- for (;;) {
-
- /* --- Input buffering --- */
-
- if (b < 8) {
- if (av >= avl)
- break;
- a |= *av++ << b;
- b += MPW_BITS;
- }
-
- /* --- Do the work in the middle --- */
-
- aa |= (mpd)(gfx_sqrtab[U8(a)]) << bb;
- bb += 16;
- a >>= 8;
- b -= 8;
-
- /* --- Output buffering --- */
-
- if (bb >= MPW_BITS) {
- *dv++ = MPW(aa);
- if (dv >= dvl)
- return;
- aa >>= MPW_BITS;
- bb -= MPW_BITS;
- }
- }
-
- /* --- Flush the input buffer --- */
-
- if (b) for (;;) {
- aa |= (mpd)(gfx_sqrtab[U8(a)]) << bb;
- bb += 16;
- if (bb > MPW_BITS) {
- *dv++ = MPW(aa);
- if (dv >= dvl)
- return;
- aa >>= MPW_BITS;
- bb -= MPW_BITS;
- }
- a >>= 8;
- if (b <= 8)
- break;
- else
- b -= 8;
- }
+ while (av < avl) {
+ a = *av++;
+ /* ..., 7, 6, 5, 4, 3, 2, 1, 0 */
+ SWIZZLE_EXCH(a, 0, 1); /* ..., 7, 6, 5, 4, 3, 2, 0, 1 */
+ SWIZZLE_EXCH(a, 0, 2); /* ..., 7, 6, 5, 4, 3, 1, 0, 2 */
+ SWIZZLE_EXCH(a, 0, 3); /* ..., 7, 6, 5, 4, 2, 1, 0, 3 */
+#if MPW_BITS > 16
+ SWIZZLE_EXCH(a, 0, 4); /* ..., 7, 6, 5, 3, 2, 1, 0, 4 */
+#endif
+#if MPW_BITS > 32
+ SWIZZLE_EXCH(a, 0, 5); /* ..., 7, 6, 4, 3, 2, 1, 0, 5 */
+#endif
+#if MPW_BITS > 64
+ SWIZZLE_EXCH(a, 0, 6); /* ..., 7, 5, 4, 3, 2, 1, 0, 6 */
+#endif
+#if MPW_BITS > 128
+# error "unsupported limb width: extend `gfx-sqr.c'"
+#endif