/* The ChaCha feedforward step, used at the end of the core function. Here,
* @y@ contains the original input matrix; @z@ contains the final one, and is
- * updated. This is the same as Salsa20.
+ * updated. This is the same as Salsa20, only without the final permutation.
*/
-#define CHACHA_FFWD(z, y) SALSA20_FFWD(z, y)
+#define CHACHA_FFWD(z, y) do { \
+ int _i; \
+ for (_i = 0; _i < 16; _i++) (z)[_i] += (y)[_i]; \
+} while (0)
/* Various numbers of rounds, unrolled. Read from @y@, and write to @z@. */
#define CHACHA_4R(z, y) \