debian/rules: Run tests twice, once without any detected CPU features.
[catacomb] / symm / salsa20-core.h
CommitLineData
194e93f2
MW
1/* -*-c-*-
2 *
3 * Salsa20 core definitions
4 *
5 * (c) 2015 Straylight/Edgeware
6 */
7
8#ifndef CATACOMB_SALSA20_CORE_H
9#define CATACOMB_SALSA20_CORE_H
10
11#ifdef __cplusplus
12 extern "C" {
13#endif
14
15/*----- Header files ------------------------------------------------------*/
16
17#include <mLib/bits.h>
18#include <mLib/macros.h>
19
20#ifndef CATACOMB_SALSA20_H
21# include "salsa20.h"
22#endif
23
24/*----- Magic constants ---------------------------------------------------*/
25
26/* The magic Salsa20 constants, for 256-bit keys... */
27#define SALSA20_A256 0x61707865 /* e x p a */
28#define SALSA20_B256 0x3320646e /* n d 3 */
29#define SALSA20_C256 0x79622d32 /* 2 - b y */
30#define SALSA20_D256 0x6b206574 /* t e k */
31
32/* ... and for 128-bit keys ... */
33#define SALSA20_A128 SALSA20_A256 /* e x p a */
34#define SALSA20_B128 0x3120646e /* n d 1 */
35#define SALSA20_C128 0x79622d36 /* 6 - b y */
36#define SALSA20_D128 SALSA20_D256 /* t e k */
37
38/* ... and for 80-bit keys, for completeness's sake. */
39#define SALSA20_A80 SALSA20_A128 /* e x p a */
40#define SALSA20_B80 SALSA20_B128 /* n d 1 */
41#define SALSA20_C80 0x79622d30 /* 0 - b y */
42#define SALSA20_D80 SALSA20_D128 /* t e k */
43
44/*----- The Salsa20 core function -----------------------------------------*/
45
46/* The Salsa20 quarter-round. Read from the matrix @y@ at indices @a@, @b@,
47 * @c@, and @d@; and write to the corresponding elements of @z@.
48 */
49#define SALSA20_QR(z, y, a, b, c, d) do { \
50 (z)[b] = (y)[b] ^ ROL32((y)[a] + (y)[d], 7); \
51 (z)[c] = (y)[c] ^ ROL32((z)[b] + (y)[a], 9); \
52 (z)[d] = (y)[d] ^ ROL32((z)[c] + (z)[b], 13); \
53 (z)[a] = (y)[a] ^ ROL32((z)[d] + (z)[c], 18); \
54} while (0)
55
56/* The Salsa20 double-round. Read from matrix @y@, writing the result to
57 * @z@.
58 */
59#define SALSA20_DR(z, y) do { \
60 SALSA20_QR(z, y, 0, 4, 8, 12); \
61 SALSA20_QR(z, y, 5, 9, 13, 1); \
62 SALSA20_QR(z, y, 10, 14, 2, 6); \
63 SALSA20_QR(z, y, 15, 3, 7, 11); \
64 SALSA20_QR(z, z, 0, 1, 2, 3); \
65 SALSA20_QR(z, z, 5, 6, 7, 4); \
66 SALSA20_QR(z, z, 10, 11, 8, 9); \
67 SALSA20_QR(z, z, 15, 12, 13, 14); \
68} while (0)
69
70/* The Salsa20 feedforward step, used at the end of the core function. Here,
71 * @y@ contains the original input matrix; @z@ contains the final one, and is
72 * updated.
73 */
74#define SALSA20_FFWD(z, y) do { \
75 int _i; \
76 for (_i = 0; _i < 16; _i++) (z)[_i] += (y)[_i]; \
77} while (0)
78
79/* Various numbers of rounds, unrolled. Read from @y@, and write to @z@. */
80#define SALSA20_4R(z, y) \
81 do { SALSA20_DR(z, y); SALSA20_DR(z, z); } while (0)
82#define SALSA20_8R(z, y) \
83 do { SALSA20_4R(z, y); SALSA20_4R(z, z); } while (0)
84#define SALSA20_12R(z, y) \
85 do { SALSA20_8R(z, y); SALSA20_4R(z, z); } while (0)
86#define SALSA20_20R(z, y) \
87 do { SALSA20_12R(z, y); SALSA20_8R(z, z); } while (0)
88
89/* Apply @n@ (must be even) rounds, rolled. (This seems to be faster,
90 * probably because it fits in cache better). Read from @y@, and write to
91 * @z@.
92 */
93#define SALSA20_nR(z, y, n) do { \
94 int _i; \
95 SALSA20_DR(z, y); \
96 for (_i = 0; _i < (n)/2 - 1; _i++) SALSA20_DR(z, z); \
97} while (0)
98
99/* Step the counter in the Salsa20 state matrix @a@. */
100#define SALSA20_STEP(a) \
101 do { (a)[8] = U32((a)[8] + 1); (a)[9] += !(a)[8]; } while (0)
102
103/*----- Buffering and output ----------------------------------------------*
104 *
105 * These macros are also used by ChaCha.
106 */
107
108/* Copy the Salsa20 matrix @a@ to the output buffer at @d@, advancing @d@
109 * past the new material.
110 */
111#define SALSA20_GENFULL(a, d) do { \
112 int _i; \
113 \
114 for (_i = 0; _i < 16; _i++) { STORE32_L((d), (a)[_i]); (d) += 4; } \
115} while (0)
116
117/* XOR the contents the input buffer at @s@ with the Salsa20 matrix @a@,
118 * writing the result to @d@ and advance @s@ and @d@.
119 */
120#define SALSA20_MIXFULL(a, d, s) do { \
121 uint32 _x; \
122 int _i; \
123 \
124 for (_i = 0; _i < 16; _i++) { \
125 _x = LOAD32_L(s); (s) += 4; \
126 _x ^= (a)[_i]; \
127 STORE32_L((d), _x); (d) += 4; \
128 } \
129} while (0)
130
131/* Fill the context @ctx@'s buffer from the matrix @a@ in preparation for
132 * emitting partial blocks of output.
133 */
134#define SALSA20_PREPBUF(ctx, a) do { \
135 int _i; \
136 for (_i = 0; _i < 16; _i++) STORE32_L((ctx)->buf + 4*_i, (a)[_i]); \
137 (ctx)->bufi = 0; \
138} while (0)
139
140/* Write at most @n@ bytes of buffered output from the context @ctx@ to the
141 * output buffer @d@ (if it's not null), XORing it with the input buffer @s@
142 * (if that's not null). Both @s@ and @d@ are advanced if they aren't null;
143 * @n@ is decreased appropriately.
144 */
145#define SALSA20_OUTBUF(ctx, d, s, n) do { \
146 size_t _n = (n), _left = SALSA20_OUTSZ - (ctx)->bufi; \
147 if (_n > _left) _n = _left; \
148 (n) -= _n; \
149 if (!(d)) (ctx)->bufi += _n; \
150 else if (s) while (_n--) *(d)++ = (ctx)->buf[(ctx)->bufi++] ^ *(s)++; \
151 else while (_n--) *(d)++ = (ctx)->buf[(ctx)->bufi++]; \
152} while (0)
153
154/*----- Variants and naming -----------------------------------------------*/
155
156/* Common numbers of rounds, for which we generate definitions. */
157#define SALSA20_VARS(_) _(8) _(12) _(20)
158
159/* Constructing externally-facing names. */
160#define SALSA20_DECOR(base, r, suff) SALSA20__DECOR_##r(base, suff)
161#define SALSA20__DECOR_20(base, suff) GLUE(base, suff)
162#define SALSA20__DECOR_12(base, suff) GLUE(base##12, suff)
163#define SALSA20__DECOR_8(base, suff) GLUE(base##8, suff)
164
165/* Preprocessor-time table of the standard names. */
166#define SALSA20_NAME_20 "salsa20"
167#define SALSA20_NAME_12 "salsa20/12"
168#define SALSA20_NAME_8 "salsa20/8"
169
170/*----- That's all, folks -------------------------------------------------*/
171
172#ifdef __cplusplus
173 }
174#endif
175
176#endif