mdw@git.distorted.org.uk Git - catacomb/blame_incremental

... / ...

Commit	Line	Data
	1	/* --c--
	2	*
	3	* Salsa20 core definitions
	4	*
	5	* (c) 2015 Straylight/Edgeware
	6	*/
	7
	8	#ifndef CATACOMB_SALSA20_CORE_H
	9	#define CATACOMB_SALSA20_CORE_H
	10
	11	#ifdef __cplusplus
	12	extern "C" {
	13	#endif
	14
	15	/----- Header files ------------------------------------------------------/
	16
	17	#include <mLib/bits.h>
	18	#include <mLib/macros.h>
	19
	20	#ifndef CATACOMB_SALSA20_H
	21	# include "salsa20.h"
	22	#endif
	23
	24	/----- Magic constants ---------------------------------------------------/
	25
	26	/* The magic Salsa20 constants, for 256-bit keys... */
	27	#define SALSA20_A256 0x61707865 /* e x p a */
	28	#define SALSA20_B256 0x3320646e /* n d 3 */
	29	#define SALSA20_C256 0x79622d32 /* 2 - b y */
	30	#define SALSA20_D256 0x6b206574 /* t e k */
	31
	32	/* ... and for 128-bit keys ... */
	33	#define SALSA20_A128 SALSA20_A256 /* e x p a */
	34	#define SALSA20_B128 0x3120646e /* n d 1 */
	35	#define SALSA20_C128 0x79622d36 /* 6 - b y */
	36	#define SALSA20_D128 SALSA20_D256 /* t e k */
	37
	38	/* ... and for 80-bit keys, for completeness's sake. */
	39	#define SALSA20_A80 SALSA20_A128 /* e x p a */
	40	#define SALSA20_B80 SALSA20_B128 /* n d 1 */
	41	#define SALSA20_C80 0x79622d30 /* 0 - b y */
	42	#define SALSA20_D80 SALSA20_D128 /* t e k */
	43
	44	/----- The Salsa20 core function -----------------------------------------/
	45
	46	/* It makes life somewhat easier if we don't actually store and maintain the
	47	* input matrix in the textbook order. Instead, we rotate the columns other
	48	* than the leftmost one upwards, so that the constants which were originally
	49	* along the diagonal end up on the top row. We'll need to undo this
	50	* permutation on output, but that's not too terrible an imposition.
	51	*
	52	* The permutation we're applying to the matrix elements is this:
	53	*
	54	* [ 0 1 2 3 ] [ 0 5 10 15 ]
	55	* [ 4 5 6 7 ] --> [ 4 9 14 3 ]
	56	* [ 8 9 10 11 ] [ 8 13 2 7 ]
	57	* [ 12 13 14 15 ] [ 12 1 6 11 ]
	58	*
	59	* and as a result, we need to apply this inverse permutation to figure out
	60	* which indices to use in the doublerow function and elsewhere.
	61	*
	62	* [ 0 13 10 7 ]
	63	* [ 4 1 14 11 ]
	64	* [ 8 5 2 15 ]
	65	* [ 12 9 6 3 ]
	66	*/
	67
	68	/* The Salsa20 quarter-round. Read from the matrix @y@ at indices @a@, @b@,
	69	* @c@, and @d@; and write to the corresponding elements of @z@.
	70	*/
	71	#define SALSA20_QR(z, y, a, b, c, d) do { \
	72	(z)[b] = (y)[b] ^ ROL32((y)[a] + (y)[d], 7); \
	73	(z)[c] = (y)[c] ^ ROL32((z)[b] + (y)[a], 9); \
	74	(z)[d] = (y)[d] ^ ROL32((z)[c] + (z)[b], 13); \
	75	(z)[a] = (y)[a] ^ ROL32((z)[d] + (z)[c], 18); \
	76	} while (0)
	77
	78	/* The Salsa20 double-round. Read from matrix @y@, writing the result to
	79	* @z@.
	80	*/
	81	#define SALSA20_DR(z, y) do { \
	82	SALSA20_QR(z, y, 0, 4, 8, 12); \
	83	SALSA20_QR(z, y, 1, 5, 9, 13); \
	84	SALSA20_QR(z, y, 2, 6, 10, 14); \
	85	SALSA20_QR(z, y, 3, 7, 11, 15); \
	86	SALSA20_QR(z, z, 0, 13, 10, 7); \
	87	SALSA20_QR(z, z, 1, 14, 11, 4); \
	88	SALSA20_QR(z, z, 2, 15, 8, 5); \
	89	SALSA20_QR(z, z, 3, 12, 9, 6); \
	90	} while (0)
	91
	92	/* The Salsa20 feedforward step, used at the end of the core function. Here,
	93	* @y@ contains the original input matrix; @z@ contains the final one, and is
	94	* updated. The output is rendered in canonical order, ready for output.
	95	*/
	96	#define SALSA20_FFWD(z, y) do { \
	97	const uint32 *_y = (y); \
	98	uint32 *_z = (z); \
	99	int _t; \
	100	_z[ 0] = _z[ 0] + _y[ 0]; _z[ 4] = _z[ 4] + _y[ 4]; \
	101	_z[ 8] = _z[ 8] + _y[ 8]; _z[12] = _z[12] + _y[12]; \
	102	_t = _z[ 1] + _y[ 1]; _z[ 1] = _z[13] + _y[13]; \
	103	_z[13] = _z[ 9] + _y[ 9]; _z[ 9] = _z[ 5] + _y[ 5]; _z[ 5] = _t; \
	104	_t = _z[ 2] + _y[ 2]; _z[ 2] = _z[10] + _y[10]; _z[10] = _t; \
	105	_t = _z[ 6] + _y[ 6]; _z[ 6] = _z[14] + _y[14]; _z[14] = _t; \
	106	_t = _z[ 3] + _y[ 3]; _z[ 3] = _z[ 7] + _y[ 7]; \
	107	_z[ 7] = _z[11] + _y[11]; _z[11] = _z[15] + _y[15]; _z[15] = _t; \
	108	} while (0)
	109
	110	/* Various numbers of rounds, unrolled. Read from @y@, and write to @z@. */
	111	#define SALSA20_4R(z, y) \
	112	do { SALSA20_DR(z, y); SALSA20_DR(z, z); } while (0)
	113	#define SALSA20_8R(z, y) \
	114	do { SALSA20_4R(z, y); SALSA20_4R(z, z); } while (0)
	115	#define SALSA20_12R(z, y) \
	116	do { SALSA20_8R(z, y); SALSA20_4R(z, z); } while (0)
	117	#define SALSA20_20R(z, y) \
	118	do { SALSA20_12R(z, y); SALSA20_8R(z, z); } while (0)
	119
	120	/* Apply @n@ (must be even) rounds, rolled. (This seems to be faster,
	121	* probably because it fits in cache better). Read from @y@, and write to
	122	* @z@.
	123	*/
	124	#define SALSA20_nR(z, y, n) do { \
	125	int _i; \
	126	SALSA20_DR(z, y); \
	127	for (_i = 0; _i < (n)/2 - 1; _i++) SALSA20_DR(z, z); \
	128	} while (0)
	129
	130	/* Step the counter in the Salsa20 state matrix @a@. */
	131	#define SALSA20_STEP(a) \
	132	do { (a)[8] = U32((a)[8] + 1); (a)[5] += !(a)[8]; } while (0)
	133
	134	/----- Buffering and output ----------------------------------------------
	135	*
	136	* These macros are also used by ChaCha.
	137	*/
	138
	139	/* Copy the Salsa20 matrix @a@ to the output buffer at @d@, advancing @d@
	140	* past the new material.
	141	*/
	142	#define SALSA20_GENFULL(a, d) do { \
	143	int _i; \
	144	\
	145	for (_i = 0; _i < 16; _i++) { STORE32_L((d), (a)[_i]); (d) += 4; } \
	146	} while (0)
	147
	148	/* XOR the contents the input buffer at @s@ with the Salsa20 matrix @a@,
	149	* writing the result to @d@ and advance @s@ and @d@.
	150	*/
	151	#define SALSA20_MIXFULL(a, d, s) do { \
	152	uint32 _x; \
	153	int _i; \
	154	\
	155	for (_i = 0; _i < 16; _i++) { \
	156	_x = LOAD32_L(s); (s) += 4; \
	157	_x ^= (a)[_i]; \
	158	STORE32_L((d), _x); (d) += 4; \
	159	} \
	160	} while (0)
	161
	162	/* Fill the context @ctx@'s buffer from the matrix @a@ in preparation for
	163	* emitting partial blocks of output.
	164	*/
	165	#define SALSA20_PREPBUF(ctx, a) do { \
	166	int _i; \
	167	for (_i = 0; _i < 16; _i++) STORE32_L((ctx)->buf + 4*_i, (a)[_i]); \
	168	(ctx)->bufi = 0; \
	169	} while (0)
	170
	171	/* Write at most @n@ bytes of buffered output from the context @ctx@ to the
	172	* output buffer @d@ (if it's not null), XORing it with the input buffer @s@
	173	* (if that's not null). Both @s@ and @d@ are advanced if they aren't null;
	174	* @n@ is decreased appropriately.
	175	*/
	176	#define SALSA20_OUTBUF(ctx, d, s, n) do { \
	177	size_t _n = (n), _left = SALSA20_OUTSZ - (ctx)->bufi; \
	178	if (_n > _left) _n = _left; \
	179	(n) -= _n; \
	180	if (!(d)) (ctx)->bufi += _n; \
	181	else if (s) while (_n--) (d)++ = (ctx)->buf[(ctx)->bufi++] ^ (s)++; \
	182	else while (_n--) *(d)++ = (ctx)->buf[(ctx)->bufi++]; \
	183	} while (0)
	184
	185	/----- Variants and naming -----------------------------------------------/
	186
	187	/* Common numbers of rounds, for which we generate definitions. */
	188	#define SALSA20_VARS(_) _(8) _(12) _(20)
	189
	190	/* Constructing externally-facing names. */
	191	#define SALSA20_DECOR(base, r, suff) SALSA20__DECOR_##r(base, suff)
	192	#define SALSA20__DECOR_20(base, suff) GLUE(base, suff)
	193	#define SALSA20__DECOR_12(base, suff) GLUE(base##12, suff)
	194	#define SALSA20__DECOR_8(base, suff) GLUE(base##8, suff)
	195
	196	/* Preprocessor-time table of the standard names. */
	197	#define SALSA20_NAME_20 "salsa20"
	198	#define SALSA20_NAME_12 "salsa20/12"
	199	#define SALSA20_NAME_8 "salsa20/8"
	200
	201	/----- That's all, folks -------------------------------------------------/
	202
	203	#ifdef __cplusplus
	204	}
	205	#endif
	206
	207	#endif