3 Reference implementation of the Kalyna block cipher (DSTU 7624:2014), all block and key length variants
5 Authors: Ruslan Kiianchuk, Ruslan Mordvinov, Roman Oliynykov
9 #include "transformations.h"
13 kalyna_t
* KalynaInit(size_t block_size
, size_t key_size
) {
15 kalyna_t
* ctx
= (kalyna_t
*)malloc(sizeof(kalyna_t
));
17 if (block_size
== kBLOCK_128
) {
18 ctx
->nb
= kBLOCK_128
/ kBITS_IN_WORD
;
19 if (key_size
== kKEY_128
) {
20 ctx
->nk
= kKEY_128
/ kBITS_IN_WORD
;
22 } else if (key_size
== kKEY_256
){
23 ctx
->nk
= kKEY_256
/ kBITS_IN_WORD
;
26 fprintf(stderr
, "Error: unsupported key size.\n");
29 } else if (block_size
== 256) {
30 ctx
->nb
= kBLOCK_256
/ kBITS_IN_WORD
;
31 if (key_size
== kKEY_256
) {
32 ctx
->nk
= kKEY_256
/ kBITS_IN_WORD
;
34 } else if (key_size
== kKEY_512
){
35 ctx
->nk
= kKEY_512
/ kBITS_IN_WORD
;
38 fprintf(stderr
, "Error: unsupported key size.\n");
41 } else if (block_size
== kBLOCK_512
) {
42 ctx
->nb
= kBLOCK_512
/ kBITS_IN_WORD
;
43 if (key_size
== kKEY_512
) {
44 ctx
->nk
= kKEY_512
/ kBITS_IN_WORD
;
47 fprintf(stderr
, "Error: unsupported key size.\n");
51 fprintf(stderr
, "Error: unsupported block size.\n");
55 ctx
->state
= (uint64_t*)calloc(ctx
->nb
, sizeof(uint64_t));
56 if (ctx
->state
== NULL
)
57 perror("Could not allocate memory for cipher state.");
59 ctx
->round_keys
= (uint64_t**)calloc(ctx
->nr
+ 1, sizeof(uint64_t**));
60 if (ctx
->round_keys
== NULL
)
61 perror("Could not allocate memory for cipher round keys.");
63 for (i
= 0; i
< ctx
->nr
+ 1; ++i
) {
64 ctx
->round_keys
[i
] = (uint64_t*)calloc(ctx
->nb
, sizeof(uint64_t));
65 if (ctx
->round_keys
[i
] == NULL
)
66 perror("Could not allocate memory for cipher round keys.");
72 int KalynaDelete(kalyna_t
* ctx
) {
75 for (i
= 0; i
< ctx
->nr
+ 1; ++i
) {
76 free(ctx
->round_keys
[i
]);
78 free(ctx
->round_keys
);
85 void SubBytes(kalyna_t
* ctx
) {
87 uint64_t* s
= ctx
->state
; /* For shorter expressions. */
88 for (i
= 0; i
< ctx
->nb
; ++i
) {
89 ctx
->state
[i
] = sboxes_enc
[0][s
[i
] & 0x00000000000000FFULL
] |
90 ((uint64_t)sboxes_enc
[1][(s
[i
] & 0x000000000000FF00ULL
) >> 8] << 8) |
91 ((uint64_t)sboxes_enc
[2][(s
[i
] & 0x0000000000FF0000ULL
) >> 16] << 16) |
92 ((uint64_t)sboxes_enc
[3][(s
[i
] & 0x00000000FF000000ULL
) >> 24] << 24) |
93 ((uint64_t)sboxes_enc
[0][(s
[i
] & 0x000000FF00000000ULL
) >> 32] << 32) |
94 ((uint64_t)sboxes_enc
[1][(s
[i
] & 0x0000FF0000000000ULL
) >> 40] << 40) |
95 ((uint64_t)sboxes_enc
[2][(s
[i
] & 0x00FF000000000000ULL
) >> 48] << 48) |
96 ((uint64_t)sboxes_enc
[3][(s
[i
] & 0xFF00000000000000ULL
) >> 56] << 56);
100 void InvSubBytes(kalyna_t
* ctx
) {
102 uint64_t* s
= ctx
->state
; /* For shorter expressions. */
103 for (i
= 0; i
< ctx
->nb
; ++i
) {
104 ctx
->state
[i
] = sboxes_dec
[0][s
[i
] & 0x00000000000000FFULL
] |
105 ((uint64_t)sboxes_dec
[1][(s
[i
] & 0x000000000000FF00ULL
) >> 8] << 8) |
106 ((uint64_t)sboxes_dec
[2][(s
[i
] & 0x0000000000FF0000ULL
) >> 16] << 16) |
107 ((uint64_t)sboxes_dec
[3][(s
[i
] & 0x00000000FF000000ULL
) >> 24] << 24) |
108 ((uint64_t)sboxes_dec
[0][(s
[i
] & 0x000000FF00000000ULL
) >> 32] << 32) |
109 ((uint64_t)sboxes_dec
[1][(s
[i
] & 0x0000FF0000000000ULL
) >> 40] << 40) |
110 ((uint64_t)sboxes_dec
[2][(s
[i
] & 0x00FF000000000000ULL
) >> 48] << 48) |
111 ((uint64_t)sboxes_dec
[3][(s
[i
] & 0xFF00000000000000ULL
) >> 56] << 56);
116 void ShiftRows(kalyna_t
* ctx
) {
120 uint8_t* state
= WordsToBytes(ctx
->nb
, ctx
->state
);
121 uint8_t* nstate
= (uint8_t*) malloc(ctx
->nb
* sizeof(uint64_t));
123 for (row
= 0; row
< sizeof(uint64_t); ++row
) {
124 if (row
% (sizeof(uint64_t) / ctx
->nb
) == 0)
126 for (col
= 0; col
< ctx
->nb
; ++col
) {
127 INDEX(nstate
, row
, (col
+ shift
) % ctx
->nb
) = INDEX(state
, row
, col
);
131 ctx
->state
= BytesToWords(ctx
->nb
* sizeof(uint64_t), nstate
);
135 void InvShiftRows(kalyna_t
* ctx
) {
139 uint8_t* state
= WordsToBytes(ctx
->nb
, ctx
->state
);
140 uint8_t* nstate
= (uint8_t*) malloc(ctx
->nb
* sizeof(uint64_t));
142 for (row
= 0; row
< sizeof(uint64_t); ++row
) {
143 if (row
% (sizeof(uint64_t) / ctx
->nb
) == 0)
145 for (col
= 0; col
< ctx
->nb
; ++col
) {
146 INDEX(nstate
, row
, col
) = INDEX(state
, row
, (col
+ shift
) % ctx
->nb
);
150 ctx
->state
= BytesToWords(ctx
->nb
* sizeof(uint64_t), nstate
);
155 uint8_t MultiplyGF(uint8_t x
, uint8_t y
) {
159 for (i
= 0; i
< kBITS_IN_BYTE
; ++i
) {
165 x
^= kREDUCTION_POLYNOMIAL
;
171 void MatrixMultiply(kalyna_t
* ctx
, uint8_t matrix
[8][8]) {
175 uint8_t* state
= WordsToBytes(ctx
->nb
, ctx
->state
);
177 for (col
= 0; col
< ctx
->nb
; ++col
) {
179 for (row
= sizeof(uint64_t) - 1; row
>= 0; --row
) {
181 for (b
= sizeof(uint64_t) - 1; b
>= 0; --b
) {
182 product
^= MultiplyGF(INDEX(state
, b
, col
), matrix
[row
][b
]);
184 result
|= (uint64_t)product
<< (row
* sizeof(uint64_t));
186 ctx
->state
[col
] = result
;
190 void MixColumns(kalyna_t
* ctx
) {
191 MatrixMultiply(ctx
, mds_matrix
);
194 void InvMixColumns(kalyna_t
* ctx
) {
195 MatrixMultiply(ctx
, mds_inv_matrix
);
199 void EncipherRound(kalyna_t
* ctx
) {
205 void DecipherRound(kalyna_t
* ctx
) {
211 void AddRoundKey(int round
, kalyna_t
* ctx
) {
213 for (i
= 0; i
< ctx
->nb
; ++i
) {
214 ctx
->state
[i
] = ctx
->state
[i
] + ctx
->round_keys
[round
][i
];
218 void SubRoundKey(int round
, kalyna_t
* ctx
) {
220 for (i
= 0; i
< ctx
->nb
; ++i
) {
221 ctx
->state
[i
] = ctx
->state
[i
] - ctx
->round_keys
[round
][i
];
226 void AddRoundKeyExpand(uint64_t* value
, kalyna_t
* ctx
) {
228 for (i
= 0; i
< ctx
->nb
; ++i
) {
229 ctx
->state
[i
] = ctx
->state
[i
] + value
[i
];
234 void XorRoundKey(int round
, kalyna_t
* ctx
) {
236 for (i
= 0; i
< ctx
->nb
; ++i
) {
237 ctx
->state
[i
] = ctx
->state
[i
] ^ ctx
->round_keys
[round
][i
];
242 void XorRoundKeyExpand(uint64_t* value
, kalyna_t
* ctx
) {
244 for (i
= 0; i
< ctx
->nb
; ++i
) {
245 ctx
->state
[i
] = ctx
->state
[i
] ^ value
[i
];
250 void Rotate(size_t state_size
, uint64_t* state_value
) {
252 uint64_t temp
= state_value
[0];
253 for (i
= 1; i
< state_size
; ++i
) {
254 state_value
[i
- 1] = state_value
[i
];
256 state_value
[state_size
- 1] = temp
;
260 void ShiftLeft(size_t state_size
, uint64_t* state_value
) {
262 for (i
= 0; i
< state_size
; ++i
) {
263 state_value
[i
] <<= 1;
267 void RotateLeft(size_t state_size
, uint64_t* state_value
) {
268 size_t rotate_bytes
= 2 * state_size
+ 3;
269 size_t bytes_num
= state_size
* (kBITS_IN_WORD
/ kBITS_IN_BYTE
);
271 uint8_t* bytes
= WordsToBytes(state_size
, state_value
);
272 uint8_t* buffer
= (uint8_t*) malloc(rotate_bytes
);
274 /* Rotate bytes in memory. */
275 memcpy(buffer
, bytes
, rotate_bytes
);
276 memmove(bytes
, bytes
+ rotate_bytes
, bytes_num
- rotate_bytes
);
277 memcpy(bytes
+ bytes_num
- rotate_bytes
, buffer
, rotate_bytes
);
279 state_value
= BytesToWords(bytes_num
, bytes
);
285 void KeyExpandKt(uint64_t* key
, kalyna_t
* ctx
, uint64_t* kt
) {
286 uint64_t* k0
= (uint64_t*) malloc(ctx
->nb
* sizeof(uint64_t));
287 uint64_t* k1
= (uint64_t*) malloc(ctx
->nb
* sizeof(uint64_t));
289 memset(ctx
->state
, 0, ctx
->nb
* sizeof(uint64_t));
290 ctx
->state
[0] += ctx
->nb
+ ctx
->nk
+ 1;
292 if (ctx
->nb
== ctx
->nk
) {
293 memcpy(k0
, key
, ctx
->nb
* sizeof(uint64_t));
294 memcpy(k1
, key
, ctx
->nb
* sizeof(uint64_t));
296 memcpy(k0
, key
, ctx
->nb
* sizeof(uint64_t));
297 memcpy(k1
, key
+ ctx
->nb
, ctx
->nb
* sizeof(uint64_t));
300 AddRoundKeyExpand(k0
, ctx
);
302 XorRoundKeyExpand(k1
, ctx
);
304 AddRoundKeyExpand(k0
, ctx
);
306 memcpy(kt
, ctx
->state
, ctx
->nb
* sizeof(uint64_t));
313 void KeyExpandEven(uint64_t* key
, uint64_t* kt
, kalyna_t
* ctx
) {
315 uint64_t* initial_data
= (uint64_t*) malloc(ctx
->nk
* sizeof(uint64_t));
316 uint64_t* kt_round
= (uint64_t*) malloc(ctx
->nb
* sizeof(uint64_t));
317 uint64_t* tmv
= (uint64_t*) malloc(ctx
->nb
* sizeof(uint64_t));
320 memcpy(initial_data
, key
, ctx
->nk
* sizeof(uint64_t));
321 for (i
= 0; i
< ctx
->nb
; ++i
) {
322 tmv
[i
] = 0x0001000100010001;
326 memcpy(ctx
->state
, kt
, ctx
->nb
* sizeof(uint64_t));
327 AddRoundKeyExpand(tmv
, ctx
);
328 memcpy(kt_round
, ctx
->state
, ctx
->nb
* sizeof(uint64_t));
330 memcpy(ctx
->state
, initial_data
, ctx
->nb
* sizeof(uint64_t));
332 AddRoundKeyExpand(kt_round
, ctx
);
334 XorRoundKeyExpand(kt_round
, ctx
);
336 AddRoundKeyExpand(kt_round
, ctx
);
338 memcpy(ctx
->round_keys
[round
], ctx
->state
, ctx
->nb
* sizeof(uint64_t));
340 if (ctx
->nr
== round
)
343 if (ctx
->nk
!= ctx
->nb
) {
346 ShiftLeft(ctx
->nb
, tmv
);
348 memcpy(ctx
->state
, kt
, ctx
->nb
* sizeof(uint64_t));
349 AddRoundKeyExpand(tmv
, ctx
);
350 memcpy(kt_round
, ctx
->state
, ctx
->nb
* sizeof(uint64_t));
352 memcpy(ctx
->state
, initial_data
+ ctx
->nb
, ctx
->nb
* sizeof(uint64_t));
354 AddRoundKeyExpand(kt_round
, ctx
);
356 XorRoundKeyExpand(kt_round
, ctx
);
358 AddRoundKeyExpand(kt_round
, ctx
);
360 memcpy(ctx
->round_keys
[round
], ctx
->state
, ctx
->nb
* sizeof(uint64_t));
362 if (ctx
->nr
== round
)
366 ShiftLeft(ctx
->nb
, tmv
);
367 Rotate(ctx
->nk
, initial_data
);
375 void KeyExpandOdd(kalyna_t
* ctx
) {
377 for (i
= 1; i
< ctx
->nr
; i
+= 2) {
378 memcpy(ctx
->round_keys
[i
], ctx
->round_keys
[i
- 1], ctx
->nb
* sizeof(uint64_t));
379 RotateLeft(ctx
->nb
, ctx
->round_keys
[i
]);
383 void KalynaKeyExpand(uint64_t* key
, kalyna_t
* ctx
) {
384 uint64_t* kt
= (uint64_t*) malloc(ctx
->nb
* sizeof(uint64_t));
385 KeyExpandKt(key
, ctx
, kt
);
386 KeyExpandEven(key
, kt
, ctx
);
392 void KalynaEncipher(uint64_t* plaintext
, kalyna_t
* ctx
, uint64_t* ciphertext
) {
394 memcpy(ctx
->state
, plaintext
, ctx
->nb
* sizeof(uint64_t));
396 AddRoundKey(round
, ctx
);
397 for (round
= 1; round
< ctx
->nr
; ++round
) {
399 XorRoundKey(round
, ctx
);
402 AddRoundKey(ctx
->nr
, ctx
);
404 memcpy(ciphertext
, ctx
->state
, ctx
->nb
* sizeof(uint64_t));
407 void KalynaDecipher(uint64_t* ciphertext
, kalyna_t
* ctx
, uint64_t* plaintext
) {
409 memcpy(ctx
->state
, ciphertext
, ctx
->nb
* sizeof(uint64_t));
411 SubRoundKey(round
, ctx
);
412 for (round
= ctx
->nr
- 1; round
> 0; --round
) {
414 XorRoundKey(round
, ctx
);
419 memcpy(plaintext
, ctx
->state
, ctx
->nb
* sizeof(uint64_t));
423 uint8_t* WordsToBytes(size_t length
, uint64_t* words
) {
427 for (i
= 0; i
< length
; ++i
) {
428 words
[i
] = ReverseWord(words
[i
]);
431 bytes
= (uint8_t*)words
;
435 uint64_t* BytesToWords(size_t length
, uint8_t* bytes
) {
437 uint64_t* words
= (uint64_t*)bytes
;
439 for (i
= 0; i
< length
; ++i
) {
440 words
[i
] = ReverseWord(words
[i
]);
447 uint64_t ReverseWord(uint64_t word
) {
449 uint64_t reversed
= 0;
450 uint8_t* src
= (uint8_t*)&word
;
451 uint8_t* dst
= (uint8_t*)&reversed
;
453 for (i
= 0; i
< sizeof(uint64_t); ++i
) {
454 dst
[i
] = src
[sizeof(uint64_t) - i
];
461 unsigned int num
= 1;
462 /* Check the least significant byte value to determine endianness */
463 return (*((uint8_t*)&num
) == 0);
466 void PrintState(size_t length
, uint64_t* state
) {
468 for (i
= length
- 1; i
>= 0; --i
) {
469 printf("%16.16llx", state
[i
]);