2 chacha-merged.c version 20080118
11 typedef struct chacha_ctx chacha_ctx;
14 #define U32C(v) (v##U)
16 #define U8V(v) ((uint8_t)(v) & U8C(0xFF))
17 #define U32V(v) ((uint32_t)(v) & U32C(0xFFFFFFFF))
19 #define ROTL32(v, n) \
20 (U32V((v) << (n)) | ((v) >> (32 - (n))))
22 #define U8TO32_LITTLE(p) \
23 (((uint32_t)((p)[0]) ) | \
24 ((uint32_t)((p)[1]) << 8) | \
25 ((uint32_t)((p)[2]) << 16) | \
26 ((uint32_t)((p)[3]) << 24))
28 #define U32TO8_LITTLE(p, v) \
31 (p)[1] = U8V((v) >> 8); \
32 (p)[2] = U8V((v) >> 16); \
33 (p)[3] = U8V((v) >> 24); \
36 #define ROTATE(v,c) (ROTL32(v,c))
37 #define XOR(v,w) ((v) ^ (w))
38 #define PLUS(v,w) (U32V((v) + (w)))
39 #define PLUSONE(v) (PLUS((v),1))
41 #define QUARTERROUND(a,b,c,d) \
42 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
43 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
44 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
45 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
47 static const char sigma[16] = "expand 32-byte k";
48 static const char tau[16] = "expand 16-byte k";
50 void chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits) {
51 const char *constants;
53 x->input[4] = U8TO32_LITTLE(k + 0);
54 x->input[5] = U8TO32_LITTLE(k + 4);
55 x->input[6] = U8TO32_LITTLE(k + 8);
56 x->input[7] = U8TO32_LITTLE(k + 12);
58 if(kbits == 256) { /* recommended */
61 } else { /* kbits == 128 */
65 x->input[8] = U8TO32_LITTLE(k + 0);
66 x->input[9] = U8TO32_LITTLE(k + 4);
67 x->input[10] = U8TO32_LITTLE(k + 8);
68 x->input[11] = U8TO32_LITTLE(k + 12);
69 x->input[0] = U8TO32_LITTLE(constants + 0);
70 x->input[1] = U8TO32_LITTLE(constants + 4);
71 x->input[2] = U8TO32_LITTLE(constants + 8);
72 x->input[3] = U8TO32_LITTLE(constants + 12);
75 void chacha_ivsetup(chacha_ctx *x, const uint8_t *iv, const uint8_t *counter) {
76 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
77 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
78 x->input[14] = U8TO32_LITTLE(iv + 0);
79 x->input[15] = U8TO32_LITTLE(iv + 4);
83 chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes) {
84 uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
85 uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
86 uint8_t *ctarget = NULL;
113 for(i = 0; i < bytes; ++i) {
139 for(i = 20; i > 0; i -= 2) {
140 QUARTERROUND(x0, x4, x8, x12)
141 QUARTERROUND(x1, x5, x9, x13)
142 QUARTERROUND(x2, x6, x10, x14)
143 QUARTERROUND(x3, x7, x11, x15)
144 QUARTERROUND(x0, x5, x10, x15)
145 QUARTERROUND(x1, x6, x11, x12)
146 QUARTERROUND(x2, x7, x8, x13)
147 QUARTERROUND(x3, x4, x9, x14)
160 x10 = PLUS(x10, j10);
161 x11 = PLUS(x11, j11);
162 x12 = PLUS(x12, j12);
163 x13 = PLUS(x13, j13);
164 x14 = PLUS(x14, j14);
165 x15 = PLUS(x15, j15);
167 x0 = XOR(x0, U8TO32_LITTLE(m + 0));
168 x1 = XOR(x1, U8TO32_LITTLE(m + 4));
169 x2 = XOR(x2, U8TO32_LITTLE(m + 8));
170 x3 = XOR(x3, U8TO32_LITTLE(m + 12));
171 x4 = XOR(x4, U8TO32_LITTLE(m + 16));
172 x5 = XOR(x5, U8TO32_LITTLE(m + 20));
173 x6 = XOR(x6, U8TO32_LITTLE(m + 24));
174 x7 = XOR(x7, U8TO32_LITTLE(m + 28));
175 x8 = XOR(x8, U8TO32_LITTLE(m + 32));
176 x9 = XOR(x9, U8TO32_LITTLE(m + 36));
177 x10 = XOR(x10, U8TO32_LITTLE(m + 40));
178 x11 = XOR(x11, U8TO32_LITTLE(m + 44));
179 x12 = XOR(x12, U8TO32_LITTLE(m + 48));
180 x13 = XOR(x13, U8TO32_LITTLE(m + 52));
181 x14 = XOR(x14, U8TO32_LITTLE(m + 56));
182 x15 = XOR(x15, U8TO32_LITTLE(m + 60));
188 /* stopping at 2^70 bytes per nonce is user's responsibility */
191 U32TO8_LITTLE(c + 0, x0);
192 U32TO8_LITTLE(c + 4, x1);
193 U32TO8_LITTLE(c + 8, x2);
194 U32TO8_LITTLE(c + 12, x3);
195 U32TO8_LITTLE(c + 16, x4);
196 U32TO8_LITTLE(c + 20, x5);
197 U32TO8_LITTLE(c + 24, x6);
198 U32TO8_LITTLE(c + 28, x7);
199 U32TO8_LITTLE(c + 32, x8);
200 U32TO8_LITTLE(c + 36, x9);
201 U32TO8_LITTLE(c + 40, x10);
202 U32TO8_LITTLE(c + 44, x11);
203 U32TO8_LITTLE(c + 48, x12);
204 U32TO8_LITTLE(c + 52, x13);
205 U32TO8_LITTLE(c + 56, x14);
206 U32TO8_LITTLE(c + 60, x15);
210 for(i = 0; i < bytes; ++i) {