blob: d9e3eecc218acc31561370e931261aeba07be90e [file] [log] [blame]
Rik Snelc494e072006-11-29 18:59:44 +11001/* gf128mul.c - GF(2^128) multiplication functions
2 *
3 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
4 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
5 *
6 * Based on Dr Brian Gladman's (GPL'd) work published at
Adrian-Ken Rueegsegger8c882f62009-03-04 14:43:52 +08007 * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
Rik Snelc494e072006-11-29 18:59:44 +11008 * See the original copyright notice below.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 */
15
16/*
17 ---------------------------------------------------------------------------
18 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
19
20 LICENSE TERMS
21
22 The free distribution and use of this software in both source and binary
23 form is allowed (with or without changes) provided that:
24
25 1. distributions of this source code include the above copyright
26 notice, this list of conditions and the following disclaimer;
27
28 2. distributions in binary form include the above copyright
29 notice, this list of conditions and the following disclaimer
30 in the documentation and/or other associated materials;
31
32 3. the copyright holder's name is not used to endorse products
33 built using this software without specific written permission.
34
35 ALTERNATIVELY, provided that this notice is retained in full, this product
36 may be distributed under the terms of the GNU General Public License (GPL),
37 in which case the provisions of the GPL apply INSTEAD OF those given above.
38
39 DISCLAIMER
40
41 This software is provided 'as is' with no explicit or implied warranties
42 in respect of its properties, including, but not limited to, correctness
43 and/or fitness for purpose.
44 ---------------------------------------------------------------------------
45 Issue 31/01/2006
46
Eric Biggers63be5b52017-02-14 13:43:27 -080047 This file provides fast multiplication in GF(2^128) as required by several
Rik Snelc494e072006-11-29 18:59:44 +110048 cryptographic authentication modes
49*/
50
51#include <crypto/gf128mul.h>
52#include <linux/kernel.h>
53#include <linux/module.h>
54#include <linux/slab.h>
55
56#define gf128mul_dat(q) { \
57 q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
58 q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
59 q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
60 q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
61 q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
62 q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
63 q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
64 q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
65 q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
66 q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
67 q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
68 q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
69 q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
70 q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
71 q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
72 q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
73 q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
74 q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
75 q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
76 q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
77 q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
78 q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
79 q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
80 q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
81 q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
82 q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
83 q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
84 q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
85 q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
86 q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
87 q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
88 q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
89}
90
91/* Given the value i in 0..255 as the byte overflow when a field element
Lucas De Marchi25985ed2011-03-30 22:57:33 -030092 in GHASH is multiplied by x^8, this function will return the values that
Rik Snelc494e072006-11-29 18:59:44 +110093 are generated in the lo 16-bit word of the field value by applying the
94 modular polynomial. The values lo_byte and hi_byte are returned via the
95 macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
96 memory as required by a suitable definition of this macro operating on
97 the table above
98*/
99
100#define xx(p, q) 0x##p##q
101
102#define xda_bbe(i) ( \
103 (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
104 (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
105 (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
106 (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
107)
108
109#define xda_lle(i) ( \
110 (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
111 (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
112 (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
113 (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
114)
115
116static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
117static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
118
Eric Biggers63be5b52017-02-14 13:43:27 -0800119/*
120 * The following functions multiply a field element by x or by x^8 in
121 * the polynomial field representation. They use 64-bit word operations
122 * to gain speed but compensate for machine endianness and hence work
Rik Snelc494e072006-11-29 18:59:44 +1100123 * correctly on both styles of machine.
124 */
125
126static void gf128mul_x_lle(be128 *r, const be128 *x)
127{
128 u64 a = be64_to_cpu(x->a);
129 u64 b = be64_to_cpu(x->b);
130 u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
131
132 r->b = cpu_to_be64((b >> 1) | (a << 63));
133 r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
134}
135
136static void gf128mul_x_bbe(be128 *r, const be128 *x)
137{
138 u64 a = be64_to_cpu(x->a);
139 u64 b = be64_to_cpu(x->b);
140 u64 _tt = gf128mul_table_bbe[a >> 63];
141
142 r->a = cpu_to_be64((a << 1) | (b >> 63));
143 r->b = cpu_to_be64((b << 1) ^ _tt);
144}
145
Rik Snelf19f5112007-09-19 20:23:13 +0800146void gf128mul_x_ble(be128 *r, const be128 *x)
147{
148 u64 a = le64_to_cpu(x->a);
149 u64 b = le64_to_cpu(x->b);
150 u64 _tt = gf128mul_table_bbe[b >> 63];
151
152 r->a = cpu_to_le64((a << 1) ^ _tt);
153 r->b = cpu_to_le64((b << 1) | (a >> 63));
154}
155EXPORT_SYMBOL(gf128mul_x_ble);
156
Rik Snelc494e072006-11-29 18:59:44 +1100157static void gf128mul_x8_lle(be128 *x)
158{
159 u64 a = be64_to_cpu(x->a);
160 u64 b = be64_to_cpu(x->b);
161 u64 _tt = gf128mul_table_lle[b & 0xff];
162
163 x->b = cpu_to_be64((b >> 8) | (a << 56));
164 x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
165}
166
167static void gf128mul_x8_bbe(be128 *x)
168{
169 u64 a = be64_to_cpu(x->a);
170 u64 b = be64_to_cpu(x->b);
171 u64 _tt = gf128mul_table_bbe[a >> 56];
172
173 x->a = cpu_to_be64((a << 8) | (b >> 56));
174 x->b = cpu_to_be64((b << 8) ^ _tt);
175}
176
177void gf128mul_lle(be128 *r, const be128 *b)
178{
179 be128 p[8];
180 int i;
181
182 p[0] = *r;
183 for (i = 0; i < 7; ++i)
184 gf128mul_x_lle(&p[i + 1], &p[i]);
185
Mathias Krause62542662011-07-08 17:21:21 +0800186 memset(r, 0, sizeof(*r));
Rik Snelc494e072006-11-29 18:59:44 +1100187 for (i = 0;;) {
188 u8 ch = ((u8 *)b)[15 - i];
189
190 if (ch & 0x80)
191 be128_xor(r, r, &p[0]);
192 if (ch & 0x40)
193 be128_xor(r, r, &p[1]);
194 if (ch & 0x20)
195 be128_xor(r, r, &p[2]);
196 if (ch & 0x10)
197 be128_xor(r, r, &p[3]);
198 if (ch & 0x08)
199 be128_xor(r, r, &p[4]);
200 if (ch & 0x04)
201 be128_xor(r, r, &p[5]);
202 if (ch & 0x02)
203 be128_xor(r, r, &p[6]);
204 if (ch & 0x01)
205 be128_xor(r, r, &p[7]);
206
207 if (++i >= 16)
208 break;
209
210 gf128mul_x8_lle(r);
211 }
212}
213EXPORT_SYMBOL(gf128mul_lle);
214
215void gf128mul_bbe(be128 *r, const be128 *b)
216{
217 be128 p[8];
218 int i;
219
220 p[0] = *r;
221 for (i = 0; i < 7; ++i)
222 gf128mul_x_bbe(&p[i + 1], &p[i]);
223
Mathias Krause62542662011-07-08 17:21:21 +0800224 memset(r, 0, sizeof(*r));
Rik Snelc494e072006-11-29 18:59:44 +1100225 for (i = 0;;) {
226 u8 ch = ((u8 *)b)[i];
227
228 if (ch & 0x80)
229 be128_xor(r, r, &p[7]);
230 if (ch & 0x40)
231 be128_xor(r, r, &p[6]);
232 if (ch & 0x20)
233 be128_xor(r, r, &p[5]);
234 if (ch & 0x10)
235 be128_xor(r, r, &p[4]);
236 if (ch & 0x08)
237 be128_xor(r, r, &p[3]);
238 if (ch & 0x04)
239 be128_xor(r, r, &p[2]);
240 if (ch & 0x02)
241 be128_xor(r, r, &p[1]);
242 if (ch & 0x01)
243 be128_xor(r, r, &p[0]);
244
245 if (++i >= 16)
246 break;
247
248 gf128mul_x8_bbe(r);
249 }
250}
251EXPORT_SYMBOL(gf128mul_bbe);
252
253/* This version uses 64k bytes of table space.
254 A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers63be5b52017-02-14 13:43:27 -0800255 value in GF(2^128). If we consider a GF(2^128) value in
Rik Snelc494e072006-11-29 18:59:44 +1100256 the buffer's lowest byte, we can construct a table of
257 the 256 16 byte values that result from the 256 values
258 of this byte. This requires 4096 bytes. But we also
259 need tables for each of the 16 higher bytes in the
260 buffer as well, which makes 64 kbytes in total.
261*/
262/* additional explanation
263 * t[0][BYTE] contains g*BYTE
264 * t[1][BYTE] contains g*x^8*BYTE
265 * ..
266 * t[15][BYTE] contains g*x^120*BYTE */
Rik Snelc494e072006-11-29 18:59:44 +1100267struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
268{
269 struct gf128mul_64k *t;
270 int i, j, k;
271
272 t = kzalloc(sizeof(*t), GFP_KERNEL);
273 if (!t)
274 goto out;
275
276 for (i = 0; i < 16; i++) {
277 t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
278 if (!t->t[i]) {
279 gf128mul_free_64k(t);
280 t = NULL;
281 goto out;
282 }
283 }
284
285 t->t[0]->t[1] = *g;
286 for (j = 1; j <= 64; j <<= 1)
287 gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
288
289 for (i = 0;;) {
290 for (j = 2; j < 256; j += j)
291 for (k = 1; k < j; ++k)
292 be128_xor(&t->t[i]->t[j + k],
293 &t->t[i]->t[j], &t->t[i]->t[k]);
294
295 if (++i >= 16)
296 break;
297
298 for (j = 128; j > 0; j >>= 1) {
299 t->t[i]->t[j] = t->t[i - 1]->t[j];
300 gf128mul_x8_bbe(&t->t[i]->t[j]);
301 }
302 }
303
304out:
305 return t;
306}
307EXPORT_SYMBOL(gf128mul_init_64k_bbe);
308
309void gf128mul_free_64k(struct gf128mul_64k *t)
310{
311 int i;
312
313 for (i = 0; i < 16; i++)
Alex Cope75aa0a72016-11-14 11:02:54 -0800314 kzfree(t->t[i]);
315 kzfree(t);
Rik Snelc494e072006-11-29 18:59:44 +1100316}
317EXPORT_SYMBOL(gf128mul_free_64k);
318
Rik Snelc494e072006-11-29 18:59:44 +1100319void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
320{
321 u8 *ap = (u8 *)a;
322 be128 r[1];
323 int i;
324
325 *r = t->t[0]->t[ap[15]];
326 for (i = 1; i < 16; ++i)
327 be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
328 *a = *r;
329}
330EXPORT_SYMBOL(gf128mul_64k_bbe);
331
332/* This version uses 4k bytes of table space.
333 A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers63be5b52017-02-14 13:43:27 -0800334 value in GF(2^128). If we consider a GF(2^128) value in a
Rik Snelc494e072006-11-29 18:59:44 +1100335 single byte, we can construct a table of the 256 16 byte
336 values that result from the 256 values of this byte.
337 This requires 4096 bytes. If we take the highest byte in
338 the buffer and use this table to get the result, we then
339 have to multiply by x^120 to get the final value. For the
340 next highest byte the result has to be multiplied by x^112
341 and so on. But we can do this by accumulating the result
342 in an accumulator starting with the result for the top
343 byte. We repeatedly multiply the accumulator value by
344 x^8 and then add in (i.e. xor) the 16 bytes of the next
345 lower byte in the buffer, stopping when we reach the
346 lowest byte. This requires a 4096 byte table.
347*/
348struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
349{
350 struct gf128mul_4k *t;
351 int j, k;
352
353 t = kzalloc(sizeof(*t), GFP_KERNEL);
354 if (!t)
355 goto out;
356
357 t->t[128] = *g;
358 for (j = 64; j > 0; j >>= 1)
359 gf128mul_x_lle(&t->t[j], &t->t[j+j]);
360
361 for (j = 2; j < 256; j += j)
362 for (k = 1; k < j; ++k)
363 be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
364
365out:
366 return t;
367}
368EXPORT_SYMBOL(gf128mul_init_4k_lle);
369
370struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
371{
372 struct gf128mul_4k *t;
373 int j, k;
374
375 t = kzalloc(sizeof(*t), GFP_KERNEL);
376 if (!t)
377 goto out;
378
379 t->t[1] = *g;
380 for (j = 1; j <= 64; j <<= 1)
381 gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
382
383 for (j = 2; j < 256; j += j)
384 for (k = 1; k < j; ++k)
385 be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
386
387out:
388 return t;
389}
390EXPORT_SYMBOL(gf128mul_init_4k_bbe);
391
392void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
393{
394 u8 *ap = (u8 *)a;
395 be128 r[1];
396 int i = 15;
397
398 *r = t->t[ap[15]];
399 while (i--) {
400 gf128mul_x8_lle(r);
401 be128_xor(r, r, &t->t[ap[i]]);
402 }
403 *a = *r;
404}
405EXPORT_SYMBOL(gf128mul_4k_lle);
406
407void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
408{
409 u8 *ap = (u8 *)a;
410 be128 r[1];
411 int i = 0;
412
413 *r = t->t[ap[0]];
414 while (++i < 16) {
415 gf128mul_x8_bbe(r);
416 be128_xor(r, r, &t->t[ap[i]]);
417 }
418 *a = *r;
419}
420EXPORT_SYMBOL(gf128mul_4k_bbe);
421
422MODULE_LICENSE("GPL");
423MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");