blob: c050cf6f5aa91648562f039c496638a7bc25c13f [file] [log] [blame]
Rik Snelc494e072006-11-29 18:59:44 +11001/* gf128mul.c - GF(2^128) multiplication functions
2 *
3 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
4 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
5 *
6 * Based on Dr Brian Gladman's (GPL'd) work published at
Adrian-Ken Rueegsegger8c882f62009-03-04 14:43:52 +08007 * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
Rik Snelc494e072006-11-29 18:59:44 +11008 * See the original copyright notice below.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 */
15
16/*
17 ---------------------------------------------------------------------------
18 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
19
20 LICENSE TERMS
21
22 The free distribution and use of this software in both source and binary
23 form is allowed (with or without changes) provided that:
24
25 1. distributions of this source code include the above copyright
26 notice, this list of conditions and the following disclaimer;
27
28 2. distributions in binary form include the above copyright
29 notice, this list of conditions and the following disclaimer
30 in the documentation and/or other associated materials;
31
32 3. the copyright holder's name is not used to endorse products
33 built using this software without specific written permission.
34
35 ALTERNATIVELY, provided that this notice is retained in full, this product
36 may be distributed under the terms of the GNU General Public License (GPL),
37 in which case the provisions of the GPL apply INSTEAD OF those given above.
38
39 DISCLAIMER
40
41 This software is provided 'as is' with no explicit or implied warranties
42 in respect of its properties, including, but not limited to, correctness
43 and/or fitness for purpose.
44 ---------------------------------------------------------------------------
45 Issue 31/01/2006
46
Eric Biggers63be5b52017-02-14 13:43:27 -080047 This file provides fast multiplication in GF(2^128) as required by several
Rik Snelc494e072006-11-29 18:59:44 +110048 cryptographic authentication modes
49*/
50
51#include <crypto/gf128mul.h>
52#include <linux/kernel.h>
53#include <linux/module.h>
54#include <linux/slab.h>
55
56#define gf128mul_dat(q) { \
57 q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
58 q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
59 q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
60 q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
61 q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
62 q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
63 q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
64 q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
65 q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
66 q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
67 q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
68 q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
69 q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
70 q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
71 q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
72 q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
73 q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
74 q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
75 q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
76 q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
77 q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
78 q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
79 q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
80 q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
81 q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
82 q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
83 q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
84 q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
85 q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
86 q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
87 q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
88 q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
89}
90
91/* Given the value i in 0..255 as the byte overflow when a field element
Lucas De Marchi25985ed2011-03-30 22:57:33 -030092 in GHASH is multiplied by x^8, this function will return the values that
Rik Snelc494e072006-11-29 18:59:44 +110093 are generated in the lo 16-bit word of the field value by applying the
94 modular polynomial. The values lo_byte and hi_byte are returned via the
95 macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
96 memory as required by a suitable definition of this macro operating on
97 the table above
98*/
99
Rik Snelc494e072006-11-29 18:59:44 +1100100#define xda_bbe(i) ( \
Eric Biggers2416e4f2017-02-14 13:43:28 -0800101 (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
102 (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
103 (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
104 (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
Rik Snelc494e072006-11-29 18:59:44 +1100105)
106
107#define xda_lle(i) ( \
Eric Biggers2416e4f2017-02-14 13:43:28 -0800108 (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
109 (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
110 (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
111 (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
Rik Snelc494e072006-11-29 18:59:44 +1100112)
113
114static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
115static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
116
Eric Biggers63be5b52017-02-14 13:43:27 -0800117/*
118 * The following functions multiply a field element by x or by x^8 in
119 * the polynomial field representation. They use 64-bit word operations
120 * to gain speed but compensate for machine endianness and hence work
Rik Snelc494e072006-11-29 18:59:44 +1100121 * correctly on both styles of machine.
122 */
123
124static void gf128mul_x_lle(be128 *r, const be128 *x)
125{
126 u64 a = be64_to_cpu(x->a);
127 u64 b = be64_to_cpu(x->b);
128 u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
129
130 r->b = cpu_to_be64((b >> 1) | (a << 63));
131 r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
132}
133
134static void gf128mul_x_bbe(be128 *r, const be128 *x)
135{
136 u64 a = be64_to_cpu(x->a);
137 u64 b = be64_to_cpu(x->b);
138 u64 _tt = gf128mul_table_bbe[a >> 63];
139
140 r->a = cpu_to_be64((a << 1) | (b >> 63));
141 r->b = cpu_to_be64((b << 1) ^ _tt);
142}
143
Rik Snelf19f5112007-09-19 20:23:13 +0800144void gf128mul_x_ble(be128 *r, const be128 *x)
145{
146 u64 a = le64_to_cpu(x->a);
147 u64 b = le64_to_cpu(x->b);
148 u64 _tt = gf128mul_table_bbe[b >> 63];
149
150 r->a = cpu_to_le64((a << 1) ^ _tt);
151 r->b = cpu_to_le64((b << 1) | (a >> 63));
152}
153EXPORT_SYMBOL(gf128mul_x_ble);
154
Rik Snelc494e072006-11-29 18:59:44 +1100155static void gf128mul_x8_lle(be128 *x)
156{
157 u64 a = be64_to_cpu(x->a);
158 u64 b = be64_to_cpu(x->b);
159 u64 _tt = gf128mul_table_lle[b & 0xff];
160
161 x->b = cpu_to_be64((b >> 8) | (a << 56));
162 x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
163}
164
165static void gf128mul_x8_bbe(be128 *x)
166{
167 u64 a = be64_to_cpu(x->a);
168 u64 b = be64_to_cpu(x->b);
169 u64 _tt = gf128mul_table_bbe[a >> 56];
170
171 x->a = cpu_to_be64((a << 8) | (b >> 56));
172 x->b = cpu_to_be64((b << 8) ^ _tt);
173}
174
175void gf128mul_lle(be128 *r, const be128 *b)
176{
177 be128 p[8];
178 int i;
179
180 p[0] = *r;
181 for (i = 0; i < 7; ++i)
182 gf128mul_x_lle(&p[i + 1], &p[i]);
183
Mathias Krause62542662011-07-08 17:21:21 +0800184 memset(r, 0, sizeof(*r));
Rik Snelc494e072006-11-29 18:59:44 +1100185 for (i = 0;;) {
186 u8 ch = ((u8 *)b)[15 - i];
187
188 if (ch & 0x80)
189 be128_xor(r, r, &p[0]);
190 if (ch & 0x40)
191 be128_xor(r, r, &p[1]);
192 if (ch & 0x20)
193 be128_xor(r, r, &p[2]);
194 if (ch & 0x10)
195 be128_xor(r, r, &p[3]);
196 if (ch & 0x08)
197 be128_xor(r, r, &p[4]);
198 if (ch & 0x04)
199 be128_xor(r, r, &p[5]);
200 if (ch & 0x02)
201 be128_xor(r, r, &p[6]);
202 if (ch & 0x01)
203 be128_xor(r, r, &p[7]);
204
205 if (++i >= 16)
206 break;
207
208 gf128mul_x8_lle(r);
209 }
210}
211EXPORT_SYMBOL(gf128mul_lle);
212
213void gf128mul_bbe(be128 *r, const be128 *b)
214{
215 be128 p[8];
216 int i;
217
218 p[0] = *r;
219 for (i = 0; i < 7; ++i)
220 gf128mul_x_bbe(&p[i + 1], &p[i]);
221
Mathias Krause62542662011-07-08 17:21:21 +0800222 memset(r, 0, sizeof(*r));
Rik Snelc494e072006-11-29 18:59:44 +1100223 for (i = 0;;) {
224 u8 ch = ((u8 *)b)[i];
225
226 if (ch & 0x80)
227 be128_xor(r, r, &p[7]);
228 if (ch & 0x40)
229 be128_xor(r, r, &p[6]);
230 if (ch & 0x20)
231 be128_xor(r, r, &p[5]);
232 if (ch & 0x10)
233 be128_xor(r, r, &p[4]);
234 if (ch & 0x08)
235 be128_xor(r, r, &p[3]);
236 if (ch & 0x04)
237 be128_xor(r, r, &p[2]);
238 if (ch & 0x02)
239 be128_xor(r, r, &p[1]);
240 if (ch & 0x01)
241 be128_xor(r, r, &p[0]);
242
243 if (++i >= 16)
244 break;
245
246 gf128mul_x8_bbe(r);
247 }
248}
249EXPORT_SYMBOL(gf128mul_bbe);
250
251/* This version uses 64k bytes of table space.
252 A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers63be5b52017-02-14 13:43:27 -0800253 value in GF(2^128). If we consider a GF(2^128) value in
Rik Snelc494e072006-11-29 18:59:44 +1100254 the buffer's lowest byte, we can construct a table of
255 the 256 16 byte values that result from the 256 values
256 of this byte. This requires 4096 bytes. But we also
257 need tables for each of the 16 higher bytes in the
258 buffer as well, which makes 64 kbytes in total.
259*/
260/* additional explanation
261 * t[0][BYTE] contains g*BYTE
262 * t[1][BYTE] contains g*x^8*BYTE
263 * ..
264 * t[15][BYTE] contains g*x^120*BYTE */
Rik Snelc494e072006-11-29 18:59:44 +1100265struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
266{
267 struct gf128mul_64k *t;
268 int i, j, k;
269
270 t = kzalloc(sizeof(*t), GFP_KERNEL);
271 if (!t)
272 goto out;
273
274 for (i = 0; i < 16; i++) {
275 t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
276 if (!t->t[i]) {
277 gf128mul_free_64k(t);
278 t = NULL;
279 goto out;
280 }
281 }
282
283 t->t[0]->t[1] = *g;
284 for (j = 1; j <= 64; j <<= 1)
285 gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
286
287 for (i = 0;;) {
288 for (j = 2; j < 256; j += j)
289 for (k = 1; k < j; ++k)
290 be128_xor(&t->t[i]->t[j + k],
291 &t->t[i]->t[j], &t->t[i]->t[k]);
292
293 if (++i >= 16)
294 break;
295
296 for (j = 128; j > 0; j >>= 1) {
297 t->t[i]->t[j] = t->t[i - 1]->t[j];
298 gf128mul_x8_bbe(&t->t[i]->t[j]);
299 }
300 }
301
302out:
303 return t;
304}
305EXPORT_SYMBOL(gf128mul_init_64k_bbe);
306
307void gf128mul_free_64k(struct gf128mul_64k *t)
308{
309 int i;
310
311 for (i = 0; i < 16; i++)
Alex Cope75aa0a72016-11-14 11:02:54 -0800312 kzfree(t->t[i]);
313 kzfree(t);
Rik Snelc494e072006-11-29 18:59:44 +1100314}
315EXPORT_SYMBOL(gf128mul_free_64k);
316
Rik Snelc494e072006-11-29 18:59:44 +1100317void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
318{
319 u8 *ap = (u8 *)a;
320 be128 r[1];
321 int i;
322
323 *r = t->t[0]->t[ap[15]];
324 for (i = 1; i < 16; ++i)
325 be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
326 *a = *r;
327}
328EXPORT_SYMBOL(gf128mul_64k_bbe);
329
330/* This version uses 4k bytes of table space.
331 A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers63be5b52017-02-14 13:43:27 -0800332 value in GF(2^128). If we consider a GF(2^128) value in a
Rik Snelc494e072006-11-29 18:59:44 +1100333 single byte, we can construct a table of the 256 16 byte
334 values that result from the 256 values of this byte.
335 This requires 4096 bytes. If we take the highest byte in
336 the buffer and use this table to get the result, we then
337 have to multiply by x^120 to get the final value. For the
338 next highest byte the result has to be multiplied by x^112
339 and so on. But we can do this by accumulating the result
340 in an accumulator starting with the result for the top
341 byte. We repeatedly multiply the accumulator value by
342 x^8 and then add in (i.e. xor) the 16 bytes of the next
343 lower byte in the buffer, stopping when we reach the
344 lowest byte. This requires a 4096 byte table.
345*/
346struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
347{
348 struct gf128mul_4k *t;
349 int j, k;
350
351 t = kzalloc(sizeof(*t), GFP_KERNEL);
352 if (!t)
353 goto out;
354
355 t->t[128] = *g;
356 for (j = 64; j > 0; j >>= 1)
357 gf128mul_x_lle(&t->t[j], &t->t[j+j]);
358
359 for (j = 2; j < 256; j += j)
360 for (k = 1; k < j; ++k)
361 be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
362
363out:
364 return t;
365}
366EXPORT_SYMBOL(gf128mul_init_4k_lle);
367
368struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
369{
370 struct gf128mul_4k *t;
371 int j, k;
372
373 t = kzalloc(sizeof(*t), GFP_KERNEL);
374 if (!t)
375 goto out;
376
377 t->t[1] = *g;
378 for (j = 1; j <= 64; j <<= 1)
379 gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
380
381 for (j = 2; j < 256; j += j)
382 for (k = 1; k < j; ++k)
383 be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
384
385out:
386 return t;
387}
388EXPORT_SYMBOL(gf128mul_init_4k_bbe);
389
390void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
391{
392 u8 *ap = (u8 *)a;
393 be128 r[1];
394 int i = 15;
395
396 *r = t->t[ap[15]];
397 while (i--) {
398 gf128mul_x8_lle(r);
399 be128_xor(r, r, &t->t[ap[i]]);
400 }
401 *a = *r;
402}
403EXPORT_SYMBOL(gf128mul_4k_lle);
404
405void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
406{
407 u8 *ap = (u8 *)a;
408 be128 r[1];
409 int i = 0;
410
411 *r = t->t[ap[0]];
412 while (++i < 16) {
413 gf128mul_x8_bbe(r);
414 be128_xor(r, r, &t->t[ap[i]]);
415 }
416 *a = *r;
417}
418EXPORT_SYMBOL(gf128mul_4k_bbe);
419
420MODULE_LICENSE("GPL");
421MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");