ANDROID: crypto: gf128mul - Add ble multiplication functions

Adding ble multiplication to GF128mul, and fixing up comments.

The ble multiplication functions multiply GF(2^128) elements in the
ble format. This format is preferable because the bits within each
byte map to polynomial coefficients in the natural order (lowest order
bit = coefficient of lowest degree polynomial term), and the bytes are
stored in little endian order which matches the endianness of most
modern CPUs.

These new functions will be used by the HEH algorithm.

Signed-off-by: Alex Cope <alexcope@google.com>
Bug: 32975945
Signed-off-by: Eric Biggers <ebiggers@google.com>
Change-Id: I39a58e8ee83e6f9b2e6bd51738f816dbfa2f3a47
This commit is contained in:
Alex Cope 2017-01-10 16:47:49 -08:00 committed by Eric Biggers
parent 3eaf06b785
commit ce2ace45d9
2 changed files with 117 additions and 27 deletions

View file

@ -44,7 +44,7 @@
---------------------------------------------------------------------------
Issue 31/01/2006
This file provides fast multiplication in GF(128) as required by several
This file provides fast multiplication in GF(2^128) as required by several
cryptographic authentication modes
*/
@ -130,9 +130,10 @@
static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
/* These functions multiply a field element by x, by x^4 and by x^8
* in the polynomial field representation. It uses 32-bit word operations
* to gain speed but compensates for machine endianess and hence works
/*
* The following functions multiply a field element by x or by x^8 in
* the polynomial field representation. They use 64-bit word operations
* to gain speed but compensate for machine endianness and hence work
* correctly on both styles of machine.
*/
@ -187,6 +188,16 @@ static void gf128mul_x8_bbe(be128 *x)
x->b = cpu_to_be64((b << 8) ^ _tt);
}
static void gf128mul_x8_ble(be128 *x)
{
u64 a = le64_to_cpu(x->b);
u64 b = le64_to_cpu(x->a);
u64 _tt = gf128mul_table_be[a >> 56];
x->b = cpu_to_le64((a << 8) | (b >> 56));
x->a = cpu_to_le64((b << 8) ^ _tt);
}
void gf128mul_lle(be128 *r, const be128 *b)
{
be128 p[8];
@ -263,9 +274,48 @@ void gf128mul_bbe(be128 *r, const be128 *b)
}
EXPORT_SYMBOL(gf128mul_bbe);
void gf128mul_ble(be128 *r, const be128 *b)
{
be128 p[8];
int i;
p[0] = *r;
for (i = 0; i < 7; ++i)
gf128mul_x_ble((be128 *)&p[i + 1], (be128 *)&p[i]);
memset(r, 0, sizeof(*r));
for (i = 0;;) {
u8 ch = ((u8 *)b)[15 - i];
if (ch & 0x80)
be128_xor(r, r, &p[7]);
if (ch & 0x40)
be128_xor(r, r, &p[6]);
if (ch & 0x20)
be128_xor(r, r, &p[5]);
if (ch & 0x10)
be128_xor(r, r, &p[4]);
if (ch & 0x08)
be128_xor(r, r, &p[3]);
if (ch & 0x04)
be128_xor(r, r, &p[2]);
if (ch & 0x02)
be128_xor(r, r, &p[1]);
if (ch & 0x01)
be128_xor(r, r, &p[0]);
if (++i >= 16)
break;
gf128mul_x8_ble(r);
}
}
EXPORT_SYMBOL(gf128mul_ble);
/* This version uses 64k bytes of table space.
A 16 byte buffer has to be multiplied by a 16 byte key
value in GF(128). If we consider a GF(128) value in
value in GF(2^128). If we consider a GF(2^128) value in
the buffer's lowest byte, we can construct a table of
the 256 16 byte values that result from the 256 values
of this byte. This requires 4096 bytes. But we also
@ -399,7 +449,7 @@ EXPORT_SYMBOL(gf128mul_64k_bbe);
/* This version uses 4k bytes of table space.
A 16 byte buffer has to be multiplied by a 16 byte key
value in GF(128). If we consider a GF(128) value in a
value in GF(2^128). If we consider a GF(2^128) value in a
single byte, we can construct a table of the 256 16 byte
values that result from the 256 values of this byte.
This requires 4096 bytes. If we take the highest byte in
@ -457,6 +507,28 @@ out:
}
EXPORT_SYMBOL(gf128mul_init_4k_bbe);
struct gf128mul_4k *gf128mul_init_4k_ble(const be128 *g)
{
struct gf128mul_4k *t;
int j, k;
t = kzalloc(sizeof(*t), GFP_KERNEL);
if (!t)
goto out;
t->t[1] = *g;
for (j = 1; j <= 64; j <<= 1)
gf128mul_x_ble(&t->t[j + j], &t->t[j]);
for (j = 2; j < 256; j += j)
for (k = 1; k < j; ++k)
be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
out:
return t;
}
EXPORT_SYMBOL(gf128mul_init_4k_ble);
void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
{
u8 *ap = (u8 *)a;
@ -487,5 +559,20 @@ void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
}
EXPORT_SYMBOL(gf128mul_4k_bbe);
void gf128mul_4k_ble(be128 *a, struct gf128mul_4k *t)
{
u8 *ap = (u8 *)a;
be128 r[1];
int i = 15;
*r = t->t[ap[15]];
while (i--) {
gf128mul_x8_ble(r);
be128_xor(r, r, &t->t[ap[i]]);
}
*a = *r;
}
EXPORT_SYMBOL(gf128mul_4k_ble);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");

View file

@ -43,7 +43,7 @@
---------------------------------------------------------------------------
Issue Date: 31/01/2006
An implementation of field multiplication in Galois Field GF(128)
An implementation of field multiplication in Galois Field GF(2^128)
*/
#ifndef _CRYPTO_GF128MUL_H
@ -65,7 +65,7 @@
* are left and the lsb's are right. char b[16] is an array and b[0] is
* the first octet.
*
* 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
* 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
* b[0] b[1] b[2] b[3] b[13] b[14] b[15]
*
* Every bit is a coefficient of some power of X. We can store the bits
@ -99,21 +99,21 @@
*
* bbe on a little endian machine u32 x[4]:
*
* MS x[0] LS MS x[1] LS
* MS x[0] LS MS x[1] LS
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
* 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88
*
* MS x[2] LS MS x[3] LS
* MS x[2] LS MS x[3] LS
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
* 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24
*
* ble on a little endian machine
*
* MS x[0] LS MS x[1] LS
* MS x[0] LS MS x[1] LS
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
* 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32
*
* MS x[2] LS MS x[3] LS
* MS x[2] LS MS x[3] LS
* ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
* 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96
*
@ -127,7 +127,7 @@
* machines this will automatically aligned to wordsize and on a 64-bit
* machine also.
*/
/* Multiply a GF128 field element by x. Field elements are held in arrays
/* Multiply a GF128 field element by x. Field elements are held in arrays
of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower
indexed bits placed in the more numerically significant bit positions
within bytes.
@ -135,45 +135,47 @@
On little endian machines the bit indexes translate into the bit
positions within four 32-bit words in the following way
MS x[0] LS MS x[1] LS
MS x[0] LS MS x[1] LS
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39
MS x[2] LS MS x[3] LS
MS x[2] LS MS x[3] LS
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103
On big endian machines the bit indexes translate into the bit
positions within four 32-bit words in the following way
MS x[0] LS MS x[1] LS
MS x[0] LS MS x[1] LS
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63
MS x[2] LS MS x[3] LS
MS x[2] LS MS x[3] LS
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127
*/
/* A slow generic version of gf_mul, implemented for lle and bbe
* It multiplies a and b and puts the result in a */
/* A slow generic version of gf_mul, implemented for lle, bbe, and ble.
* It multiplies a and b and puts the result in a
*/
void gf128mul_lle(be128 *a, const be128 *b);
void gf128mul_bbe(be128 *a, const be128 *b);
void gf128mul_ble(be128 *a, const be128 *b);
/* multiply by x in ble format, needed by XTS */
/* multiply by x in ble format, needed by XTS and HEH */
void gf128mul_x_ble(be128 *a, const be128 *b);
/* 4k table optimization */
struct gf128mul_4k {
be128 t[256];
};
struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
struct gf128mul_4k *gf128mul_init_4k_ble(const be128 *g);
void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t);
void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t);
void gf128mul_4k_ble(be128 *a, struct gf128mul_4k *t);
static inline void gf128mul_free_4k(struct gf128mul_4k *t)
{
@ -181,16 +183,17 @@ static inline void gf128mul_free_4k(struct gf128mul_4k *t)
}
/* 64k table optimization, implemented for lle and bbe */
/* 64k table optimization, implemented for lle, ble, and bbe */
struct gf128mul_64k {
struct gf128mul_4k *t[16];
};
/* first initialize with the constant factor with which you
* want to multiply and then call gf128_64k_lle with the other
* factor in the first argument, the table in the second and a
* scratch register in the third. Afterwards *a = *r. */
/* First initialize with the constant factor with which you
* want to multiply and then call gf128mul_64k_bbe with the other
* factor in the first argument, and the table in the second.
* Afterwards, the result is stored in *a.
*/
struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g);
struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
void gf128mul_free_64k(struct gf128mul_64k *t);