* cipher/chacha20-ppc.c (vec_add_ctr_u64, ADD_U64): New.
(_gcry_chacha20_ppc8_blocks1, _gcry_chacha20_ppc8_blocks4) (_gcry_chacha20_poly1305_ppc8_blocks4): Use ADD_U64 when incrementing counter. -- Patch fixes 32-bit overflow for PowerPC ChaCha20 implementation. In typical use case, overflow happens after 256 GiB bytes of output. Typical use case here means use of 96-bit or 64-bit IV which causes lower 32-bits of counter to start from zero. Signed-off-by: Jussi Kivilinna <[hidden email]> --- cipher/chacha20-ppc.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/cipher/chacha20-ppc.c b/cipher/chacha20-ppc.c index 985f2fcd..4a21b837 100644 --- a/cipher/chacha20-ppc.c +++ b/cipher/chacha20-ppc.c @@ -88,6 +88,24 @@ vec_store_le(vector4x_u32 vec, unsigned long offset, unsigned char *ptr) } +static ASM_FUNC_ATTR_INLINE vector4x_u32 +vec_add_ctr_u64(vector4x_u32 v, vector4x_u32 a) +{ +#ifdef WORDS_BIGENDIAN + static const vector16x_u8 swap32 = + { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }; + vector2x_u64 vec, add, sum; + + vec = (vector2x_u64)vec_perm((vector16x_u8)v, (vector16x_u8)v, swap32); + add = (vector2x_u64)vec_perm((vector16x_u8)a, (vector16x_u8)a, swap32); + sum = vec + add; + return (vector4x_u32)vec_perm((vector16x_u8)sum, (vector16x_u8)sum, swap32); +#else + return (vector4x_u32)((vector2x_u64)(v) + (vector2x_u64)(a)); +#endif +} + + /********************************************************************** 2-way && 1-way chacha20 **********************************************************************/ @@ -115,6 +133,9 @@ vec_store_le(vector4x_u32 vec, unsigned long offset, unsigned char *ptr) ROTATE(x1, rotate_7); \ WORD_ROL(x1, rol_x1); +#define ADD_U64(v,a) \ + (v = vec_add_ctr_u64(v, a)) + unsigned int ASM_FUNC_ATTR _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, size_t nblks) @@ -152,7 +173,7 @@ _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, v5 = state1; v6 = state2; v7 = state3; - v7 += counter_1; + ADD_U64(v7, counter_1); for (i = 20; i > 0; i -= 2) { @@ -166,12 +187,12 @@ _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, v1 += state1; v2 += state2; v3 += state3; - state3 += counter_1; /* update counter */ + ADD_U64(state3, counter_1); /* update counter */ v4 += state0; v5 += state1; v6 += state2; v7 += state3; - state3 += counter_1; /* update counter */ + ADD_U64(state3, counter_1); /* update counter */ v0 ^= vec_load_le(0 * 16, src); v1 ^= vec_load_le(1 * 16, src); @@ -214,7 +235,7 @@ _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, v1 += state1; v2 += state2; v3 += state3; - state3 += counter_1; /* update counter */ + ADD_U64(state3, counter_1); /* update counter */ v0 ^= vec_load_le(0 * 16, src); v1 ^= vec_load_le(1 * 16, src); @@ -339,7 +360,7 @@ _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src, v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123); v14 += vec_splat(state3, 2); v15 += vec_splat(state3, 3); - state3 += counter_4; /* update counter */ + ADD_U64(state3, counter_4); /* update counter */ transpose_4x4(v0, v1, v2, v3); transpose_4x4(v4, v5, v6, v7); @@ -554,7 +575,7 @@ _gcry_chacha20_poly1305_ppc8_blocks4(u32 *state, byte *dst, const byte *src, v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123); v14 += vec_splat(state3, 2); v15 += vec_splat(state3, 3); - state3 += counter_4; /* update counter */ + ADD_U64(state3, counter_4); /* update counter */ transpose_4x4(v0, v1, v2, v3); transpose_4x4(v4, v5, v6, v7); -- 2.27.0 _______________________________________________ Gcrypt-devel mailing list [hidden email] http://lists.gnupg.org/mailman/listinfo/gcrypt-devel |
* tests/basic.c (check_one_cipher_ctr_reset)
(check_one_cipher_ctr_overflow): New. (check_one_cipher): Add counter overflow tests for ChaCha20 and CTR mode. -- Patch adds counter overflow tests to check for correct counter handling in bulk processing implementations. Signed-off-by: Jussi Kivilinna <[hidden email]> --- tests/basic.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/tests/basic.c b/tests/basic.c index 1d12c4a2..4beeeed9 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -9415,6 +9415,210 @@ err_out_free: +static int +check_one_cipher_ctr_reset (gcry_cipher_hd_t hd, int algo, int mode, + u32 ctr_high_bits, int be_ctr, + int pass) +{ + unsigned char iv[16] = { 0 }; + unsigned char swap; + unsigned int ivlen; + u32 ctr_low_bits; + int err; + int i; + + /* This should be largest parallel block processing count in any + * implementation negated. Currently for CTR this is 32 and, for + * ChaCha20, count is 8. */ + ctr_low_bits = (mode == GCRY_CIPHER_MODE_CTR) ? -32 : -8; + + gcry_cipher_reset (hd); + + if (mode == GCRY_CIPHER_MODE_CTR) + ivlen = get_algo_mode_blklen(algo, GCRY_CIPHER_MODE_ECB); + else + ivlen = 16; + + /* Little-endian fill. */ + for (i = 0; i < 4; i++) + iv[i + 0] = (ctr_low_bits >> (i * 8)) & 0xff; + for (i = 0; i < 4; i++) + iv[i + 4] = (ctr_high_bits >> (i * 8)) & 0xff; + + if (be_ctr) + { + /* Swap to big-endian. */ + for (i = 0; i < ivlen / 2; i++) + { + swap = iv[i]; + iv[i] = iv[ivlen - (i + 1)]; + iv[ivlen - (i + 1)] = swap; + } + } + + clutter_vector_registers(); + if (mode == GCRY_CIPHER_MODE_CTR) + err = gcry_cipher_setctr (hd, iv, ivlen); + else + err = gcry_cipher_setiv (hd, iv, ivlen); + + if (err) + { + fail ("pass %d, algo %d, mode %d, gcry_cipher_setiv failed: %s\n", + pass, algo, mode, gpg_strerror (err)); + gcry_cipher_close (hd); + return -1; + } + + return 0; +} + +static int +check_one_cipher_ctr_overflow (int algo, int mode, int flags, + const char *key, size_t nkey, + const unsigned char *plain, size_t nplain, + unsigned long ctr_high_bits, int be_ctr, + int pass) +{ + gcry_cipher_hd_t hd; + unsigned char *out; + unsigned char *enc_result; + int keylen; + gcry_error_t err = 0; + unsigned int firstlen; + unsigned int leftlen; + unsigned int blklen; + unsigned int pos; + unsigned int i; + + out = malloc (nplain); + enc_result = malloc (nplain); + if (!out || !enc_result) + { + fail ("pass %d, algo %d, mode %d, malloc failed\n", + pass, algo, mode); + goto err_out_free; + } + + assert (nkey == 64); + assert (nplain > 0); + assert ((nplain % 16) == 0); + + keylen = gcry_cipher_get_algo_keylen (algo); + if (!keylen) + { + fail ("pass %d, algo %d, mode %d, gcry_cipher_get_algo_keylen failed\n", + pass, algo, mode); + goto err_out_free; + } + + if (keylen < 40 / 8 || keylen > 32) + { + fail ("pass %d, algo %d, mode %d, keylength problem (%d)\n", + pass, algo, mode, keylen); + goto err_out_free; + } + + err = gcry_cipher_open (&hd, algo, mode, flags); + if (err) + { + fail ("pass %d, algo %d, mode %d, gcry_cipher_open failed: %s\n", + pass, algo, mode, gpg_strerror (err)); + goto err_out_free; + } + + clutter_vector_registers(); + err = gcry_cipher_setkey (hd, key, keylen); + if (err) + { + fail ("pass %d, algo %d, mode %d, gcry_cipher_setkey failed: %s\n", + pass, algo, mode, gpg_strerror (err)); + gcry_cipher_close (hd); + goto err_out_free; + } + + if (check_one_cipher_ctr_reset (hd, algo, mode, ctr_high_bits, be_ctr, + pass) < 0) + goto err_out_free; + + /* Non-bulk processing. */ + for (i = 0; i < nplain; i += 16) + { + clutter_vector_registers(); + err = gcry_cipher_encrypt (hd, out + i, 16, plain + i, 16); + if (err) + { + fail ("pass %d, algo %d, mode %d, gcry_cipher_encrypt failed: %s\n", + pass, algo, mode, gpg_strerror (err)); + gcry_cipher_close (hd); + goto err_out_free; + } + } + + memcpy (enc_result, out, nplain); + + /* Test with different bulk processing sizes. */ + for (blklen = 2 * 16; blklen <= 32 * 16; blklen *= 2) + { + /* Move bulk processing start offset, test at different spots to + * test bulk counter calculation throughly. */ + for (firstlen = 16; firstlen < 8 * 64; firstlen += 16) + { + if (check_one_cipher_ctr_reset (hd, algo, mode, ctr_high_bits, be_ctr, + pass) < 0) + goto err_out_free; + + clutter_vector_registers(); + err = gcry_cipher_encrypt (hd, out, firstlen, plain, firstlen); + if (err) + { + fail ("pass %d, algo %d, mode %d, gcry_cipher_encrypt " + "failed: %s\n", pass, algo, mode, gpg_strerror (err)); + gcry_cipher_close (hd); + goto err_out_free; + } + + leftlen = nplain - firstlen; + pos = firstlen; + while (leftlen) + { + unsigned int currlen = leftlen > blklen ? blklen : leftlen; + + clutter_vector_registers(); + err = gcry_cipher_encrypt (hd, out + pos, currlen, plain + pos, + currlen); + if (err) + { + fail ("pass %d, algo %d, mode %d, block len %d, first len %d," + "gcry_cipher_encrypt failed: %s\n", pass, algo, mode, + blklen, firstlen, gpg_strerror (err)); + gcry_cipher_close (hd); + goto err_out_free; + } + + pos += currlen; + leftlen -= currlen; + } + + if (memcmp (enc_result, out, nplain)) + fail ("pass %d, algo %d, mode %d, block len %d, first len %d, " + "encrypt mismatch\n", pass, algo, mode, blklen, firstlen); + } + } + + gcry_cipher_close (hd); + + free (enc_result); + free (out); + return 0; + +err_out_free: + free (enc_result); + free (out); + return -1; +} + + static void check_one_cipher (int algo, int mode, int flags) { @@ -9491,6 +9695,34 @@ check_one_cipher (int algo, int mode, int flags) 50)) goto out; + /* Pass 6: Counter overflow tests for ChaCha20 and CTR mode. */ + if (mode == GCRY_CIPHER_MODE_STREAM && algo == GCRY_CIPHER_CHACHA20) + { + /* 32bit overflow test (little-endian counter) */ + if (check_one_cipher_ctr_overflow (algo, mode, flags, key, 64, plain, + medium_buffer_size, 0UL, + 0, 60)) + goto out; + /* 64bit overflow test (little-endian counter) */ + if (check_one_cipher_ctr_overflow (algo, mode, flags, key, 64, plain, + medium_buffer_size, 0xffffffffUL, + 0, 61)) + goto out; + } + else if (mode == GCRY_CIPHER_MODE_CTR) + { + /* 32bit overflow test (big-endian counter) */ + if (check_one_cipher_ctr_overflow (algo, mode, flags, key, 64, plain, + medium_buffer_size, 0UL, + 1, 62)) + goto out; + /* 64bit overflow test (big-endian counter) */ + if (check_one_cipher_ctr_overflow (algo, mode, flags, key, 64, plain, + medium_buffer_size, 0xffffffffUL, + 1, 63)) + goto out; + } + out: free (plain); } -- 2.27.0 _______________________________________________ Gcrypt-devel mailing list [hidden email] http://lists.gnupg.org/mailman/listinfo/gcrypt-devel |
In reply to this post by Jussi Kivilinna-2
* src/g10lib.h (NOINLINE_FUNC): New attribute macro.
* src/misc.c (__gcry_burn_stack): Add NOINLINE_FUNC attribute. -- LTO can cause inline of __gcry_burn_stack and result tail-call to _gcry_fast_wipememory and defeat tail-call prevention in _gcry_burn_stack macro. Mark __gcry_burn_stack with 'noinline' attribute to prevent unwanted inlining of this function in LTO builds. Signed-off-by: Jussi Kivilinna <[hidden email]> --- src/g10lib.h | 6 ++++++ src/misc.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/g10lib.h b/src/g10lib.h index c85e6649..ffd71018 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -75,6 +75,12 @@ #define GCC_ATTR_UNUSED #endif +#if __GNUC__ > 3 +#define NOINLINE_FUNC __attribute__((noinline)) +#else +#define NOINLINE_FUNC +#endif + #if __GNUC__ >= 3 #define LIKELY(expr) __builtin_expect( !!(expr), 1 ) #define UNLIKELY(expr) __builtin_expect( !!(expr), 0 ) diff --git a/src/misc.c b/src/misc.c index 283e3a72..4db2d9a4 100644 --- a/src/misc.c +++ b/src/misc.c @@ -545,7 +545,7 @@ _gcry_fast_wipememory2 (void *ptr, int set, size_t len) } -void +void NOINLINE_FUNC __gcry_burn_stack (unsigned int bytes) { #ifdef HAVE_VLA -- 2.27.0 _______________________________________________ Gcrypt-devel mailing list [hidden email] http://lists.gnupg.org/mailman/listinfo/gcrypt-devel |
Free forum by Nabble | Edit this page |