* cipher/rijndael-aesni.c (aesni_ocb_checksum, aesni_ocb_enc)
(aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Split assembly blocks with more than 4 memory operands to smaller blocks. * cipher/sha512-ssse3-i386.c (W2): Split big assembly block to three smaller blocks. -- On i386, with -O0, assembly blocks with many memory operands cause compiler error such as: rijndael-aesni.c:2815:7: error: 'asm' operand has impossible constraints Fix is to split assembly blocks so that number of operands per block is reduced. GnuPG-bug-id: 5257 Signed-off-by: Jussi Kivilinna <[hidden email]> --- cipher/rijndael-aesni.c | 137 +++++++++++++++++++++---------------- cipher/sha512-ssse3-i386.c | 18 +++-- 2 files changed, 90 insertions(+), 65 deletions(-) diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index 747ef662..95ec4c2b 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -2271,16 +2271,18 @@ aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext, "vpxor %[ptr1], %%ymm1, %%ymm1\n\t" "vpxor %[ptr2], %%ymm2, %%ymm2\n\t" "vpxor %[ptr3], %%ymm3, %%ymm3\n\t" - "vpxor %[ptr4], %%ymm0, %%ymm0\n\t" - "vpxor %[ptr5], %%ymm4, %%ymm4\n\t" - "vpxor %[ptr6], %%ymm5, %%ymm5\n\t" - "vpxor %[ptr7], %%ymm7, %%ymm7\n\t" : : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)), [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)), [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)), - [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)), - [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), + [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)) + : "memory" ); + asm volatile ("vpxor %[ptr4], %%ymm0, %%ymm0\n\t" + "vpxor %[ptr5], %%ymm4, %%ymm4\n\t" + "vpxor %[ptr6], %%ymm5, %%ymm5\n\t" + "vpxor %[ptr7], %%ymm7, %%ymm7\n\t" + : + : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)), [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)), [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2)) @@ -2325,16 +2327,18 @@ aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext, "vxorpd %[ptr1], %%ymm1, %%ymm1\n\t" "vxorpd %[ptr2], %%ymm2, %%ymm2\n\t" "vxorpd %[ptr3], %%ymm3, %%ymm3\n\t" - "vxorpd %[ptr4], %%ymm0, %%ymm0\n\t" - "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t" - "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t" - "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t" : : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)), [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)), [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)), - [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)), - [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), + [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)) + : "memory" ); + asm volatile ("vxorpd %[ptr4], %%ymm0, %%ymm0\n\t" + "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t" + "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t" + "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t" + : + : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)), [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)), [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)), [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2)) @@ -2718,28 +2722,35 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, "aesenclast %[tmpbuf0],%%xmm8\n\t" "aesenclast %[tmpbuf1],%%xmm9\n\t" "aesenclast %[tmpbuf2],%%xmm10\n\t" - "aesenclast %%xmm5, %%xmm11\n\t" + : + : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), + [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), + [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)), + [lxfkey] "m" (*lxf_key) + : "memory" ); + asm volatile ("aesenclast %%xmm5, %%xmm11\n\t" "pxor %[lxfkey], %%xmm11\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" - "movdqu %%xmm3, %[outbuf2]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [lxfkey] "m" (*lxf_key) + : "memory" ); + asm volatile ("movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" "movdqu %%xmm8, %[outbuf4]\n\t" - "movdqu %%xmm9, %[outbuf5]\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), + [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), + [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) + : + : "memory" ); + asm volatile ("movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), - [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), - [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), - [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), - [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), - [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)), + : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)), [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)), [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE)) - : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), - [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), - [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)), - [lxfkey] "m" (*lxf_key) + : : "memory" ); outbuf += 8*BLOCKSIZE; @@ -2816,17 +2827,18 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, "movdqu %%xmm1, %[outbuf0]\n\t" "pxor %[tmpbuf1],%%xmm2\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" - "pxor %[tmpbuf2],%%xmm3\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), + [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("pxor %[tmpbuf2],%%xmm3\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), - [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), - [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) - : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), - [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), - [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)) + : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)) : "memory" ); outbuf += 4*BLOCKSIZE; @@ -3199,28 +3211,34 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, "aesdeclast %[tmpbuf0],%%xmm8\n\t" "aesdeclast %[tmpbuf1],%%xmm9\n\t" "aesdeclast %[tmpbuf2],%%xmm10\n\t" - "aesdeclast %%xmm5, %%xmm11\n\t" + : + : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), + [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), + [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("aesdeclast %%xmm5, %%xmm11\n\t" "pxor %[lxfkey], %%xmm11\n\t" "movdqu %%xmm1, %[outbuf0]\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" - "movdqu %%xmm3, %[outbuf2]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [lxfkey] "m" (*lxf_key) + : "memory" ); + asm volatile ("movdqu %%xmm3, %[outbuf2]\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" "movdqu %%xmm8, %[outbuf4]\n\t" - "movdqu %%xmm9, %[outbuf5]\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), + [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), + [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)) + : + : "memory" ); + asm volatile ("movdqu %%xmm9, %[outbuf5]\n\t" "movdqu %%xmm10, %[outbuf6]\n\t" "movdqu %%xmm11, %[outbuf7]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), - [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), - [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), - [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)), - [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)), - [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)), + : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)), [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)), [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE)) - : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), - [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), - [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)), - [lxfkey] "m" (*lxf_key) + : : "memory" ); outbuf += 8*BLOCKSIZE; @@ -3292,17 +3310,18 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, "movdqu %%xmm1, %[outbuf0]\n\t" "pxor %[tmpbuf1],%%xmm2\n\t" "movdqu %%xmm2, %[outbuf1]\n\t" - "pxor %[tmpbuf2],%%xmm3\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), + [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("pxor %[tmpbuf2],%%xmm3\n\t" "movdqu %%xmm3, %[outbuf2]\n\t" "pxor %%xmm5, %%xmm4\n\t" "movdqu %%xmm4, %[outbuf3]\n\t" - : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)), - [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)), - [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)), [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) - : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)), - [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)), - [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)) + : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)) : "memory" ); outbuf += 4*BLOCKSIZE; @@ -3461,16 +3480,18 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, "movdqu %[abuf1], %%xmm2\n\t" "movdqu %[abuf2], %%xmm3\n\t" "movdqu %[abuf3], %%xmm4\n\t" - "movdqu %[abuf4], %%xmm8\n\t" - "movdqu %[abuf5], %%xmm9\n\t" - "movdqu %[abuf6], %%xmm10\n\t" - "movdqu %[abuf7], %%xmm11\n\t" : : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)), [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)), [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)), - [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)), - [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)), + [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[abuf4], %%xmm8\n\t" + "movdqu %[abuf5], %%xmm9\n\t" + "movdqu %[abuf6], %%xmm10\n\t" + "movdqu %[abuf7], %%xmm11\n\t" + : + : [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)), [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)), [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)), [abuf7] "m" (*(abuf + 7 * BLOCKSIZE)) diff --git a/cipher/sha512-ssse3-i386.c b/cipher/sha512-ssse3-i386.c index 4b12cee4..0fc98d8e 100644 --- a/cipher/sha512-ssse3-i386.c +++ b/cipher/sha512-ssse3-i386.c @@ -228,7 +228,11 @@ static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) = asm volatile ("movdqu %[w_t_m_2], %%xmm2;\n\t" \ "movdqa %%xmm2, %%xmm0;\n\t" \ "movdqu %[w_t_m_15], %%xmm5;\n\t" \ - "movdqa %%xmm5, %%xmm3;\n\t" \ + : \ + : [w_t_m_2] "m" (w[(i)-2]), \ + [w_t_m_15] "m" (w[(i)-15]) \ + : "memory" ); \ + asm volatile ("movdqa %%xmm5, %%xmm3;\n\t" \ "psrlq $(61-19), %%xmm0;\n\t" \ "psrlq $(8-7), %%xmm3;\n\t" \ "pxor %%xmm2, %%xmm0;\n\t" \ @@ -251,17 +255,17 @@ static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) = "movdqu %[w_t_m_16], %%xmm2;\n\t" \ "pxor %%xmm4, %%xmm3;\n\t" \ "movdqu %[w_t_m_7], %%xmm1;\n\t" \ - "paddq %%xmm3, %%xmm0;\n\t" \ + : \ + : [w_t_m_7] "m" (w[(i)-7]), \ + [w_t_m_16] "m" (w[(i)-16]) \ + : "memory" ); \ + asm volatile ("paddq %%xmm3, %%xmm0;\n\t" \ "paddq %%xmm2, %%xmm0;\n\t" \ "paddq %%xmm1, %%xmm0;\n\t" \ "movdqu %%xmm0, %[w_t_m_0];\n\t" \ "paddq %[k], %%xmm0;\n\t" \ : [w_t_m_0] "=m" (w[(i)-0]) \ - : [k] "m" (K[i]), \ - [w_t_m_2] "m" (w[(i)-2]), \ - [w_t_m_7] "m" (w[(i)-7]), \ - [w_t_m_15] "m" (w[(i)-15]), \ - [w_t_m_16] "m" (w[(i)-16]) \ + : [k] "m" (K[i]) \ : "memory" ) unsigned int ASM_FUNC_ATTR -- 2.27.0 _______________________________________________ Gcrypt-devel mailing list [hidden email] http://lists.gnupg.org/mailman/listinfo/gcrypt-devel |
Free forum by Nabble | Edit this page |