[PATCH] Split inline assembly blocks with many memory operands

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] Split inline assembly blocks with many memory operands

Jussi Kivilinna-2
* cipher/rijndael-aesni.c (aesni_ocb_checksum, aesni_ocb_enc)
(aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Split assembly blocks
with more than 4 memory operands to smaller blocks.
* cipher/sha512-ssse3-i386.c (W2): Split big assembly block to
three smaller blocks.
--

On i386, with -O0, assembly blocks with many memory operands cause
compiler error such as:
 rijndael-aesni.c:2815:7: error: 'asm' operand has impossible constraints

Fix is to split assembly blocks so that number of operands per block is
reduced.

GnuPG-bug-id: 5257
Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 cipher/rijndael-aesni.c    | 137 +++++++++++++++++++++----------------
 cipher/sha512-ssse3-i386.c |  18 +++--
 2 files changed, 90 insertions(+), 65 deletions(-)

diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 747ef662..95ec4c2b 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -2271,16 +2271,18 @@ aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext,
  "vpxor %[ptr1], %%ymm1, %%ymm1\n\t"
  "vpxor %[ptr2], %%ymm2, %%ymm2\n\t"
  "vpxor %[ptr3], %%ymm3, %%ymm3\n\t"
- "vpxor %[ptr4], %%ymm0, %%ymm0\n\t"
- "vpxor %[ptr5], %%ymm4, %%ymm4\n\t"
- "vpxor %[ptr6], %%ymm5, %%ymm5\n\t"
- "vpxor %[ptr7], %%ymm7, %%ymm7\n\t"
  :
  : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
   [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
   [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
-  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)),
-  [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+ : "memory" );
+  asm volatile ("vpxor %[ptr4], %%ymm0, %%ymm0\n\t"
+ "vpxor %[ptr5], %%ymm4, %%ymm4\n\t"
+ "vpxor %[ptr6], %%ymm5, %%ymm5\n\t"
+ "vpxor %[ptr7], %%ymm7, %%ymm7\n\t"
+ :
+ : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
   [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
   [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
   [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
@@ -2325,16 +2327,18 @@ aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext,
  "vxorpd %[ptr1], %%ymm1, %%ymm1\n\t"
  "vxorpd %[ptr2], %%ymm2, %%ymm2\n\t"
  "vxorpd %[ptr3], %%ymm3, %%ymm3\n\t"
- "vxorpd %[ptr4], %%ymm0, %%ymm0\n\t"
- "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t"
- "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t"
- "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t"
  :
  : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
   [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
   [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
-  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)),
-  [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+ : "memory" );
+  asm volatile ("vxorpd %[ptr4], %%ymm0, %%ymm0\n\t"
+ "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t"
+ "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t"
+ "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t"
+ :
+ : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
   [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
   [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
   [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
@@ -2718,28 +2722,35 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
  "aesenclast %[tmpbuf0],%%xmm8\n\t"
  "aesenclast %[tmpbuf1],%%xmm9\n\t"
  "aesenclast %[tmpbuf2],%%xmm10\n\t"
- "aesenclast %%xmm5,    %%xmm11\n\t"
+ :
+ : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
+  [lxfkey] "m" (*lxf_key)
+ : "memory" );
+  asm volatile ("aesenclast %%xmm5,    %%xmm11\n\t"
  "pxor   %[lxfkey], %%xmm11\n\t"
  "movdqu %%xmm1,    %[outbuf0]\n\t"
  "movdqu %%xmm2,    %[outbuf1]\n\t"
- "movdqu %%xmm3,    %[outbuf2]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [lxfkey] "m" (*lxf_key)
+ : "memory" );
+  asm volatile ("movdqu %%xmm3,    %[outbuf2]\n\t"
  "movdqu %%xmm4,    %[outbuf3]\n\t"
  "movdqu %%xmm8,    %[outbuf4]\n\t"
- "movdqu %%xmm9,    %[outbuf5]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+ :
+ : "memory" );
+  asm volatile ("movdqu %%xmm9,    %[outbuf5]\n\t"
  "movdqu %%xmm10,   %[outbuf6]\n\t"
  "movdqu %%xmm11,   %[outbuf7]\n\t"
- : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-  [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
-  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
-  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)),
-  [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+ : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
   [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
   [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
- : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
-  [lxfkey] "m" (*lxf_key)
+ :
  : "memory" );
 
   outbuf += 8*BLOCKSIZE;
@@ -2816,17 +2827,18 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
     "movdqu %%xmm1,    %[outbuf0]\n\t"
     "pxor   %[tmpbuf1],%%xmm2\n\t"
     "movdqu %%xmm2,    %[outbuf1]\n\t"
-    "pxor   %[tmpbuf2],%%xmm3\n\t"
+    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+    : "memory" );
+      asm volatile ("pxor   %[tmpbuf2],%%xmm3\n\t"
     "movdqu %%xmm3,    %[outbuf2]\n\t"
     "pxor   %%xmm5,    %%xmm4\n\t"
     "movdqu %%xmm4,    %[outbuf3]\n\t"
-    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-      [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
       [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-      [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+    : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
     : "memory" );
 
       outbuf += 4*BLOCKSIZE;
@@ -3199,28 +3211,34 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
  "aesdeclast %[tmpbuf0],%%xmm8\n\t"
  "aesdeclast %[tmpbuf1],%%xmm9\n\t"
  "aesdeclast %[tmpbuf2],%%xmm10\n\t"
- "aesdeclast %%xmm5,    %%xmm11\n\t"
+ :
+ : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+ : "memory" );
+  asm volatile ("aesdeclast %%xmm5,    %%xmm11\n\t"
  "pxor   %[lxfkey], %%xmm11\n\t"
  "movdqu %%xmm1,    %[outbuf0]\n\t"
  "movdqu %%xmm2,    %[outbuf1]\n\t"
- "movdqu %%xmm3,    %[outbuf2]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [lxfkey] "m" (*lxf_key)
+ : "memory" );
+  asm volatile ("movdqu %%xmm3,    %[outbuf2]\n\t"
  "movdqu %%xmm4,    %[outbuf3]\n\t"
  "movdqu %%xmm8,    %[outbuf4]\n\t"
- "movdqu %%xmm9,    %[outbuf5]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+ :
+ : "memory" );
+  asm volatile ("movdqu %%xmm9,    %[outbuf5]\n\t"
  "movdqu %%xmm10,   %[outbuf6]\n\t"
  "movdqu %%xmm11,   %[outbuf7]\n\t"
- : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-  [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
-  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
-  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)),
-  [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+ : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
   [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
   [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
- : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
-  [lxfkey] "m" (*lxf_key)
+ :
  : "memory" );
 
   outbuf += 8*BLOCKSIZE;
@@ -3292,17 +3310,18 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
     "movdqu %%xmm1,    %[outbuf0]\n\t"
     "pxor   %[tmpbuf1],%%xmm2\n\t"
     "movdqu %%xmm2,    %[outbuf1]\n\t"
-    "pxor   %[tmpbuf2],%%xmm3\n\t"
+    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+    : "memory" );
+      asm volatile ("pxor   %[tmpbuf2],%%xmm3\n\t"
     "movdqu %%xmm3,    %[outbuf2]\n\t"
     "pxor   %%xmm5,    %%xmm4\n\t"
     "movdqu %%xmm4,    %[outbuf3]\n\t"
-    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-      [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
       [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-      [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+    : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
     : "memory" );
 
       outbuf += 4*BLOCKSIZE;
@@ -3461,16 +3480,18 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
  "movdqu %[abuf1],  %%xmm2\n\t"
  "movdqu %[abuf2],  %%xmm3\n\t"
  "movdqu %[abuf3],  %%xmm4\n\t"
- "movdqu %[abuf4],  %%xmm8\n\t"
- "movdqu %[abuf5],  %%xmm9\n\t"
- "movdqu %[abuf6],  %%xmm10\n\t"
- "movdqu %[abuf7],  %%xmm11\n\t"
  :
  : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)),
   [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)),
   [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)),
-  [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)),
-  [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)),
+  [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+ : "memory" );
+  asm volatile ("movdqu %[abuf4],  %%xmm8\n\t"
+ "movdqu %[abuf5],  %%xmm9\n\t"
+ "movdqu %[abuf6],  %%xmm10\n\t"
+ "movdqu %[abuf7],  %%xmm11\n\t"
+ :
+ : [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)),
   [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)),
   [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)),
   [abuf7] "m" (*(abuf + 7 * BLOCKSIZE))
diff --git a/cipher/sha512-ssse3-i386.c b/cipher/sha512-ssse3-i386.c
index 4b12cee4..0fc98d8e 100644
--- a/cipher/sha512-ssse3-i386.c
+++ b/cipher/sha512-ssse3-i386.c
@@ -228,7 +228,11 @@ static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
  asm volatile ("movdqu %[w_t_m_2], %%xmm2;\n\t" \
       "movdqa %%xmm2, %%xmm0;\n\t" \
       "movdqu %[w_t_m_15], %%xmm5;\n\t" \
-      "movdqa %%xmm5, %%xmm3;\n\t" \
+      : \
+      : [w_t_m_2] "m" (w[(i)-2]), \
+        [w_t_m_15] "m" (w[(i)-15]) \
+      : "memory" ); \
+ asm volatile ("movdqa %%xmm5, %%xmm3;\n\t" \
       "psrlq $(61-19), %%xmm0;\n\t" \
       "psrlq $(8-7), %%xmm3;\n\t" \
       "pxor %%xmm2, %%xmm0;\n\t" \
@@ -251,17 +255,17 @@ static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
       "movdqu %[w_t_m_16], %%xmm2;\n\t" \
       "pxor %%xmm4, %%xmm3;\n\t" \
       "movdqu %[w_t_m_7], %%xmm1;\n\t" \
-      "paddq %%xmm3, %%xmm0;\n\t" \
+      : \
+      : [w_t_m_7] "m" (w[(i)-7]), \
+        [w_t_m_16] "m" (w[(i)-16]) \
+      : "memory" ); \
+ asm volatile ("paddq %%xmm3, %%xmm0;\n\t" \
       "paddq %%xmm2, %%xmm0;\n\t" \
       "paddq %%xmm1, %%xmm0;\n\t" \
       "movdqu %%xmm0, %[w_t_m_0];\n\t" \
       "paddq %[k], %%xmm0;\n\t" \
       : [w_t_m_0] "=m" (w[(i)-0]) \
-      : [k] "m" (K[i]), \
-        [w_t_m_2] "m" (w[(i)-2]), \
-        [w_t_m_7] "m" (w[(i)-7]), \
-        [w_t_m_15] "m" (w[(i)-15]), \
-        [w_t_m_16] "m" (w[(i)-16]) \
+      : [k] "m" (K[i]) \
       : "memory" )
 
 unsigned int ASM_FUNC_ATTR
--
2.27.0


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel