[PATCH 1/5] Fix building with x86-64 medium and large memory models

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/5] Fix building with x86-64 medium and large memory models

Jussi Kivilinna-2
* cipher/cast5-amd64.S [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]
(GET_EXTERN_POINTER): Load 64-bit address instead of 32-bit.
* cipher/rijndael.c (do_encrypt, do_decrypt)
[USE_AMD64_ASM && !HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS]: Load
table pointer through register instead of generic reference.
--

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 cipher/cast5-amd64.S |    2 +-
 cipher/rijndael.c    |    4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index a5f078e3..608fb64e 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -24,7 +24,7 @@
      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5)
 
 #if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
-#  define GET_EXTERN_POINTER(name, reg) leaq name, reg
+#  define GET_EXTERN_POINTER(name, reg) movabsq $name, reg
 #else
 #  define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg
 #endif
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 66ea0f3a..8637195a 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -752,7 +752,7 @@ do_encrypt (const RIJNDAEL_context *ctx,
                   "+d" (ax),
                   "+c" (rounds)
                 : "0" (_gcry_aes_amd64_encrypt_block),
-                  [encT] "g" (encT)
+                  [encT] "r" (encT)
                 : "cc", "memory", "r8", "r9", "r10", "r11");
   return ret;
 # endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */
@@ -1135,7 +1135,7 @@ do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
                   "+d" (ax),
                   "+c" (rounds)
                 : "0" (_gcry_aes_amd64_decrypt_block),
-                  [dectabs] "g" (&dec_tables)
+                  [dectabs] "r" (&dec_tables)
                 : "cc", "memory", "r8", "r9", "r10", "r11");
   return ret;
 # endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/5] cast5-amd64: use 64-bit relocation with large PIC memory model

Jussi Kivilinna-2
* cipher/cast5-amd64.S [__code_model_large__]
(GET_EXTERN_POINTER): New.
--

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 cipher/cast5-amd64.S |   15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index 608fb64e..c04015a2 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -26,7 +26,20 @@
 #if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
 #  define GET_EXTERN_POINTER(name, reg) movabsq $name, reg
 #else
-#  define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg
+#  ifdef __code_model_large__
+#    define GET_EXTERN_POINTER(name, reg) \
+       pushq %r15; \
+       pushq %r14; \
+    1: leaq 1b(%rip), reg; \
+       movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \
+       movabsq $name@GOT, %r15; \
+       addq %r14, reg; \
+       popq %r14; \
+       movq (reg, %r15), reg; \
+       popq %r15;
+#  else
+#    define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg
+#  endif
 #endif
 
 #ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Reply | Threaded
Open this post in threaded view
|

[PATCH 3/5] Move data in AMD64 assembly to text section

Jussi Kivilinna-2
In reply to this post by Jussi Kivilinna-2
* cipher/camellia-aesni-avx-amd64.S: Move data to .text section to
ensure that RIP relative addressing of data will work.
* cipher/camellia-aesni-avx2-amd64.S: Ditto.
* cipher/chacha20-avx2-amd64.S: Ditto.
* cipher/chacha20-ssse3-amd64.S: Ditto.
* cipher/des-amd64.S: Ditto.
* cipher/serpent-avx2-amd64.S: Ditto.
* cipher/sha1-avx-amd64.S: Ditto.
* cipher/sha1-avx-bmi2-amd64.S: Ditto.
* cipher/sha1-ssse3-amd64.S: Ditto.
* cipher/sha256-avx-amd64.S: Ditto.
* cipher/sha256-avx2-bmi2-amd64.S: Ditto.
* cipher/sha256-ssse3-amd64.S: Ditto.
* cipher/sha512-avx-amd64.S: Ditto.
* cipher/sha512-avx2-bmi2-amd64.S: Ditto.
* cipher/sha512-ssse3-amd64.S: Ditto.
--

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 cipher/camellia-aesni-avx-amd64.S  |    5 +----
 cipher/camellia-aesni-avx2-amd64.S |    3 +--
 cipher/chacha20-avx2-amd64.S       |    1 -
 cipher/chacha20-ssse3-amd64.S      |    1 -
 cipher/des-amd64.S                 |    1 -
 cipher/serpent-avx2-amd64.S        |    1 -
 cipher/sha1-avx-amd64.S            |    3 +--
 cipher/sha1-avx-bmi2-amd64.S       |    3 +--
 cipher/sha1-ssse3-amd64.S          |    3 +--
 cipher/sha256-avx-amd64.S          |    1 -
 cipher/sha256-avx2-bmi2-amd64.S    |    1 -
 cipher/sha256-ssse3-amd64.S        |    1 -
 cipher/sha512-avx-amd64.S          |    2 --
 cipher/sha512-avx2-bmi2-amd64.S    |    2 --
 cipher/sha512-ssse3-amd64.S        |    2 --
 15 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 5a3a3cbc..8022934f 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -629,7 +629,7 @@
  vmovdqu y6, 14 * 16(rio); \
  vmovdqu y7, 15 * 16(rio);
 
-.data
+.text
 .align 16
 
 #define SHUFB_BYTES(idx) \
@@ -773,7 +773,6 @@
 .L0f0f0f0f:
  .long 0x0f0f0f0f
 
-.text
 
 .align 8
 ELF(.type   __camellia_enc_blk16,@function;)
@@ -1702,7 +1701,6 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;
  vpsllq $(64-(nror)), out, out; \
  vpaddd t0, out, out;
 
-.data
 
 .align 16
 .Linv_shift_row_and_unpcklbw:
@@ -1735,7 +1733,6 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;
 .Lsigma6:
  .long 0xB3E6C1FD, 0xB05688C2;
 
-.text
 
 .align 8
 ELF(.type  __camellia_avx_setup128,@function;)
diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S
index 26381df0..897e4aee 100644
--- a/cipher/camellia-aesni-avx2-amd64.S
+++ b/cipher/camellia-aesni-avx2-amd64.S
@@ -613,7 +613,7 @@
  vmovdqu y6, 14 * 32(rio); \
  vmovdqu y7, 15 * 32(rio);
 
-.data
+.text
 .align 32
 
 #define SHUFB_BYTES(idx) \
@@ -752,7 +752,6 @@
 .L0f0f0f0f:
  .long 0x0f0f0f0f
 
-.text
 
 .align 8
 ELF(.type   __camellia_enc_blk32,@function;)
diff --git a/cipher/chacha20-avx2-amd64.S b/cipher/chacha20-avx2-amd64.S
index 12bed35b..8c085bad 100644
--- a/cipher/chacha20-avx2-amd64.S
+++ b/cipher/chacha20-avx2-amd64.S
@@ -947,7 +947,6 @@ _gcry_chacha20_amd64_avx2_blocks:
  ret
 ELF(.size _gcry_chacha20_amd64_avx2_blocks,.-_gcry_chacha20_amd64_avx2_blocks;)
 
-.data
 .align 16
 .LC:
 .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13       /* pshufb rotate by 16 */
diff --git a/cipher/chacha20-ssse3-amd64.S b/cipher/chacha20-ssse3-amd64.S
index a1a843fa..c04010e7 100644
--- a/cipher/chacha20-ssse3-amd64.S
+++ b/cipher/chacha20-ssse3-amd64.S
@@ -623,7 +623,6 @@ _gcry_chacha20_amd64_ssse3_blocks:
  ret
 ELF(.size _gcry_chacha20_amd64_ssse3_blocks,.-_gcry_chacha20_amd64_ssse3_blocks;)
 
-.data
 .align 16;
 .LC:
 .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13       /* pshufb rotate by 16 */
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index 307d2112..1b7cfba8 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
@@ -766,7 +766,6 @@ _gcry_3des_amd64_cfb_dec:
  ret;
 ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
 
-.data
 .align 16
 .L_s1:
  .quad 0x0010100001010400, 0x0000000000000000
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 2902dab5..8d60a159 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -1113,7 +1113,6 @@ _gcry_serpent_avx2_ocb_auth:
  ret;
 ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;)
 
-.data
 .align 16
 
 /* For CTR-mode IV byteswap */
diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S
index 3b3a6d11..b14603bf 100644
--- a/cipher/sha1-avx-amd64.S
+++ b/cipher/sha1-avx-amd64.S
@@ -58,7 +58,7 @@
 
 /* Constants */
 
-.data
+.text
 #define K1  0x5A827999
 #define K2  0x6ED9EBA1
 #define K3  0x8F1BBCDC
@@ -214,7 +214,6 @@
  * _gcry_sha1_transform_amd64_avx (void *ctx, const unsigned char *data,
  *                                  size_t nblks)
  */
-.text
 .globl _gcry_sha1_transform_amd64_avx
 ELF(.type _gcry_sha1_transform_amd64_avx,@function)
 .align 16
diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S
index 22bcbb3c..b267693f 100644
--- a/cipher/sha1-avx-bmi2-amd64.S
+++ b/cipher/sha1-avx-bmi2-amd64.S
@@ -59,7 +59,7 @@
 
 /* Constants */
 
-.data
+.text
 #define K1  0x5A827999
 #define K2  0x6ED9EBA1
 #define K3  0x8F1BBCDC
@@ -212,7 +212,6 @@
  * _gcry_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
  *                                      size_t nblks)
  */
-.text
 .globl _gcry_sha1_transform_amd64_avx_bmi2
 ELF(.type _gcry_sha1_transform_amd64_avx_bmi2,@function)
 .align 16
diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S
index 98a19e60..2b439476 100644
--- a/cipher/sha1-ssse3-amd64.S
+++ b/cipher/sha1-ssse3-amd64.S
@@ -58,7 +58,7 @@
 
 /* Constants */
 
-.data
+.text
 #define K1  0x5A827999
 #define K2  0x6ED9EBA1
 #define K3  0x8F1BBCDC
@@ -226,7 +226,6 @@
  * _gcry_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data,
  *                                   size_t nblks)
  */
-.text
 .globl _gcry_sha1_transform_amd64_ssse3
 ELF(.type _gcry_sha1_transform_amd64_ssse3,@function)
 .align 16
diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S
index 8bf26bd7..6953855b 100644
--- a/cipher/sha256-avx-amd64.S
+++ b/cipher/sha256-avx-amd64.S
@@ -496,7 +496,6 @@ _gcry_sha256_transform_amd64_avx:
  ret
 
 
-.data
 .align 16
 .LK256:
  .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S
index 74b60631..85e663fe 100644
--- a/cipher/sha256-avx2-bmi2-amd64.S
+++ b/cipher/sha256-avx2-bmi2-amd64.S
@@ -763,7 +763,6 @@ _gcry_sha256_transform_amd64_avx2:
 
  ret
 
-.data
 .align 64
 .LK256:
  .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S
index 9ec87e46..a9213e41 100644
--- a/cipher/sha256-ssse3-amd64.S
+++ b/cipher/sha256-ssse3-amd64.S
@@ -516,7 +516,6 @@ _gcry_sha256_transform_amd64_ssse3:
  ret
 
 
-.data
 .align 16
 .LK256:
  .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S
index 699c271b..446a8b4e 100644
--- a/cipher/sha512-avx-amd64.S
+++ b/cipher/sha512-avx-amd64.S
@@ -368,8 +368,6 @@ _gcry_sha512_transform_amd64_avx:
 ;;; Binary Data
 */
 
-.data
-
 .align 16
 
 /* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */
diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S
index 02f95af6..05bef64c 100644
--- a/cipher/sha512-avx2-bmi2-amd64.S
+++ b/cipher/sha512-avx2-bmi2-amd64.S
@@ -735,8 +735,6 @@ _gcry_sha512_transform_amd64_avx2:
 /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
 /*;; Binary Data */
 
-.data
-
 .align 64
 /* K[t] used in SHA512 hashing */
 .LK512:
diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S
index c721bcf2..51193b36 100644
--- a/cipher/sha512-ssse3-amd64.S
+++ b/cipher/sha512-ssse3-amd64.S
@@ -373,8 +373,6 @@ _gcry_sha512_transform_amd64_ssse3:
 ;;; Binary Data
 */
 
-.data
-
 .align 16
 
 /* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Reply | Threaded
Open this post in threaded view
|

[PATCH 4/5] chacha20-armv7-neon: fix to use fast code path when memory is aligned

Jussi Kivilinna-2
In reply to this post by Jussi Kivilinna-2
* cipher/chacha20-armv7-neon.S (UNALIGNED_LDMIA4): Uncomment
instruction for jump to aligned code path.
--

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 cipher/chacha20-armv7-neon.S |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cipher/chacha20-armv7-neon.S b/cipher/chacha20-armv7-neon.S
index 4d3340b3..c1971fc7 100644
--- a/cipher/chacha20-armv7-neon.S
+++ b/cipher/chacha20-armv7-neon.S
@@ -54,7 +54,7 @@
 
 #define UNALIGNED_LDMIA4(ptr, l0, l1, l2, l3) \
         tst ptr, #3; \
-        /*beq 1f;*/ \
+        beq 1f; \
         vpush {d0-d1}; \
         vld1.32 {d0-d1}, [ptr]; \
         add ptr, #16; \


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Reply | Threaded
Open this post in threaded view
|

[PATCH 5/5] rijndael-ssse3: fix functions calls from assembly blocks

Jussi Kivilinna-2
In reply to this post by Jussi Kivilinna-2
* cipher/rijndael-ssse3-amd64.c (PUSH_STACK_PTR, POP_STACK_PTR): New.
(vpaes_ssse3_prepare_enc, vpaes_ssse3_prepare_dec)
(_gcry_aes_ssse3_do_setkey, _gcry_aes_ssse3_prepare_decryption)
(do_vpaes_ssse3_enc, do_vpaes_ssse3_dec): Use PUSH_STACK_PTR and
POP_STACK_PTR.
--

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 cipher/rijndael-ssse3-amd64.c |   31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index 78d8234d..da5339e3 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -110,6 +110,8 @@ extern void _gcry_aes_ssse3_decrypt_core(void);
                   : \
                   : "r" (ssse3_state) \
                   : "memory" )
+# define PUSH_STACK_PTR
+# define POP_STACK_PTR
 #else
 # define SSSE3_STATE_SIZE 1
 # define vpaes_ssse3_prepare() (void)ssse3_state
@@ -124,18 +126,27 @@ extern void _gcry_aes_ssse3_decrypt_core(void);
                   "pxor %%xmm7,  %%xmm7 \n\t" \
                   "pxor %%xmm8,  %%xmm8 \n\t" \
                   ::: "memory" )
+/* Old GCC versions use red-zone of AMD64 SYSV ABI and stack pointer is
+ * not properly adjusted for assembly block. Therefore stack pointer
+ * needs to be manually corrected. */
+# define PUSH_STACK_PTR "subq $128, %%rsp;\n\t"
+# define POP_STACK_PTR  "addq $128, %%rsp;\n\t"
 #endif
 
 #define vpaes_ssse3_prepare_enc() \
     vpaes_ssse3_prepare(); \
-    asm volatile ("callq *%q[core] \n\t" \
+    asm volatile (PUSH_STACK_PTR \
+                  "callq *%q[core] \n\t" \
+                  POP_STACK_PTR \
                   : \
                   : [core] "r" (_gcry_aes_ssse3_enc_preload) \
                   : "rax", "cc", "memory" )
 
 #define vpaes_ssse3_prepare_dec() \
     vpaes_ssse3_prepare(); \
-    asm volatile ("callq *%q[core] \n\t" \
+    asm volatile (PUSH_STACK_PTR \
+                  "callq *%q[core] \n\t" \
+                  POP_STACK_PTR \
                   : \
                   : [core] "r" (_gcry_aes_ssse3_dec_preload) \
                   : "rax", "cc", "memory" )
@@ -155,7 +166,9 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
                 "leaq %[buf], %%rdx" "\n\t"
                 "movl %[dir], %%ecx" "\n\t"
                 "movl %[rotoffs], %%r8d" "\n\t"
+                PUSH_STACK_PTR
                 "callq *%q[core]" "\n\t"
+                POP_STACK_PTR
                 :
                 : [core] "r" (&_gcry_aes_ssse3_schedule_core),
                   [key] "m" (*key),
@@ -208,7 +221,9 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
                 "leaq %[buf], %%rdx" "\n\t"
                 "movl %[dir], %%ecx" "\n\t"
                 "movl %[rotoffs], %%r8d" "\n\t"
+                PUSH_STACK_PTR
                 "callq *%q[core]" "\n\t"
+                POP_STACK_PTR
                 :
                 : [core] "r" (_gcry_aes_ssse3_schedule_core),
                   [key] "m" (ctx->keyschdec32[0][0]),
@@ -231,7 +246,9 @@ do_vpaes_ssse3_enc (const RIJNDAEL_context *ctx, unsigned int nrounds)
   unsigned int middle_rounds = nrounds - 1;
   const void *keysched = ctx->keyschenc32;
 
-  asm volatile ("callq *%q[core]" "\n\t"
+  asm volatile (PUSH_STACK_PTR
+ "callq *%q[core]" "\n\t"
+ POP_STACK_PTR
  : "+a" (middle_rounds), "+d" (keysched)
  : [core] "r" (_gcry_aes_ssse3_encrypt_core)
  : "rcx", "rsi", "rdi", "cc", "memory");
@@ -246,10 +263,12 @@ do_vpaes_ssse3_dec (const RIJNDAEL_context *ctx, unsigned int nrounds)
   unsigned int middle_rounds = nrounds - 1;
   const void *keysched = ctx->keyschdec32;
 
-  asm volatile ("callq *%q[core]" "\n\t"
-                : "+a" (middle_rounds), "+d" (keysched)
+  asm volatile (PUSH_STACK_PTR
+ "callq *%q[core]" "\n\t"
+ POP_STACK_PTR
+ : "+a" (middle_rounds), "+d" (keysched)
  : [core] "r" (_gcry_aes_ssse3_decrypt_core)
-                : "rcx", "rsi", "cc", "memory");
+ : "rcx", "rsi", "cc", "memory");
 }
 
 


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel