[PATCH] Add configure option to force enable 'soft' HW feature bits

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] Add configure option to force enable 'soft' HW feature bits

Jussi Kivilinna-2
* configure.ac (force_soft_hwfeatures)
(ENABLE_FORCE_SOFT_HWFEATURES): New.
* src/hwf-x86.c (detect_x86_gnuc): Enable HWF_INTEL_FAST_SHLD
and HWF_INTEL_FAST_VPGATHER if ENABLE_FORCE_SOFT_HWFEATURES enabled.
--

Patch allows enabling HW features, that are fast only select CPU models,
on all CPUs. For example, SHLD instruction is fast on only select Intel
processors and should not be used on others. This configuration option
allows enabling these 'soft' HW features for testing purposes on all
CPUs.

Current 'soft' HW features are:
 - "intel-fast-shld": supported by all x86 (but very slow on most)
 - "intel-fast-vpgather": supported by all x86 with AVX2 (but slow on
   most)

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 configure.ac  | 14 ++++++++++++++
 src/hwf-x86.c | 23 +++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/configure.ac b/configure.ac
index ce17d9f4..8aba8ece 100644
--- a/configure.ac
+++ b/configure.ac
@@ -566,6 +566,15 @@ AC_ARG_ENABLE(large-data-tests,
 AC_MSG_RESULT($large_data_tests)
 AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests)
 
+# Implementation of --enable-force-soft-hwfeatures
+AC_MSG_CHECKING([whether 'soft' HW feature bits are forced on])
+AC_ARG_ENABLE([force-soft-hwfeatures],
+              AS_HELP_STRING([--enable-force-soft-hwfeatures],
+                             [Enable forcing 'soft' HW feature bits on]),
+              [force_soft_hwfeatures=$enableval],
+              [force_soft_hwfeatures=no])
+AC_MSG_RESULT($force_soft_hwfeatures)
+
 
 # Implementation of the --with-capabilities switch.
 # Check whether we want to use Linux capabilities
@@ -2434,6 +2443,11 @@ if test x"$drngsupport" = xyes ; then
 fi
 
 
+if test x"$force_soft_hwfeatures" = xyes ; then
+  AC_DEFINE(ENABLE_FORCE_SOFT_HWFEATURES, 1,
+            [Enable forcing 'soft' HW feature bits on (for testing).])
+fi
+
 # Define conditional sources and config.h symbols depending on the
 # selected ciphers, pubkey-ciphers, digests, kdfs, and random modules.
 
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index 796e874f..9a9ed6d3 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -301,6 +301,29 @@ detect_x86_gnuc (void)
       avoid_vpgather |= 1;
     }
 
+#ifdef ENABLE_FORCE_SOFT_HWFEATURES
+  /* Soft HW features mark functionality that is available on all systems
+   * but not feasible to use because of slow HW implementation. */
+
+  /* SHLD is faster at rotating register than actual ROR/ROL instructions
+   * on older Intel systems (~sandy-bridge era). However, SHLD is very
+   * slow on almost anything else and later Intel processors have faster
+   * ROR/ROL. Therefore in regular build HWF_INTEL_FAST_SHLD is enabled
+   * only for those Intel processors that benefit from the SHLD
+   * instruction. Enabled here unconditionally as requested. */
+  result |= HWF_INTEL_FAST_SHLD;
+
+  /* VPGATHER instructions are used for look-up table based
+   * implementations which require VPGATHER to be fast enough to beat
+   * regular parallelized look-up table implementations (see Twofish).
+   * So far, only Intel processors beginning with skylake have had
+   * VPGATHER fast enough to be enabled. AMD Zen3 comes close to
+   * being feasible, but not quite (where twofish-avx2 is few percent
+   * slower than twofish-3way). Enable VPGATHER here unconditionally
+   * as requested. */
+  avoid_vpgather = 0;
+#endif
+
 #ifdef ENABLE_PCLMUL_SUPPORT
   /* Test bit 1 for PCLMUL.  */
   if (features & 0x00000002)
--
2.27.0


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Reply | Threaded
Open this post in threaded view
|

[PATCH] Define HW-feature flags per architecture

Jussi Kivilinna-2
* random/rand-internal.h (_gcry_rndhw_poll_slow): Add requested length
parameter.
* random/rndhw.c (_gcry_rndhw_poll_slow): Limit accounted bytes to 50%
(or 25% for RDRAND) - this code is moved from caller side.
* random/rndlinux.c (_gcry_rndlinux_gather_random): Move
HWF_INTEL_RDRAND check to _gcry_rndhw_poll_slow.
* src/g10lib.h (HWF_PADLOCK_*, HWF_INTEL_*): Define only if
HAVE_CPU_ARCH_X86.
(HWF_ARM_*): Define only if HAVE_CPU_ARCH_ARM.
(HWF_PPC_*): Define only if HAVE_CPU_ARCH_PPC.
(HWF_S390X_*): Define only if HAVE_CPU_ARCH_S390X.
--

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 random/rand-internal.h |  2 +-
 random/rndhw.c         | 15 ++++++++++++---
 random/rndlinux.c      | 17 ++++-------------
 src/g10lib.h           | 34 ++++++++++++++++++++++------------
 4 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/random/rand-internal.h b/random/rand-internal.h
index d99c6671..34221569 100644
--- a/random/rand-internal.h
+++ b/random/rand-internal.h
@@ -141,7 +141,7 @@ void _gcry_rndhw_poll_fast (void (*add)(const void*, size_t,
                             enum random_origins origin);
 size_t _gcry_rndhw_poll_slow (void (*add)(const void*, size_t,
                                           enum random_origins),
-                              enum random_origins origin);
+                              enum random_origins origin, size_t req_length);
 
 
 
diff --git a/random/rndhw.c b/random/rndhw.c
index 2829382c..3cf9acc3 100644
--- a/random/rndhw.c
+++ b/random/rndhw.c
@@ -198,24 +198,33 @@ _gcry_rndhw_poll_fast (void (*add)(const void*, size_t, enum random_origins),
 
 
 /* Read 64 bytes from a hardware RNG and return the number of bytes
-   actually read.  */
+   actually read.  However hardware source is let account only
+   for up to 50% (or 25% for RDRAND) of the requested bytes.  */
 size_t
 _gcry_rndhw_poll_slow (void (*add)(const void*, size_t, enum random_origins),
-                       enum random_origins origin)
+                       enum random_origins origin, size_t req_length)
 {
   size_t nbytes = 0;
 
   (void)add;
   (void)origin;
 
+  req_length /= 2; /* Up to 50%. */
+
 #ifdef USE_DRNG
   if ((_gcry_get_hw_features () & HWF_INTEL_RDRAND))
-    nbytes += poll_drng (add, origin, 0);
+    {
+      req_length /= 2; /* Up to 25%. */
+      nbytes += poll_drng (add, origin, 0);
+    }
 #endif
 #ifdef USE_PADLOCK
   if ((_gcry_get_hw_features () & HWF_PADLOCK_RNG))
     nbytes += poll_padlock (add, origin, 0);
 #endif
 
+  if (nbytes > req_length)
+    nbytes = req_length;
+
   return nbytes;
 }
diff --git a/random/rndlinux.c b/random/rndlinux.c
index 04e2a464..7cbf6ac2 100644
--- a/random/rndlinux.c
+++ b/random/rndlinux.c
@@ -186,19 +186,10 @@ _gcry_rndlinux_gather_random (void (*add)(const void*, size_t,
     }
 
 
-  /* First read from a hardware source.  However let it account only
-     for up to 50% (or 25% for RDRAND) of the requested bytes.  */
-  n_hw = _gcry_rndhw_poll_slow (add, origin);
-  if ((_gcry_get_hw_features () & HWF_INTEL_RDRAND))
-    {
-      if (n_hw > length/4)
-        n_hw = length/4;
-    }
-  else
-    {
-      if (n_hw > length/2)
-        n_hw = length/2;
-    }
+  /* First read from a hardware source.  Note that _gcry_rndhw_poll_slow lets
+     it account only for up to 50% (or 25% for RDRAND) of the requested
+     bytes.  */
+  n_hw = _gcry_rndhw_poll_slow (add, origin, length);
   if (length > 1)
     length -= n_hw;
 
diff --git a/src/g10lib.h b/src/g10lib.h
index cba2e237..243997eb 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -217,6 +217,8 @@ char **_gcry_strtokenize (const char *string, const char *delim);
 
 
 /*-- src/hwfeatures.c --*/
+#if defined(HAVE_CPU_ARCH_X86)
+
 #define HWF_PADLOCK_RNG         (1 << 0)
 #define HWF_PADLOCK_AES         (1 << 1)
 #define HWF_PADLOCK_SHA         (1 << 2)
@@ -236,20 +238,28 @@ char **_gcry_strtokenize (const char *string, const char *delim);
 #define HWF_INTEL_RDTSC         (1 << 15)
 #define HWF_INTEL_SHAEXT        (1 << 16)
 
-#define HWF_ARM_NEON            (1 << 17)
-#define HWF_ARM_AES             (1 << 18)
-#define HWF_ARM_SHA1            (1 << 19)
-#define HWF_ARM_SHA2            (1 << 20)
-#define HWF_ARM_PMULL           (1 << 21)
+#elif defined(HAVE_CPU_ARCH_ARM)
+
+#define HWF_ARM_NEON            (1 << 0)
+#define HWF_ARM_AES             (1 << 1)
+#define HWF_ARM_SHA1            (1 << 2)
+#define HWF_ARM_SHA2            (1 << 3)
+#define HWF_ARM_PMULL           (1 << 4)
+
+#elif defined(HAVE_CPU_ARCH_PPC)
 
-#define HWF_PPC_VCRYPTO         (1 << 22)
-#define HWF_PPC_ARCH_3_00       (1 << 23)
-#define HWF_PPC_ARCH_2_07       (1 << 24)
+#define HWF_PPC_VCRYPTO         (1 << 0)
+#define HWF_PPC_ARCH_3_00       (1 << 1)
+#define HWF_PPC_ARCH_2_07       (1 << 2)
 
-#define HWF_S390X_MSA           (1 << 25)
-#define HWF_S390X_MSA_4         (1 << 26)
-#define HWF_S390X_MSA_8         (1 << 27)
-#define HWF_S390X_VX            (1 << 28)
+#elif defined(HAVE_CPU_ARCH_S390X)
+
+#define HWF_S390X_MSA           (1 << 0)
+#define HWF_S390X_MSA_4         (1 << 1)
+#define HWF_S390X_MSA_8         (1 << 2)
+#define HWF_S390X_VX            (1 << 3)
+
+#endif
 
 gpg_err_code_t _gcry_disable_hw_feature (const char *name);
 void _gcry_detect_hw_features (void);
--
2.27.0


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Reply | Threaded
Open this post in threaded view
|

[PATCH] rijndael: remove unused use_xxx flags

Jussi Kivilinna-2
In reply to this post by Jussi Kivilinna-2
* cipher/rijndael-internal.h (RIJNDAEL_context_s): Remove unused
'use_padlock', 'use_aesni', 'use_ssse3', 'use_arm_ce', 'use_ppc_crypto'
and 'use_ppc9le_crypto'.
* cipher/rijndael.c (do_setkey): Do not setup 'use_padlock',
'use_aesni', 'use_ssse3', 'use_arm_ce', 'use_ppc_crypto' and
'use_ppc9le_crypto'.
--

Signed-off-by: Jussi Kivilinna <[hidden email]>
---
 cipher/rijndael-internal.h | 20 ++------------------
 cipher/rijndael.c          | 24 ------------------------
 2 files changed, 2 insertions(+), 42 deletions(-)

diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index 447a773a..7e01f6b0 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -164,26 +164,10 @@ typedef struct RIJNDAEL_context_s
   } u2;
   int rounds;                         /* Key-length-dependent number of rounds.  */
   unsigned int decryption_prepared:1; /* The decryption key schedule is available.  */
-#ifdef USE_PADLOCK
-  unsigned int use_padlock:1;         /* Padlock shall be used.  */
-#endif /*USE_PADLOCK*/
 #ifdef USE_AESNI
-  unsigned int use_aesni:1;           /* AES-NI shall be used.  */
-  unsigned int use_avx:1;             /* AVX shall be used. */
-  unsigned int use_avx2:1;            /* AVX2 shall be used. */
+  unsigned int use_avx:1;             /* AVX shall be used by AES-NI implementation. */
+  unsigned int use_avx2:1;            /* AVX2 shall be used by AES-NI implementation. */
 #endif /*USE_AESNI*/
-#ifdef USE_SSSE3
-  unsigned int use_ssse3:1;           /* SSSE3 shall be used.  */
-#endif /*USE_SSSE3*/
-#ifdef USE_ARM_CE
-  unsigned int use_arm_ce:1;          /* ARMv8 CE shall be used.  */
-#endif /*USE_ARM_CE*/
-#ifdef USE_PPC_CRYPTO
-  unsigned int use_ppc_crypto:1;      /* PowerPC crypto shall be used.  */
-#endif /*USE_PPC_CRYPTO*/
-#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
-  unsigned int use_ppc9le_crypto:1;   /* POWER9 LE crypto shall be used.  */
-#endif
 #ifdef USE_S390X_CRYPTO
   byte km_func;
   byte km_func_xts;
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2b1aa5e5..6ab6d542 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -441,24 +441,6 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
   hwfeatures = _gcry_get_hw_features ();
 
   ctx->decryption_prepared = 0;
-#ifdef USE_PADLOCK
-  ctx->use_padlock = 0;
-#endif
-#ifdef USE_AESNI
-  ctx->use_aesni = 0;
-#endif
-#ifdef USE_SSSE3
-  ctx->use_ssse3 = 0;
-#endif
-#ifdef USE_ARM_CE
-  ctx->use_arm_ce = 0;
-#endif
-#ifdef USE_PPC_CRYPTO
-  ctx->use_ppc_crypto = 0;
-#endif
-#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
-  ctx->use_ppc9le_crypto = 0;
-#endif
 
   /* Setup default bulk encryption routines.  */
   memset (bulk_ops, 0, sizeof(*bulk_ops));
@@ -486,7 +468,6 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->prepare_decryption = _gcry_aes_aesni_prepare_decryption;
-      ctx->use_aesni = 1;
       ctx->use_avx = !!(hwfeatures & HWF_INTEL_AVX);
       ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2);
 
@@ -509,7 +490,6 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->prepare_decryption = _gcry_aes_padlock_prepare_decryption;
-      ctx->use_padlock = 1;
       memcpy (ctx->padlockkey, key, keylen);
     }
 #endif
@@ -522,7 +502,6 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->prepare_decryption = _gcry_aes_ssse3_prepare_decryption;
-      ctx->use_ssse3 = 1;
 
       /* Setup SSSE3 bulk encryption routines.  */
       bulk_ops->cfb_enc = _gcry_aes_ssse3_cfb_enc;
@@ -543,7 +522,6 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->prepare_decryption = _gcry_aes_armv8_ce_prepare_decryption;
-      ctx->use_arm_ce = 1;
 
       /* Setup ARM-CE bulk encryption routines.  */
       bulk_ops->cfb_enc = _gcry_aes_armv8_ce_cfb_enc;
@@ -565,7 +543,6 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption;
-      ctx->use_ppc_crypto = 1; /* same key-setup as USE_PPC_CRYPTO */
       ctx->use_ppc9le_crypto = 1;
 
       /* Setup PPC9LE bulk encryption routines.  */
@@ -588,7 +565,6 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption;
-      ctx->use_ppc_crypto = 1;
 
       /* Setup PPC8 bulk encryption routines.  */
       bulk_ops->cfb_enc = _gcry_aes_ppc8_cfb_enc;
--
2.27.0


_______________________________________________
Gcrypt-devel mailing list
[hidden email]
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel