From 206c6ffc854666f556eed9fad97d6a615c6a540a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Wed, 5 Apr 2017 20:58:46 +0200 Subject: [PATCH] Implement ffs() / fls() and use the latter to compute the MSB. --- configure.ac | 51 +++++++++++++----- include/cryb/bitwise.h | 103 +++++++++++++++++++++++++++++++++++++ lib/mpi/cryb_mpi_add_abs.c | 10 ++-- lib/mpi/cryb_mpi_load.c | 11 ++-- lib/mpi/cryb_mpi_sub_abs.c | 12 ++--- 5 files changed, 150 insertions(+), 37 deletions(-) diff --git a/configure.ac b/configure.ac index dab2eb5..b4d35db 100644 --- a/configure.ac +++ b/configure.ac @@ -21,19 +21,6 @@ AC_C_CONST AC_C_RESTRICT AC_C_VOLATILE AC_C_BIGENDIAN -AC_TYPE_INT16_T -AC_TYPE_INT32_T -AC_TYPE_INT8_T -AC_TYPE_INTMAX_T -AC_TYPE_INTPTR_T -AC_TYPE_OFF_T -AC_TYPE_SIZE_T -AC_TYPE_SSIZE_T -AC_TYPE_UINT16_T -AC_TYPE_UINT32_T -AC_TYPE_UINT8_T -AC_TYPE_UINTMAX_T -AC_TYPE_UINTPTR_T # libtool LT_PREREQ([2.2.6]) @@ -46,15 +33,46 @@ PKG_INSTALLDIR # other programs AC_PROG_INSTALL +############################################################################ +# +# Types +# +AC_TYPE_INT16_T +AC_TYPE_INT32_T +AC_TYPE_INT8_T +AC_TYPE_INTMAX_T +AC_TYPE_INTPTR_T +AC_TYPE_OFF_T +AC_TYPE_SIZE_T +AC_TYPE_SSIZE_T +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T +AC_TYPE_UINT8_T +AC_TYPE_UINTMAX_T +AC_TYPE_UINTPTR_T + ############################################################################ # # Headers and functions # -AC_CHECK_HEADERS([endian.h sys/endian.h]) +AC_CHECK_HEADERS([endian.h sys/endian.h strings.h]) AX_GCC_BUILTIN([__builtin_bswap16]) AX_GCC_BUILTIN([__builtin_bswap32]) AX_GCC_BUILTIN([__builtin_bswap64]) +AX_GCC_BUILTIN([__builtin_clz]) +AX_GCC_BUILTIN([__builtin_clzl]) +AX_GCC_BUILTIN([__builtin_clzll]) +AX_GCC_BUILTIN([__builtin_ctz]) +AX_GCC_BUILTIN([__builtin_ctzl]) +AX_GCC_BUILTIN([__builtin_ctzll]) +AX_GCC_BUILTIN([__builtin_ffs]) +AX_GCC_BUILTIN([__builtin_ffsl]) +AX_GCC_BUILTIN([__builtin_ffsll]) +# No compiler we know of has these +#AX_GCC_BUILTIN([__builtin_fls]) +#AX_GCC_BUILTIN([__builtin_flsl]) +#AX_GCC_BUILTIN([__builtin_flsll]) AC_CHECK_DECLS([ bswap16, bswap32, bswap64, bswap16v, bswap32v, bswap64v, @@ -76,6 +94,11 @@ AC_CHECK_DECLS([ #include #endif ]]) +AC_CHECK_FUNCS([ffs ffsl ffsll fls flsl flsll], [], [], [[ +#if HAVE_STRINGS_H +#include +#endif +]]) AC_CHECK_FUNCS([strlcat strlcmp strlcpy]) AC_CHECK_FUNCS([wcslcat wcslcmp wcslcpy]) diff --git a/include/cryb/bitwise.h b/include/cryb/bitwise.h index 1430ff3..a1eff65 100644 --- a/include/cryb/bitwise.h +++ b/include/cryb/bitwise.h @@ -54,6 +54,109 @@ CRYB_ROL_ROR(64); #undef CRYB_ROL_ROR +#if !HAVE_FFS +#define ffs cryb_ffs +#endif +#if !HAVE_FFSL +#define ffsl cryb_ffsl +#endif +#if !HAVE_FFSLL +#define ffsll cryb_ffsll +#endif +#if !HAVE_FLS +#define fls cryb_fls +#endif +#if !HAVE_FLSL +#define flsl cryb_flsl +#endif +#if !HAVE_FLSLL +#define flsll cryb_flsll +#endif + +static inline int cryb_ffs(int n) { +#if HAVE___BUILTIN_FFS + return (__builtin_ffs(n)); +#elif HAVE___BUILTIN_CTZ + return (n ? __builtin_ctz(n) : 0); +#else + int i = 8 * sizeof n - 1; + for (i > 0) + if (n & (1 << --i)) + break; + return (i); +#endif +} + +static inline int cryb_ffsl(long int n) { +#if HAVE___BUILTIN_FFSL + return (__builtin_ffsl(n)); +#elif HAVE___BUILTIN_CLZ + return (n ? __builtin_ctz(n) : 0); +#else + int i = 8 * sizeof n - 1; + for (i > 0) + if (n & (1 << --i)) + break; + return (i); +#endif +} + +static inline int cryb_ffsll(long long int n) { +#if HAVE___BUILTIN_FFSLL + return (__builtin_ffsll(n)); +#elif HAVE___BUILTIN_CLZ + return (n ? __builtin_ctz(n) : 0); +#else + int i = 8 * sizeof n - 1; + for (i > 0) + if (n & (1 << --i)) + break; + return (i); +#endif +} + +static inline int cryb_fls(int n) { +#if HAVE___BUILTIN_FLS + return (__builtin_fls(n)); +#elif HAVE___BUILTIN_CLZ + return (n ? (8 * sizeof n) - __builtin_clz(n) : 0); +#else + int i = 8 * sizeof n - 1; + for (i > 0) + if (n & (1 << --i)) + break; + return (i); +#endif +} + +static inline int cryb_flsl(long int n) { +#if HAVE___BUILTIN_FLSL + return (__builtin_flsl(n)); +#elif HAVE___BUILTIN_CLZ + return (n ? (8 * sizeof n) - __builtin_clzl(n) : 0); +#else + int i = 8 * sizeof n - 1; + for (i > 0) + if (n & (1 << --i)) + break; + return (i); +#endif +} + +static inline int cryb_flsll(long long int n) { +#if HAVE___BUILTIN_FLSLL + return (__builtin_flsll(n)); +#elif HAVE___BUILTIN_CLZ + return (n ? (8 * sizeof n) - __builtin_clzll(n) : 0); +#else + int i = 8 * sizeof n - 1; + for (i > 0) + if (n & (1 << --i)) + break; + return (i); +#endif +} + CRYB_END #endif diff --git a/lib/mpi/cryb_mpi_add_abs.c b/lib/mpi/cryb_mpi_add_abs.c index ea53072..258ae30 100644 --- a/lib/mpi/cryb_mpi_add_abs.c +++ b/lib/mpi/cryb_mpi_add_abs.c @@ -31,7 +31,9 @@ #include #include +#include +#include #include #include "cryb_mpi_impl.h" @@ -104,13 +106,7 @@ mpi_add_abs(cryb_mpi *X, const cryb_mpi *A, const cryb_mpi *B) } if (X->words[i] == 0) --i; - /* compute msb of msw */ - /* XXX should use flsl() */ - for (X->msb = 31; X->msb > 0; --X->msb) - if (X->words[i] & (1 << X->msb)) - break; - /* add msw offset */ - X->msb += i * 32 + 1; + X->msb = i * 32 + flsl(X->words[i]); X->neg = 0; return (0); } diff --git a/lib/mpi/cryb_mpi_load.c b/lib/mpi/cryb_mpi_load.c index 9e2eb91..20f2a90 100644 --- a/lib/mpi/cryb_mpi_load.c +++ b/lib/mpi/cryb_mpi_load.c @@ -31,7 +31,9 @@ #include #include +#include +#include #include #include @@ -70,13 +72,6 @@ mpi_load(cryb_mpi *X, const uint8_t *a, size_t len) --i; CRYB_NO_DEFAULT_CASE; } - /* i now points to the msw */ - /* compute msb of msw */ - /* XXX use flsl() */ - for (X->msb = 31; X->msb > 0; --X->msb) - if (X->words[i] & (1 << X->msb)) - break; - /* add msw offset */ - X->msb += i * 32 + 1; + X->msb = i * 32 + flsl(X->words[i]); return (0); } diff --git a/lib/mpi/cryb_mpi_sub_abs.c b/lib/mpi/cryb_mpi_sub_abs.c index a6dc00f..da6f46a 100644 --- a/lib/mpi/cryb_mpi_sub_abs.c +++ b/lib/mpi/cryb_mpi_sub_abs.c @@ -31,7 +31,9 @@ #include #include +#include +#include #include #include "cryb_mpi_impl.h" @@ -92,15 +94,9 @@ mpi_sub_abs(cryb_mpi *X, const cryb_mpi *A, const cryb_mpi *B) c = cn; ++i; } - while (i > 0 && X->words[i] == 0) + while (X->words[i] == 0) --i; - /* compute msb of msw */ - /* XXX use flsl() */ - for (X->msb = 31; X->msb > 0; --X->msb) - if (X->words[i] & (1 << X->msb)) - break; - /* add msw offset */ - X->msb += i * 32 + 1; + X->msb = i * 32 + flsl(X->words[i]); X->neg = 0; return (0); }