summaryrefslogtreecommitdiff
path: root/abseil-cpp/absl/random/internal/randen_hwaes.cc
diff options
context:
space:
mode:
Diffstat (limited to 'abseil-cpp/absl/random/internal/randen_hwaes.cc')
-rw-r--r--abseil-cpp/absl/random/internal/randen_hwaes.cc87
1 files changed, 20 insertions, 67 deletions
diff --git a/abseil-cpp/absl/random/internal/randen_hwaes.cc b/abseil-cpp/absl/random/internal/randen_hwaes.cc
index b5a3f90..f535f4c 100644
--- a/abseil-cpp/absl/random/internal/randen_hwaes.cc
+++ b/abseil-cpp/absl/random/internal/randen_hwaes.cc
@@ -23,49 +23,20 @@
#include <cstring>
#include "absl/base/attributes.h"
+#include "absl/numeric/int128.h"
#include "absl/random/internal/platform.h"
#include "absl/random/internal/randen_traits.h"
// ABSL_RANDEN_HWAES_IMPL indicates whether this file will contain
// a hardware accelerated implementation of randen, or whether it
// will contain stubs that exit the process.
-#if defined(ABSL_ARCH_X86_64) || defined(ABSL_ARCH_X86_32)
-// The platform.h directives are sufficient to indicate whether
-// we should build accelerated implementations for x86.
-#if (ABSL_HAVE_ACCELERATED_AES || ABSL_RANDOM_INTERNAL_AES_DISPATCH)
-#define ABSL_RANDEN_HWAES_IMPL 1
-#endif
-#elif defined(ABSL_ARCH_PPC)
-// The platform.h directives are sufficient to indicate whether
-// we should build accelerated implementations for PPC.
-//
-// NOTE: This has mostly been tested on 64-bit Power variants,
-// and not embedded cpus such as powerpc32-8540
#if ABSL_HAVE_ACCELERATED_AES
+// The following platforms have implemented RandenHwAes.
+#if defined(ABSL_ARCH_X86_64) || defined(ABSL_ARCH_X86_32) || \
+ defined(ABSL_ARCH_PPC) || defined(ABSL_ARCH_ARM) || \
+ defined(ABSL_ARCH_AARCH64)
#define ABSL_RANDEN_HWAES_IMPL 1
#endif
-#elif defined(ABSL_ARCH_ARM) || defined(ABSL_ARCH_AARCH64)
-// ARM is somewhat more complicated. We might support crypto natively...
-#if ABSL_HAVE_ACCELERATED_AES || \
- (defined(__ARM_NEON) && defined(__ARM_FEATURE_CRYPTO))
-#define ABSL_RANDEN_HWAES_IMPL 1
-
-#elif ABSL_RANDOM_INTERNAL_AES_DISPATCH && !defined(__APPLE__) && \
- (defined(__GNUC__) && __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ > 9)
-// ...or, on GCC, we can use an ASM directive to
-// instruct the assember to allow crypto instructions.
-#define ABSL_RANDEN_HWAES_IMPL 1
-#define ABSL_RANDEN_HWAES_IMPL_CRYPTO_DIRECTIVE 1
-#endif
-#else
-// HWAES is unsupported by these architectures / platforms:
-// __myriad2__
-// __mips__
-//
-// Other architectures / platforms are unknown.
-//
-// See the Abseil documentation on supported macros at:
-// https://abseil.io/docs/cpp/platforms/macros
#endif
#if !defined(ABSL_RANDEN_HWAES_IMPL)
@@ -120,11 +91,6 @@ namespace {
using absl::random_internal::RandenTraits;
-// Randen operates on 128-bit vectors.
-struct alignas(16) u64x2 {
- uint64_t data[2];
-};
-
} // namespace
// TARGET_CRYPTO defines a crypto attribute for each architecture.
@@ -186,7 +152,7 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state,
}
// Enables native loads in the round loop by pre-swapping.
-inline ABSL_TARGET_CRYPTO void SwapEndian(u64x2* state) {
+inline ABSL_TARGET_CRYPTO void SwapEndian(absl::uint128* state) {
for (uint32_t block = 0; block < RandenTraits::kFeistelBlocks; ++block) {
Vector128Store(ReverseBytes(Vector128Load(state + block)), state + block);
}
@@ -196,22 +162,6 @@ inline ABSL_TARGET_CRYPTO void SwapEndian(u64x2* state) {
#elif defined(ABSL_ARCH_ARM) || defined(ABSL_ARCH_AARCH64)
-// This asm directive will cause the file to be compiled with crypto extensions
-// whether or not the cpu-architecture supports it.
-#if ABSL_RANDEN_HWAES_IMPL_CRYPTO_DIRECTIVE
-asm(".arch_extension crypto\n");
-
-// Override missing defines.
-#if !defined(__ARM_NEON)
-#define __ARM_NEON 1
-#endif
-
-#if !defined(__ARM_FEATURE_CRYPTO)
-#define __ARM_FEATURE_CRYPTO 1
-#endif
-
-#endif
-
// Rely on the ARM NEON+Crypto advanced simd types, defined in <arm_neon.h>.
// uint8x16_t is the user alias for underlying __simd128_uint8_t type.
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
@@ -261,7 +211,7 @@ inline ABSL_TARGET_CRYPTO void SwapEndian(void*) {}
#elif defined(ABSL_ARCH_X86_64) || defined(ABSL_ARCH_X86_32)
// On x86 we rely on the aesni instructions
-#include <wmmintrin.h>
+#include <immintrin.h>
namespace {
@@ -270,7 +220,7 @@ namespace {
class Vector128 {
public:
// Convert from/to intrinsics.
- inline explicit Vector128(const __m128i& Vector128) : data_(Vector128) {}
+ inline explicit Vector128(const __m128i& v) : data_(v) {}
inline __m128i data() const { return data_; }
@@ -327,7 +277,7 @@ namespace {
// Block shuffles applies a shuffle to the entire state between AES rounds.
// Improved odd-even shuffle from "New criterion for diffusion property".
-inline ABSL_TARGET_CRYPTO void BlockShuffle(u64x2* state) {
+inline ABSL_TARGET_CRYPTO void BlockShuffle(absl::uint128* state) {
static_assert(RandenTraits::kFeistelBlocks == 16,
"Expecting 16 FeistelBlocks.");
@@ -374,8 +324,9 @@ inline ABSL_TARGET_CRYPTO void BlockShuffle(u64x2* state) {
// per 16 bytes (vs. 10 for AES-CTR). Computing eight round functions in
// parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel
// XORs are 'free' (included in the second AES instruction).
-inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound(
- u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) {
+inline ABSL_TARGET_CRYPTO const absl::uint128* FeistelRound(
+ absl::uint128* state,
+ const absl::uint128* ABSL_RANDOM_INTERNAL_RESTRICT keys) {
static_assert(RandenTraits::kFeistelBlocks == 16,
"Expecting 16 FeistelBlocks.");
@@ -436,7 +387,8 @@ inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound(
// 2^64 queries if the round function is a PRF. This is similar to the b=8 case
// of Simpira v2, but more efficient than its generic construction for b=16.
inline ABSL_TARGET_CRYPTO void Permute(
- u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) {
+ absl::uint128* state,
+ const absl::uint128* ABSL_RANDOM_INTERNAL_RESTRICT keys) {
// (Successfully unrolled; the first iteration jumps into the second half)
#ifdef __clang__
#pragma clang loop unroll_count(2)
@@ -473,10 +425,11 @@ void ABSL_TARGET_CRYPTO RandenHwAes::Absorb(const void* seed_void,
static_assert(RandenTraits::kStateBytes / sizeof(Vector128) == 16,
"Unexpected Randen kStateBlocks");
- auto* state =
- reinterpret_cast<u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(state_void);
+ auto* state = reinterpret_cast<absl::uint128 * ABSL_RANDOM_INTERNAL_RESTRICT>(
+ state_void);
const auto* seed =
- reinterpret_cast<const u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(seed_void);
+ reinterpret_cast<const absl::uint128 * ABSL_RANDOM_INTERNAL_RESTRICT>(
+ seed_void);
Vector128 b1 = Vector128Load(state + 1);
b1 ^= Vector128Load(seed + 0);
@@ -545,8 +498,8 @@ void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys_void,
static_assert(RandenTraits::kCapacityBytes == sizeof(Vector128),
"Capacity mismatch");
- auto* state = reinterpret_cast<u64x2*>(state_void);
- const auto* keys = reinterpret_cast<const u64x2*>(keys_void);
+ auto* state = reinterpret_cast<absl::uint128*>(state_void);
+ const auto* keys = reinterpret_cast<const absl::uint128*>(keys_void);
const Vector128 prev_inner = Vector128Load(state);