Skip to content

Commit

Permalink
Merge #15250: Use RdSeed when available, and reduce RdRand load
Browse files Browse the repository at this point in the history
1435fab Use RdSeed when available, and reduce RdRand load (Pieter Wuille)

Pull request description:

  This introduces support for autodetecting and using the RdSeed instruction on x86/x86_64 systems.

  In addition:
  * In SeedFast, only 64 bits of entropy are generated through RdRand (256 was relatively slow).
  * In SeedStartup, 256 bits of entropy are generated, using RdSeed (preferably) or RdRand (otherwise).

Tree-SHA512: fb7d3e22e93e14592f4b07282aa79d7c3cc4e9debdd9978580b8d2562bbad345e289bf3f80de2c50c9b50b8bac2aa9b838f9f272f7f8d43f1efc0913aa8acce3
  • Loading branch information
laanwj committed Feb 18, 2019
2 parents f5a623e + 1435fab commit 29e82e4
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 39 deletions.
172 changes: 134 additions & 38 deletions src/random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,119 @@ static inline int64_t GetPerformanceCounter() noexcept
}

#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
static bool rdrand_supported = false;
static bool g_rdrand_supported = false;
static bool g_rdseed_supported = false;
static constexpr uint32_t CPUID_F1_ECX_RDRAND = 0x40000000;
static constexpr uint32_t CPUID_F7_EBX_RDSEED = 0x00040000;
#ifdef bit_RDRND
static_assert(CPUID_F1_ECX_RDRAND == bit_RDRND, "Unexpected value for bit_RDRND");
#endif
#ifdef bit_RDSEED
static_assert(CPUID_F7_EBX_RDSEED == bit_RDSEED, "Unexpected value for bit_RDSEED");
#endif
static void inline GetCPUID(uint32_t leaf, uint32_t subleaf, uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d)
{
// We can't use __get_cpuid as it doesn't support subleafs.
#ifdef __GNUC__
__cpuid_count(leaf, subleaf, a, b, c, d);
#else
__asm__ ("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(leaf), "2"(subleaf));
#endif
}

static void InitHardwareRand()
{
uint32_t eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx & CPUID_F1_ECX_RDRAND)) {
rdrand_supported = true;
GetCPUID(1, 0, eax, ebx, ecx, edx);
if (ecx & CPUID_F1_ECX_RDRAND) {
g_rdrand_supported = true;
}
GetCPUID(7, 0, eax, ebx, ecx, edx);
if (ebx & CPUID_F7_EBX_RDSEED) {
g_rdseed_supported = true;
}
}

static void ReportHardwareRand()
{
if (rdrand_supported) {
// This must be done in a separate function, as HWRandInit() may be indirectly called
// from global constructors, before logging is initialized.
// This must be done in a separate function, as HWRandInit() may be indirectly called
// from global constructors, before logging is initialized.
if (g_rdseed_supported) {
LogPrintf("Using RdSeed as additional entropy source\n");
}
if (g_rdrand_supported) {
LogPrintf("Using RdRand as an additional entropy source\n");
}
}

/** Read 64 bits of entropy using rdrand.
*
* Must only be called when RdRand is supported.
*/
static uint64_t GetRdRand() noexcept
{
// RdRand may very rarely fail. Invoke it up to 10 times in a loop to reduce this risk.
#ifdef __i386__
uint8_t ok;
uint32_t r1, r2;
for (int i = 0; i < 10; ++i) {
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdrand %eax
if (ok) break;
}
for (int i = 0; i < 10; ++i) {
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r2), "=q"(ok) :: "cc"); // rdrand %eax
if (ok) break;
}
return (((uint64_t)r2) << 32) | r1;
#elif defined(__x86_64__) || defined(__amd64__)
uint8_t ok;
uint64_t r1;
for (int i = 0; i < 10; ++i) {
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdrand %rax
if (ok) break;
}
return r1;
#else
#error "RdRand is only supported on x86 and x86_64"
#endif
}

/** Read 64 bits of entropy using rdseed.
*
* Must only be called when RdSeed is supported.
*/
static uint64_t GetRdSeed() noexcept
{
// RdSeed may fail when the HW RNG is overloaded. Loop indefinitely until enough entropy is gathered,
// but pause after every failure.
#ifdef __i386__
uint8_t ok;
uint32_t r1, r2;
do {
__asm__ volatile (".byte 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdseed %eax
if (ok) break;
__asm__ volatile ("pause");
} while(true);
do {
__asm__ volatile (".byte 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r2), "=q"(ok) :: "cc"); // rdseed %eax
if (ok) break;
__asm__ volatile ("pause");
} while(true);
return (((uint64_t)r2) << 32) | r1;
#elif defined(__x86_64__) || defined(__amd64__)
uint8_t ok;
uint64_t r1;
do {
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdseed %rax
if (ok) break;
__asm__ volatile ("pause");
} while(true);
return r1;
#else
#error "RdSeed is only supported on x86 and x86_64"
#endif
}

#else
/* Access to other hardware random number generators could be added here later,
* assuming it is sufficiently fast (in the order of a few hundred CPU cycles).
Expand All @@ -107,40 +201,40 @@ static void InitHardwareRand() {}
static void ReportHardwareRand() {}
#endif

static bool GetHardwareRand(unsigned char* ent32) noexcept {
/** Add 64 bits of entropy gathered from hardware to hasher. Do nothing if not supported. */
static void SeedHardwareFast(CSHA512& hasher) noexcept {
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
if (rdrand_supported) {
uint8_t ok;
// Not all assemblers support the rdrand instruction, write it in hex.
#ifdef __i386__
for (int iter = 0; iter < 4; ++iter) {
uint32_t r1, r2;
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0;" // rdrand %eax
".byte 0x0f, 0xc7, 0xf2;" // rdrand %edx
"setc %2" :
"=a"(r1), "=d"(r2), "=q"(ok) :: "cc");
if (!ok) return false;
WriteLE32(ent32 + 8 * iter, r1);
WriteLE32(ent32 + 8 * iter + 4, r2);
}
#else
uint64_t r1, r2, r3, r4;
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf0, " // rdrand %rax
"0x48, 0x0f, 0xc7, 0xf3, " // rdrand %rbx
"0x48, 0x0f, 0xc7, 0xf1, " // rdrand %rcx
"0x48, 0x0f, 0xc7, 0xf2; " // rdrand %rdx
"setc %4" :
"=a"(r1), "=b"(r2), "=c"(r3), "=d"(r4), "=q"(ok) :: "cc");
if (!ok) return false;
WriteLE64(ent32, r1);
WriteLE64(ent32 + 8, r2);
WriteLE64(ent32 + 16, r3);
WriteLE64(ent32 + 24, r4);
if (g_rdrand_supported) {
uint64_t out = GetRdRand();
hasher.Write((const unsigned char*)&out, sizeof(out));
return;
}
#endif
return true;
}

/** Add 256 bits of entropy gathered from hardware to hasher. Do nothing if not supported. */
static void SeedHardwareSlow(CSHA512& hasher) noexcept {
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
// When we want 256 bits of entropy, prefer RdSeed over RdRand, as it's
// guaranteed to produce independent randomness on every call.
if (g_rdseed_supported) {
for (int i = 0; i < 4; ++i) {
uint64_t out = GetRdSeed();
hasher.Write((const unsigned char*)&out, sizeof(out));
}
return;
}
// When falling back to RdRand, XOR the result of 1024 results.
// This guarantees a reseeding occurs between each.
if (g_rdrand_supported) {
for (int i = 0; i < 4; ++i) {
uint64_t out = 0;
for (int j = 0; j < 1024; ++j) out ^= GetRdRand();
hasher.Write((const unsigned char*)&out, sizeof(out));
}
return;
}
#endif
return false;
}

static void RandAddSeedPerfmon(CSHA512& hasher)
Expand Down Expand Up @@ -407,8 +501,7 @@ static void SeedFast(CSHA512& hasher) noexcept
hasher.Write((const unsigned char*)&ptr, sizeof(ptr));

// Hardware randomness is very fast when available; use it always.
bool have_hw_rand = GetHardwareRand(buffer);
if (have_hw_rand) hasher.Write(buffer, sizeof(buffer));
SeedHardwareFast(hasher);

// High-precision timestamp
SeedTimestamp(hasher);
Expand Down Expand Up @@ -460,6 +553,9 @@ static void SeedStartup(CSHA512& hasher) noexcept
RAND_screen();
#endif

// Gather 256 bits of hardware randomness, if available
SeedHardwareSlow(hasher);

// Everything that the 'slow' seeder includes.
SeedSlow(hasher);

Expand Down
3 changes: 2 additions & 1 deletion src/random.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
* perform 'fast' seeding, consisting of mixing in:
* - A stack pointer (indirectly committing to calling thread and call stack)
* - A high-precision timestamp (rdtsc when available, c++ high_resolution_clock otherwise)
* - Hardware RNG (rdrand) when available.
* - 64 bits from the hardware RNG (rdrand) when available.
* These entropy sources are very fast, and only designed to protect against situations
* where a VM state restore/copy results in multiple systems with the same randomness.
* FastRandomContext on the other hand does not protect against this once created, but
Expand All @@ -48,6 +48,7 @@
*
* On first use of the RNG (regardless of what function is called first), all entropy
* sources used in the 'slow' seeder are included, but also:
* - 256 bits from the hardware RNG (rdseed or rdrand) when available.
* - (On Windows) Performance monitoring data from the OS.
* - (On Windows) Through OpenSSL, the screen contents.
*
Expand Down

0 comments on commit 29e82e4

Please sign in to comment.