Disabled external gits

This commit is contained in:
2022-04-07 18:46:57 +02:00
parent 88cb3426ad
commit 15e7120d6d
5316 changed files with 4563444 additions and 6 deletions

View File

@@ -0,0 +1,16 @@
# pcg32
This is a tiny self-contained C++ implementation of the PCG32 random number
based on code by Melissa O'Neill available at http://www.pcg-random.org.
I decided to put together my own version because the official small
implementation lacks a C++ interface and various important features (e.g.
rewind/difference support, shuffling, floating point sample generation), while
while the official C++ version is extremely complex and seems to be intended
for research on PRNGs involving the entire PCG family.
The file ``pcg32_8.h`` contains a vectorized implementation designed by myself
which runs eight PCG32 PRNGs in parallel. Expect to get a ~3-4x speedup when
generating single or double precision floats.
Wenzel Jakob
June 2016

View File

@@ -0,0 +1,109 @@
/*
* PCG Random Number Generation for C.
*
* Copyright 2014 Melissa O'Neill <oneill@pcg-random.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For additional information about the PCG random number generation scheme,
* including its license and other licensing options, visit
*
* http://www.pcg-random.org
*/
/*
* This is the original demo application from the PCG library ported to the new API
*/
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <time.h>
#include <string.h>
#include "pcg32.h"
int main(int argc, char** argv) {
// Read command-line options
int rounds = 5;
if (argc > 1)
rounds = atoi(argv[1]);
pcg32 rng;
// You should *always* seed the RNG. The usual time to do it is the
// point in time when you create RNG (typically at the beginning of the
// program).
//
// pcg32::seed takes two 64-bit constants (the initial state, and the
// rng sequence selector; rngs with different sequence selectors will
// *never* have random sequences that coincide, at all)
rng.seed(42u, 54u);
printf("pcg32_random_r:\n"
" - result: 32-bit unsigned int (uint32_t)\n"
" - period: 2^64 (* 2^63 streams)\n"
" - state type: pcg32_random_t (%zu bytes)\n"
" - output func: XSH-RR\n"
"\n",
sizeof(pcg32));
for (int round = 1; round <= rounds; ++round) {
printf("Round %d:\n", round);
/* Make some 32-bit numbers */
printf(" 32bit:");
for (int i = 0; i < 6; ++i)
printf(" 0x%08x", rng.nextUInt());
printf("\n");
/* Toss some coins */
printf(" Coins: ");
for (int i = 0; i < 65; ++i)
printf("%c", rng.nextUInt(2) ? 'H' : 'T');
printf("\n");
/* Roll some dice */
printf(" Rolls:");
for (int i = 0; i < 33; ++i) {
printf(" %d", (int)rng.nextUInt(6) + 1);
}
printf("\n");
/* Deal some cards */
enum { SUITS = 4, NUMBERS = 13, CARDS = 52 };
char cards[CARDS];
for (int i = 0; i < CARDS; ++i)
cards[i] = i;
rng.shuffle(cards, cards + CARDS);
printf(" Cards:");
static const char number[] = {'A', '2', '3', '4', '5', '6', '7',
'8', '9', 'T', 'J', 'Q', 'K'};
static const char suit[] = {'h', 'c', 'd', 's'};
for (int i = 0; i < CARDS; ++i) {
printf(" %c%c", number[cards[i] / SUITS], suit[cards[i] % SUITS]);
if ((i + 1) % 22 == 0)
printf("\n\t");
}
printf("\n");
printf("\n");
}
return 0;
}

View File

@@ -0,0 +1,46 @@
pcg32_random_r:
- result: 32-bit unsigned int (uint32_t)
- period: 2^64 (* 2^63 streams)
- state type: pcg32_random_t (16 bytes)
- output func: XSH-RR
Round 1:
32bit: 0xa15c02b7 0x7b47f409 0xba1d3330 0x83d2f293 0xbfa4784b 0xcbed606e
Coins: HHTTTHTHHHTHTTTHHHHHTTTHHHTHTHTHTTHTTTHHHHHHTTTTHHTTTTTHTTTTTTTHT
Rolls: 3 4 1 1 2 2 3 2 4 3 2 4 3 3 5 2 3 1 3 1 5 1 4 1 5 6 4 6 6 2 6 3 3
Cards: Qd Ks 6d 3s 3d 4c 3h Td Kc 5c Jh Kd Jd As 4s 4h Ad Th Ac Jc 7s Qs
2s 7h Kh 2d 6c Ah 4d Qh 9h 6s 5s 2c 9c Ts 8d 9s 3c 8c Js 5d 2h 6h
7d 8s 9d 5h 8h Qc 7c Tc
Round 2:
32bit: 0x74ab93ad 0x1c1da000 0x494ff896 0x34462f2f 0xd308a3e5 0x0fa83bab
Coins: HHHHHHHHHHTHHHTHTHTHTHTTTTHHTTTHHTHHTHTTHHTTTHHHHHHTHTTHTHTTTTTTT
Rolls: 5 1 1 3 3 2 4 5 3 2 2 6 4 3 2 4 2 4 3 2 3 6 3 2 3 4 2 4 1 1 5 4 4
Cards: 7d 2s 7h Td 8s 3c 3d Js 2d Tc 4h Qs 5c 9c Th 2c Jc Qd 9d Qc 7s 3s
5s 6h 4d Jh 4c Ac 4s 5h 5d Kc 8h 8d Jd 9s Ad 6s 6c Kd 2h 3h Kh Ts
Qh 9h 6d As 7c Ks Ah 8c
Round 3:
32bit: 0x39af5f9f 0x04196b18 0xc3c3eb28 0xc076c60c 0xc693e135 0xf8f63932
Coins: HTTHHTTTTTHTTHHHTHTTHHTTHTHHTHTHTTTTHHTTTHHTHHTTHTTHHHTHHHTHTTTHT
Rolls: 5 1 5 3 2 2 4 5 3 3 1 3 4 6 3 2 3 4 2 2 3 1 5 2 4 6 6 4 2 4 3 3 6
Cards: Kd Jh Kc Qh 4d Qc 4h 9d 3c Kh Qs 8h 5c Jd 7d 8d 3h 7c 8s 3s 2h Ks
9c 9h 2c 8c Ad 7s 4s 2s 5h 6s 4c Ah 7h 5s Ac 3d 5d Qd As Tc 6h 9s
2d 6c 6d Td Jc Ts Th Js
Round 4:
32bit: 0x55ce6851 0x97a7726d 0x17e10815 0x58007d43 0x962fb148 0xb9bb55bd
Coins: HHTHHTTTTHTHHHHHTTHHHTTTHHTHTHTHTHHTTHTHHHHHHTHHTHHTHHTTTTHHTHHTT
Rolls: 6 6 3 2 3 4 2 6 4 2 6 3 2 3 5 5 3 4 4 6 6 2 6 5 4 4 6 1 6 1 3 6 5
Cards: Qd 8h 5d 8s 8d Ts 7h Th Qs Js 7s Kc 6h 5s 4d Ac Jd 7d 7c Td 2c 6s
5h 6d 3s Kd 9s Jh Kh As Ah 9h 3c Qh 9c 2d Tc 9d 2s 3d Ks 4h Qc Ad
Jc 8c 2h 3h 4s 4c 5c 6c
Round 5:
32bit: 0xfcef7cd6 0x1b488b5a 0xd0daf7ea 0x1d9a70f7 0x241a37cf 0x9a3857b7
Coins: HHHHTHHTTHTTHHHTTTHHTHTHTTTTHTTHTHTTTHHHTHTHTTHTTHTHHTHTHHHTHTHTT
Rolls: 5 4 1 2 6 1 3 1 5 6 3 6 2 1 4 4 5 2 1 5 6 5 6 4 4 4 5 2 6 4 3 5 6
Cards: 4d 9s Qc 9h As Qs 7s 4c Kd 6h 6s 2c 8c 5d 7h 5h Jc 3s 7c Jh Js Ks
Tc Jd Kc Th 3h Ts Qh Ad Td 3c Ah 2d 3d 5c Ac 8s 5s 9c 2h 6c 6d Kh
Qd 8d 7d 2s 8h 4h 9d 4s

209
cs440-acg/ext/pcg32/pcg32.h Normal file
View File

@@ -0,0 +1,209 @@
/*
* Tiny self-contained version of the PCG Random Number Generation for C++
* put together from pieces of the much larger C/C++ codebase.
* Wenzel Jakob, February 2015
*
* The PCG random number generator was developed by Melissa O'Neill
* <oneill@pcg-random.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For additional information about the PCG random number generation scheme,
* including its license and other licensing options, visit
*
* http://www.pcg-random.org
*/
#ifndef __PCG32_H
#define __PCG32_H 1
#define PCG32_DEFAULT_STATE 0x853c49e6748fea9bULL
#define PCG32_DEFAULT_STREAM 0xda3e39cb94b95bdbULL
#define PCG32_MULT 0x5851f42d4c957f2dULL
#include <inttypes.h>
#include <cmath>
#include <cassert>
#include <algorithm>
/// PCG32 Pseudorandom number generator
struct pcg32 {
/// Initialize the pseudorandom number generator with default seed
pcg32() : state(PCG32_DEFAULT_STATE), inc(PCG32_DEFAULT_STREAM) {}
/// Initialize the pseudorandom number generator with the \ref seed() function
pcg32(uint64_t initstate, uint64_t initseq = 1u) { seed(initstate, initseq); }
/**
* \brief Seed the pseudorandom number generator
*
* Specified in two parts: a state initializer and a sequence selection
* constant (a.k.a. stream id)
*/
void seed(uint64_t initstate, uint64_t initseq = 1) {
state = 0U;
inc = (initseq << 1u) | 1u;
nextUInt();
state += initstate;
nextUInt();
}
/// Generate a uniformly distributed unsigned 32-bit random number
uint32_t nextUInt() {
uint64_t oldstate = state;
state = oldstate * PCG32_MULT + inc;
uint32_t xorshifted = (uint32_t) (((oldstate >> 18u) ^ oldstate) >> 27u);
uint32_t rot = (uint32_t) (oldstate >> 59u);
return (xorshifted >> rot) | (xorshifted << ((~rot + 1u) & 31));
}
/// Generate a uniformly distributed number, r, where 0 <= r < bound
uint32_t nextUInt(uint32_t bound) {
// To avoid bias, we need to make the range of the RNG a multiple of
// bound, which we do by dropping output less than a threshold.
// A naive scheme to calculate the threshold would be to do
//
// uint32_t threshold = 0x100000000ull % bound;
//
// but 64-bit div/mod is slower than 32-bit div/mod (especially on
// 32-bit platforms). In essence, we do
//
// uint32_t threshold = (0x100000000ull-bound) % bound;
//
// because this version will calculate the same modulus, but the LHS
// value is less than 2^32.
uint32_t threshold = (~bound+1u) % bound;
// Uniformity guarantees that this loop will terminate. In practice, it
// should usually terminate quickly; on average (assuming all bounds are
// equally likely), 82.25% of the time, we can expect it to require just
// one iteration. In the worst case, someone passes a bound of 2^31 + 1
// (i.e., 2147483649), which invalidates almost 50% of the range. In
// practice, bounds are typically small and only a tiny amount of the range
// is eliminated.
for (;;) {
uint32_t r = nextUInt();
if (r >= threshold)
return r % bound;
}
}
/// Generate a single precision floating point value on the interval [0, 1)
float nextFloat() {
/* Trick from MTGP: generate an uniformly distributed
single precision number in [1,2) and subtract 1. */
union {
uint32_t u;
float f;
} x;
x.u = (nextUInt() >> 9) | 0x3f800000u;
return x.f - 1.0f;
}
/**
* \brief Generate a double precision floating point value on the interval [0, 1)
*
* \remark Since the underlying random number generator produces 32 bit output,
* only the first 32 mantissa bits will be filled (however, the resolution is still
* finer than in \ref nextFloat(), which only uses 23 mantissa bits)
*/
double nextDouble() {
/* Trick from MTGP: generate an uniformly distributed
double precision number in [1,2) and subtract 1. */
union {
uint64_t u;
double d;
} x;
x.u = ((uint64_t) nextUInt() << 20) | 0x3ff0000000000000ULL;
return x.d - 1.0;
}
/**
* \brief Multi-step advance function (jump-ahead, jump-back)
*
* The method used here is based on Brown, "Random Number Generation
* with Arbitrary Stride", Transactions of the American Nuclear
* Society (Nov. 1994). The algorithm is very similar to fast
* exponentiation.
*/
void advance(int64_t delta_) {
uint64_t
cur_mult = PCG32_MULT,
cur_plus = inc,
acc_mult = 1u,
acc_plus = 0u;
/* Even though delta is an unsigned integer, we can pass a signed
integer to go backwards, it just goes "the long way round". */
uint64_t delta = (uint64_t) delta_;
while (delta > 0) {
if (delta & 1) {
acc_mult *= cur_mult;
acc_plus = acc_plus * cur_mult + cur_plus;
}
cur_plus = (cur_mult + 1) * cur_plus;
cur_mult *= cur_mult;
delta /= 2;
}
state = acc_mult * state + acc_plus;
}
/**
* \brief Draw uniformly distributed permutation and permute the
* given STL container
*
* From: Knuth, TAoCP Vol. 2 (3rd 3d), Section 3.4.2
*/
template <typename Iterator> void shuffle(Iterator begin, Iterator end) {
for (Iterator it = end - 1; it > begin; --it)
std::iter_swap(it, begin + nextUInt((uint32_t) (it - begin + 1)));
}
/// Compute the distance between two PCG32 pseudorandom number generators
int64_t operator-(const pcg32 &other) const {
assert(inc == other.inc);
uint64_t
cur_mult = PCG32_MULT,
cur_plus = inc,
cur_state = other.state,
the_bit = 1u,
distance = 0u;
while (state != cur_state) {
if ((state & the_bit) != (cur_state & the_bit)) {
cur_state = cur_state * cur_mult + cur_plus;
distance |= the_bit;
}
assert((state & the_bit) == (cur_state & the_bit));
the_bit <<= 1;
cur_plus = (cur_mult + 1ULL) * cur_plus;
cur_mult *= cur_mult;
}
return (int64_t) distance;
}
/// Equality operator
bool operator==(const pcg32 &other) const { return state == other.state && inc == other.inc; }
/// Inequality operator
bool operator!=(const pcg32 &other) const { return state != other.state || inc != other.inc; }
uint64_t state; // RNG state. All values are possible.
uint64_t inc; // Controls which RNG sequence (stream) is selected. Must *always* be odd.
};
#endif // __PCG32_H

View File

@@ -0,0 +1,284 @@
/*
* Vectorized AVX2 version of the PCG32 random number generator developed by
* Wenzel Jakob (June 2016)
*
* The PCG random number generator was developed by Melissa O'Neill
* <oneill@pcg-random.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For additional information about the PCG random number generation scheme,
* including its license and other licensing options, visit
*
* http://www.pcg-random.org
*/
#include "pcg32.h"
#include <immintrin.h>
#include <utility>
#if defined(_MSC_VER)
# define PCG32_ALIGN(amt) __declspec(align(amt))
# define PCG32_VECTORCALL __vectorcall
# define PCG32_INLINE __forceinline
#else
# define PCG32_ALIGN(amt) __attribute__ ((aligned(amt)))
# define PCG32_INLINE __attribute__ ((always_inline))
# if defined(__clang__)
# define PCG32_VECTORCALL __attribute__ ((vectorcall))
# else
# define PCG32_VECTORCALL
# endif
#endif
/// 8 parallel PCG32 pseudorandom number generators
struct PCG32_ALIGN(32) pcg32_8 {
#if defined(__AVX2__)
__m256i state[2]; // RNG state. All values are possible.
__m256i inc[2]; // Controls which RNG sequence (stream) is selected. Must *always* be odd.
#else
/* Scalar fallback */
pcg32 rng[8];
#endif
/// Initialize the pseudorandom number generator with default seed
pcg32_8() {
PCG32_ALIGN(32) uint64_t initstate[8] = {
PCG32_DEFAULT_STATE, PCG32_DEFAULT_STATE,
PCG32_DEFAULT_STATE, PCG32_DEFAULT_STATE,
PCG32_DEFAULT_STATE, PCG32_DEFAULT_STATE,
PCG32_DEFAULT_STATE, PCG32_DEFAULT_STATE
};
PCG32_ALIGN(32) uint64_t initseq[8] =
{ 1, 2, 3, 4, 5, 6, 7, 8 };
seed(initstate, initseq);
}
/// Initialize the pseudorandom number generator with the \ref seed() function
pcg32_8(const uint64_t initstate[8], const uint64_t initseq[8]) {
seed(initstate, initseq);
}
#if defined(__AVX2__)
/**
* \brief Seed the pseudorandom number generator
*
* Specified in two parts: a state initializer and a sequence selection
* constant (a.k.a. stream id)
*/
void seed(const uint64_t initstate[8], const uint64_t initseq[8]) {
const __m256i one = _mm256_set1_epi64x((long long) 1);
state[0] = state[1] = _mm256_setzero_si256();
inc[0] = _mm256_or_si256(
_mm256_slli_epi64(_mm256_load_si256((__m256i *) &initseq[0]), 1),
one);
inc[1] = _mm256_or_si256(
_mm256_slli_epi64(_mm256_load_si256((__m256i *) &initseq[4]), 1),
one);
step();
state[0] = _mm256_add_epi64(state[0], _mm256_load_si256((__m256i *) &initstate[0]));
state[1] = _mm256_add_epi64(state[1], _mm256_load_si256((__m256i *) &initstate[4]));
step();
}
/// Generate 8 uniformly distributed unsigned 32-bit random numbers
void nextUInt(uint32_t result[8]) {
_mm256_store_si256((__m256i *) result, step());
}
/// Generate 8 uniformly distributed unsigned 32-bit random numbers
__m256i PCG32_VECTORCALL nextUInt() {
return step();
}
/// Generate eight single precision floating point value on the interval [0, 1)
__m256 PCG32_VECTORCALL nextFloat() {
/* Trick from MTGP: generate an uniformly distributed
single precision number in [1,2) and subtract 1. */
const __m256i const1 = _mm256_set1_epi32((int) 0x3f800000u);
__m256i value = step();
__m256i fltval = _mm256_or_si256(_mm256_srli_epi32(value, 9), const1);
return _mm256_sub_ps(_mm256_castsi256_ps(fltval),
_mm256_castsi256_ps(const1));
}
/// Generate eight single precision floating point value on the interval [0, 1)
void nextFloat(float result[8]) {
_mm256_store_ps(result, nextFloat());
}
/**
* \brief Generate eight double precision floating point value on the interval [0, 1)
*
* \remark Since the underlying random number generator produces 32 bit output,
* only the first 32 mantissa bits will be filled (however, the resolution is still
* finer than in \ref nextFloat(), which only uses 23 mantissa bits)
*/
std::pair<__m256d, __m256d> nextDouble() {
/* Trick from MTGP: generate an uniformly distributed
double precision number in [1,2) and subtract 1. */
const __m256i const1 =
_mm256_set1_epi64x((long long) 0x3ff0000000000000ull);
__m256i value = step();
__m256i lo = _mm256_cvtepu32_epi64(_mm256_castsi256_si128(value));
__m256i hi = _mm256_cvtepu32_epi64(_mm256_extractf128_si256(value, 1));
__m256i tlo = _mm256_or_si256(_mm256_slli_epi64(lo, 20), const1);
__m256i thi = _mm256_or_si256(_mm256_slli_epi64(hi, 20), const1);
__m256d flo = _mm256_sub_pd(_mm256_castsi256_pd(tlo),
_mm256_castsi256_pd(const1));
__m256d fhi = _mm256_sub_pd(_mm256_castsi256_pd(thi),
_mm256_castsi256_pd(const1));
return std::make_pair(flo, fhi);
}
/**
* \brief Generate eight double precision floating point value on the interval [0, 1)
*
* \remark Since the underlying random number generator produces 32 bit output,
* only the first 32 mantissa bits will be filled (however, the resolution is still
* finer than in \ref nextFloat(), which only uses 23 mantissa bits)
*/
void nextDouble(double result[8]) {
std::pair<__m256d, __m256d> value = nextDouble();
_mm256_store_pd(&result[0], value.first);
_mm256_store_pd(&result[4], value.second);
}
private:
PCG32_INLINE __m256i PCG32_VECTORCALL step() {
const __m256i pcg32_mult_l = _mm256_set1_epi64x((long long) (PCG32_MULT & 0xffffffffu));
const __m256i pcg32_mult_h = _mm256_set1_epi64x((long long) (PCG32_MULT >> 32));
const __m256i mask_l = _mm256_set1_epi64x((long long) 0x00000000ffffffffull);
const __m256i shift0 = _mm256_set_epi32(7, 7, 7, 7, 6, 4, 2, 0);
const __m256i shift1 = _mm256_set_epi32(6, 4, 2, 0, 7, 7, 7, 7);
const __m256i const32 = _mm256_set1_epi32(32);
__m256i s0 = state[0], s1 = state[1];
/* Extract low and high words for partial products below */
__m256i s0_l = _mm256_and_si256(s0, mask_l);
__m256i s0_h = _mm256_srli_epi64(s0, 32);
__m256i s1_l = _mm256_and_si256(s1, mask_l);
__m256i s1_h = _mm256_srli_epi64(s1, 32);
/* Improve high bits using xorshift step */
__m256i s0s = _mm256_srli_epi64(s0, 18);
__m256i s1s = _mm256_srli_epi64(s1, 18);
__m256i s0x = _mm256_xor_si256(s0s, s0);
__m256i s1x = _mm256_xor_si256(s1s, s1);
__m256i s0xs = _mm256_srli_epi64(s0x, 27);
__m256i s1xs = _mm256_srli_epi64(s1x, 27);
__m256i xors0 = _mm256_and_si256(mask_l, s0xs);
__m256i xors1 = _mm256_and_si256(mask_l, s1xs);
/* Use high bits to choose a bit-level rotation */
__m256i rot0 = _mm256_srli_epi64(s0, 59);
__m256i rot1 = _mm256_srli_epi64(s1, 59);
/* 64 bit multiplication using 32 bit partial products :( */
__m256i m0_hl = _mm256_mul_epu32(s0_h, pcg32_mult_l);
__m256i m1_hl = _mm256_mul_epu32(s1_h, pcg32_mult_l);
__m256i m0_lh = _mm256_mul_epu32(s0_l, pcg32_mult_h);
__m256i m1_lh = _mm256_mul_epu32(s1_l, pcg32_mult_h);
/* Assemble lower 32 bits, will be merged into one 256 bit vector below */
xors0 = _mm256_permutevar8x32_epi32(xors0, shift0);
rot0 = _mm256_permutevar8x32_epi32(rot0, shift0);
xors1 = _mm256_permutevar8x32_epi32(xors1, shift1);
rot1 = _mm256_permutevar8x32_epi32(rot1, shift1);
/* Continue with partial products */
__m256i m0_ll = _mm256_mul_epu32(s0_l, pcg32_mult_l);
__m256i m1_ll = _mm256_mul_epu32(s1_l, pcg32_mult_l);
__m256i m0h = _mm256_add_epi64(m0_hl, m0_lh);
__m256i m1h = _mm256_add_epi64(m1_hl, m1_lh);
__m256i m0hs = _mm256_slli_epi64(m0h, 32);
__m256i m1hs = _mm256_slli_epi64(m1h, 32);
__m256i s0n = _mm256_add_epi64(m0hs, m0_ll);
__m256i s1n = _mm256_add_epi64(m1hs, m1_ll);
__m256i xors = _mm256_or_si256(xors0, xors1);
__m256i rot = _mm256_or_si256(rot0, rot1);
state[0] = _mm256_add_epi64(s0n, inc[0]);
state[1] = _mm256_add_epi64(s1n, inc[1]);
/* Finally, rotate and return the result */
__m256i result = _mm256_or_si256(
_mm256_srlv_epi32(xors, rot),
_mm256_sllv_epi32(xors, _mm256_sub_epi32(const32, rot))
);
return result;
}
#else
/**
* \brief Seed the pseudorandom number generator
*
* Specified in two parts: a state initializer and a sequence selection
* constant (a.k.a. stream id)
*/
void seed(const uint64_t initstate[8], const uint64_t initseq[8]) {
for (int i = 0; i < 8; ++i)
rng[i].seed(initstate[i], initseq[i]);
}
/// Generate 8 uniformly distributed unsigned 32-bit random numbers
void nextUInt(uint32_t result[8]) {
for (int i = 0; i < 8; ++i)
result[i] = rng[i].nextUInt();
}
/// Generate eight single precision floating point value on the interval [0, 1)
void nextFloat(float result[8]) {
for (int i = 0; i < 8; ++i)
result[i] = rng[i].nextFloat();
}
/**
* \brief Generate eight double precision floating point value on the interval [0, 1)
*
* \remark Since the underlying random number generator produces 32 bit output,
* only the first 32 mantissa bits will be filled (however, the resolution is still
* finer than in \ref nextFloat(), which only uses 23 mantissa bits)
*/
void nextDouble(double result[8]) {
for (int i = 0; i < 8; ++i)
result[i] = rng[i].nextDouble();
}
#endif
};