Raymond Toy pushed to branch master at cmucl / cmucl
Commits: 07a0ca81 by Raymond Toy at 2024-03-15T20:53:00+00:00 Fix #276: Implement xoroshiro128**
- - - - - 335588ce by Raymond Toy at 2024-03-15T20:53:04+00:00 Merge branch 'issue-276-xoroshiro128starstar' into 'master'
Fix #276: Implement xoroshiro128**
Closes #276
See merge request cmucl/cmucl!192 - - - - -
7 changed files:
- bin/run-ansi-tests.sh - src/code/rand-xoroshiro.lisp - src/compiler/x86/arith.lisp - src/general-info/release-21f.md - src/i18n/locale/cmucl.pot - tests/rng.lisp - + tests/rng/test-128-star-star.c
Changes:
===================================== bin/run-ansi-tests.sh ===================================== @@ -33,14 +33,15 @@ shift $[$OPTIND - 1]
set -x if [ -d ../ansi-test ]; then - # We already have clone; make sure it's clean by stashing any changes. - (cd ../ansi-test; git stash) + # We already have clone; make sure it's clean by stashing any + # changes. Then pull any updates. + (cd ../ansi-test; git stash; git pull --rebase) else (cd ../; git clone https://gitlab.common-lisp.net/cmucl/ansi-test.git) fi
cd ../ansi-test -git checkout cmucl-expected-failures +git checkout issue-276-xoroshiro
make LISP="$LISP batch -noinit -nositeinit" # There should be no unexpected successes or failures; check these separately
===================================== src/code/rand-xoroshiro.lisp ===================================== @@ -10,7 +10,7 @@ ;;; ;;; ********************************************************************** ;;; -;;; Support for the xoroshiro128+ random number generator by David +;;; Support for the xoroshiro128** random number generator by David ;;; Blackman and Sebastiano Vigna (vigna@acm.org). See ;;; http://xoroshiro.di.unimi.it/.
@@ -225,127 +225,143 @@ ;;;; Random entries:
-;; Sparc and x86 have vops to implement xoroshiro-gen that are much -;; faster than the portable lisp version. Use them. -#+(or x86 sparc) +;; X86 has a vop to implement xoroshiro-gen that is about 4.5 times +;; faster than the portable lisp version below. For other +;; architectures, we use the portable version until a vop is written. +#+x86 (declaim (inline xoroshiro-gen)) -#+(or x86 sparc) +#+x86 (defun xoroshiro-gen (state) + _N"Generate the next 64-bit result from the xoroshiro128** generator + using the state in STATE, a simple-array of 2 double-floats. The + 64-bit result is returned as 2 32-bit values, with the high 32-bits + being the first value." (declare (type (simple-array double-float (2)) state) (optimize (speed 3) (safety 0))) (vm::xoroshiro-next state))
-#-(or x86 sparc) +#-x86 (defun xoroshiro-gen (state) + _N"Generate the next 64-bit result from the xoroshiro128** generator + using the state in STATE, a simple-array of 2 double-floats. The + 64-bit result is returned as 2 32-bit values, with the high 32-bits + being the first value." (declare (type (simple-array double-float (2)) state) (optimize (speed 3) (safety 0))) - ;; Portable implementation of the xoroshiro128+ generator. See - ;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c for the - ;; definitive definition. - ;; - ;; uint64_t s[2]; - ;; - ;; static inline uint64_t rotl(const uint64_t x, int k) { - ;; return (x << k) | (x >> (64 - k)); - ;; } - ;; - ;; uint64_t next(void) { - ;; const uint64_t s0 = s[0]; - ;; uint64_t s1 = s[1]; - ;; const uint64_t result = s0 + s1; - ;; - ;; s1 ^= s0; - ;; s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b - ;; s[1] = rotl(s1, 36); // c - ;; - ;; return result; - ;; } - ;; - (flet ((rotl-55 (x1 x0) - ;; Rotate [x1|x0] left 55 bits, returning the result as two - ;; values. - (declare (type (unsigned-byte 32) x0 x1) - (optimize (speed 3) (safety 0))) - ;; x << 55 - (let ((sl55-h (ldb (byte 32 0) (ash x0 (- 55 32)))) - (sl55-l 0)) - ;; x >> 9 - (let ((sr9-h (ash x1 -9)) - (sr9-l (ldb (byte 32 0) - (logior (ash x0 -9) - (ash x1 23))))) - (values (logior sl55-h sr9-h) - (logior sl55-l sr9-l))))) - (rotl-36 (x1 x0) - ;; Rotate [x1|x0] left 36 bits, returning the result as two - ;; values. - (declare (type (unsigned-byte 32) x0 x1) - (optimize (speed 3) (safety 0))) - ;; x << 36 - (let ((sl36-h (ldb (byte 32 0) (ash x0 4)))) - ;; x >> 28 - (let ((sr28-l (ldb (byte 32 0) - (logior (ash x0 -28) - (ash x1 4)))) - (sr28-h (ash x1 -28))) - (values (logior sl36-h sr28-h) - sr28-l)))) - (shl-14 (x1 x0) - ;; Shift [x1|x0] left by 14 bits, returning the result as - ;; two values. - (declare (type (unsigned-byte 32) x1 x0) - (optimize (speed 3) (safety 0))) - (values (ldb (byte 32 0) - (logior (ash x1 14) - (ash x0 (- 14 32)))) - (ldb (byte 32 0) - (ash x0 14)))) - (make-double (hi lo) - (kernel:make-double-float - (if (< hi #x80000000) - hi - (- hi #x100000000)) - lo))) + (flet + ((make-double (hi lo) + ;; Convert [hi lo] to a double-float, where hi and lo are the + ;; high and low 32 bits of a 64-bit double-float. + (declare (type (unsigned-byte 32) hi lo)) + (kernel:make-double-float + (if (< hi #x80000000) + hi + (- hi #x100000000)) + lo)) + (mul-shift (x1 x0 shift) + ;; Perform multiplication by 2^shift+1, by doing a shift and + ;; an add. + (declare (type (unsigned-byte 32) x1 x0) + (optimize (speed 3))) + (let* ((overflow (ash x0 (- (- 32 shift)))) + (s0 (ldb (byte 32 0) (ash x0 shift))) + (s1 (logior (ldb (byte 32 0) (ash x1 shift)) + overflow))) + (multiple-value-bind (sum0 carry) + (bignum:%add-with-carry s0 x0 0) + (multiple-value-bind (sum1) + (bignum:%add-with-carry s1 x1 carry) + (values sum1 sum0))))) + (shl (x1 x0 n) + ;; Shift left [x1 x0] by n bits, where 0 <= n < 31. + (declare (type (unsigned-byte 32) x1 x0) + (type (integer 0 31) n) + (optimize (speed 3))) + (let ((out (ldb (byte n (- 32 n)) x0))) + (values (logior (ldb (byte 32 0) (ash x1 n)) out) + (ldb (byte 32 0) (ash x0 n))))) + (rotl (x1 x0 n) + ;; Rotate left [x1 x0] by n bits where n < 32. + (declare (type (unsigned-byte 32) x1 x0) + (type (integer 0 31) n) + (optimize (speed 3))) + (let ((x1-hi (ldb (byte n (- 32 n)) x1)) + (x0-hi (ldb (byte n (- 32 n)) x0))) + ;; x1-hi and x0-hi are the bits that are shifted out of x1 + ;; and x0. + (values (logior (ldb (byte 32 0) (ash x1 n)) + x0-hi) + (logior (ldb (byte 32 0) (ash x0 n)) + x1-hi)))) + (rotr (x1 x0 n) + ;; Rotate right [x1 x0] by n bits where n < 32. + (declare (type (unsigned-byte 32) x1 x0) + (type (integer 0 31) n) + (optimize (speed 3))) + (let ((x1-lo (ldb (byte n 0) x1)) + (x0-lo (ldb (byte n 0) x0)) + (-n (- n)) + (32-n (- 32 n))) + ;; x1-lo and x0-lo are the bits of x1/x0 that would be + ;; shifted out. + (values (logior (ldb (byte 32 0) (ash x1 -n)) + (ldb (byte 32 0) (ash x0-lo 32-n))) + (logior (ldb (byte 32 0) (ash x0 -n)) + (ldb (byte 32 0) (ash x1-lo 32-n))))))) + (declare (inline mul-shift shl rotl rotr)) (let ((s0-1 0) (s0-0 0) (s1-1 0) - (s1-0 0)) - (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0)) - ;; Load the state to s0 and s1. s0-1 is the high 32-bit part and - ;; s0-0 is the low 32-bit part of the 64-bit value. Similarly - ;; for s1. + (s1-0 0) + (result-1 0) + (result-0 0)) + (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0 result-1 result-0)) + ;; [s0-1 s0-0] is the first 64-bit word of the state. (multiple-value-bind (x1 x0) (kernel:double-float-bits (aref state 0)) (setf s0-1 (ldb (byte 32 0) x1) s0-0 x0)) + ;; [s1-1 s1-0] is the second 64-bit word of the state. (multiple-value-bind (x1 x0) (kernel:double-float-bits (aref state 1)) (setf s1-1 (ldb (byte 32 0) x1) s1-0 x0))
- ;; Compute the 64-bit random value: s0 + s1 - (multiple-value-prog1 - (multiple-value-bind (sum-0 c) - (bignum::%add-with-carry s0-0 s1-0 0) - (values (bignum::%add-with-carry s0-1 s1-1 c) - sum-0)) - ;; s1 ^= s0 - (setf s1-1 (logxor s1-1 s0-1) - s1-0 (logxor s1-0 s0-0)) - ;; s[0] = rotl(s0,55) ^ s1 ^ (s1 << 14) - (multiple-value-setq (s0-1 s0-0) - (rotl-55 s0-1 s0-0)) - (setf s0-1 (logxor s0-1 s1-1) - s0-0 (logxor s0-0 s1-0)) - (multiple-value-bind (s14-1 s14-0) - (shl-14 s1-1 s1-0) - (setf s0-1 (logxor s0-1 s14-1) - s0-0 (logxor s0-0 s14-0))) - - (multiple-value-bind (r1 r0) - (rotl-36 s1-1 s1-0) - (setf (aref state 0) (make-double s0-1 s0-0) - (aref state 1) (make-double r1 r0))))))) + ;; [result-1 result-0] = s0 * 5 + (multiple-value-setq (result-1 result-0) + (mul-shift s0-1 s0-0 2)) + + ;; [result-1 result-0] = rotl(result, 7) + (multiple-value-setq (result-1 result-0) + (rotl result-1 result-0 7)) + + ;; [result-1 result-0] = result*9 + (multiple-value-setq (result-1 result-0) + (mul-shift result-1 result-0 3)) + + ;; s1 ^= s0 + (setf s1-1 (logxor s1-1 s0-1) + s1-0 (logxor s1-0 s0-0)) + (multiple-value-bind (s24-hi s24-lo) + (rotl s0-1 s0-0 24) + (declare (type (unsigned-byte 32) s24-hi s24-lo)) + ;; [s24-hi s24-lo] = rotl(s0, 24) ^ s1 + (setf s24-hi (logxor s24-hi s1-1) + s24-lo (logxor s24-lo s1-0)) + (multiple-value-bind (s16-hi s16-lo) + (shl s1-1 s1-0 16) + ;; [s24-hi s24-lo] = s24 ^ (s1 << 16) = state[0] + (setf s24-hi (logxor s24-hi s16-hi) + s24-lo (logxor s24-lo s16-lo)) + (setf (aref state 0) (make-double s24-hi s24-lo)))) + (multiple-value-bind (s37-hi s37-lo) + (rotr s1-1 s1-0 (- 64 37)) + ;; [s37-hi s37-lo] = rotl(s1, 37) = state[1]. But rotating + ;; left by 37 is the same as rotating right by 64-37. + (setf (aref state 1) (make-double s37-hi s37-lo))) + ;; Return result. + (values result-1 + result-0))))
;;; Size of the chunks returned by random-chunk. ;;; @@ -495,10 +511,10 @@ :format-arguments (list arg)))))
;; Jump function for the generator. See the jump function in -;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c +;; https://prng.di.unimi.it/xoroshiro128starstar.c (defun random-state-jump (&optional (rng-state *random-state*)) _N"Jump the RNG-STATE. This is equivalent to 2^64 calls to the - xoroshiro128+ generator. It can be used to generate 2^64 + xoroshiro128** generator. It can be used to generate 2^64 non-overlapping subsequences for parallel computations." (declare (type random-state rng-state)) (let ((state (random-state-state rng-state)) @@ -508,12 +524,12 @@ (s1-1 0)) (declare (type (unsigned-byte 32) s0-0 s0-1 s1-0 s1-1) (optimize (speed 3) (safety 0))) - ;; The constants are #xbeac0467eba5facb and #xd86b048b86aa9922, + ;; The constants are #xdf900294d8f554a5 and #x170865df4b3201fc, ;; and we process these numbers starting from the LSB. We want ot ;; process these in 32-bit chunks, so word-reverse the constants. - (dolist (jump '(#xeba5facb #xbeac0467 #x86aa9922 #xd86b048b)) - (declare (type (unsigned-byte 32) jump)) - (dotimes (b 32) + (dolist (jump '(#xdf900294d8f554a5 #x170865df4b3201fc)) + (declare (type (unsigned-byte 64) jump)) + (dotimes (b 64) (declare (fixnum b)) (when (logbitp b jump) (multiple-value-bind (x1 x0) @@ -534,4 +550,4 @@ x0))) (setf (aref state 0) (convert s0-1 s0-0)) (setf (aref state 1) (convert s1-1 s1-0))) - rng-state)) + rng-state))
===================================== src/compiler/x86/arith.lisp ===================================== @@ -1695,58 +1695,101 @@ (:temporary (:sc double-reg) s0) (:temporary (:sc double-reg) s1) (:temporary (:sc double-reg) t0) + (:temporary (:sc double-reg) t1) (:generator 10 + ;; See https://prng.di.unimi.it/xoroshiro128starstar.c for the official code. + ;; + ;; This is what we're implementing, where s[] is our state vector. + ;; + ;; static uint64_t s[2]; + ;; static inline uint64_t rotl(const uint64_t x, int k) { + ;; return (x << k) | (x >> (64 - k)); + ;; } + ;; + ;; uint64_t next(void) { + ;; const uint64_t s0 = s[0]; + ;; uint64_t s1 = s[1]; + ;; const uint64_t result = rotl(s0 * 5, 7) * 9; + ;; + ;; s1 ^= s0; + ;; s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b + ;; s[1] = rotl(s1, 37); // c + ;; + ;; return result; + ;; } + ;; s0 = state[0] (inst movsd s0 (make-ea :dword :base state - :disp (- (+ (* vm:vector-data-offset - vm:word-bytes) - (* 8 0)) - vm:other-pointer-type))) - ;; s1 = state[1] - (inst movsd s1 (make-ea :dword :base state - :disp (- (+ (* vm:vector-data-offset - vm:word-bytes) - (* 8 1)) - vm:other-pointer-type))) - ;; Compute result = s0 + s1 - (inst movapd t0 s0) - (inst paddq t0 s1) - ;; Save the 64-bit result as two 32-bit results + :disp (- (+ (* vm:vector-data-offset + vm:word-bytes) + (* 8 0)) + vm:other-pointer-type))) + ;; t0 = s0 * 5 = s0 << 2 + s0 + (inst movapd t0 s0) ; t0 = s0 + (inst psllq t0 2) ; t0 = t0 << 2 = 4*t0 + (inst paddq t0 s0) ; t0 = t0 + s0 = 5*t0 + + ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7) + ;; = rotl(s0*5, 7) + (inst movapd t1 t0) ; t1 = t0 + (inst psllq t1 7) ; t1 = t0 << 7 + (inst psrlq t0 (- 64 7)) ; t0 = t0 >> 57 + (inst orpd t0 t1) ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7) + + ;; t0 = t0 * 9 = t0 << 3 + t0 + ;; = rotl(s0*5, 7) * 9 + (inst movapd t1 t0) ; t1 = t0 + (inst psllq t1 3) ; t1 = t0 << 3 + (inst paddq t0 t1) ; t0 = t0 << 3 + t0 = 9*t0 + + ;; Save the result as two 32-bit results. r1 is the high 32 bits + ;; and r0 is the low 32. (inst movd r0 t0) (inst psrlq t0 32) (inst movd r1 t0)
- ;; s1 = s1 ^ s0 - (inst xorpd s1 s0) - - ;; s0 = rotl(s0,55) = s0 << 55 | s0 >> 9 - (inst movapd t0 s0) - (inst psllq s0 55) ; s0 = s0 << 55 - (inst psrlq t0 9) ; t0 = s0 >> 9 - (inst orpd s0 t0) ; s0 = rotl(s0, 55) - - (inst movapd t0 s1) - (inst xorpd s0 s1) ; s0 = s0 ^ s1 - (inst psllq t0 14) ; t0 = s1 << 14 - (inst xorpd s0 t0) ; s0 = s0 ^ t0 + ;; s1 = state[1] + (inst movsd s1 (make-ea :dword :base state + :disp (- (+ (* vm:vector-data-offset + vm:word-bytes) + (* 8 1)) + vm:other-pointer-type))) + (inst xorpd s1 s0) ; s1 = s1 ^ s0 + + ;; s0 can now be reused as a temp. + ;; s0 = rotl(s0, 24) + (inst movapd t0 s0) ; t0 = s0 + (inst psllq t0 24) ; t0 = s0 << 24 + (inst psrlq s0 (- 64 24)) ; s0 = s0 >> 40 + (inst orpd s0 t0) ; s0 = s0 | t0 = rotl(s0, 24) + + ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1 + (inst xorpd s0 s1) + + ;; s0 = s0 ^ (s1 << 16) + (inst movapd t0 s1) ; t0 = s1 + (inst psllq t0 16) ; t0 = s1 << 16 + (inst xorpd s0 t0) ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16) + + ;; Save s0 to state[0] (inst movsd (make-ea :dword :base state :disp (- (+ (* vm:vector-data-offset vm:word-bytes) (* 8 0)) vm:other-pointer-type)) - s0) + s0)
- ;; s1 = rotl(s1, 36) = s1 << 36 | s1 >> 28, using t0 as temp - (inst movapd t0 s1) - (inst psllq s1 36) - (inst psrlq t0 28) - (inst orpd s1 t0) + ;; s1 = rotl(s1, 37) + (inst movapd t0 s1) ; t0 = s1 + (inst psllq t0 37) ; t0 = s1 << 37 + (inst psrlq s1 (- 64 37)) ; s1 = s1 >> 27 + (inst orpd s1 t0) ; s1 = t0 | s1 = rotl(s1, 37)
+ ;; Save s1 to state[1] (inst movsd (make-ea :dword :base state :disp (- (+ (* vm:vector-data-offset vm:word-bytes) (* 8 1)) vm:other-pointer-type)) - s1))) -) - + s1))) +)
===================================== src/general-info/release-21f.md ===================================== @@ -24,6 +24,9 @@ public domain. `ext:stream-file-length` generic function. * Changes: * Update to ASDF 3.3.7 + * The RNG has changed from an old version of xoroshiro128+ to + xoroshiro128**. This means sequences of random numbers will be + different from before. See ~~#276~~. * ANSI compliance fixes: * Bug fixes: * Gitlab tickets: @@ -44,8 +47,7 @@ public domain. * ~~#266~~ Support "~user" in namestrings * ~~#271~~ Update ASDF to 3.3.7 * ~~#272~~ Move scavenge code for static vectors to its own function - * ~~#277~~ `float-ratio-float` returns least postive float for - ratios closer to that than zero. + * ~~#276~~ Implement xoroshiro128** generator for x86 * Other changes: * Improvements to the PCL implementation of CLOS: * Changes to building procedure:
===================================== src/i18n/locale/cmucl.pot ===================================== @@ -12220,6 +12220,14 @@ msgstr "" msgid "Argument is not a RANDOM-STATE, T, or NIL: ~S" msgstr ""
+#: src/code/rand-xoroshiro.lisp +msgid "" +"Generate the next 64-bit result from the xoroshiro128** generator\n" +" using the state in STATE, a simple-array of 2 double-floats. The\n" +" 64-bit result is returned as 2 32-bit values, with the high 32-bits\n" +" being the first value." +msgstr "" + #: src/code/rand-xoroshiro.lisp msgid "" "Generate a uniformly distributed pseudo-random number between zero\n" @@ -12233,7 +12241,7 @@ msgstr "" #: src/code/rand-xoroshiro.lisp msgid "" "Jump the RNG-STATE. This is equivalent to 2^64 calls to the\n" -" xoroshiro128+ generator. It can be used to generate 2^64\n" +" xoroshiro128** generator. It can be used to generate 2^64\n" " non-overlapping subsequences for parallel computations." msgstr ""
===================================== tests/rng.lisp ===================================== @@ -48,16 +48,18 @@ (assert-equal 0 (kernel::random-state-rand *test-state*)) (assert-equal nil (kernel::random-state-cached-p *test-state*))
- (dolist (item '((#x18d5f05c086e0086 (#x228f4926843b364d #x74dfe78e715c81be)) - (#x976f30b4f597b80b (#x5b6bd4558bd96a68 #x567b7f35650aea8f)) - (#xb1e7538af0e454f7 (#x13e5253e242fac52 #xed380e70d10ab60e)) - (#x011d33aef53a6260 (#x9d0764952ca00d8a #x5251a5cfedd2b4ef)) - (#xef590a651a72c279 (#xba4ef2b425bda963 #x172b965cf56c15ac)) - (#xd17a89111b29bf0f (#x458277a5e5f0a21b #xd1bccfad6564e8d)) - (#x529e44a0bc46f0a8 (#x2becb68d5a7194c7 #x3a6ec964899bb5f3)) - (#x665b7ff1e40d4aba (#xededfd481d0a19fe #x3ea213411827fe9d)) - (#x2c9010893532189b (#xd7bb59bcd8fba26f #x52de763d34fee090)) - (#x2a99cffa0dfa82ff (#xf96e892c62d6ff2e #xc0542ff85652f81e)))) + (dolist (item + ;; Results for xoroshiro128** + '((#x41db14eb317141fe (#x16dfbf3d760d0fa4 #xe9bfcf1ce2b9037c)) + (#xaa4ee6e025dfec01 (#xb237e99a3c7ad367 #x96819b1fec0e0432)) + (#xea080e50cb948fa5 (#xcc0fd8226093e0bc #x0e9aeaa496ce50ba)) + (#x647f057cff408a6e (#xd273573bfa97bfde #xcbb600d852a650de)) + (#x232ac586565d037e (#x75dc686d99e39c57 #x063de00338aafc75)) + (#xdf2da206813da6d6 (#x9616cabb961ebc4a #x292c044e7c310dd4)) + (#x00d17cb1b38c852f (#xca593a661127a754 #x45f633d7e759debd)) + (#xd7a1f881fc34e641 (#xe00fd868db5d20d3 #xcfcf3d31f5e1363e)) + (#x64853747af628d30 (#xa24296c5ebb11935 #xd782dda5f81cab25)) + (#xda40653710b7293d (#xfb4be9d4941ff086 #x75b6420eb8096c02)))) (destructuring-bind (value state) item (assert-equal value (64-bit-value *test-state*)) @@ -69,10 +71,13 @@ (kernel::make-random-object :state (kernel::init-random-state #x12345678) :rand 0 :cached-p nil)) - (dolist (result '((#x291ddf8e6f6a7b67 #x1f9018a12f9e031f) - (#x88a7aa12158558d0 #xe264d785ab1472d9) - (#x207e16f73c51e7ba #x999c8a0a9a8d87c0) - (#x28f8959d3bcf5ff1 #x38091e563ab6eb98))) + (dolist (result + ;; Results for xoroshiro128** jump function + '((#x19a22191480b0a4e #x43b3d7ee592dd4cf) + (#x76cb87035d0b6e99 #xb6827bcf2ef8267c) + (#x5125201dbdf76860 #x8984c075043869e2) + (#x2c06f0667255309f #xa48cbe2e60fc1d65) + )) (kernel:random-state-jump *test-state*) (assert-equal result (multiple-value-list (64-bit-rng-state *test-state*)))))
===================================== tests/rng/test-128-star-star.c ===================================== @@ -0,0 +1,143 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See http://creativecommons.org/publicdomain/zero/1.0/. */ + +#include <stdint.h> + +/* This is xoroshiro128** 1.0, one of our all-purpose, rock-solid, + small-state generators. It is extremely (sub-ns) fast and it passes all + tests we are aware of, but its state space is large enough only for + mild parallelism. + + For generating just floating-point numbers, xoroshiro128+ is even + faster (but it has a very mild bias, see notes in the comments). + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +static uint64_t s[2]; + +uint64_t next(void) { + const uint64_t s0 = s[0]; + uint64_t s1 = s[1]; + const uint64_t result = rotl(s0 * 5, 7) * 9; + + s1 ^= s0; + s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b + s[1] = rotl(s1, 37); // c + + return result; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^64 calls to next(); it can be used to generate 2^64 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(); + } + + s[0] = s0; + s[1] = s1; +} + + +/* This is the long-jump function for the generator. It is equivalent to + 2^96 calls to next(); it can be used to generate 2^32 starting points, + from each of which jump() will generate 2^32 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) { + static const uint64_t LONG_JUMP[] = { 0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1 }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(); + } + + s[0] = s0; + s[1] = s1; +} + +/*********************************************************************/ + +#include <stdio.h> + +/* + * Print out the first 10 random numbers from the generator. This is + * used for the expected results in the rng unit test. + */ +void +test_rng() +{ + int k; + uint64_t r; + + + s[0] = 0x38f1dc39d1906b6full; + s[1] = 0xdfe4142236dd9517ull; + + printf(";; First 10 outputs\n"); + + for (k = 0; k < 10; ++k) { + r = next(); + printf("%2d: #x%016llx (#x%016llx #x%016llx)\n", k, r, s[0], s[1]); + } +} + +/* + * Print out the first 4 states from jumping generator by 2^64 steps. + * This is used for the expected results in the rng unit test. + */ +void +test_jump() +{ + int k; + + s[0] = 0x38f1dc39d1906b6full; + s[1] = 0xdfe4142236dd9517ull; + + printf(";; First 4 jumps\n"); + for (k = 0; k < 4; ++k) { + jump(); + printf("%2d: #x%016llx #x%016llx\n", k, s[0], s[1]); + } +} + + +int +main() +{ + test_rng(); + test_jump(); + + return 0; +}
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/34f33e19fd935515c72ee53...