[Git][cmucl/cmucl][master] 2 commits: Fix #276: Implement xoroshiro128**

15 Mar 2024


      Raymond Toy pushed to branch master at cmucl / cmucl


Commits:
07a0ca81 by Raymond Toy at 2024-03-15T20:53:00+00:00
Fix #276: Implement xoroshiro128**

- - - - -
335588ce by Raymond Toy at 2024-03-15T20:53:04+00:00
Merge branch 'issue-276-xoroshiro128starstar' into 'master'

Fix #276: Implement xoroshiro128**

Closes #276

See merge request cmucl/cmucl!192
- - - - -


7 changed files:

- bin/run-ansi-tests.sh
- src/code/rand-xoroshiro.lisp
- src/compiler/x86/arith.lisp
- src/general-info/release-21f.md
- src/i18n/locale/cmucl.pot
- tests/rng.lisp
- + tests/rng/test-128-star-star.c


Changes:

=====================================
bin/run-ansi-tests.sh
=====================================
@@ -33,14 +33,15 @@ shift $[$OPTIND - 1]
 
 set -x
 if [ -d ../ansi-test ]; then
-    # We already have clone; make sure it's clean by stashing any changes.
-    (cd ../ansi-test; git stash)
+    # We already have clone; make sure it's clean by stashing any
+    # changes.  Then pull any updates.
+    (cd ../ansi-test; git stash; git pull --rebase)
 else    
     (cd ../; git clone https://gitlab.common-lisp.net/cmucl/ansi-test.git)
 fi
 
 cd ../ansi-test
-git checkout cmucl-expected-failures
+git checkout issue-276-xoroshiro
 
 make LISP="$LISP batch -noinit -nositeinit"
 # There should be no unexpected successes or failures; check these separately


=====================================
src/code/rand-xoroshiro.lisp
=====================================
@@ -10,7 +10,7 @@
 ;;;
 ;;; **********************************************************************
 ;;;
-;;; Support for the xoroshiro128+ random number generator by David
+;;; Support for the xoroshiro128** random number generator by David
 ;;; Blackman and Sebastiano Vigna (vigna@acm.org). See
 ;;; http://xoroshiro.di.unimi.it/.
 
@@ -225,127 +225,143 @@
 
 ;;;; Random entries:
 
-;; Sparc and x86 have vops to implement xoroshiro-gen that are much
-;; faster than the portable lisp version.  Use them.
-#+(or x86 sparc)
+;; X86 has a vop to implement xoroshiro-gen that is about 4.5 times
+;; faster than the portable lisp version below.  For other
+;; architectures, we use the portable version until a vop is written.
+#+x86
 (declaim (inline xoroshiro-gen))
-#+(or x86 sparc)
+#+x86
 (defun xoroshiro-gen (state)
+  _N"Generate the next 64-bit result from the xoroshiro128** generator
+  using the state in STATE, a simple-array of 2 double-floats.  The
+  64-bit result is returned as 2 32-bit values, with the high 32-bits
+  being the first value."
   (declare (type (simple-array double-float (2)) state)
 	   (optimize (speed 3) (safety 0)))
   (vm::xoroshiro-next state))
 
-#-(or x86 sparc)
+#-x86
 (defun xoroshiro-gen (state)
+  _N"Generate the next 64-bit result from the xoroshiro128** generator
+  using the state in STATE, a simple-array of 2 double-floats.  The
+  64-bit result is returned as 2 32-bit values, with the high 32-bits
+  being the first value."
   (declare (type (simple-array double-float (2)) state)
 	   (optimize (speed 3) (safety 0)))
-  ;; Portable implementation of the xoroshiro128+ generator. See
-  ;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c for the
-  ;; definitive definition.
-  ;;
-  ;; uint64_t s[2];
-  ;;
-  ;; static inline uint64_t rotl(const uint64_t x, int k) {
-  ;; 	return (x << k) | (x >> (64 - k));
-  ;; }
-  ;;
-  ;; uint64_t next(void) {
-  ;; 	const uint64_t s0 = s[0];
-  ;; 	uint64_t s1 = s[1];
-  ;; 	const uint64_t result = s0 + s1;
-  ;;
-  ;; 	s1 ^= s0;
-  ;; 	s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
-  ;; 	s[1] = rotl(s1, 36); // c
-  ;;
-  ;; 	return result;
-  ;; }
-  ;;
-  (flet ((rotl-55 (x1 x0)
-	   ;; Rotate [x1|x0] left 55 bits, returning the result as two
-	   ;; values.
-	   (declare (type (unsigned-byte 32) x0 x1)
-		    (optimize (speed 3) (safety 0)))
-	   ;; x << 55
-	   (let ((sl55-h (ldb (byte 32 0) (ash x0 (- 55 32))))
-		 (sl55-l 0))
-	     ;; x >> 9
-	     (let ((sr9-h (ash x1 -9))
-		   (sr9-l (ldb (byte 32 0)
-			       (logior (ash x0 -9)
-				       (ash x1 23)))))
-	       (values (logior sl55-h sr9-h)
-		       (logior sl55-l sr9-l)))))
-	 (rotl-36 (x1 x0)
-	   ;; Rotate [x1|x0] left 36 bits, returning the result as two
-	   ;; values.
-	   (declare (type (unsigned-byte 32) x0 x1)
-		    (optimize (speed 3) (safety 0)))
-	   ;; x << 36
-	   (let ((sl36-h (ldb (byte 32 0) (ash x0 4))))
-	     ;; x >> 28
-	     (let ((sr28-l (ldb (byte 32 0)
-				(logior (ash x0 -28)
-					(ash x1 4))))
-		   (sr28-h (ash x1 -28)))
-	       (values (logior sl36-h sr28-h)
-		       sr28-l))))
-	 (shl-14 (x1 x0)
-	   ;; Shift [x1|x0] left by 14 bits, returning the result as
-	   ;; two values.
-	   (declare (type (unsigned-byte 32) x1 x0)
-		    (optimize (speed 3) (safety 0)))
-	   (values (ldb (byte 32 0)
-			(logior (ash x1 14)
-				(ash x0 (- 14 32))))
-		   (ldb (byte 32 0)
-			(ash x0 14))))
-	 (make-double (hi lo)
-	   (kernel:make-double-float
-	    (if (< hi #x80000000)
-		hi
-		(- hi #x100000000))
-	    lo)))
+  (flet
+      ((make-double (hi lo)
+	 ;; Convert [hi lo] to a double-float, where hi and lo are the
+	 ;; high and low 32 bits of a 64-bit double-float.
+	 (declare (type (unsigned-byte 32) hi lo))
+	 (kernel:make-double-float
+	  (if (< hi #x80000000)
+	      hi
+	      (- hi #x100000000))
+	  lo))
+       (mul-shift (x1 x0 shift)
+	 ;; Perform multiplication by 2^shift+1, by doing a shift and
+	 ;; an add.
+	 (declare (type (unsigned-byte 32) x1 x0)
+		  (optimize (speed 3)))
+	 (let* ((overflow (ash x0 (- (- 32 shift))))
+		(s0 (ldb (byte 32 0) (ash x0 shift)))
+		(s1 (logior (ldb (byte 32 0) (ash x1 shift))
+			    overflow)))
+	   (multiple-value-bind (sum0 carry)
+	       (bignum:%add-with-carry s0 x0 0)
+	     (multiple-value-bind (sum1)
+		 (bignum:%add-with-carry s1 x1 carry)
+	       (values sum1 sum0)))))
+       (shl (x1 x0 n)
+	 ;; Shift left [x1 x0] by n bits, where 0 <= n < 31.
+	 (declare (type (unsigned-byte 32) x1 x0)
+		  (type (integer 0 31) n)
+		  (optimize (speed 3)))
+	 (let ((out (ldb (byte n (- 32 n)) x0)))
+	   (values (logior (ldb (byte 32 0) (ash x1 n)) out)
+		   (ldb (byte 32 0) (ash x0 n)))))
+       (rotl (x1 x0 n)
+	 ;; Rotate left [x1 x0] by n bits where n < 32.
+	 (declare (type (unsigned-byte 32) x1 x0)
+		  (type (integer 0 31) n)
+		  (optimize (speed 3)))
+	 (let ((x1-hi (ldb (byte n (- 32 n)) x1))
+	       (x0-hi (ldb (byte n (- 32 n)) x0)))
+	   ;; x1-hi and x0-hi are the bits that are shifted out of x1
+	   ;; and x0.
+	   (values (logior (ldb (byte 32 0) (ash x1 n))
+			   x0-hi)
+		   (logior (ldb (byte 32 0) (ash x0 n))
+			   x1-hi))))
+       (rotr (x1 x0 n)
+	 ;; Rotate right [x1 x0] by n bits where n < 32.
+	 (declare (type (unsigned-byte 32) x1 x0)
+		  (type (integer 0 31) n)
+		  (optimize (speed 3)))
+	 (let ((x1-lo (ldb (byte n 0) x1))
+	       (x0-lo (ldb (byte n 0) x0))
+	       (-n (- n))
+	       (32-n (- 32 n)))
+	   ;; x1-lo and x0-lo are the bits of x1/x0 that would be
+	   ;; shifted out.
+	   (values (logior (ldb (byte 32 0) (ash x1 -n))
+			   (ldb (byte 32 0) (ash x0-lo 32-n)))
+		   (logior (ldb (byte 32 0) (ash x0 -n))
+			   (ldb (byte 32 0) (ash x1-lo 32-n)))))))
+    (declare (inline mul-shift shl rotl rotr))
     (let ((s0-1 0)
 	  (s0-0 0)
 	  (s1-1 0)
-	  (s1-0 0))
-      (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0))
-      ;; Load the state to s0 and s1. s0-1 is the high 32-bit part and
-      ;; s0-0 is the low 32-bit part of the 64-bit value.  Similarly
-      ;; for s1.
+	  (s1-0 0)
+	  (result-1 0)
+	  (result-0 0))
+      (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0 result-1 result-0))
+      ;; [s0-1 s0-0] is the first 64-bit word of the state.
       (multiple-value-bind (x1 x0)
 	  (kernel:double-float-bits (aref state 0))
 	(setf s0-1 (ldb (byte 32 0) x1)
 	      s0-0 x0))
+      ;; [s1-1 s1-0] is the second 64-bit word of the state.
       (multiple-value-bind (x1 x0)
 	  (kernel:double-float-bits (aref state 1))
 	(setf s1-1 (ldb (byte 32 0) x1)
 	      s1-0 x0))
 
-      ;; Compute the 64-bit random value: s0 + s1
-      (multiple-value-prog1
-	  (multiple-value-bind (sum-0 c)
-	      (bignum::%add-with-carry s0-0 s1-0 0)
-	    (values (bignum::%add-with-carry s0-1 s1-1 c)
-		    sum-0))
-	;; s1 ^= s0
-	(setf s1-1 (logxor s1-1 s0-1)
-	      s1-0 (logxor s1-0 s0-0))
-	;; s[0] = rotl(s0,55) ^ s1 ^ (s1 << 14)
-	(multiple-value-setq (s0-1 s0-0)
-	  (rotl-55 s0-1 s0-0))
-	(setf s0-1 (logxor s0-1 s1-1)
-	      s0-0 (logxor s0-0 s1-0))
-	(multiple-value-bind (s14-1 s14-0)
-	    (shl-14 s1-1 s1-0)
-	  (setf s0-1 (logxor s0-1 s14-1)
-		s0-0 (logxor s0-0 s14-0)))
-
-	(multiple-value-bind (r1 r0)
-	    (rotl-36 s1-1 s1-0)
-	  (setf (aref state 0) (make-double s0-1 s0-0)
-		(aref state 1) (make-double r1 r0)))))))
+      ;; [result-1 result-0] = s0 * 5
+      (multiple-value-setq (result-1 result-0)
+	(mul-shift s0-1 s0-0 2))
+
+      ;; [result-1 result-0] = rotl(result, 7)
+      (multiple-value-setq (result-1 result-0)
+	(rotl result-1 result-0 7))
+
+      ;; [result-1 result-0] = result*9
+      (multiple-value-setq (result-1 result-0)
+	(mul-shift result-1 result-0 3))
+
+      ;; s1 ^= s0
+      (setf s1-1 (logxor s1-1 s0-1)
+	    s1-0 (logxor s1-0 s0-0))
+      (multiple-value-bind (s24-hi s24-lo)
+	  (rotl s0-1 s0-0 24)
+	(declare (type (unsigned-byte 32) s24-hi s24-lo))
+	;; [s24-hi s24-lo] = rotl(s0, 24) ^ s1
+	(setf s24-hi (logxor s24-hi s1-1)
+	      s24-lo (logxor s24-lo s1-0))
+	(multiple-value-bind (s16-hi s16-lo)
+	    (shl s1-1 s1-0 16)
+	  ;; [s24-hi s24-lo] = s24 ^ (s1 << 16) = state[0]
+	  (setf s24-hi (logxor s24-hi s16-hi)
+		s24-lo (logxor s24-lo s16-lo))
+	  (setf (aref state 0) (make-double s24-hi s24-lo))))
+      (multiple-value-bind (s37-hi s37-lo)
+	  (rotr s1-1 s1-0 (- 64 37))
+	;; [s37-hi s37-lo] = rotl(s1, 37) = state[1].  But rotating
+	;; left by 37 is the same as rotating right by 64-37.
+	(setf (aref state 1) (make-double s37-hi s37-lo)))
+      ;; Return result.
+      (values result-1
+	      result-0))))
 
 ;;; Size of the chunks returned by random-chunk.
 ;;;
@@ -495,10 +511,10 @@
 	    :format-arguments (list arg)))))
 
 ;; Jump function for the generator.  See the jump function in
-;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c
+;; https://prng.di.unimi.it/xoroshiro128starstar.c
 (defun random-state-jump (&optional (rng-state *random-state*))
   _N"Jump the RNG-STATE.  This is equivalent to 2^64 calls to the
-  xoroshiro128+ generator.  It can be used to generate 2^64
+  xoroshiro128** generator.  It can be used to generate 2^64
   non-overlapping subsequences for parallel computations."
   (declare (type random-state rng-state))
   (let ((state (random-state-state rng-state))
@@ -508,12 +524,12 @@
 	(s1-1 0))
     (declare (type (unsigned-byte 32) s0-0 s0-1 s1-0 s1-1)
 	     (optimize (speed 3) (safety 0)))
-    ;; The constants are #xbeac0467eba5facb and #xd86b048b86aa9922,
+    ;; The constants are #xdf900294d8f554a5 and #x170865df4b3201fc,
     ;; and we process these numbers starting from the LSB.  We want ot
     ;; process these in 32-bit chunks, so word-reverse the constants.
-    (dolist (jump '(#xeba5facb #xbeac0467 #x86aa9922 #xd86b048b))
-      (declare (type (unsigned-byte 32) jump))
-      (dotimes (b 32)
+    (dolist (jump '(#xdf900294d8f554a5 #x170865df4b3201fc))
+      (declare (type (unsigned-byte 64) jump))
+      (dotimes (b 64)
 	(declare (fixnum b))
 	(when (logbitp b jump)
 	  (multiple-value-bind (x1 x0)
@@ -534,4 +550,4 @@
 	      x0)))
       (setf (aref state 0) (convert s0-1 s0-0))
       (setf (aref state 1) (convert s1-1 s1-0)))
-      rng-state))
+    rng-state))


=====================================
src/compiler/x86/arith.lisp
=====================================
@@ -1695,58 +1695,101 @@
   (:temporary (:sc double-reg) s0)
   (:temporary (:sc double-reg) s1)
   (:temporary (:sc double-reg) t0)
+  (:temporary (:sc double-reg) t1)
   (:generator 10
+    ;; See https://prng.di.unimi.it/xoroshiro128starstar.c for the official code.
+    ;;
+    ;; This is what we're implementing, where s[] is our state vector.
+    ;;
+    ;; static uint64_t s[2];
+    ;; static inline uint64_t rotl(const uint64_t x, int k) {
+    ;;   return (x << k) | (x >> (64 - k));
+    ;; }
+    ;;
+    ;; uint64_t next(void) {
+    ;;   const uint64_t s0 = s[0];
+    ;; 	 uint64_t s1 = s[1];
+    ;; 	 const uint64_t result = rotl(s0 * 5, 7) * 9;
+    ;; 
+    ;; 	 s1 ^= s0;
+    ;; 	 s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
+    ;; 	 s[1] = rotl(s1, 37); // c
+    ;; 
+    ;; 	 return result;
+    ;; }
+
     ;; s0 = state[0]
     (inst movsd s0 (make-ea :dword :base state
-			 :disp (- (+ (* vm:vector-data-offset
-					vm:word-bytes)
-				     (* 8 0))
-				  vm:other-pointer-type)))
-    ;; s1 = state[1]
-    (inst movsd s1 (make-ea :dword :base state
-			 :disp (- (+ (* vm:vector-data-offset
-					vm:word-bytes)
-				     (* 8 1))
-				  vm:other-pointer-type)))
-    ;; Compute result = s0 + s1
-    (inst movapd t0 s0)
-    (inst paddq t0 s1)
-    ;; Save the 64-bit result as two 32-bit results
+                            :disp (- (+ (* vm:vector-data-offset
+					   vm:word-bytes)
+				        (* 8 0))
+				     vm:other-pointer-type)))
+    ;; t0 = s0 * 5 = s0 << 2 + s0
+    (inst movapd t0 s0)                 ; t0 = s0
+    (inst psllq t0 2)                   ; t0 = t0 << 2 = 4*t0
+    (inst paddq t0 s0)                  ; t0 = t0 + s0 = 5*t0
+
+    ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7)
+    ;;    = rotl(s0*5, 7)
+    (inst movapd t1 t0)        ; t1 = t0
+    (inst psllq t1 7)          ; t1 = t0 << 7
+    (inst psrlq t0 (- 64 7))   ; t0 = t0 >> 57
+    (inst orpd t0 t1)          ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7)
+
+    ;; t0 = t0 * 9 = t0 << 3 + t0
+    ;;    = rotl(s0*5, 7) * 9
+    (inst movapd t1 t0)                 ; t1 = t0
+    (inst psllq t1 3)                   ; t1 = t0 << 3
+    (inst paddq t0 t1)                  ; t0 = t0 << 3 + t0 = 9*t0
+
+    ;; Save the result as two 32-bit results.  r1 is the high 32 bits
+    ;; and r0 is the low 32.
     (inst movd r0 t0)
     (inst psrlq t0 32)
     (inst movd r1 t0)
 
-    ;; s1 = s1 ^ s0
-    (inst xorpd s1 s0)
-
-    ;; s0 = rotl(s0,55) = s0 << 55 | s0 >> 9
-    (inst movapd t0 s0)
-    (inst psllq s0 55)			; s0 = s0 << 55
-    (inst psrlq t0 9)			; t0 = s0 >> 9
-    (inst orpd s0 t0)			; s0 = rotl(s0, 55)
-
-    (inst movapd t0 s1)
-    (inst xorpd s0 s1)			; s0 = s0 ^ s1
-    (inst psllq t0 14)			; t0 = s1 << 14
-    (inst xorpd s0 t0)			; s0 = s0 ^ t0
+    ;; s1 = state[1]
+    (inst movsd s1 (make-ea :dword :base state
+			    :disp (- (+ (* vm:vector-data-offset
+					   vm:word-bytes)
+				        (* 8 1))
+				     vm:other-pointer-type)))
+    (inst xorpd s1 s0)                  ; s1 = s1 ^ s0
+
+    ;; s0 can now be reused as a temp.
+    ;; s0 = rotl(s0, 24)
+    (inst movapd t0 s0)                 ; t0 = s0
+    (inst psllq t0 24)                  ; t0 = s0 << 24
+    (inst psrlq s0 (- 64 24))           ; s0 = s0 >> 40
+    (inst orpd s0 t0)                   ; s0 = s0 | t0 = rotl(s0, 24)
+
+    ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1
+    (inst xorpd s0 s1)
+
+    ;; s0 = s0 ^ (s1 << 16)
+    (inst movapd t0 s1)          ; t0 = s1
+    (inst psllq t0 16)           ; t0 = s1 << 16
+    (inst xorpd s0 t0)           ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16)
+
+    ;; Save s0 to state[0]
     (inst movsd (make-ea :dword :base state
 			 :disp (- (+ (* vm:vector-data-offset
 					vm:word-bytes)
 				     (* 8 0))
 				  vm:other-pointer-type))
-	  s0)
+          s0)
 
-    ;; s1 = rotl(s1, 36) = s1 << 36 | s1 >> 28, using t0 as temp
-    (inst movapd t0 s1)
-    (inst psllq s1 36)
-    (inst psrlq t0 28)
-    (inst orpd s1 t0)
+    ;; s1 = rotl(s1, 37)
+    (inst movapd t0 s1)                 ; t0 = s1
+    (inst psllq t0 37)                  ; t0 = s1 << 37
+    (inst psrlq s1 (- 64 37))           ; s1 = s1 >> 27
+    (inst orpd s1 t0)                   ; s1 = t0 | s1 = rotl(s1, 37)
 
+    ;; Save s1 to state[1]
     (inst movsd (make-ea :dword :base state
 			 :disp (- (+ (* vm:vector-data-offset
 					vm:word-bytes)
 				     (* 8 1))
 				  vm:other-pointer-type))
-	  s1)))
-)    
-    
+          s1)))
+)


=====================================
src/general-info/release-21f.md
=====================================
@@ -24,6 +24,9 @@ public domain.
       `ext:stream-file-length` generic function.
   * Changes:
     * Update to ASDF 3.3.7
+    * The RNG has changed from an old version of xoroshiro128+ to
+      xoroshiro128**.  This means sequences of random numbers will be
+      different from before.  See ~~#276~~.
   * ANSI compliance fixes:
   * Bug fixes:
   * Gitlab tickets:
@@ -44,8 +47,7 @@ public domain.
     * ~~#266~~ Support "~user" in namestrings
     * ~~#271~~ Update ASDF to 3.3.7
     * ~~#272~~ Move scavenge code for static vectors to its own function
-    * ~~#277~~ `float-ratio-float` returns least postive float for
-      ratios closer to that than zero.
+    * ~~#276~~ Implement xoroshiro128** generator for x86
   * Other changes:
   * Improvements to the PCL implementation of CLOS:
   * Changes to building procedure:


=====================================
src/i18n/locale/cmucl.pot
=====================================
@@ -12220,6 +12220,14 @@ msgstr ""
 msgid "Argument is not a RANDOM-STATE, T, or NIL: ~S"
 msgstr ""
 
+#: src/code/rand-xoroshiro.lisp
+msgid ""
+"Generate the next 64-bit result from the xoroshiro128** generator\n"
+"  using the state in STATE, a simple-array of 2 double-floats.  The\n"
+"  64-bit result is returned as 2 32-bit values, with the high 32-bits\n"
+"  being the first value."
+msgstr ""
+
 #: src/code/rand-xoroshiro.lisp
 msgid ""
 "Generate a uniformly distributed pseudo-random number between zero\n"
@@ -12233,7 +12241,7 @@ msgstr ""
 #: src/code/rand-xoroshiro.lisp
 msgid ""
 "Jump the RNG-STATE.  This is equivalent to 2^64 calls to the\n"
-"  xoroshiro128+ generator.  It can be used to generate 2^64\n"
+"  xoroshiro128** generator.  It can be used to generate 2^64\n"
 "  non-overlapping subsequences for parallel computations."
 msgstr ""
 


=====================================
tests/rng.lisp
=====================================
@@ -48,16 +48,18 @@
   (assert-equal 0 (kernel::random-state-rand *test-state*))
   (assert-equal nil (kernel::random-state-cached-p *test-state*))
 
-  (dolist (item '((#x18d5f05c086e0086 (#x228f4926843b364d #x74dfe78e715c81be))
-		  (#x976f30b4f597b80b (#x5b6bd4558bd96a68 #x567b7f35650aea8f))
-		  (#xb1e7538af0e454f7 (#x13e5253e242fac52 #xed380e70d10ab60e))
-		  (#x011d33aef53a6260 (#x9d0764952ca00d8a #x5251a5cfedd2b4ef))
-		  (#xef590a651a72c279 (#xba4ef2b425bda963 #x172b965cf56c15ac))
-		  (#xd17a89111b29bf0f (#x458277a5e5f0a21b #xd1bccfad6564e8d))
-		  (#x529e44a0bc46f0a8 (#x2becb68d5a7194c7 #x3a6ec964899bb5f3))
-		  (#x665b7ff1e40d4aba (#xededfd481d0a19fe #x3ea213411827fe9d))
-		  (#x2c9010893532189b (#xd7bb59bcd8fba26f #x52de763d34fee090))
-		  (#x2a99cffa0dfa82ff (#xf96e892c62d6ff2e #xc0542ff85652f81e))))
+  (dolist (item
+           ;; Results for xoroshiro128**
+           '((#x41db14eb317141fe (#x16dfbf3d760d0fa4 #xe9bfcf1ce2b9037c))
+             (#xaa4ee6e025dfec01 (#xb237e99a3c7ad367 #x96819b1fec0e0432))
+             (#xea080e50cb948fa5 (#xcc0fd8226093e0bc #x0e9aeaa496ce50ba))
+             (#x647f057cff408a6e (#xd273573bfa97bfde #xcbb600d852a650de))
+             (#x232ac586565d037e (#x75dc686d99e39c57 #x063de00338aafc75))
+             (#xdf2da206813da6d6 (#x9616cabb961ebc4a #x292c044e7c310dd4))
+             (#x00d17cb1b38c852f (#xca593a661127a754 #x45f633d7e759debd))
+             (#xd7a1f881fc34e641 (#xe00fd868db5d20d3 #xcfcf3d31f5e1363e))
+             (#x64853747af628d30 (#xa24296c5ebb11935 #xd782dda5f81cab25))
+             (#xda40653710b7293d (#xfb4be9d4941ff086 #x75b6420eb8096c02))))
     (destructuring-bind (value state)
 	item
       (assert-equal value (64-bit-value *test-state*))
@@ -69,10 +71,13 @@
 	(kernel::make-random-object :state (kernel::init-random-state #x12345678)
 				    :rand 0
 				    :cached-p nil))
-  (dolist (result '((#x291ddf8e6f6a7b67 #x1f9018a12f9e031f)
-		    (#x88a7aa12158558d0 #xe264d785ab1472d9)
-		    (#x207e16f73c51e7ba #x999c8a0a9a8d87c0)
-		    (#x28f8959d3bcf5ff1 #x38091e563ab6eb98)))
+  (dolist (result
+           ;; Results for xoroshiro128** jump function
+           '((#x19a22191480b0a4e #x43b3d7ee592dd4cf)
+             (#x76cb87035d0b6e99 #xb6827bcf2ef8267c)
+             (#x5125201dbdf76860 #x8984c075043869e2)
+             (#x2c06f0667255309f #xa48cbe2e60fc1d65)
+             ))
     (kernel:random-state-jump *test-state*)
     (assert-equal result (multiple-value-list
 			  (64-bit-rng-state *test-state*)))))


=====================================
tests/rng/test-128-star-star.c
=====================================
@@ -0,0 +1,143 @@
+/*  Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
+
+To the extent possible under law, the author has dedicated all copyright
+and related and neighboring rights to this software to the public domain
+worldwide. This software is distributed without any warranty.
+
+See <http://creativecommons.org/publicdomain/zero/1.0/>. */
+
+#include <stdint.h>
+
+/* This is xoroshiro128** 1.0, one of our all-purpose, rock-solid,
+   small-state generators. It is extremely (sub-ns) fast and it passes all
+   tests we are aware of, but its state space is large enough only for
+   mild parallelism.
+
+   For generating just floating-point numbers, xoroshiro128+ is even
+   faster (but it has a very mild bias, see notes in the comments).
+
+   The state must be seeded so that it is not everywhere zero. If you have
+   a 64-bit seed, we suggest to seed a splitmix64 generator and use its
+   output to fill s. */
+
+
+static inline uint64_t rotl(const uint64_t x, int k) {
+	return (x << k) | (x >> (64 - k));
+}
+
+
+static uint64_t s[2];
+
+uint64_t next(void) {
+	const uint64_t s0 = s[0];
+	uint64_t s1 = s[1];
+	const uint64_t result = rotl(s0 * 5, 7) * 9;
+
+	s1 ^= s0;
+	s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
+	s[1] = rotl(s1, 37); // c
+
+	return result;
+}
+
+
+/* This is the jump function for the generator. It is equivalent
+   to 2^64 calls to next(); it can be used to generate 2^64
+   non-overlapping subsequences for parallel computations. */
+
+void jump(void) {
+	static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };
+
+	uint64_t s0 = 0;
+	uint64_t s1 = 0;
+	for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
+		for(int b = 0; b < 64; b++) {
+			if (JUMP[i] & UINT64_C(1) << b) {
+				s0 ^= s[0];
+				s1 ^= s[1];
+			}
+			next();
+		}
+
+	s[0] = s0;
+	s[1] = s1;
+}
+
+
+/* This is the long-jump function for the generator. It is equivalent to
+   2^96 calls to next(); it can be used to generate 2^32 starting points,
+   from each of which jump() will generate 2^32 non-overlapping
+   subsequences for parallel distributed computations. */
+
+void long_jump(void) {
+	static const uint64_t LONG_JUMP[] = { 0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1 };
+
+	uint64_t s0 = 0;
+	uint64_t s1 = 0;
+	for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
+		for(int b = 0; b < 64; b++) {
+			if (LONG_JUMP[i] & UINT64_C(1) << b) {
+				s0 ^= s[0];
+				s1 ^= s[1];
+			}
+			next();
+		}
+
+	s[0] = s0;
+	s[1] = s1;
+}
+
+/*********************************************************************/
+
+#include <stdio.h>
+
+/*
+ * Print out the first 10 random numbers from the generator.  This is
+ * used for the expected results in the rng unit test.
+ */
+void
+test_rng()
+{
+    int k;
+    uint64_t r;
+    
+    
+    s[0] = 0x38f1dc39d1906b6full;
+    s[1] = 0xdfe4142236dd9517ull;
+
+    printf(";; First 10 outputs\n");
+
+    for (k = 0; k < 10; ++k) {
+        r = next();
+        printf("%2d: #x%016llx (#x%016llx #x%016llx)\n", k, r, s[0], s[1]);
+    }
+}
+
+/*
+ * Print out the first 4 states from jumping generator by 2^64 steps.
+ * This is used for the expected results in the rng unit test.
+ */
+void
+test_jump()
+{
+    int k;
+
+    s[0] = 0x38f1dc39d1906b6full;
+    s[1] = 0xdfe4142236dd9517ull;
+
+    printf(";; First 4 jumps\n");
+    for (k = 0; k < 4; ++k) {
+        jump();
+        printf("%2d: #x%016llx #x%016llx\n", k, s[0], s[1]);
+    }
+}
+
+
+int
+main()
+{
+    test_rng();
+    test_jump();
+    
+    return 0;
+}



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/34f33e19fd935515c72ee53...

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/34f33e19fd935515c72ee53...
You're receiving this email because of your account on gitlab.common-lisp.net.

    

Raymond Toy (＠rtoy)

tags

participants (1)