Raymond Toy pushed to branch master at cmucl / cmucl

Commits:

7 changed files:

Changes:

  • bin/run-ansi-tests.sh
    ... ... @@ -33,14 +33,15 @@ shift $[$OPTIND - 1]
    33 33
     
    
    34 34
     set -x
    
    35 35
     if [ -d ../ansi-test ]; then
    
    36
    -    # We already have clone; make sure it's clean by stashing any changes.
    
    37
    -    (cd ../ansi-test; git stash)
    
    36
    +    # We already have clone; make sure it's clean by stashing any
    
    37
    +    # changes.  Then pull any updates.
    
    38
    +    (cd ../ansi-test; git stash; git pull --rebase)
    
    38 39
     else    
    
    39 40
         (cd ../; git clone https://gitlab.common-lisp.net/cmucl/ansi-test.git)
    
    40 41
     fi
    
    41 42
     
    
    42 43
     cd ../ansi-test
    
    43
    -git checkout cmucl-expected-failures
    
    44
    +git checkout issue-276-xoroshiro
    
    44 45
     
    
    45 46
     make LISP="$LISP batch -noinit -nositeinit"
    
    46 47
     # There should be no unexpected successes or failures; check these separately
    

  • src/code/rand-xoroshiro.lisp
    ... ... @@ -10,7 +10,7 @@
    10 10
     ;;;
    
    11 11
     ;;; **********************************************************************
    
    12 12
     ;;;
    
    13
    -;;; Support for the xoroshiro128+ random number generator by David
    
    13
    +;;; Support for the xoroshiro128** random number generator by David
    
    14 14
     ;;; Blackman and Sebastiano Vigna (vigna@acm.org). See
    
    15 15
     ;;; http://xoroshiro.di.unimi.it/.
    
    16 16
     
    
    ... ... @@ -225,127 +225,143 @@
    225 225
     
    
    226 226
     ;;;; Random entries:
    
    227 227
     
    
    228
    -;; Sparc and x86 have vops to implement xoroshiro-gen that are much
    
    229
    -;; faster than the portable lisp version.  Use them.
    
    230
    -#+(or x86 sparc)
    
    228
    +;; X86 has a vop to implement xoroshiro-gen that is about 4.5 times
    
    229
    +;; faster than the portable lisp version below.  For other
    
    230
    +;; architectures, we use the portable version until a vop is written.
    
    231
    +#+x86
    
    231 232
     (declaim (inline xoroshiro-gen))
    
    232
    -#+(or x86 sparc)
    
    233
    +#+x86
    
    233 234
     (defun xoroshiro-gen (state)
    
    235
    +  _N"Generate the next 64-bit result from the xoroshiro128** generator
    
    236
    +  using the state in STATE, a simple-array of 2 double-floats.  The
    
    237
    +  64-bit result is returned as 2 32-bit values, with the high 32-bits
    
    238
    +  being the first value."
    
    234 239
       (declare (type (simple-array double-float (2)) state)
    
    235 240
     	   (optimize (speed 3) (safety 0)))
    
    236 241
       (vm::xoroshiro-next state))
    
    237 242
     
    
    238
    -#-(or x86 sparc)
    
    243
    +#-x86
    
    239 244
     (defun xoroshiro-gen (state)
    
    245
    +  _N"Generate the next 64-bit result from the xoroshiro128** generator
    
    246
    +  using the state in STATE, a simple-array of 2 double-floats.  The
    
    247
    +  64-bit result is returned as 2 32-bit values, with the high 32-bits
    
    248
    +  being the first value."
    
    240 249
       (declare (type (simple-array double-float (2)) state)
    
    241 250
     	   (optimize (speed 3) (safety 0)))
    
    242
    -  ;; Portable implementation of the xoroshiro128+ generator. See
    
    243
    -  ;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c for the
    
    244
    -  ;; definitive definition.
    
    245
    -  ;;
    
    246
    -  ;; uint64_t s[2];
    
    247
    -  ;;
    
    248
    -  ;; static inline uint64_t rotl(const uint64_t x, int k) {
    
    249
    -  ;; 	return (x << k) | (x >> (64 - k));
    
    250
    -  ;; }
    
    251
    -  ;;
    
    252
    -  ;; uint64_t next(void) {
    
    253
    -  ;; 	const uint64_t s0 = s[0];
    
    254
    -  ;; 	uint64_t s1 = s[1];
    
    255
    -  ;; 	const uint64_t result = s0 + s1;
    
    256
    -  ;;
    
    257
    -  ;; 	s1 ^= s0;
    
    258
    -  ;; 	s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
    
    259
    -  ;; 	s[1] = rotl(s1, 36); // c
    
    260
    -  ;;
    
    261
    -  ;; 	return result;
    
    262
    -  ;; }
    
    263
    -  ;;
    
    264
    -  (flet ((rotl-55 (x1 x0)
    
    265
    -	   ;; Rotate [x1|x0] left 55 bits, returning the result as two
    
    266
    -	   ;; values.
    
    267
    -	   (declare (type (unsigned-byte 32) x0 x1)
    
    268
    -		    (optimize (speed 3) (safety 0)))
    
    269
    -	   ;; x << 55
    
    270
    -	   (let ((sl55-h (ldb (byte 32 0) (ash x0 (- 55 32))))
    
    271
    -		 (sl55-l 0))
    
    272
    -	     ;; x >> 9
    
    273
    -	     (let ((sr9-h (ash x1 -9))
    
    274
    -		   (sr9-l (ldb (byte 32 0)
    
    275
    -			       (logior (ash x0 -9)
    
    276
    -				       (ash x1 23)))))
    
    277
    -	       (values (logior sl55-h sr9-h)
    
    278
    -		       (logior sl55-l sr9-l)))))
    
    279
    -	 (rotl-36 (x1 x0)
    
    280
    -	   ;; Rotate [x1|x0] left 36 bits, returning the result as two
    
    281
    -	   ;; values.
    
    282
    -	   (declare (type (unsigned-byte 32) x0 x1)
    
    283
    -		    (optimize (speed 3) (safety 0)))
    
    284
    -	   ;; x << 36
    
    285
    -	   (let ((sl36-h (ldb (byte 32 0) (ash x0 4))))
    
    286
    -	     ;; x >> 28
    
    287
    -	     (let ((sr28-l (ldb (byte 32 0)
    
    288
    -				(logior (ash x0 -28)
    
    289
    -					(ash x1 4))))
    
    290
    -		   (sr28-h (ash x1 -28)))
    
    291
    -	       (values (logior sl36-h sr28-h)
    
    292
    -		       sr28-l))))
    
    293
    -	 (shl-14 (x1 x0)
    
    294
    -	   ;; Shift [x1|x0] left by 14 bits, returning the result as
    
    295
    -	   ;; two values.
    
    296
    -	   (declare (type (unsigned-byte 32) x1 x0)
    
    297
    -		    (optimize (speed 3) (safety 0)))
    
    298
    -	   (values (ldb (byte 32 0)
    
    299
    -			(logior (ash x1 14)
    
    300
    -				(ash x0 (- 14 32))))
    
    301
    -		   (ldb (byte 32 0)
    
    302
    -			(ash x0 14))))
    
    303
    -	 (make-double (hi lo)
    
    304
    -	   (kernel:make-double-float
    
    305
    -	    (if (< hi #x80000000)
    
    306
    -		hi
    
    307
    -		(- hi #x100000000))
    
    308
    -	    lo)))
    
    251
    +  (flet
    
    252
    +      ((make-double (hi lo)
    
    253
    +	 ;; Convert [hi lo] to a double-float, where hi and lo are the
    
    254
    +	 ;; high and low 32 bits of a 64-bit double-float.
    
    255
    +	 (declare (type (unsigned-byte 32) hi lo))
    
    256
    +	 (kernel:make-double-float
    
    257
    +	  (if (< hi #x80000000)
    
    258
    +	      hi
    
    259
    +	      (- hi #x100000000))
    
    260
    +	  lo))
    
    261
    +       (mul-shift (x1 x0 shift)
    
    262
    +	 ;; Perform multiplication by 2^shift+1, by doing a shift and
    
    263
    +	 ;; an add.
    
    264
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    265
    +		  (optimize (speed 3)))
    
    266
    +	 (let* ((overflow (ash x0 (- (- 32 shift))))
    
    267
    +		(s0 (ldb (byte 32 0) (ash x0 shift)))
    
    268
    +		(s1 (logior (ldb (byte 32 0) (ash x1 shift))
    
    269
    +			    overflow)))
    
    270
    +	   (multiple-value-bind (sum0 carry)
    
    271
    +	       (bignum:%add-with-carry s0 x0 0)
    
    272
    +	     (multiple-value-bind (sum1)
    
    273
    +		 (bignum:%add-with-carry s1 x1 carry)
    
    274
    +	       (values sum1 sum0)))))
    
    275
    +       (shl (x1 x0 n)
    
    276
    +	 ;; Shift left [x1 x0] by n bits, where 0 <= n < 31.
    
    277
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    278
    +		  (type (integer 0 31) n)
    
    279
    +		  (optimize (speed 3)))
    
    280
    +	 (let ((out (ldb (byte n (- 32 n)) x0)))
    
    281
    +	   (values (logior (ldb (byte 32 0) (ash x1 n)) out)
    
    282
    +		   (ldb (byte 32 0) (ash x0 n)))))
    
    283
    +       (rotl (x1 x0 n)
    
    284
    +	 ;; Rotate left [x1 x0] by n bits where n < 32.
    
    285
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    286
    +		  (type (integer 0 31) n)
    
    287
    +		  (optimize (speed 3)))
    
    288
    +	 (let ((x1-hi (ldb (byte n (- 32 n)) x1))
    
    289
    +	       (x0-hi (ldb (byte n (- 32 n)) x0)))
    
    290
    +	   ;; x1-hi and x0-hi are the bits that are shifted out of x1
    
    291
    +	   ;; and x0.
    
    292
    +	   (values (logior (ldb (byte 32 0) (ash x1 n))
    
    293
    +			   x0-hi)
    
    294
    +		   (logior (ldb (byte 32 0) (ash x0 n))
    
    295
    +			   x1-hi))))
    
    296
    +       (rotr (x1 x0 n)
    
    297
    +	 ;; Rotate right [x1 x0] by n bits where n < 32.
    
    298
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    299
    +		  (type (integer 0 31) n)
    
    300
    +		  (optimize (speed 3)))
    
    301
    +	 (let ((x1-lo (ldb (byte n 0) x1))
    
    302
    +	       (x0-lo (ldb (byte n 0) x0))
    
    303
    +	       (-n (- n))
    
    304
    +	       (32-n (- 32 n)))
    
    305
    +	   ;; x1-lo and x0-lo are the bits of x1/x0 that would be
    
    306
    +	   ;; shifted out.
    
    307
    +	   (values (logior (ldb (byte 32 0) (ash x1 -n))
    
    308
    +			   (ldb (byte 32 0) (ash x0-lo 32-n)))
    
    309
    +		   (logior (ldb (byte 32 0) (ash x0 -n))
    
    310
    +			   (ldb (byte 32 0) (ash x1-lo 32-n)))))))
    
    311
    +    (declare (inline mul-shift shl rotl rotr))
    
    309 312
         (let ((s0-1 0)
    
    310 313
     	  (s0-0 0)
    
    311 314
     	  (s1-1 0)
    
    312
    -	  (s1-0 0))
    
    313
    -      (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0))
    
    314
    -      ;; Load the state to s0 and s1. s0-1 is the high 32-bit part and
    
    315
    -      ;; s0-0 is the low 32-bit part of the 64-bit value.  Similarly
    
    316
    -      ;; for s1.
    
    315
    +	  (s1-0 0)
    
    316
    +	  (result-1 0)
    
    317
    +	  (result-0 0))
    
    318
    +      (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0 result-1 result-0))
    
    319
    +      ;; [s0-1 s0-0] is the first 64-bit word of the state.
    
    317 320
           (multiple-value-bind (x1 x0)
    
    318 321
     	  (kernel:double-float-bits (aref state 0))
    
    319 322
     	(setf s0-1 (ldb (byte 32 0) x1)
    
    320 323
     	      s0-0 x0))
    
    324
    +      ;; [s1-1 s1-0] is the second 64-bit word of the state.
    
    321 325
           (multiple-value-bind (x1 x0)
    
    322 326
     	  (kernel:double-float-bits (aref state 1))
    
    323 327
     	(setf s1-1 (ldb (byte 32 0) x1)
    
    324 328
     	      s1-0 x0))
    
    325 329
     
    
    326
    -      ;; Compute the 64-bit random value: s0 + s1
    
    327
    -      (multiple-value-prog1
    
    328
    -	  (multiple-value-bind (sum-0 c)
    
    329
    -	      (bignum::%add-with-carry s0-0 s1-0 0)
    
    330
    -	    (values (bignum::%add-with-carry s0-1 s1-1 c)
    
    331
    -		    sum-0))
    
    332
    -	;; s1 ^= s0
    
    333
    -	(setf s1-1 (logxor s1-1 s0-1)
    
    334
    -	      s1-0 (logxor s1-0 s0-0))
    
    335
    -	;; s[0] = rotl(s0,55) ^ s1 ^ (s1 << 14)
    
    336
    -	(multiple-value-setq (s0-1 s0-0)
    
    337
    -	  (rotl-55 s0-1 s0-0))
    
    338
    -	(setf s0-1 (logxor s0-1 s1-1)
    
    339
    -	      s0-0 (logxor s0-0 s1-0))
    
    340
    -	(multiple-value-bind (s14-1 s14-0)
    
    341
    -	    (shl-14 s1-1 s1-0)
    
    342
    -	  (setf s0-1 (logxor s0-1 s14-1)
    
    343
    -		s0-0 (logxor s0-0 s14-0)))
    
    344
    -
    
    345
    -	(multiple-value-bind (r1 r0)
    
    346
    -	    (rotl-36 s1-1 s1-0)
    
    347
    -	  (setf (aref state 0) (make-double s0-1 s0-0)
    
    348
    -		(aref state 1) (make-double r1 r0)))))))
    
    330
    +      ;; [result-1 result-0] = s0 * 5
    
    331
    +      (multiple-value-setq (result-1 result-0)
    
    332
    +	(mul-shift s0-1 s0-0 2))
    
    333
    +
    
    334
    +      ;; [result-1 result-0] = rotl(result, 7)
    
    335
    +      (multiple-value-setq (result-1 result-0)
    
    336
    +	(rotl result-1 result-0 7))
    
    337
    +
    
    338
    +      ;; [result-1 result-0] = result*9
    
    339
    +      (multiple-value-setq (result-1 result-0)
    
    340
    +	(mul-shift result-1 result-0 3))
    
    341
    +
    
    342
    +      ;; s1 ^= s0
    
    343
    +      (setf s1-1 (logxor s1-1 s0-1)
    
    344
    +	    s1-0 (logxor s1-0 s0-0))
    
    345
    +      (multiple-value-bind (s24-hi s24-lo)
    
    346
    +	  (rotl s0-1 s0-0 24)
    
    347
    +	(declare (type (unsigned-byte 32) s24-hi s24-lo))
    
    348
    +	;; [s24-hi s24-lo] = rotl(s0, 24) ^ s1
    
    349
    +	(setf s24-hi (logxor s24-hi s1-1)
    
    350
    +	      s24-lo (logxor s24-lo s1-0))
    
    351
    +	(multiple-value-bind (s16-hi s16-lo)
    
    352
    +	    (shl s1-1 s1-0 16)
    
    353
    +	  ;; [s24-hi s24-lo] = s24 ^ (s1 << 16) = state[0]
    
    354
    +	  (setf s24-hi (logxor s24-hi s16-hi)
    
    355
    +		s24-lo (logxor s24-lo s16-lo))
    
    356
    +	  (setf (aref state 0) (make-double s24-hi s24-lo))))
    
    357
    +      (multiple-value-bind (s37-hi s37-lo)
    
    358
    +	  (rotr s1-1 s1-0 (- 64 37))
    
    359
    +	;; [s37-hi s37-lo] = rotl(s1, 37) = state[1].  But rotating
    
    360
    +	;; left by 37 is the same as rotating right by 64-37.
    
    361
    +	(setf (aref state 1) (make-double s37-hi s37-lo)))
    
    362
    +      ;; Return result.
    
    363
    +      (values result-1
    
    364
    +	      result-0))))
    
    349 365
     
    
    350 366
     ;;; Size of the chunks returned by random-chunk.
    
    351 367
     ;;;
    
    ... ... @@ -495,10 +511,10 @@
    495 511
     	    :format-arguments (list arg)))))
    
    496 512
     
    
    497 513
     ;; Jump function for the generator.  See the jump function in
    
    498
    -;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c
    
    514
    +;; https://prng.di.unimi.it/xoroshiro128starstar.c
    
    499 515
     (defun random-state-jump (&optional (rng-state *random-state*))
    
    500 516
       _N"Jump the RNG-STATE.  This is equivalent to 2^64 calls to the
    
    501
    -  xoroshiro128+ generator.  It can be used to generate 2^64
    
    517
    +  xoroshiro128** generator.  It can be used to generate 2^64
    
    502 518
       non-overlapping subsequences for parallel computations."
    
    503 519
       (declare (type random-state rng-state))
    
    504 520
       (let ((state (random-state-state rng-state))
    
    ... ... @@ -508,12 +524,12 @@
    508 524
     	(s1-1 0))
    
    509 525
         (declare (type (unsigned-byte 32) s0-0 s0-1 s1-0 s1-1)
    
    510 526
     	     (optimize (speed 3) (safety 0)))
    
    511
    -    ;; The constants are #xbeac0467eba5facb and #xd86b048b86aa9922,
    
    527
    +    ;; The constants are #xdf900294d8f554a5 and #x170865df4b3201fc,
    
    512 528
         ;; and we process these numbers starting from the LSB.  We want ot
    
    513 529
         ;; process these in 32-bit chunks, so word-reverse the constants.
    
    514
    -    (dolist (jump '(#xeba5facb #xbeac0467 #x86aa9922 #xd86b048b))
    
    515
    -      (declare (type (unsigned-byte 32) jump))
    
    516
    -      (dotimes (b 32)
    
    530
    +    (dolist (jump '(#xdf900294d8f554a5 #x170865df4b3201fc))
    
    531
    +      (declare (type (unsigned-byte 64) jump))
    
    532
    +      (dotimes (b 64)
    
    517 533
     	(declare (fixnum b))
    
    518 534
     	(when (logbitp b jump)
    
    519 535
     	  (multiple-value-bind (x1 x0)
    
    ... ... @@ -534,4 +550,4 @@
    534 550
     	      x0)))
    
    535 551
           (setf (aref state 0) (convert s0-1 s0-0))
    
    536 552
           (setf (aref state 1) (convert s1-1 s1-0)))
    
    537
    -      rng-state))
    553
    +    rng-state))

  • src/compiler/x86/arith.lisp
    ... ... @@ -1695,58 +1695,101 @@
    1695 1695
       (:temporary (:sc double-reg) s0)
    
    1696 1696
       (:temporary (:sc double-reg) s1)
    
    1697 1697
       (:temporary (:sc double-reg) t0)
    
    1698
    +  (:temporary (:sc double-reg) t1)
    
    1698 1699
       (:generator 10
    
    1700
    +    ;; See https://prng.di.unimi.it/xoroshiro128starstar.c for the official code.
    
    1701
    +    ;;
    
    1702
    +    ;; This is what we're implementing, where s[] is our state vector.
    
    1703
    +    ;;
    
    1704
    +    ;; static uint64_t s[2];
    
    1705
    +    ;; static inline uint64_t rotl(const uint64_t x, int k) {
    
    1706
    +    ;;   return (x << k) | (x >> (64 - k));
    
    1707
    +    ;; }
    
    1708
    +    ;;
    
    1709
    +    ;; uint64_t next(void) {
    
    1710
    +    ;;   const uint64_t s0 = s[0];
    
    1711
    +    ;; 	 uint64_t s1 = s[1];
    
    1712
    +    ;; 	 const uint64_t result = rotl(s0 * 5, 7) * 9;
    
    1713
    +    ;; 
    
    1714
    +    ;; 	 s1 ^= s0;
    
    1715
    +    ;; 	 s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
    
    1716
    +    ;; 	 s[1] = rotl(s1, 37); // c
    
    1717
    +    ;; 
    
    1718
    +    ;; 	 return result;
    
    1719
    +    ;; }
    
    1720
    +
    
    1699 1721
         ;; s0 = state[0]
    
    1700 1722
         (inst movsd s0 (make-ea :dword :base state
    
    1701
    -			 :disp (- (+ (* vm:vector-data-offset
    
    1702
    -					vm:word-bytes)
    
    1703
    -				     (* 8 0))
    
    1704
    -				  vm:other-pointer-type)))
    
    1705
    -    ;; s1 = state[1]
    
    1706
    -    (inst movsd s1 (make-ea :dword :base state
    
    1707
    -			 :disp (- (+ (* vm:vector-data-offset
    
    1708
    -					vm:word-bytes)
    
    1709
    -				     (* 8 1))
    
    1710
    -				  vm:other-pointer-type)))
    
    1711
    -    ;; Compute result = s0 + s1
    
    1712
    -    (inst movapd t0 s0)
    
    1713
    -    (inst paddq t0 s1)
    
    1714
    -    ;; Save the 64-bit result as two 32-bit results
    
    1723
    +                            :disp (- (+ (* vm:vector-data-offset
    
    1724
    +					   vm:word-bytes)
    
    1725
    +				        (* 8 0))
    
    1726
    +				     vm:other-pointer-type)))
    
    1727
    +    ;; t0 = s0 * 5 = s0 << 2 + s0
    
    1728
    +    (inst movapd t0 s0)                 ; t0 = s0
    
    1729
    +    (inst psllq t0 2)                   ; t0 = t0 << 2 = 4*t0
    
    1730
    +    (inst paddq t0 s0)                  ; t0 = t0 + s0 = 5*t0
    
    1731
    +
    
    1732
    +    ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7)
    
    1733
    +    ;;    = rotl(s0*5, 7)
    
    1734
    +    (inst movapd t1 t0)        ; t1 = t0
    
    1735
    +    (inst psllq t1 7)          ; t1 = t0 << 7
    
    1736
    +    (inst psrlq t0 (- 64 7))   ; t0 = t0 >> 57
    
    1737
    +    (inst orpd t0 t1)          ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7)
    
    1738
    +
    
    1739
    +    ;; t0 = t0 * 9 = t0 << 3 + t0
    
    1740
    +    ;;    = rotl(s0*5, 7) * 9
    
    1741
    +    (inst movapd t1 t0)                 ; t1 = t0
    
    1742
    +    (inst psllq t1 3)                   ; t1 = t0 << 3
    
    1743
    +    (inst paddq t0 t1)                  ; t0 = t0 << 3 + t0 = 9*t0
    
    1744
    +
    
    1745
    +    ;; Save the result as two 32-bit results.  r1 is the high 32 bits
    
    1746
    +    ;; and r0 is the low 32.
    
    1715 1747
         (inst movd r0 t0)
    
    1716 1748
         (inst psrlq t0 32)
    
    1717 1749
         (inst movd r1 t0)
    
    1718 1750
     
    
    1719
    -    ;; s1 = s1 ^ s0
    
    1720
    -    (inst xorpd s1 s0)
    
    1721
    -
    
    1722
    -    ;; s0 = rotl(s0,55) = s0 << 55 | s0 >> 9
    
    1723
    -    (inst movapd t0 s0)
    
    1724
    -    (inst psllq s0 55)			; s0 = s0 << 55
    
    1725
    -    (inst psrlq t0 9)			; t0 = s0 >> 9
    
    1726
    -    (inst orpd s0 t0)			; s0 = rotl(s0, 55)
    
    1727
    -
    
    1728
    -    (inst movapd t0 s1)
    
    1729
    -    (inst xorpd s0 s1)			; s0 = s0 ^ s1
    
    1730
    -    (inst psllq t0 14)			; t0 = s1 << 14
    
    1731
    -    (inst xorpd s0 t0)			; s0 = s0 ^ t0
    
    1751
    +    ;; s1 = state[1]
    
    1752
    +    (inst movsd s1 (make-ea :dword :base state
    
    1753
    +			    :disp (- (+ (* vm:vector-data-offset
    
    1754
    +					   vm:word-bytes)
    
    1755
    +				        (* 8 1))
    
    1756
    +				     vm:other-pointer-type)))
    
    1757
    +    (inst xorpd s1 s0)                  ; s1 = s1 ^ s0
    
    1758
    +
    
    1759
    +    ;; s0 can now be reused as a temp.
    
    1760
    +    ;; s0 = rotl(s0, 24)
    
    1761
    +    (inst movapd t0 s0)                 ; t0 = s0
    
    1762
    +    (inst psllq t0 24)                  ; t0 = s0 << 24
    
    1763
    +    (inst psrlq s0 (- 64 24))           ; s0 = s0 >> 40
    
    1764
    +    (inst orpd s0 t0)                   ; s0 = s0 | t0 = rotl(s0, 24)
    
    1765
    +
    
    1766
    +    ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1
    
    1767
    +    (inst xorpd s0 s1)
    
    1768
    +
    
    1769
    +    ;; s0 = s0 ^ (s1 << 16)
    
    1770
    +    (inst movapd t0 s1)          ; t0 = s1
    
    1771
    +    (inst psllq t0 16)           ; t0 = s1 << 16
    
    1772
    +    (inst xorpd s0 t0)           ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16)
    
    1773
    +
    
    1774
    +    ;; Save s0 to state[0]
    
    1732 1775
         (inst movsd (make-ea :dword :base state
    
    1733 1776
     			 :disp (- (+ (* vm:vector-data-offset
    
    1734 1777
     					vm:word-bytes)
    
    1735 1778
     				     (* 8 0))
    
    1736 1779
     				  vm:other-pointer-type))
    
    1737
    -	  s0)
    
    1780
    +          s0)
    
    1738 1781
     
    
    1739
    -    ;; s1 = rotl(s1, 36) = s1 << 36 | s1 >> 28, using t0 as temp
    
    1740
    -    (inst movapd t0 s1)
    
    1741
    -    (inst psllq s1 36)
    
    1742
    -    (inst psrlq t0 28)
    
    1743
    -    (inst orpd s1 t0)
    
    1782
    +    ;; s1 = rotl(s1, 37)
    
    1783
    +    (inst movapd t0 s1)                 ; t0 = s1
    
    1784
    +    (inst psllq t0 37)                  ; t0 = s1 << 37
    
    1785
    +    (inst psrlq s1 (- 64 37))           ; s1 = s1 >> 27
    
    1786
    +    (inst orpd s1 t0)                   ; s1 = t0 | s1 = rotl(s1, 37)
    
    1744 1787
     
    
    1788
    +    ;; Save s1 to state[1]
    
    1745 1789
         (inst movsd (make-ea :dword :base state
    
    1746 1790
     			 :disp (- (+ (* vm:vector-data-offset
    
    1747 1791
     					vm:word-bytes)
    
    1748 1792
     				     (* 8 1))
    
    1749 1793
     				  vm:other-pointer-type))
    
    1750
    -	  s1)))
    
    1751
    -)    
    
    1752
    -    
    1794
    +          s1)))
    
    1795
    +)

  • src/general-info/release-21f.md
    ... ... @@ -24,6 +24,9 @@ public domain.
    24 24
           `ext:stream-file-length` generic function.
    
    25 25
       * Changes:
    
    26 26
         * Update to ASDF 3.3.7
    
    27
    +    * The RNG has changed from an old version of xoroshiro128+ to
    
    28
    +      xoroshiro128**.  This means sequences of random numbers will be
    
    29
    +      different from before.  See ~~#276~~.
    
    27 30
       * ANSI compliance fixes:
    
    28 31
       * Bug fixes:
    
    29 32
       * Gitlab tickets:
    
    ... ... @@ -44,8 +47,7 @@ public domain.
    44 47
         * ~~#266~~ Support "~user" in namestrings
    
    45 48
         * ~~#271~~ Update ASDF to 3.3.7
    
    46 49
         * ~~#272~~ Move scavenge code for static vectors to its own function
    
    47
    -    * ~~#277~~ `float-ratio-float` returns least postive float for
    
    48
    -      ratios closer to that than zero.
    
    50
    +    * ~~#276~~ Implement xoroshiro128** generator for x86
    
    49 51
       * Other changes:
    
    50 52
       * Improvements to the PCL implementation of CLOS:
    
    51 53
       * Changes to building procedure:
    

  • src/i18n/locale/cmucl.pot
    ... ... @@ -12220,6 +12220,14 @@ msgstr ""
    12220 12220
     msgid "Argument is not a RANDOM-STATE, T, or NIL: ~S"
    
    12221 12221
     msgstr ""
    
    12222 12222
     
    
    12223
    +#: src/code/rand-xoroshiro.lisp
    
    12224
    +msgid ""
    
    12225
    +"Generate the next 64-bit result from the xoroshiro128** generator\n"
    
    12226
    +"  using the state in STATE, a simple-array of 2 double-floats.  The\n"
    
    12227
    +"  64-bit result is returned as 2 32-bit values, with the high 32-bits\n"
    
    12228
    +"  being the first value."
    
    12229
    +msgstr ""
    
    12230
    +
    
    12223 12231
     #: src/code/rand-xoroshiro.lisp
    
    12224 12232
     msgid ""
    
    12225 12233
     "Generate a uniformly distributed pseudo-random number between zero\n"
    
    ... ... @@ -12233,7 +12241,7 @@ msgstr ""
    12233 12241
     #: src/code/rand-xoroshiro.lisp
    
    12234 12242
     msgid ""
    
    12235 12243
     "Jump the RNG-STATE.  This is equivalent to 2^64 calls to the\n"
    
    12236
    -"  xoroshiro128+ generator.  It can be used to generate 2^64\n"
    
    12244
    +"  xoroshiro128** generator.  It can be used to generate 2^64\n"
    
    12237 12245
     "  non-overlapping subsequences for parallel computations."
    
    12238 12246
     msgstr ""
    
    12239 12247
     
    

  • tests/rng.lisp
    ... ... @@ -48,16 +48,18 @@
    48 48
       (assert-equal 0 (kernel::random-state-rand *test-state*))
    
    49 49
       (assert-equal nil (kernel::random-state-cached-p *test-state*))
    
    50 50
     
    
    51
    -  (dolist (item '((#x18d5f05c086e0086 (#x228f4926843b364d #x74dfe78e715c81be))
    
    52
    -		  (#x976f30b4f597b80b (#x5b6bd4558bd96a68 #x567b7f35650aea8f))
    
    53
    -		  (#xb1e7538af0e454f7 (#x13e5253e242fac52 #xed380e70d10ab60e))
    
    54
    -		  (#x011d33aef53a6260 (#x9d0764952ca00d8a #x5251a5cfedd2b4ef))
    
    55
    -		  (#xef590a651a72c279 (#xba4ef2b425bda963 #x172b965cf56c15ac))
    
    56
    -		  (#xd17a89111b29bf0f (#x458277a5e5f0a21b #xd1bccfad6564e8d))
    
    57
    -		  (#x529e44a0bc46f0a8 (#x2becb68d5a7194c7 #x3a6ec964899bb5f3))
    
    58
    -		  (#x665b7ff1e40d4aba (#xededfd481d0a19fe #x3ea213411827fe9d))
    
    59
    -		  (#x2c9010893532189b (#xd7bb59bcd8fba26f #x52de763d34fee090))
    
    60
    -		  (#x2a99cffa0dfa82ff (#xf96e892c62d6ff2e #xc0542ff85652f81e))))
    
    51
    +  (dolist (item
    
    52
    +           ;; Results for xoroshiro128**
    
    53
    +           '((#x41db14eb317141fe (#x16dfbf3d760d0fa4 #xe9bfcf1ce2b9037c))
    
    54
    +             (#xaa4ee6e025dfec01 (#xb237e99a3c7ad367 #x96819b1fec0e0432))
    
    55
    +             (#xea080e50cb948fa5 (#xcc0fd8226093e0bc #x0e9aeaa496ce50ba))
    
    56
    +             (#x647f057cff408a6e (#xd273573bfa97bfde #xcbb600d852a650de))
    
    57
    +             (#x232ac586565d037e (#x75dc686d99e39c57 #x063de00338aafc75))
    
    58
    +             (#xdf2da206813da6d6 (#x9616cabb961ebc4a #x292c044e7c310dd4))
    
    59
    +             (#x00d17cb1b38c852f (#xca593a661127a754 #x45f633d7e759debd))
    
    60
    +             (#xd7a1f881fc34e641 (#xe00fd868db5d20d3 #xcfcf3d31f5e1363e))
    
    61
    +             (#x64853747af628d30 (#xa24296c5ebb11935 #xd782dda5f81cab25))
    
    62
    +             (#xda40653710b7293d (#xfb4be9d4941ff086 #x75b6420eb8096c02))))
    
    61 63
         (destructuring-bind (value state)
    
    62 64
     	item
    
    63 65
           (assert-equal value (64-bit-value *test-state*))
    
    ... ... @@ -69,10 +71,13 @@
    69 71
     	(kernel::make-random-object :state (kernel::init-random-state #x12345678)
    
    70 72
     				    :rand 0
    
    71 73
     				    :cached-p nil))
    
    72
    -  (dolist (result '((#x291ddf8e6f6a7b67 #x1f9018a12f9e031f)
    
    73
    -		    (#x88a7aa12158558d0 #xe264d785ab1472d9)
    
    74
    -		    (#x207e16f73c51e7ba #x999c8a0a9a8d87c0)
    
    75
    -		    (#x28f8959d3bcf5ff1 #x38091e563ab6eb98)))
    
    74
    +  (dolist (result
    
    75
    +           ;; Results for xoroshiro128** jump function
    
    76
    +           '((#x19a22191480b0a4e #x43b3d7ee592dd4cf)
    
    77
    +             (#x76cb87035d0b6e99 #xb6827bcf2ef8267c)
    
    78
    +             (#x5125201dbdf76860 #x8984c075043869e2)
    
    79
    +             (#x2c06f0667255309f #xa48cbe2e60fc1d65)
    
    80
    +             ))
    
    76 81
         (kernel:random-state-jump *test-state*)
    
    77 82
         (assert-equal result (multiple-value-list
    
    78 83
     			  (64-bit-rng-state *test-state*)))))
    

  • tests/rng/test-128-star-star.c
    1
    +/*  Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
    
    2
    +
    
    3
    +To the extent possible under law, the author has dedicated all copyright
    
    4
    +and related and neighboring rights to this software to the public domain
    
    5
    +worldwide. This software is distributed without any warranty.
    
    6
    +
    
    7
    +See <http://creativecommons.org/publicdomain/zero/1.0/>. */
    
    8
    +
    
    9
    +#include <stdint.h>
    
    10
    +
    
    11
    +/* This is xoroshiro128** 1.0, one of our all-purpose, rock-solid,
    
    12
    +   small-state generators. It is extremely (sub-ns) fast and it passes all
    
    13
    +   tests we are aware of, but its state space is large enough only for
    
    14
    +   mild parallelism.
    
    15
    +
    
    16
    +   For generating just floating-point numbers, xoroshiro128+ is even
    
    17
    +   faster (but it has a very mild bias, see notes in the comments).
    
    18
    +
    
    19
    +   The state must be seeded so that it is not everywhere zero. If you have
    
    20
    +   a 64-bit seed, we suggest to seed a splitmix64 generator and use its
    
    21
    +   output to fill s. */
    
    22
    +
    
    23
    +
    
    24
    +static inline uint64_t rotl(const uint64_t x, int k) {
    
    25
    +	return (x << k) | (x >> (64 - k));
    
    26
    +}
    
    27
    +
    
    28
    +
    
    29
    +static uint64_t s[2];
    
    30
    +
    
    31
    +uint64_t next(void) {
    
    32
    +	const uint64_t s0 = s[0];
    
    33
    +	uint64_t s1 = s[1];
    
    34
    +	const uint64_t result = rotl(s0 * 5, 7) * 9;
    
    35
    +
    
    36
    +	s1 ^= s0;
    
    37
    +	s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
    
    38
    +	s[1] = rotl(s1, 37); // c
    
    39
    +
    
    40
    +	return result;
    
    41
    +}
    
    42
    +
    
    43
    +
    
    44
    +/* This is the jump function for the generator. It is equivalent
    
    45
    +   to 2^64 calls to next(); it can be used to generate 2^64
    
    46
    +   non-overlapping subsequences for parallel computations. */
    
    47
    +
    
    48
    +void jump(void) {
    
    49
    +	static const uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc };
    
    50
    +
    
    51
    +	uint64_t s0 = 0;
    
    52
    +	uint64_t s1 = 0;
    
    53
    +	for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
    
    54
    +		for(int b = 0; b < 64; b++) {
    
    55
    +			if (JUMP[i] & UINT64_C(1) << b) {
    
    56
    +				s0 ^= s[0];
    
    57
    +				s1 ^= s[1];
    
    58
    +			}
    
    59
    +			next();
    
    60
    +		}
    
    61
    +
    
    62
    +	s[0] = s0;
    
    63
    +	s[1] = s1;
    
    64
    +}
    
    65
    +
    
    66
    +
    
    67
    +/* This is the long-jump function for the generator. It is equivalent to
    
    68
    +   2^96 calls to next(); it can be used to generate 2^32 starting points,
    
    69
    +   from each of which jump() will generate 2^32 non-overlapping
    
    70
    +   subsequences for parallel distributed computations. */
    
    71
    +
    
    72
    +void long_jump(void) {
    
    73
    +	static const uint64_t LONG_JUMP[] = { 0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1 };
    
    74
    +
    
    75
    +	uint64_t s0 = 0;
    
    76
    +	uint64_t s1 = 0;
    
    77
    +	for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
    
    78
    +		for(int b = 0; b < 64; b++) {
    
    79
    +			if (LONG_JUMP[i] & UINT64_C(1) << b) {
    
    80
    +				s0 ^= s[0];
    
    81
    +				s1 ^= s[1];
    
    82
    +			}
    
    83
    +			next();
    
    84
    +		}
    
    85
    +
    
    86
    +	s[0] = s0;
    
    87
    +	s[1] = s1;
    
    88
    +}
    
    89
    +
    
    90
    +/*********************************************************************/
    
    91
    +
    
    92
    +#include <stdio.h>
    
    93
    +
    
    94
    +/*
    
    95
    + * Print out the first 10 random numbers from the generator.  This is
    
    96
    + * used for the expected results in the rng unit test.
    
    97
    + */
    
    98
    +void
    
    99
    +test_rng()
    
    100
    +{
    
    101
    +    int k;
    
    102
    +    uint64_t r;
    
    103
    +    
    
    104
    +    
    
    105
    +    s[0] = 0x38f1dc39d1906b6full;
    
    106
    +    s[1] = 0xdfe4142236dd9517ull;
    
    107
    +
    
    108
    +    printf(";; First 10 outputs\n");
    
    109
    +
    
    110
    +    for (k = 0; k < 10; ++k) {
    
    111
    +        r = next();
    
    112
    +        printf("%2d: #x%016llx (#x%016llx #x%016llx)\n", k, r, s[0], s[1]);
    
    113
    +    }
    
    114
    +}
    
    115
    +
    
    116
    +/*
    
    117
    + * Print out the first 4 states from jumping generator by 2^64 steps.
    
    118
    + * This is used for the expected results in the rng unit test.
    
    119
    + */
    
    120
    +void
    
    121
    +test_jump()
    
    122
    +{
    
    123
    +    int k;
    
    124
    +
    
    125
    +    s[0] = 0x38f1dc39d1906b6full;
    
    126
    +    s[1] = 0xdfe4142236dd9517ull;
    
    127
    +
    
    128
    +    printf(";; First 4 jumps\n");
    
    129
    +    for (k = 0; k < 4; ++k) {
    
    130
    +        jump();
    
    131
    +        printf("%2d: #x%016llx #x%016llx\n", k, s[0], s[1]);
    
    132
    +    }
    
    133
    +}
    
    134
    +
    
    135
    +
    
    136
    +int
    
    137
    +main()
    
    138
    +{
    
    139
    +    test_rng();
    
    140
    +    test_jump();
    
    141
    +    
    
    142
    +    return 0;
    
    143
    +}