Raymond Toy pushed to branch issue-276-xoroshiro128starstar at cmucl / cmucl

Commits:

1 changed file:

Changes:

  • src/code/rand-xoroshiro.lisp
    ... ... @@ -229,123 +229,130 @@
    229 229
     ;; faster than the portable lisp version.  Use them.
    
    230 230
     #+(or x86 sparc)
    
    231 231
     (declaim (inline xoroshiro-gen))
    
    232
    -#+(or x86 sparc)
    
    232
    +#+(or x86)
    
    233 233
     (defun xoroshiro-gen (state)
    
    234 234
       (declare (type (simple-array double-float (2)) state)
    
    235 235
     	   (optimize (speed 3) (safety 0)))
    
    236 236
       (vm::xoroshiro-next state))
    
    237 237
     
    
    238
    -#-(or x86 sparc)
    
    238
    +#+(or sparc)
    
    239 239
     (defun xoroshiro-gen (state)
    
    240 240
       (declare (type (simple-array double-float (2)) state)
    
    241 241
     	   (optimize (speed 3) (safety 0)))
    
    242
    -  ;; Portable implementation of the xoroshiro128+ generator. See
    
    243
    -  ;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c for the
    
    244
    -  ;; definitive definition.
    
    245
    -  ;;
    
    246
    -  ;; uint64_t s[2];
    
    247
    -  ;;
    
    248
    -  ;; static inline uint64_t rotl(const uint64_t x, int k) {
    
    249
    -  ;; 	return (x << k) | (x >> (64 - k));
    
    250
    -  ;; }
    
    251
    -  ;;
    
    252
    -  ;; uint64_t next(void) {
    
    253
    -  ;; 	const uint64_t s0 = s[0];
    
    254
    -  ;; 	uint64_t s1 = s[1];
    
    255
    -  ;; 	const uint64_t result = s0 + s1;
    
    256
    -  ;;
    
    257
    -  ;; 	s1 ^= s0;
    
    258
    -  ;; 	s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
    
    259
    -  ;; 	s[1] = rotl(s1, 36); // c
    
    260
    -  ;;
    
    261
    -  ;; 	return result;
    
    262
    -  ;; }
    
    263
    -  ;;
    
    264
    -  (flet ((rotl-55 (x1 x0)
    
    265
    -	   ;; Rotate [x1|x0] left 55 bits, returning the result as two
    
    266
    -	   ;; values.
    
    267
    -	   (declare (type (unsigned-byte 32) x0 x1)
    
    268
    -		    (optimize (speed 3) (safety 0)))
    
    269
    -	   ;; x << 55
    
    270
    -	   (let ((sl55-h (ldb (byte 32 0) (ash x0 (- 55 32))))
    
    271
    -		 (sl55-l 0))
    
    272
    -	     ;; x >> 9
    
    273
    -	     (let ((sr9-h (ash x1 -9))
    
    274
    -		   (sr9-l (ldb (byte 32 0)
    
    275
    -			       (logior (ash x0 -9)
    
    276
    -				       (ash x1 23)))))
    
    277
    -	       (values (logior sl55-h sr9-h)
    
    278
    -		       (logior sl55-l sr9-l)))))
    
    279
    -	 (rotl-36 (x1 x0)
    
    280
    -	   ;; Rotate [x1|x0] left 36 bits, returning the result as two
    
    281
    -	   ;; values.
    
    282
    -	   (declare (type (unsigned-byte 32) x0 x1)
    
    283
    -		    (optimize (speed 3) (safety 0)))
    
    284
    -	   ;; x << 36
    
    285
    -	   (let ((sl36-h (ldb (byte 32 0) (ash x0 4))))
    
    286
    -	     ;; x >> 28
    
    287
    -	     (let ((sr28-l (ldb (byte 32 0)
    
    288
    -				(logior (ash x0 -28)
    
    289
    -					(ash x1 4))))
    
    290
    -		   (sr28-h (ash x1 -28)))
    
    291
    -	       (values (logior sl36-h sr28-h)
    
    292
    -		       sr28-l))))
    
    293
    -	 (shl-14 (x1 x0)
    
    294
    -	   ;; Shift [x1|x0] left by 14 bits, returning the result as
    
    295
    -	   ;; two values.
    
    296
    -	   (declare (type (unsigned-byte 32) x1 x0)
    
    297
    -		    (optimize (speed 3) (safety 0)))
    
    298
    -	   (values (ldb (byte 32 0)
    
    299
    -			(logior (ash x1 14)
    
    300
    -				(ash x0 (- 14 32))))
    
    301
    -		   (ldb (byte 32 0)
    
    302
    -			(ash x0 14))))
    
    303
    -	 (make-double (hi lo)
    
    304
    -	   (kernel:make-double-float
    
    305
    -	    (if (< hi #x80000000)
    
    306
    -		hi
    
    307
    -		(- hi #x100000000))
    
    308
    -	    lo)))
    
    242
    +  (flet
    
    243
    +      ((make-double (hi lo)
    
    244
    +	 ;; Convert [hi lo] to a double-float, where hi and lo are the
    
    245
    +	 ;; high and low 32 bits of a 64-bit double-float.
    
    246
    +	 (declare (type (unsigned-byte 32) hi lo))
    
    247
    +	 (kernel:make-double-float
    
    248
    +	  (if (< hi #x80000000)
    
    249
    +	      hi
    
    250
    +	      (- hi #x100000000))
    
    251
    +	  lo))
    
    252
    +       (mul-shift (x1 x0 shift)
    
    253
    +	 ;; Perform multiplication by 2^shift+1, by doing a shift and
    
    254
    +	 ;; an add.
    
    255
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    256
    +		  (optimize (speed 3)))
    
    257
    +	 (let* ((overflow (ash x0 (- (- 32 shift))))
    
    258
    +		(s0 (ldb (byte 32 0) (ash x0 shift)))
    
    259
    +		(s1 (logior (ldb (byte 32 0) (ash x1 shift))
    
    260
    +			    overflow)))
    
    261
    +	   (multiple-value-bind (sum0 carry)
    
    262
    +	       (bignum:%add-with-carry s0 x0 0)
    
    263
    +	     (multiple-value-bind (sum1)
    
    264
    +		 (bignum:%add-with-carry s1 x1 carry)
    
    265
    +	       (values sum1 sum0)))))
    
    266
    +       (shl (x1 x0 n)
    
    267
    +	 ;; Shift left [x1 x0] by n bits, where 0 <= n < 31.
    
    268
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    269
    +		  (type (integer 0 31) n)
    
    270
    +		  (optimize (speed 3)))
    
    271
    +	 (let ((out (ldb (byte n (- 32 n)) x0)))
    
    272
    +	   (values (logior (ldb (byte 32 0) (ash x1 n)) out)
    
    273
    +		   (ldb (byte 32 0) (ash x0 n)))))
    
    274
    +       (rotl (x1 x0 n)
    
    275
    +	 ;; Rotate left [x1 x0] by n bits where n < 32.
    
    276
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    277
    +		  (type (integer 0 31) n)
    
    278
    +		  (optimize (speed 3)))
    
    279
    +	 (let ((x1-hi (ldb (byte n (- 32 n)) x1))
    
    280
    +	       (x0-hi (ldb (byte n (- 32 n)) x0)))
    
    281
    +	   ;; x1-hi and x0-hi are the bits that are shifted out of x1
    
    282
    +	   ;; and x0.
    
    283
    +	   (values (logior (ldb (byte 32 0) (ash x1 n))
    
    284
    +			   x0-hi)
    
    285
    +		   (logior (ldb (byte 32 0) (ash x0 n))
    
    286
    +			   x1-hi))))
    
    287
    +       (rotr (x1 x0 n)
    
    288
    +	 ;; Rotate right [x1 x0] by n bits where n < 32.
    
    289
    +	 (declare (type (unsigned-byte 32) x1 x0)
    
    290
    +		  (type (integer 0 31) n)
    
    291
    +		  (optimize (speed 3)))
    
    292
    +	 (let ((x1-lo (ldb (byte n 0) x1))
    
    293
    +	       (x0-lo (ldb (byte n 0) x0))
    
    294
    +	       (-n (- n))
    
    295
    +	       (32-n (- 32 n)))
    
    296
    +	   ;; x1-lo and x0-lo are the bits of x1/x0 that would be
    
    297
    +	   ;; shifted out.
    
    298
    +	   (values (logior (ldb (byte 32 0) (ash x1 -n))
    
    299
    +			   (ldb (byte 32 0) (ash x0-lo 32-n)))
    
    300
    +		   (logior (ldb (byte 32 0) (ash x0 -n))
    
    301
    +			   (ldb (byte 32 0) (ash x1-lo 32-n)))))))
    
    302
    +    (declare (inline mul-shift shl rotl rotr))
    
    309 303
         (let ((s0-1 0)
    
    310 304
     	  (s0-0 0)
    
    311 305
     	  (s1-1 0)
    
    312
    -	  (s1-0 0))
    
    313
    -      (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0))
    
    314
    -      ;; Load the state to s0 and s1. s0-1 is the high 32-bit part and
    
    315
    -      ;; s0-0 is the low 32-bit part of the 64-bit value.  Similarly
    
    316
    -      ;; for s1.
    
    306
    +	  (s1-0 0)
    
    307
    +	  (result-1 0)
    
    308
    +	  (result-0 0))
    
    309
    +      (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0 result-1 result-0))
    
    310
    +      ;; [s0-1 s0-0] is the first 64-bit word of the state.
    
    317 311
           (multiple-value-bind (x1 x0)
    
    318 312
     	  (kernel:double-float-bits (aref state 0))
    
    319 313
     	(setf s0-1 (ldb (byte 32 0) x1)
    
    320 314
     	      s0-0 x0))
    
    315
    +      ;; [s1-1 s1-0] is the second 64-bit word of the state.
    
    321 316
           (multiple-value-bind (x1 x0)
    
    322 317
     	  (kernel:double-float-bits (aref state 1))
    
    323 318
     	(setf s1-1 (ldb (byte 32 0) x1)
    
    324 319
     	      s1-0 x0))
    
    325 320
     
    
    326
    -      ;; Compute the 64-bit random value: s0 + s1
    
    327
    -      (multiple-value-prog1
    
    328
    -	  (multiple-value-bind (sum-0 c)
    
    329
    -	      (bignum::%add-with-carry s0-0 s1-0 0)
    
    330
    -	    (values (bignum::%add-with-carry s0-1 s1-1 c)
    
    331
    -		    sum-0))
    
    332
    -	;; s1 ^= s0
    
    333
    -	(setf s1-1 (logxor s1-1 s0-1)
    
    334
    -	      s1-0 (logxor s1-0 s0-0))
    
    335
    -	;; s[0] = rotl(s0,55) ^ s1 ^ (s1 << 14)
    
    336
    -	(multiple-value-setq (s0-1 s0-0)
    
    337
    -	  (rotl-55 s0-1 s0-0))
    
    338
    -	(setf s0-1 (logxor s0-1 s1-1)
    
    339
    -	      s0-0 (logxor s0-0 s1-0))
    
    340
    -	(multiple-value-bind (s14-1 s14-0)
    
    341
    -	    (shl-14 s1-1 s1-0)
    
    342
    -	  (setf s0-1 (logxor s0-1 s14-1)
    
    343
    -		s0-0 (logxor s0-0 s14-0)))
    
    344
    -
    
    345
    -	(multiple-value-bind (r1 r0)
    
    346
    -	    (rotl-36 s1-1 s1-0)
    
    347
    -	  (setf (aref state 0) (make-double s0-1 s0-0)
    
    348
    -		(aref state 1) (make-double r1 r0)))))))
    
    321
    +      ;; [result-1 result-0] = s0 * 5
    
    322
    +      (multiple-value-setq (result-1 result-0)
    
    323
    +	(mul-shift s0-1 s0-0 2))
    
    324
    +
    
    325
    +      ;; [result-1 result-0] = rotl(result, 7)
    
    326
    +      (multiple-value-setq (result-1 result-0)
    
    327
    +	(rotl result-1 result-0 7))
    
    328
    +
    
    329
    +      ;; [result-1 result-0] = result*9
    
    330
    +      (multiple-value-setq (result-1 result-0)
    
    331
    +	(mul-shift result-1 result-0 3))
    
    332
    +
    
    333
    +      ;; s1 ^= s0
    
    334
    +      (setf s1-1 (logxor s1-1 s0-1)
    
    335
    +	    s1-0 (logxor s1-0 s0-0))
    
    336
    +      (multiple-value-bind (s24-hi s24-lo)
    
    337
    +	  (rotl s0-1 s0-0 24)
    
    338
    +	(declare (type (unsigned-byte 32) s24-hi s24-lo))
    
    339
    +	;; [s24-hi s24-lo] = rotl(s0, 24) ^ s1
    
    340
    +	(setf s24-hi (logxor s24-hi s1-1)
    
    341
    +	      s24-lo (logxor s24-lo s1-0))
    
    342
    +	(multiple-value-bind (s16-hi s16-lo)
    
    343
    +	    (shl s1-1 s1-0 16)
    
    344
    +	  ;; [s24-hi s24-lo] = s24 ^ (s1 << 16) = state[0]
    
    345
    +	  (setf s24-hi (logxor s24-hi s16-hi)
    
    346
    +		s24-lo (logxor s24-lo s16-lo))
    
    347
    +	  (setf (aref state 0) (make-double s24-hi s24-lo))))
    
    348
    +      (multiple-value-bind (s37-hi s37-lo)
    
    349
    +	  (rotr s1-1 s1-0 (- 64 37))
    
    350
    +	;; [s37-hi s37-lo] = rotl(s1, 37) = state[1].  But rotating
    
    351
    +	;; left by 37 is the same as rotating right by 64-37.
    
    352
    +	(setf (aref state 1) (make-double s37-hi s37-lo)))
    
    353
    +      ;; Return result.
    
    354
    +      (values result-1
    
    355
    +	      result-0))))
    
    349 356
     
    
    350 357
     ;;; Size of the chunks returned by random-chunk.
    
    351 358
     ;;;