... |
... |
@@ -225,18 +225,27 @@ |
225
|
225
|
|
226
|
226
|
;;;; Random entries:
|
227
|
227
|
|
228
|
|
-;; Sparc and x86 have vops to implement xoroshiro-gen that are much
|
229
|
|
-;; faster than the portable lisp version. Use them.
|
230
|
|
-#+(or x86 sparc)
|
|
228
|
+;; X86 has a vop to implement xoroshiro-gen that is about 4.5 times
|
|
229
|
+;; faster than the portable lisp version below. For other
|
|
230
|
+;; architectures, we use the portable version until a vop is written.
|
|
231
|
+#+x86
|
231
|
232
|
(declaim (inline xoroshiro-gen))
|
232
|
|
-#+(or x86)
|
|
233
|
+#+x86
|
233
|
234
|
(defun xoroshiro-gen (state)
|
|
235
|
+ _N"Generate the next 64-bit result from the xoroshiro128** generator
|
|
236
|
+ using the state in STATE, a simple-array of 2 double-floats. The
|
|
237
|
+ 64-bit result is returned as 2 32-bit values, with the high 32-bits
|
|
238
|
+ being the first value."
|
234
|
239
|
(declare (type (simple-array double-float (2)) state)
|
235
|
240
|
(optimize (speed 3) (safety 0)))
|
236
|
241
|
(vm::xoroshiro-next state))
|
237
|
242
|
|
238
|
|
-#+(or sparc)
|
|
243
|
+#-x86
|
239
|
244
|
(defun xoroshiro-gen (state)
|
|
245
|
+ _N"Generate the next 64-bit result from the xoroshiro128** generator
|
|
246
|
+ using the state in STATE, a simple-array of 2 double-floats. The
|
|
247
|
+ 64-bit result is returned as 2 32-bit values, with the high 32-bits
|
|
248
|
+ being the first value."
|
240
|
249
|
(declare (type (simple-array double-float (2)) state)
|
241
|
250
|
(optimize (speed 3) (safety 0)))
|
242
|
251
|
(flet
|
... |
... |
@@ -502,49 +511,7 @@ |
502
|
511
|
:format-arguments (list arg)))))
|
503
|
512
|
|
504
|
513
|
;; Jump function for the generator. See the jump function in
|
505
|
|
-;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c
|
506
|
|
-#-x86
|
507
|
|
-(defun random-state-jump (&optional (rng-state *random-state*))
|
508
|
|
- _N"Jump the RNG-STATE. This is equivalent to 2^64 calls to the
|
509
|
|
- xoroshiro128+ generator. It can be used to generate 2^64
|
510
|
|
- non-overlapping subsequences for parallel computations."
|
511
|
|
- (declare (type random-state rng-state))
|
512
|
|
- (let ((state (random-state-state rng-state))
|
513
|
|
- (s0-0 0)
|
514
|
|
- (s0-1 0)
|
515
|
|
- (s1-0 0)
|
516
|
|
- (s1-1 0))
|
517
|
|
- (declare (type (unsigned-byte 32) s0-0 s0-1 s1-0 s1-1)
|
518
|
|
- (optimize (speed 3) (safety 0)))
|
519
|
|
- ;; The constants are #xbeac0467eba5facb and #xd86b048b86aa9922,
|
520
|
|
- ;; and we process these numbers starting from the LSB. We want ot
|
521
|
|
- ;; process these in 32-bit chunks, so word-reverse the constants.
|
522
|
|
- (dolist (jump '(#xeba5facb #xbeac0467 #x86aa9922 #xd86b048b))
|
523
|
|
- (declare (type (unsigned-byte 32) jump))
|
524
|
|
- (dotimes (b 32)
|
525
|
|
- (declare (fixnum b))
|
526
|
|
- (when (logbitp b jump)
|
527
|
|
- (multiple-value-bind (x1 x0)
|
528
|
|
- (kernel:double-float-bits (aref state 0))
|
529
|
|
- (setf s0-1 (logxor s0-1 (ldb (byte 32 0) x1))
|
530
|
|
- s0-0 (logxor s0-0 x0)))
|
531
|
|
-
|
532
|
|
- (multiple-value-bind (x1 x0)
|
533
|
|
- (kernel:double-float-bits (aref state 1))
|
534
|
|
- (setf s1-1 (logxor s1-1 (ldb (byte 32 0) x1))
|
535
|
|
- s1-0 (logxor s1-0 x0))))
|
536
|
|
- (xoroshiro-gen state)))
|
537
|
|
-
|
538
|
|
- (flet ((convert (x1 x0)
|
539
|
|
- (declare (type (unsigned-byte 32) x1 x0))
|
540
|
|
- (kernel:make-double-float
|
541
|
|
- (if (< x1 #x80000000) x1 (- x1 #x100000000))
|
542
|
|
- x0)))
|
543
|
|
- (setf (aref state 0) (convert s0-1 s0-0))
|
544
|
|
- (setf (aref state 1) (convert s1-1 s1-0)))
|
545
|
|
- rng-state))
|
546
|
|
-
|
547
|
|
-#+x86
|
|
514
|
+;; https://prng.di.unimi.it/xoroshiro128starstar.c
|
548
|
515
|
(defun random-state-jump (&optional (rng-state *random-state*))
|
549
|
516
|
_N"Jump the RNG-STATE. This is equivalent to 2^64 calls to the
|
550
|
517
|
xoroshiro128** generator. It can be used to generate 2^64
|