... |
... |
@@ -229,123 +229,130 @@ |
229
|
229
|
;; faster than the portable lisp version. Use them.
|
230
|
230
|
#+(or x86 sparc)
|
231
|
231
|
(declaim (inline xoroshiro-gen))
|
232
|
|
-#+(or x86 sparc)
|
|
232
|
+#+(or x86)
|
233
|
233
|
(defun xoroshiro-gen (state)
|
234
|
234
|
(declare (type (simple-array double-float (2)) state)
|
235
|
235
|
(optimize (speed 3) (safety 0)))
|
236
|
236
|
(vm::xoroshiro-next state))
|
237
|
237
|
|
238
|
|
-#-(or x86 sparc)
|
|
238
|
+#+(or sparc)
|
239
|
239
|
(defun xoroshiro-gen (state)
|
240
|
240
|
(declare (type (simple-array double-float (2)) state)
|
241
|
241
|
(optimize (speed 3) (safety 0)))
|
242
|
|
- ;; Portable implementation of the xoroshiro128+ generator. See
|
243
|
|
- ;; http://xoroshiro.di.unimi.it/xoroshiro128plus.c for the
|
244
|
|
- ;; definitive definition.
|
245
|
|
- ;;
|
246
|
|
- ;; uint64_t s[2];
|
247
|
|
- ;;
|
248
|
|
- ;; static inline uint64_t rotl(const uint64_t x, int k) {
|
249
|
|
- ;; return (x << k) | (x >> (64 - k));
|
250
|
|
- ;; }
|
251
|
|
- ;;
|
252
|
|
- ;; uint64_t next(void) {
|
253
|
|
- ;; const uint64_t s0 = s[0];
|
254
|
|
- ;; uint64_t s1 = s[1];
|
255
|
|
- ;; const uint64_t result = s0 + s1;
|
256
|
|
- ;;
|
257
|
|
- ;; s1 ^= s0;
|
258
|
|
- ;; s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
|
259
|
|
- ;; s[1] = rotl(s1, 36); // c
|
260
|
|
- ;;
|
261
|
|
- ;; return result;
|
262
|
|
- ;; }
|
263
|
|
- ;;
|
264
|
|
- (flet ((rotl-55 (x1 x0)
|
265
|
|
- ;; Rotate [x1|x0] left 55 bits, returning the result as two
|
266
|
|
- ;; values.
|
267
|
|
- (declare (type (unsigned-byte 32) x0 x1)
|
268
|
|
- (optimize (speed 3) (safety 0)))
|
269
|
|
- ;; x << 55
|
270
|
|
- (let ((sl55-h (ldb (byte 32 0) (ash x0 (- 55 32))))
|
271
|
|
- (sl55-l 0))
|
272
|
|
- ;; x >> 9
|
273
|
|
- (let ((sr9-h (ash x1 -9))
|
274
|
|
- (sr9-l (ldb (byte 32 0)
|
275
|
|
- (logior (ash x0 -9)
|
276
|
|
- (ash x1 23)))))
|
277
|
|
- (values (logior sl55-h sr9-h)
|
278
|
|
- (logior sl55-l sr9-l)))))
|
279
|
|
- (rotl-36 (x1 x0)
|
280
|
|
- ;; Rotate [x1|x0] left 36 bits, returning the result as two
|
281
|
|
- ;; values.
|
282
|
|
- (declare (type (unsigned-byte 32) x0 x1)
|
283
|
|
- (optimize (speed 3) (safety 0)))
|
284
|
|
- ;; x << 36
|
285
|
|
- (let ((sl36-h (ldb (byte 32 0) (ash x0 4))))
|
286
|
|
- ;; x >> 28
|
287
|
|
- (let ((sr28-l (ldb (byte 32 0)
|
288
|
|
- (logior (ash x0 -28)
|
289
|
|
- (ash x1 4))))
|
290
|
|
- (sr28-h (ash x1 -28)))
|
291
|
|
- (values (logior sl36-h sr28-h)
|
292
|
|
- sr28-l))))
|
293
|
|
- (shl-14 (x1 x0)
|
294
|
|
- ;; Shift [x1|x0] left by 14 bits, returning the result as
|
295
|
|
- ;; two values.
|
296
|
|
- (declare (type (unsigned-byte 32) x1 x0)
|
297
|
|
- (optimize (speed 3) (safety 0)))
|
298
|
|
- (values (ldb (byte 32 0)
|
299
|
|
- (logior (ash x1 14)
|
300
|
|
- (ash x0 (- 14 32))))
|
301
|
|
- (ldb (byte 32 0)
|
302
|
|
- (ash x0 14))))
|
303
|
|
- (make-double (hi lo)
|
304
|
|
- (kernel:make-double-float
|
305
|
|
- (if (< hi #x80000000)
|
306
|
|
- hi
|
307
|
|
- (- hi #x100000000))
|
308
|
|
- lo)))
|
|
242
|
+ (flet
|
|
243
|
+ ((make-double (hi lo)
|
|
244
|
+ ;; Convert [hi lo] to a double-float, where hi and lo are the
|
|
245
|
+ ;; high and low 32 bits of a 64-bit double-float.
|
|
246
|
+ (declare (type (unsigned-byte 32) hi lo))
|
|
247
|
+ (kernel:make-double-float
|
|
248
|
+ (if (< hi #x80000000)
|
|
249
|
+ hi
|
|
250
|
+ (- hi #x100000000))
|
|
251
|
+ lo))
|
|
252
|
+ (mul-shift (x1 x0 shift)
|
|
253
|
+ ;; Perform multiplication by 2^shift+1, by doing a shift and
|
|
254
|
+ ;; an add.
|
|
255
|
+ (declare (type (unsigned-byte 32) x1 x0)
|
|
256
|
+ (optimize (speed 3)))
|
|
257
|
+ (let* ((overflow (ash x0 (- (- 32 shift))))
|
|
258
|
+ (s0 (ldb (byte 32 0) (ash x0 shift)))
|
|
259
|
+ (s1 (logior (ldb (byte 32 0) (ash x1 shift))
|
|
260
|
+ overflow)))
|
|
261
|
+ (multiple-value-bind (sum0 carry)
|
|
262
|
+ (bignum:%add-with-carry s0 x0 0)
|
|
263
|
+ (multiple-value-bind (sum1)
|
|
264
|
+ (bignum:%add-with-carry s1 x1 carry)
|
|
265
|
+ (values sum1 sum0)))))
|
|
266
|
+ (shl (x1 x0 n)
|
|
267
|
+ ;; Shift left [x1 x0] by n bits, where 0 <= n < 31.
|
|
268
|
+ (declare (type (unsigned-byte 32) x1 x0)
|
|
269
|
+ (type (integer 0 31) n)
|
|
270
|
+ (optimize (speed 3)))
|
|
271
|
+ (let ((out (ldb (byte n (- 32 n)) x0)))
|
|
272
|
+ (values (logior (ldb (byte 32 0) (ash x1 n)) out)
|
|
273
|
+ (ldb (byte 32 0) (ash x0 n)))))
|
|
274
|
+ (rotl (x1 x0 n)
|
|
275
|
+ ;; Rotate left [x1 x0] by n bits where n < 32.
|
|
276
|
+ (declare (type (unsigned-byte 32) x1 x0)
|
|
277
|
+ (type (integer 0 31) n)
|
|
278
|
+ (optimize (speed 3)))
|
|
279
|
+ (let ((x1-hi (ldb (byte n (- 32 n)) x1))
|
|
280
|
+ (x0-hi (ldb (byte n (- 32 n)) x0)))
|
|
281
|
+ ;; x1-hi and x0-hi are the bits that are shifted out of x1
|
|
282
|
+ ;; and x0.
|
|
283
|
+ (values (logior (ldb (byte 32 0) (ash x1 n))
|
|
284
|
+ x0-hi)
|
|
285
|
+ (logior (ldb (byte 32 0) (ash x0 n))
|
|
286
|
+ x1-hi))))
|
|
287
|
+ (rotr (x1 x0 n)
|
|
288
|
+ ;; Rotate right [x1 x0] by n bits where n < 32.
|
|
289
|
+ (declare (type (unsigned-byte 32) x1 x0)
|
|
290
|
+ (type (integer 0 31) n)
|
|
291
|
+ (optimize (speed 3)))
|
|
292
|
+ (let ((x1-lo (ldb (byte n 0) x1))
|
|
293
|
+ (x0-lo (ldb (byte n 0) x0))
|
|
294
|
+ (-n (- n))
|
|
295
|
+ (32-n (- 32 n)))
|
|
296
|
+ ;; x1-lo and x0-lo are the bits of x1/x0 that would be
|
|
297
|
+ ;; shifted out.
|
|
298
|
+ (values (logior (ldb (byte 32 0) (ash x1 -n))
|
|
299
|
+ (ldb (byte 32 0) (ash x0-lo 32-n)))
|
|
300
|
+ (logior (ldb (byte 32 0) (ash x0 -n))
|
|
301
|
+ (ldb (byte 32 0) (ash x1-lo 32-n)))))))
|
|
302
|
+ (declare (inline mul-shift shl rotl rotr))
|
309
|
303
|
(let ((s0-1 0)
|
310
|
304
|
(s0-0 0)
|
311
|
305
|
(s1-1 0)
|
312
|
|
- (s1-0 0))
|
313
|
|
- (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0))
|
314
|
|
- ;; Load the state to s0 and s1. s0-1 is the high 32-bit part and
|
315
|
|
- ;; s0-0 is the low 32-bit part of the 64-bit value. Similarly
|
316
|
|
- ;; for s1.
|
|
306
|
+ (s1-0 0)
|
|
307
|
+ (result-1 0)
|
|
308
|
+ (result-0 0))
|
|
309
|
+ (declare (type (unsigned-byte 32) s0-1 s0-0 s1-1 s1-0 result-1 result-0))
|
|
310
|
+ ;; [s0-1 s0-0] is the first 64-bit word of the state.
|
317
|
311
|
(multiple-value-bind (x1 x0)
|
318
|
312
|
(kernel:double-float-bits (aref state 0))
|
319
|
313
|
(setf s0-1 (ldb (byte 32 0) x1)
|
320
|
314
|
s0-0 x0))
|
|
315
|
+ ;; [s1-1 s1-0] is the second 64-bit word of the state.
|
321
|
316
|
(multiple-value-bind (x1 x0)
|
322
|
317
|
(kernel:double-float-bits (aref state 1))
|
323
|
318
|
(setf s1-1 (ldb (byte 32 0) x1)
|
324
|
319
|
s1-0 x0))
|
325
|
320
|
|
326
|
|
- ;; Compute the 64-bit random value: s0 + s1
|
327
|
|
- (multiple-value-prog1
|
328
|
|
- (multiple-value-bind (sum-0 c)
|
329
|
|
- (bignum::%add-with-carry s0-0 s1-0 0)
|
330
|
|
- (values (bignum::%add-with-carry s0-1 s1-1 c)
|
331
|
|
- sum-0))
|
332
|
|
- ;; s1 ^= s0
|
333
|
|
- (setf s1-1 (logxor s1-1 s0-1)
|
334
|
|
- s1-0 (logxor s1-0 s0-0))
|
335
|
|
- ;; s[0] = rotl(s0,55) ^ s1 ^ (s1 << 14)
|
336
|
|
- (multiple-value-setq (s0-1 s0-0)
|
337
|
|
- (rotl-55 s0-1 s0-0))
|
338
|
|
- (setf s0-1 (logxor s0-1 s1-1)
|
339
|
|
- s0-0 (logxor s0-0 s1-0))
|
340
|
|
- (multiple-value-bind (s14-1 s14-0)
|
341
|
|
- (shl-14 s1-1 s1-0)
|
342
|
|
- (setf s0-1 (logxor s0-1 s14-1)
|
343
|
|
- s0-0 (logxor s0-0 s14-0)))
|
344
|
|
-
|
345
|
|
- (multiple-value-bind (r1 r0)
|
346
|
|
- (rotl-36 s1-1 s1-0)
|
347
|
|
- (setf (aref state 0) (make-double s0-1 s0-0)
|
348
|
|
- (aref state 1) (make-double r1 r0)))))))
|
|
321
|
+ ;; [result-1 result-0] = s0 * 5
|
|
322
|
+ (multiple-value-setq (result-1 result-0)
|
|
323
|
+ (mul-shift s0-1 s0-0 2))
|
|
324
|
+
|
|
325
|
+ ;; [result-1 result-0] = rotl(result, 7)
|
|
326
|
+ (multiple-value-setq (result-1 result-0)
|
|
327
|
+ (rotl result-1 result-0 7))
|
|
328
|
+
|
|
329
|
+ ;; [result-1 result-0] = result*9
|
|
330
|
+ (multiple-value-setq (result-1 result-0)
|
|
331
|
+ (mul-shift result-1 result-0 3))
|
|
332
|
+
|
|
333
|
+ ;; s1 ^= s0
|
|
334
|
+ (setf s1-1 (logxor s1-1 s0-1)
|
|
335
|
+ s1-0 (logxor s1-0 s0-0))
|
|
336
|
+ (multiple-value-bind (s24-hi s24-lo)
|
|
337
|
+ (rotl s0-1 s0-0 24)
|
|
338
|
+ (declare (type (unsigned-byte 32) s24-hi s24-lo))
|
|
339
|
+ ;; [s24-hi s24-lo] = rotl(s0, 24) ^ s1
|
|
340
|
+ (setf s24-hi (logxor s24-hi s1-1)
|
|
341
|
+ s24-lo (logxor s24-lo s1-0))
|
|
342
|
+ (multiple-value-bind (s16-hi s16-lo)
|
|
343
|
+ (shl s1-1 s1-0 16)
|
|
344
|
+ ;; [s24-hi s24-lo] = s24 ^ (s1 << 16) = state[0]
|
|
345
|
+ (setf s24-hi (logxor s24-hi s16-hi)
|
|
346
|
+ s24-lo (logxor s24-lo s16-lo))
|
|
347
|
+ (setf (aref state 0) (make-double s24-hi s24-lo))))
|
|
348
|
+ (multiple-value-bind (s37-hi s37-lo)
|
|
349
|
+ (rotr s1-1 s1-0 (- 64 37))
|
|
350
|
+ ;; [s37-hi s37-lo] = rotl(s1, 37) = state[1]. But rotating
|
|
351
|
+ ;; left by 37 is the same as rotating right by 64-37.
|
|
352
|
+ (setf (aref state 1) (make-double s37-hi s37-lo)))
|
|
353
|
+ ;; Return result.
|
|
354
|
+ (values result-1
|
|
355
|
+ result-0))))
|
349
|
356
|
|
350
|
357
|
;;; Size of the chunks returned by random-chunk.
|
351
|
358
|
;;;
|