... |
... |
@@ -411,3 +411,121 @@ |
411
|
411
|
(inst pop y)
|
412
|
412
|
(inst pop k)
|
413
|
413
|
(inst ret))
|
|
414
|
+
|
|
415
|
+;;; Support for the xoroshiro128** generator. See
|
|
416
|
+;;; https://prng.di.unimi.it/xoroshiro128starstar.c for the official
|
|
417
|
+;;; code.
|
|
418
|
+;;;
|
|
419
|
+;;; This is what we're implementing, where s[] is our state vector.
|
|
420
|
+;;;
|
|
421
|
+;;; static uint64_t s[2];
|
|
422
|
+;;; static inline uint64_t rotl(const uint64_t x, int k) {
|
|
423
|
+;;; return (x << k) | (x >> (64 - k));
|
|
424
|
+;;; }
|
|
425
|
+;;;
|
|
426
|
+;;; uint64_t next(void) {
|
|
427
|
+;;; const uint64_t s0 = s[0];
|
|
428
|
+;;; uint64_t s1 = s[1];
|
|
429
|
+;;; const uint64_t result = rotl(s0 * 5, 7) * 9;
|
|
430
|
+;;;
|
|
431
|
+;;; s1 ^= s0;
|
|
432
|
+;;; s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
|
|
433
|
+;;; s[1] = rotl(s1, 37); // c
|
|
434
|
+;;;
|
|
435
|
+;;; return result;
|
|
436
|
+;;; }
|
|
437
|
+;;;
|
|
438
|
+;;; A VOP is also generated to call this assembly routine. This
|
|
439
|
+;;; routine computes a new 64-bit random number and also updates the
|
|
440
|
+;;; state, which is (simple-array (double-float) (2)).
|
|
441
|
+#+random-xoroshiro
|
|
442
|
+(define-assembly-routine
|
|
443
|
+ (xoroshiro-update
|
|
444
|
+ (:translate kernel::random-xoroshiro-update)
|
|
445
|
+ (:return-style :raw)
|
|
446
|
+ (:cost 30)
|
|
447
|
+ (:policy :fast-safe)
|
|
448
|
+ (:arg-types simple-array-double-float)
|
|
449
|
+ (:result-types unsigned-num unsigned-num))
|
|
450
|
+ ((:arg state descriptor-reg edx-offset)
|
|
451
|
+ (:res r1 unsigned-reg ecx-offset)
|
|
452
|
+ (:res r0 unsigned-reg ebx-offset)
|
|
453
|
+ (:temp s0 double-reg xmm0-offset)
|
|
454
|
+ (:temp s1 double-reg xmm1-offset)
|
|
455
|
+ (:temp t0 double-reg xmm2-offset)
|
|
456
|
+ (:temp t1 double-reg xmm3-offset))
|
|
457
|
+
|
|
458
|
+ ;; s0 = state[0]
|
|
459
|
+ (inst movsd s0 (make-ea :dword :base state
|
|
460
|
+ :disp (- (+ (* vm:vector-data-offset
|
|
461
|
+ vm:word-bytes)
|
|
462
|
+ (* 8 0))
|
|
463
|
+ vm:other-pointer-type)))
|
|
464
|
+ ;; t0 = s0 * 5 = s0 << 2 + s0
|
|
465
|
+ (inst movapd t0 s0) ; t0 = s0
|
|
466
|
+ (inst psllq t0 2) ; t0 = t0 << 2 = 4*t0
|
|
467
|
+ (inst paddq t0 s0) ; t0 = t0 + s0 = 5*t0
|
|
468
|
+
|
|
469
|
+ ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7)
|
|
470
|
+ ;; = rotl(s0*5, 7)
|
|
471
|
+ (inst movapd t1 t0) ; t1 = t0
|
|
472
|
+ (inst psllq t1 7) ; t1 = t0 << 7
|
|
473
|
+ (inst psrlq t0 (- 64 7)) ; t0 = t0 >> 57
|
|
474
|
+ (inst orpd t0 t1) ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7)
|
|
475
|
+
|
|
476
|
+ ;; t0 = t0 * 9 = t0 << 3 + t0
|
|
477
|
+ ;; = rotl(s0*5, 7) * 9
|
|
478
|
+ (inst movapd t1 t0) ; t1 = t0
|
|
479
|
+ (inst psllq t1 3) ; t1 = t0 << 3
|
|
480
|
+ (inst paddq t0 t1) ; t0 = t0 << 3 + t0 = 9*t0
|
|
481
|
+
|
|
482
|
+ ;; Save the result as two 32-bit results. r1 is the high 32 bits
|
|
483
|
+ ;; and r0 is the low 32.
|
|
484
|
+ (inst movd r0 t0)
|
|
485
|
+ (inst psrlq t0 32)
|
|
486
|
+ (inst movd r1 t0)
|
|
487
|
+
|
|
488
|
+ ;; s1 = state[1]
|
|
489
|
+ (inst movsd s1 (make-ea :dword :base state
|
|
490
|
+ :disp (- (+ (* vm:vector-data-offset
|
|
491
|
+ vm:word-bytes)
|
|
492
|
+ (* 8 1))
|
|
493
|
+ vm:other-pointer-type)))
|
|
494
|
+ (inst xorpd s1 s0) ; s1 = s1 ^ s0
|
|
495
|
+
|
|
496
|
+ ;; s0 can now be reused as a temp.
|
|
497
|
+ ;; s0 = rotl(s0, 24)
|
|
498
|
+ (inst movapd t0 s0) ; t0 = s0
|
|
499
|
+ (inst psllq t0 24) ; t0 = s0 << 24
|
|
500
|
+ (inst psrlq s0 (- 64 24)) ; s0 = s0 >> 40
|
|
501
|
+ (inst orpd s0 t0) ; s0 = s0 | t0 = rotl(s0, 24)
|
|
502
|
+
|
|
503
|
+ ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1
|
|
504
|
+ (inst xorpd s0 s1)
|
|
505
|
+
|
|
506
|
+ ;; s0 = s0 ^ (s1 << 16)
|
|
507
|
+ (inst movapd t0 s1) ; t0 = s1
|
|
508
|
+ (inst psllq t0 16) ; t0 = s1 << 16
|
|
509
|
+ (inst xorpd s0 t0) ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16)
|
|
510
|
+
|
|
511
|
+ ;; Save s0 to state[0]
|
|
512
|
+ (inst movsd (make-ea :dword :base state
|
|
513
|
+ :disp (- (+ (* vm:vector-data-offset
|
|
514
|
+ vm:word-bytes)
|
|
515
|
+ (* 8 0))
|
|
516
|
+ vm:other-pointer-type))
|
|
517
|
+ s0)
|
|
518
|
+
|
|
519
|
+ ;; s1 = rotl(s1, 37)
|
|
520
|
+ (inst movapd t0 s1) ; t0 = s1
|
|
521
|
+ (inst psllq t0 37) ; t0 = s1 << 37
|
|
522
|
+ (inst psrlq s1 (- 64 37)) ; s1 = s1 >> 27
|
|
523
|
+ (inst orpd s1 t0) ; s1 = t0 | s1 = rotl(s1, 37)
|
|
524
|
+
|
|
525
|
+ ;; Save s1 to state[1]
|
|
526
|
+ (inst movsd (make-ea :dword :base state
|
|
527
|
+ :disp (- (+ (* vm:vector-data-offset
|
|
528
|
+ vm:word-bytes)
|
|
529
|
+ (* 8 1))
|
|
530
|
+ vm:other-pointer-type))
|
|
531
|
+ s1)) |