Raymond Toy pushed to branch master at cmucl / cmucl

Commits:

3 changed files:

Changes:

  • src/assembly/x86/arith.lisp
    ... ... @@ -411,3 +411,121 @@
    411 411
       (inst pop y)
    
    412 412
       (inst pop k)
    
    413 413
       (inst ret))
    
    414
    +
    
    415
    +;;; Support for the xoroshiro128** generator.  See
    
    416
    +;;; https://prng.di.unimi.it/xoroshiro128starstar.c for the official
    
    417
    +;;; code.
    
    418
    +;;;
    
    419
    +;;; This is what we're implementing, where s[] is our state vector.
    
    420
    +;;;
    
    421
    +;;; static uint64_t s[2];
    
    422
    +;;; static inline uint64_t rotl(const uint64_t x, int k) {
    
    423
    +;;;   return (x << k) | (x >> (64 - k));
    
    424
    +;;; }
    
    425
    +;;;
    
    426
    +;;; uint64_t next(void) {
    
    427
    +;;;   const uint64_t s0 = s[0];
    
    428
    +;;; 	 uint64_t s1 = s[1];
    
    429
    +;;; 	 const uint64_t result = rotl(s0 * 5, 7) * 9;
    
    430
    +;;; 
    
    431
    +;;; 	 s1 ^= s0;
    
    432
    +;;; 	 s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
    
    433
    +;;; 	 s[1] = rotl(s1, 37); // c
    
    434
    +;;; 
    
    435
    +;;; 	 return result;
    
    436
    +;;; }
    
    437
    +;;;
    
    438
    +;;; A VOP is also generated to call this assembly routine.  This
    
    439
    +;;; routine computes a new 64-bit random number and also updates the
    
    440
    +;;; state, which is (simple-array (double-float) (2)).
    
    441
    +#+random-xoroshiro
    
    442
    +(define-assembly-routine
    
    443
    +  (xoroshiro-update
    
    444
    +   (:translate kernel::random-xoroshiro-update)
    
    445
    +   (:return-style :raw)
    
    446
    +   (:cost 30)
    
    447
    +   (:policy :fast-safe)
    
    448
    +   (:arg-types simple-array-double-float)
    
    449
    +   (:result-types unsigned-num unsigned-num))
    
    450
    +  ((:arg state descriptor-reg edx-offset)
    
    451
    +   (:res r1 unsigned-reg ecx-offset)
    
    452
    +   (:res r0 unsigned-reg ebx-offset)
    
    453
    +   (:temp s0 double-reg xmm0-offset)
    
    454
    +   (:temp s1 double-reg xmm1-offset)
    
    455
    +   (:temp t0 double-reg xmm2-offset)
    
    456
    +   (:temp t1 double-reg xmm3-offset))
    
    457
    +
    
    458
    +  ;; s0 = state[0]
    
    459
    +  (inst movsd s0 (make-ea :dword :base state
    
    460
    +                                 :disp (- (+ (* vm:vector-data-offset
    
    461
    +					        vm:word-bytes)
    
    462
    +				             (* 8 0))
    
    463
    +				          vm:other-pointer-type)))
    
    464
    +  ;; t0 = s0 * 5 = s0 << 2 + s0
    
    465
    +  (inst movapd t0 s0)                   ; t0 = s0
    
    466
    +  (inst psllq t0 2)                     ; t0 = t0 << 2 = 4*t0
    
    467
    +  (inst paddq t0 s0)                    ; t0 = t0 + s0 = 5*t0
    
    468
    +
    
    469
    +  ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7)
    
    470
    +  ;;    = rotl(s0*5, 7)
    
    471
    +  (inst movapd t1 t0)          ; t1 = t0
    
    472
    +  (inst psllq t1 7)            ; t1 = t0 << 7
    
    473
    +  (inst psrlq t0 (- 64 7))     ; t0 = t0 >> 57
    
    474
    +  (inst orpd t0 t1)            ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7)
    
    475
    +
    
    476
    +  ;; t0 = t0 * 9 = t0 << 3 + t0
    
    477
    +  ;;    = rotl(s0*5, 7) * 9
    
    478
    +  (inst movapd t1 t0)                   ; t1 = t0
    
    479
    +  (inst psllq t1 3)                     ; t1 = t0 << 3
    
    480
    +  (inst paddq t0 t1)                    ; t0 = t0 << 3 + t0 = 9*t0
    
    481
    +
    
    482
    +  ;; Save the result as two 32-bit results.  r1 is the high 32 bits
    
    483
    +  ;; and r0 is the low 32.
    
    484
    +  (inst movd r0 t0)
    
    485
    +  (inst psrlq t0 32)
    
    486
    +  (inst movd r1 t0)
    
    487
    +
    
    488
    +  ;; s1 = state[1]
    
    489
    +  (inst movsd s1 (make-ea :dword :base state
    
    490
    +			         :disp (- (+ (* vm:vector-data-offset
    
    491
    +					        vm:word-bytes)
    
    492
    +				             (* 8 1))
    
    493
    +				          vm:other-pointer-type)))
    
    494
    +  (inst xorpd s1 s0)                    ; s1 = s1 ^ s0
    
    495
    +
    
    496
    +  ;; s0 can now be reused as a temp.
    
    497
    +  ;; s0 = rotl(s0, 24)
    
    498
    +  (inst movapd t0 s0)                   ; t0 = s0
    
    499
    +  (inst psllq t0 24)                    ; t0 = s0 << 24
    
    500
    +  (inst psrlq s0 (- 64 24))             ; s0 = s0 >> 40
    
    501
    +  (inst orpd s0 t0)                     ; s0 = s0 | t0 = rotl(s0, 24)
    
    502
    +
    
    503
    +  ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1
    
    504
    +  (inst xorpd s0 s1)
    
    505
    +
    
    506
    +  ;; s0 = s0 ^ (s1 << 16)
    
    507
    +  (inst movapd t0 s1)            ; t0 = s1
    
    508
    +  (inst psllq t0 16)             ; t0 = s1 << 16
    
    509
    +  (inst xorpd s0 t0)             ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16)
    
    510
    +
    
    511
    +  ;; Save s0 to state[0]
    
    512
    +  (inst movsd (make-ea :dword :base state
    
    513
    +			      :disp (- (+ (* vm:vector-data-offset
    
    514
    +					     vm:word-bytes)
    
    515
    +				          (* 8 0))
    
    516
    +				       vm:other-pointer-type))
    
    517
    +        s0)
    
    518
    +
    
    519
    +  ;; s1 = rotl(s1, 37)
    
    520
    +  (inst movapd t0 s1)                   ; t0 = s1
    
    521
    +  (inst psllq t0 37)                    ; t0 = s1 << 37
    
    522
    +  (inst psrlq s1 (- 64 37))             ; s1 = s1 >> 27
    
    523
    +  (inst orpd s1 t0)                     ; s1 = t0 | s1 = rotl(s1, 37)
    
    524
    +
    
    525
    +  ;; Save s1 to state[1]
    
    526
    +  (inst movsd (make-ea :dword :base state
    
    527
    +			      :disp (- (+ (* vm:vector-data-offset
    
    528
    +					     vm:word-bytes)
    
    529
    +				          (* 8 1))
    
    530
    +				       vm:other-pointer-type))
    
    531
    +        s1))

  • src/assembly/x86/support.lisp
    ... ... @@ -39,7 +39,7 @@
    39 39
     (def-vm-support-routine generate-return-sequence (style)
    
    40 40
       (ecase style
    
    41 41
         (:raw
    
    42
    -     `(inst ret))
    
    42
    +     `((inst ret)))
    
    43 43
         (:full-call
    
    44 44
          `(
    
    45 45
            (inst pop eax-tn)
    

  • src/compiler/x86/arith.lisp
    ... ... @@ -1810,3 +1810,8 @@
    1810 1810
     				  vm:other-pointer-type))
    
    1811 1811
               s1)))
    
    1812 1812
     )
    
    1813
    +
    
    1814
    +#+random-xoroshiro
    
    1815
    +(defknown kernel::random-xoroshiro-update ((simple-array double-float (2)))
    
    1816
    +  (values (unsigned-byte 32) (unsigned-byte 32))
    
    1817
    +  (movable))