Raymond Toy pushed to branch master at cmucl / cmucl
Commits: 1cb2cb14 by Raymond Toy at 2024-04-08T13:00:08+00:00 Fix #294: Implement assembly routine for xoroshiro update
- - - - - 574eef63 by Raymond Toy at 2024-04-08T13:00:12+00:00 Merge branch 'issue-294-xoroshiro-lisp-assem-routine' into 'master'
Fix #294: Implement assembly routine for xoroshiro update
Closes #294
See merge request cmucl/cmucl!202 - - - - -
3 changed files:
- src/assembly/x86/arith.lisp - src/assembly/x86/support.lisp - src/compiler/x86/arith.lisp
Changes:
===================================== src/assembly/x86/arith.lisp ===================================== @@ -411,3 +411,121 @@ (inst pop y) (inst pop k) (inst ret)) + +;;; Support for the xoroshiro128** generator. See +;;; https://prng.di.unimi.it/xoroshiro128starstar.c for the official +;;; code. +;;; +;;; This is what we're implementing, where s[] is our state vector. +;;; +;;; static uint64_t s[2]; +;;; static inline uint64_t rotl(const uint64_t x, int k) { +;;; return (x << k) | (x >> (64 - k)); +;;; } +;;; +;;; uint64_t next(void) { +;;; const uint64_t s0 = s[0]; +;;; uint64_t s1 = s[1]; +;;; const uint64_t result = rotl(s0 * 5, 7) * 9; +;;; +;;; s1 ^= s0; +;;; s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b +;;; s[1] = rotl(s1, 37); // c +;;; +;;; return result; +;;; } +;;; +;;; A VOP is also generated to call this assembly routine. This +;;; routine computes a new 64-bit random number and also updates the +;;; state, which is (simple-array (double-float) (2)). +#+random-xoroshiro +(define-assembly-routine + (xoroshiro-update + (:translate kernel::random-xoroshiro-update) + (:return-style :raw) + (:cost 30) + (:policy :fast-safe) + (:arg-types simple-array-double-float) + (:result-types unsigned-num unsigned-num)) + ((:arg state descriptor-reg edx-offset) + (:res r1 unsigned-reg ecx-offset) + (:res r0 unsigned-reg ebx-offset) + (:temp s0 double-reg xmm0-offset) + (:temp s1 double-reg xmm1-offset) + (:temp t0 double-reg xmm2-offset) + (:temp t1 double-reg xmm3-offset)) + + ;; s0 = state[0] + (inst movsd s0 (make-ea :dword :base state + :disp (- (+ (* vm:vector-data-offset + vm:word-bytes) + (* 8 0)) + vm:other-pointer-type))) + ;; t0 = s0 * 5 = s0 << 2 + s0 + (inst movapd t0 s0) ; t0 = s0 + (inst psllq t0 2) ; t0 = t0 << 2 = 4*t0 + (inst paddq t0 s0) ; t0 = t0 + s0 = 5*t0 + + ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7) + ;; = rotl(s0*5, 7) + (inst movapd t1 t0) ; t1 = t0 + (inst psllq t1 7) ; t1 = t0 << 7 + (inst psrlq t0 (- 64 7)) ; t0 = t0 >> 57 + (inst orpd t0 t1) ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7) + + ;; t0 = t0 * 9 = t0 << 3 + t0 + ;; = rotl(s0*5, 7) * 9 + (inst movapd t1 t0) ; t1 = t0 + (inst psllq t1 3) ; t1 = t0 << 3 + (inst paddq t0 t1) ; t0 = t0 << 3 + t0 = 9*t0 + + ;; Save the result as two 32-bit results. r1 is the high 32 bits + ;; and r0 is the low 32. + (inst movd r0 t0) + (inst psrlq t0 32) + (inst movd r1 t0) + + ;; s1 = state[1] + (inst movsd s1 (make-ea :dword :base state + :disp (- (+ (* vm:vector-data-offset + vm:word-bytes) + (* 8 1)) + vm:other-pointer-type))) + (inst xorpd s1 s0) ; s1 = s1 ^ s0 + + ;; s0 can now be reused as a temp. + ;; s0 = rotl(s0, 24) + (inst movapd t0 s0) ; t0 = s0 + (inst psllq t0 24) ; t0 = s0 << 24 + (inst psrlq s0 (- 64 24)) ; s0 = s0 >> 40 + (inst orpd s0 t0) ; s0 = s0 | t0 = rotl(s0, 24) + + ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1 + (inst xorpd s0 s1) + + ;; s0 = s0 ^ (s1 << 16) + (inst movapd t0 s1) ; t0 = s1 + (inst psllq t0 16) ; t0 = s1 << 16 + (inst xorpd s0 t0) ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16) + + ;; Save s0 to state[0] + (inst movsd (make-ea :dword :base state + :disp (- (+ (* vm:vector-data-offset + vm:word-bytes) + (* 8 0)) + vm:other-pointer-type)) + s0) + + ;; s1 = rotl(s1, 37) + (inst movapd t0 s1) ; t0 = s1 + (inst psllq t0 37) ; t0 = s1 << 37 + (inst psrlq s1 (- 64 37)) ; s1 = s1 >> 27 + (inst orpd s1 t0) ; s1 = t0 | s1 = rotl(s1, 37) + + ;; Save s1 to state[1] + (inst movsd (make-ea :dword :base state + :disp (- (+ (* vm:vector-data-offset + vm:word-bytes) + (* 8 1)) + vm:other-pointer-type)) + s1))
===================================== src/assembly/x86/support.lisp ===================================== @@ -39,7 +39,7 @@ (def-vm-support-routine generate-return-sequence (style) (ecase style (:raw - `(inst ret)) + `((inst ret))) (:full-call `( (inst pop eax-tn)
===================================== src/compiler/x86/arith.lisp ===================================== @@ -1810,3 +1810,8 @@ vm:other-pointer-type)) s1))) ) + +#+random-xoroshiro +(defknown kernel::random-xoroshiro-update ((simple-array double-float (2))) + (values (unsigned-byte 32) (unsigned-byte 32)) + (movable))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/e77ded50bfd6c663157047e...