Raymond Toy pushed to branch master at cmucl / cmucl
Commits: aa42e51a by Raymond Toy at 2024-04-09T16:19:13+00:00 Fix #299: Use xoroshiro assembly routine for x86
- - - - - 149c45e1 by Raymond Toy at 2024-04-09T16:19:41+00:00 Merge branch 'issue-299-enable-xoroshiro-assem-routine' into 'master'
Fix #299: Use xoroshiro assembly routine for x86
Closes #299, #295, #300, #297, and #294
See merge request cmucl/cmucl!208 - - - - -
4 changed files:
- .gitlab-ci.yml - bin/make-dist.sh - src/code/rand-xoroshiro.lisp - src/compiler/x86/arith.lisp
Changes:
===================================== .gitlab-ci.yml ===================================== @@ -1,7 +1,7 @@ variables: download_url: "https://common-lisp.net/project/cmucl/downloads/snapshots/2023/08" - version: "2023-08-x86" - bootstrap: "-B boot-2023-08" + version: "xoroshiro-assembly-x86" + bootstrap: ""
stages: @@ -48,7 +48,7 @@ linux:build: # Regular build using the cross-compiled result or snapshot - bin/build.sh $bootstrap -R -C "x86_linux_clang" -o snapshot/bin/lisp # - bin/build.sh $bootstrap -R -C "x86_linux" -o snapshot/bin/lisp - - bin/make-dist.sh -I dist linux-4 + - bin/make-dist.sh -V ci-build -I dist linux-4
linux:test: stage: test @@ -131,7 +131,7 @@ osx:build: # Regular build using the cross-compiled result or snapshot. # Need /opt/local/bin to get msgmerge and msgfmt programs. - PATH=/opt/local/bin:$PATH bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp - - bin/make-dist.sh -I dist darwin-4 + - bin/make-dist.sh -V ci-build -I dist darwin-4
osx:test: stage: test
===================================== bin/make-dist.sh ===================================== @@ -98,6 +98,8 @@ def_arch_os # ("snapshot-yyyy-mm") or a release number.. GIT_HASH="`(cd src; git describe --dirty 2>/dev/null)`"
+echo GIT_HASH = ${GIT_HASH} + if expr "X${GIT_HASH}" : 'Xsnapshot-[0-9][0-9][0-9][0-9]-[01][0-9]' > /dev/null; then DEFAULT_VERSION=`expr "${GIT_HASH}" : "snapshot-(.*)"` fi
===================================== src/code/rand-xoroshiro.lisp ===================================== @@ -238,7 +238,7 @@ being the first value." (declare (type (simple-array double-float (2)) state) (optimize (speed 3) (safety 0))) - (vm::xoroshiro-next state)) + (kernel::random-xoroshiro-update state))
#-x86 (defun xoroshiro-gen (state)
===================================== src/compiler/x86/arith.lisp ===================================== @@ -1695,122 +1695,8 @@
(in-package "VM")
-#+random-xoroshiro -(progn -(defknown xoroshiro-next ((simple-array double-float (2))) - (values (unsigned-byte 32) (unsigned-byte 32)) - (movable)) - -(define-vop (xoroshiro-next) - (:policy :fast-safe) - (:translate xoroshiro-next) - (:args (state :scs (descriptor-reg) :to (:result 3))) - (:arg-types simple-array-double-float) - (:results (r1 :scs (unsigned-reg)) - (r0 :scs (unsigned-reg))) - (:result-types unsigned-num unsigned-num) - (:temporary (:sc double-reg) s0) - (:temporary (:sc double-reg) s1) - (:temporary (:sc double-reg) t0) - (:temporary (:sc double-reg) t1) - (:generator 10 - ;; See https://prng.di.unimi.it/xoroshiro128starstar.c for the official code. - ;; - ;; This is what we're implementing, where s[] is our state vector. - ;; - ;; static uint64_t s[2]; - ;; static inline uint64_t rotl(const uint64_t x, int k) { - ;; return (x << k) | (x >> (64 - k)); - ;; } - ;; - ;; uint64_t next(void) { - ;; const uint64_t s0 = s[0]; - ;; uint64_t s1 = s[1]; - ;; const uint64_t result = rotl(s0 * 5, 7) * 9; - ;; - ;; s1 ^= s0; - ;; s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b - ;; s[1] = rotl(s1, 37); // c - ;; - ;; return result; - ;; } - - ;; s0 = state[0] - (inst movsd s0 (make-ea :dword :base state - :disp (- (+ (* vm:vector-data-offset - vm:word-bytes) - (* 8 0)) - vm:other-pointer-type))) - ;; t0 = s0 * 5 = s0 << 2 + s0 - (inst movapd t0 s0) ; t0 = s0 - (inst psllq t0 2) ; t0 = t0 << 2 = 4*t0 - (inst paddq t0 s0) ; t0 = t0 + s0 = 5*t0 - - ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7) - ;; = rotl(s0*5, 7) - (inst movapd t1 t0) ; t1 = t0 - (inst psllq t1 7) ; t1 = t0 << 7 - (inst psrlq t0 (- 64 7)) ; t0 = t0 >> 57 - (inst orpd t0 t1) ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7) - - ;; t0 = t0 * 9 = t0 << 3 + t0 - ;; = rotl(s0*5, 7) * 9 - (inst movapd t1 t0) ; t1 = t0 - (inst psllq t1 3) ; t1 = t0 << 3 - (inst paddq t0 t1) ; t0 = t0 << 3 + t0 = 9*t0 - - ;; Save the result as two 32-bit results. r1 is the high 32 bits - ;; and r0 is the low 32. - (inst movd r0 t0) - (inst psrlq t0 32) - (inst movd r1 t0) - - ;; s1 = state[1] - (inst movsd s1 (make-ea :dword :base state - :disp (- (+ (* vm:vector-data-offset - vm:word-bytes) - (* 8 1)) - vm:other-pointer-type))) - (inst xorpd s1 s0) ; s1 = s1 ^ s0 - - ;; s0 can now be reused as a temp. - ;; s0 = rotl(s0, 24) - (inst movapd t0 s0) ; t0 = s0 - (inst psllq t0 24) ; t0 = s0 << 24 - (inst psrlq s0 (- 64 24)) ; s0 = s0 >> 40 - (inst orpd s0 t0) ; s0 = s0 | t0 = rotl(s0, 24) - - ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1 - (inst xorpd s0 s1) - - ;; s0 = s0 ^ (s1 << 16) - (inst movapd t0 s1) ; t0 = s1 - (inst psllq t0 16) ; t0 = s1 << 16 - (inst xorpd s0 t0) ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16) - - ;; Save s0 to state[0] - (inst movsd (make-ea :dword :base state - :disp (- (+ (* vm:vector-data-offset - vm:word-bytes) - (* 8 0)) - vm:other-pointer-type)) - s0) - - ;; s1 = rotl(s1, 37) - (inst movapd t0 s1) ; t0 = s1 - (inst psllq t0 37) ; t0 = s1 << 37 - (inst psrlq s1 (- 64 37)) ; s1 = s1 >> 27 - (inst orpd s1 t0) ; s1 = t0 | s1 = rotl(s1, 37) - - ;; Save s1 to state[1] - (inst movsd (make-ea :dword :base state - :disp (- (+ (* vm:vector-data-offset - vm:word-bytes) - (* 8 1)) - vm:other-pointer-type)) - s1))) -) - +;; The update routine is a Lisp assembly routine with a corresponding +;; VOP. This lets the compiler know about the VOP so we can use it. #+random-xoroshiro (defknown kernel::random-xoroshiro-update ((simple-array double-float (2))) (values (unsigned-byte 32) (unsigned-byte 32))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/a46a530ea4d27a6b34a43e5...