... |
... |
@@ -1695,122 +1695,8 @@ |
1695
|
1695
|
|
1696
|
1696
|
(in-package "VM")
|
1697
|
1697
|
|
1698
|
|
-#+random-xoroshiro
|
1699
|
|
-(progn
|
1700
|
|
-(defknown xoroshiro-next ((simple-array double-float (2)))
|
1701
|
|
- (values (unsigned-byte 32) (unsigned-byte 32))
|
1702
|
|
- (movable))
|
1703
|
|
-
|
1704
|
|
-(define-vop (xoroshiro-next)
|
1705
|
|
- (:policy :fast-safe)
|
1706
|
|
- (:translate xoroshiro-next)
|
1707
|
|
- (:args (state :scs (descriptor-reg) :to (:result 3)))
|
1708
|
|
- (:arg-types simple-array-double-float)
|
1709
|
|
- (:results (r1 :scs (unsigned-reg))
|
1710
|
|
- (r0 :scs (unsigned-reg)))
|
1711
|
|
- (:result-types unsigned-num unsigned-num)
|
1712
|
|
- (:temporary (:sc double-reg) s0)
|
1713
|
|
- (:temporary (:sc double-reg) s1)
|
1714
|
|
- (:temporary (:sc double-reg) t0)
|
1715
|
|
- (:temporary (:sc double-reg) t1)
|
1716
|
|
- (:generator 10
|
1717
|
|
- ;; See https://prng.di.unimi.it/xoroshiro128starstar.c for the official code.
|
1718
|
|
- ;;
|
1719
|
|
- ;; This is what we're implementing, where s[] is our state vector.
|
1720
|
|
- ;;
|
1721
|
|
- ;; static uint64_t s[2];
|
1722
|
|
- ;; static inline uint64_t rotl(const uint64_t x, int k) {
|
1723
|
|
- ;; return (x << k) | (x >> (64 - k));
|
1724
|
|
- ;; }
|
1725
|
|
- ;;
|
1726
|
|
- ;; uint64_t next(void) {
|
1727
|
|
- ;; const uint64_t s0 = s[0];
|
1728
|
|
- ;; uint64_t s1 = s[1];
|
1729
|
|
- ;; const uint64_t result = rotl(s0 * 5, 7) * 9;
|
1730
|
|
- ;;
|
1731
|
|
- ;; s1 ^= s0;
|
1732
|
|
- ;; s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
|
1733
|
|
- ;; s[1] = rotl(s1, 37); // c
|
1734
|
|
- ;;
|
1735
|
|
- ;; return result;
|
1736
|
|
- ;; }
|
1737
|
|
-
|
1738
|
|
- ;; s0 = state[0]
|
1739
|
|
- (inst movsd s0 (make-ea :dword :base state
|
1740
|
|
- :disp (- (+ (* vm:vector-data-offset
|
1741
|
|
- vm:word-bytes)
|
1742
|
|
- (* 8 0))
|
1743
|
|
- vm:other-pointer-type)))
|
1744
|
|
- ;; t0 = s0 * 5 = s0 << 2 + s0
|
1745
|
|
- (inst movapd t0 s0) ; t0 = s0
|
1746
|
|
- (inst psllq t0 2) ; t0 = t0 << 2 = 4*t0
|
1747
|
|
- (inst paddq t0 s0) ; t0 = t0 + s0 = 5*t0
|
1748
|
|
-
|
1749
|
|
- ;; t0 = rotl(t0, 7) = t0 << 7 | t0 >> (64-7)
|
1750
|
|
- ;; = rotl(s0*5, 7)
|
1751
|
|
- (inst movapd t1 t0) ; t1 = t0
|
1752
|
|
- (inst psllq t1 7) ; t1 = t0 << 7
|
1753
|
|
- (inst psrlq t0 (- 64 7)) ; t0 = t0 >> 57
|
1754
|
|
- (inst orpd t0 t1) ; t0 = t0 << 7 | t0 >> 57 = rotl(t0, 7)
|
1755
|
|
-
|
1756
|
|
- ;; t0 = t0 * 9 = t0 << 3 + t0
|
1757
|
|
- ;; = rotl(s0*5, 7) * 9
|
1758
|
|
- (inst movapd t1 t0) ; t1 = t0
|
1759
|
|
- (inst psllq t1 3) ; t1 = t0 << 3
|
1760
|
|
- (inst paddq t0 t1) ; t0 = t0 << 3 + t0 = 9*t0
|
1761
|
|
-
|
1762
|
|
- ;; Save the result as two 32-bit results. r1 is the high 32 bits
|
1763
|
|
- ;; and r0 is the low 32.
|
1764
|
|
- (inst movd r0 t0)
|
1765
|
|
- (inst psrlq t0 32)
|
1766
|
|
- (inst movd r1 t0)
|
1767
|
|
-
|
1768
|
|
- ;; s1 = state[1]
|
1769
|
|
- (inst movsd s1 (make-ea :dword :base state
|
1770
|
|
- :disp (- (+ (* vm:vector-data-offset
|
1771
|
|
- vm:word-bytes)
|
1772
|
|
- (* 8 1))
|
1773
|
|
- vm:other-pointer-type)))
|
1774
|
|
- (inst xorpd s1 s0) ; s1 = s1 ^ s0
|
1775
|
|
-
|
1776
|
|
- ;; s0 can now be reused as a temp.
|
1777
|
|
- ;; s0 = rotl(s0, 24)
|
1778
|
|
- (inst movapd t0 s0) ; t0 = s0
|
1779
|
|
- (inst psllq t0 24) ; t0 = s0 << 24
|
1780
|
|
- (inst psrlq s0 (- 64 24)) ; s0 = s0 >> 40
|
1781
|
|
- (inst orpd s0 t0) ; s0 = s0 | t0 = rotl(s0, 24)
|
1782
|
|
-
|
1783
|
|
- ;; s0 = s0 ^ s1 = rotl(s0, 24) ^ s1
|
1784
|
|
- (inst xorpd s0 s1)
|
1785
|
|
-
|
1786
|
|
- ;; s0 = s0 ^ (s1 << 16)
|
1787
|
|
- (inst movapd t0 s1) ; t0 = s1
|
1788
|
|
- (inst psllq t0 16) ; t0 = s1 << 16
|
1789
|
|
- (inst xorpd s0 t0) ; s0 = rotl(s0, 24) ^ s1 ^ (s1 << 16)
|
1790
|
|
-
|
1791
|
|
- ;; Save s0 to state[0]
|
1792
|
|
- (inst movsd (make-ea :dword :base state
|
1793
|
|
- :disp (- (+ (* vm:vector-data-offset
|
1794
|
|
- vm:word-bytes)
|
1795
|
|
- (* 8 0))
|
1796
|
|
- vm:other-pointer-type))
|
1797
|
|
- s0)
|
1798
|
|
-
|
1799
|
|
- ;; s1 = rotl(s1, 37)
|
1800
|
|
- (inst movapd t0 s1) ; t0 = s1
|
1801
|
|
- (inst psllq t0 37) ; t0 = s1 << 37
|
1802
|
|
- (inst psrlq s1 (- 64 37)) ; s1 = s1 >> 27
|
1803
|
|
- (inst orpd s1 t0) ; s1 = t0 | s1 = rotl(s1, 37)
|
1804
|
|
-
|
1805
|
|
- ;; Save s1 to state[1]
|
1806
|
|
- (inst movsd (make-ea :dword :base state
|
1807
|
|
- :disp (- (+ (* vm:vector-data-offset
|
1808
|
|
- vm:word-bytes)
|
1809
|
|
- (* 8 1))
|
1810
|
|
- vm:other-pointer-type))
|
1811
|
|
- s1)))
|
1812
|
|
-)
|
1813
|
|
-
|
|
1698
|
+;; The update routine is a Lisp assembly routine with a corresponding
|
|
1699
|
+;; VOP. This lets the compiler know about the VOP so we can use it.
|
1814
|
1700
|
#+random-xoroshiro
|
1815
|
1701
|
(defknown kernel::random-xoroshiro-update ((simple-array double-float (2)))
|
1816
|
1702
|
(values (unsigned-byte 32) (unsigned-byte 32))
|