... |
... |
@@ -2611,39 +2611,37 @@ |
2611
|
2611
|
(:temporary (:sc unsigned-reg :offset nl4-offset) s1)
|
2612
|
2612
|
(:temporary (:sc unsigned-reg :offset nl3-offset) t0)
|
2613
|
2613
|
(:generator 10
|
2614
|
|
- (inst ldx s0 state (+ (* 0 double-float-bytes)
|
2615
|
|
- (- (* vm:vector-data-offset vm:word-bytes)
|
2616
|
|
- vm:other-pointer-type)))
|
2617
|
|
- (inst ldx s1 state (+ (* 1 double-float-bytes)
|
2618
|
|
- (- (* vm:vector-data-offset vm:word-bytes)
|
2619
|
|
- vm:other-pointer-type)))
|
2620
|
|
- ;; result = s0 + s1, split into low 32-bits in r0 and high 32-bits
|
2621
|
|
- ;; in r1
|
2622
|
|
- (inst add r0 s0 s1)
|
2623
|
|
- (inst srlx r1 r0 32)
|
2624
|
|
-
|
2625
|
|
- ;; s1 = s1 ^ s0
|
2626
|
|
- (inst xor s1 s1 s0)
|
2627
|
|
-
|
2628
|
|
- ;; s0 = rotl(s0,55) = s0 << 55 | s0 >> 9
|
2629
|
|
- (inst sllx s0 s0 55)
|
2630
|
|
- (inst srlx t0 s0 9)
|
2631
|
|
- (inst or s0 t0)
|
2632
|
|
-
|
2633
|
|
- (inst xor s0 s1) ; s0 = s0 ^ s1
|
2634
|
|
- (inst sllx t0 s1 14) ; t0 = s1 << 14
|
2635
|
|
- (inst xor s0 t0) ; s0 = s0 ^ t0
|
2636
|
|
-
|
2637
|
|
- (inst stx s0 state (+ (* 0 double-float-bytes)
|
2638
|
|
- (- (* vm:vector-data-offset vm:word-bytes)
|
2639
|
|
- vm:other-pointer-type)))
|
2640
|
|
-
|
2641
|
|
- ;; s1 = rotl(s1, 36) = s1 << 36 | s1 >> 28, using t0 as temp
|
2642
|
|
- (inst sllx s1 36)
|
2643
|
|
- (inst srlx t0 s1 28)
|
2644
|
|
- (inst or s1 t0)
|
2645
|
|
-
|
2646
|
|
- (inst stx s1 state (+ (* 1 double-float-bytes)
|
2647
|
|
- (- (* vm:vector-data-offset vm:word-bytes)
|
2648
|
|
- vm:other-pointer-type)))))
|
|
2614
|
+ (let ((s0-offset (+ (* 0 double-float-bytes)
|
|
2615
|
+ (- (* vm:vector-data-offset vm:word-bytes)
|
|
2616
|
+ vm:other-pointer-type)))
|
|
2617
|
+ (s1-offset (+ (* 1 double-float-bytes)
|
|
2618
|
+ (- (* vm:vector-data-offset vm:word-bytes)
|
|
2619
|
+ vm:other-pointer-type))))
|
|
2620
|
+ (inst ldx s0 state s0-offset)
|
|
2621
|
+ (inst ldx s1 state s1-offset)
|
|
2622
|
+ ;; result = s0 + s1, split into low 32-bits in r0 and high 32-bits
|
|
2623
|
+ ;; in r1
|
|
2624
|
+ (inst add r0 s0 s1)
|
|
2625
|
+ (inst srlx r1 r0 32)
|
|
2626
|
+
|
|
2627
|
+ ;; s1 = s1 ^ s0
|
|
2628
|
+ (inst xor s1 s0)
|
|
2629
|
+
|
|
2630
|
+ ;; s0 = rotl(s0,55) = s0 << 55 | s0 >> 9
|
|
2631
|
+ (inst sllx t0 s0 55)
|
|
2632
|
+ (inst srlx s0 s0 9)
|
|
2633
|
+ (inst or s0 t0)
|
|
2634
|
+
|
|
2635
|
+ (inst xor s0 s1) ; s0 = s0 ^ s1
|
|
2636
|
+ (inst sllx t0 s1 14) ; t0 = s1 << 14
|
|
2637
|
+ (inst xor s0 t0) ; s0 = s0 ^ t0
|
|
2638
|
+
|
|
2639
|
+ (inst stx s0 state s0-offset)
|
|
2640
|
+
|
|
2641
|
+ ;; s1 = rotl(s1, 36) = s1 << 36 | s1 >> 28, using t0 as temp
|
|
2642
|
+ (inst sllx t0 s1 36)
|
|
2643
|
+ (inst srlx s1 28)
|
|
2644
|
+ (inst or s1 t0)
|
|
2645
|
+
|
|
2646
|
+ (inst stx s1 state s1-offset))))
|
2649
|
2647
|
)
|