|
|
1
|
+;;; -*- Package: ARM64 -*-
|
|
|
2
|
+;;;
|
|
|
3
|
+;;; **********************************************************************
|
|
|
4
|
+;;; This code was written as part of the CMU Common Lisp project at
|
|
|
5
|
+;;; Carnegie Mellon University, and has been placed in the public domain.
|
|
|
6
|
+;;;
|
|
|
7
|
+(ext:file-comment
|
|
|
8
|
+ "$Header: src/compiler/arm64/system.lisp $")
|
|
|
9
|
+;;;
|
|
|
10
|
+;;; **********************************************************************
|
|
|
11
|
+;;;
|
|
|
12
|
+;;; ARM64 VM definitions of various system hacking operations.
|
|
|
13
|
+;;;
|
|
|
14
|
+;;; Written by Rob MacLachlan
|
|
|
15
|
+;;;
|
|
|
16
|
+;;; SPARC conversion by William Lott and Christopher Hoover.
|
|
|
17
|
+;;; ARM64 conversion derived from the SPARC port.
|
|
|
18
|
+;;;
|
|
|
19
|
+(in-package "ARM64")
|
|
|
20
|
+(intl:textdomain "cmucl-arm64-vm")
|
|
|
21
|
+
|
|
|
22
|
+
|
|
|
23
|
+;;;; Type frobbing VOPs
|
|
|
24
|
+
|
|
|
25
|
+(define-vop (get-lowtag)
|
|
|
26
|
+ (:translate get-lowtag)
|
|
|
27
|
+ (:policy :fast-safe)
|
|
|
28
|
+ (:args (object :scs (any-reg descriptor-reg)))
|
|
|
29
|
+ (:results (result :scs (unsigned-reg)))
|
|
|
30
|
+ (:result-types positive-fixnum)
|
|
|
31
|
+ (:generator 1
|
|
|
32
|
+ (emit-not-implemented)
|
|
|
33
|
+ (inst and result object vm:lowtag-mask)))
|
|
|
34
|
+
|
|
|
35
|
+(define-vop (get-type)
|
|
|
36
|
+ (:translate get-type)
|
|
|
37
|
+ (:policy :fast-safe)
|
|
|
38
|
+ (:args (object :scs (descriptor-reg) :to (:eval 1)))
|
|
|
39
|
+ (:results (result :scs (unsigned-reg) :from (:eval 0)))
|
|
|
40
|
+ (:result-types positive-fixnum)
|
|
|
41
|
+ (:generator 6
|
|
|
42
|
+ (emit-not-implemented)
|
|
|
43
|
+ ;; Grab the lowtag.
|
|
|
44
|
+ (inst and result object lowtag-mask)
|
|
|
45
|
+ ;; Check for various pointer types.
|
|
|
46
|
+ (inst cmp result list-pointer-type)
|
|
|
47
|
+ (inst b :eq done)
|
|
|
48
|
+ (inst cmp result other-pointer-type)
|
|
|
49
|
+ (inst b :eq other-pointer)
|
|
|
50
|
+ (inst cmp result function-pointer-type)
|
|
|
51
|
+ (inst b :eq function-pointer)
|
|
|
52
|
+ (inst cmp result instance-pointer-type)
|
|
|
53
|
+ (inst b :eq done)
|
|
|
54
|
+ ;; Okay, it is an immediate. If fixnum, we want zero. Otherwise,
|
|
|
55
|
+ ;; we want the low 8 bits.
|
|
|
56
|
+ ;;
|
|
|
57
|
+ ;; AArch64: TST sets flags without writing a result (ANDS Xd=XZR).
|
|
|
58
|
+ ;; No delay slot exists on ARM64, so the fixnum zero materialisation
|
|
|
59
|
+ ;; must be done explicitly before the branch.
|
|
|
60
|
+ (inst tst object vm:fixnum-tag-mask)
|
|
|
61
|
+ (inst movz result 0) ; pre-load zero; harmless if not taken
|
|
|
62
|
+ (inst b :eq done)
|
|
|
63
|
+ ;; Not a fixnum: fetch the low 8 type bits.
|
|
|
64
|
+ (inst and result object type-mask)
|
|
|
65
|
+ (inst b done)
|
|
|
66
|
+
|
|
|
67
|
+ FUNCTION-POINTER
|
|
|
68
|
+ (load-type result object (- function-pointer-type))
|
|
|
69
|
+ (inst b done)
|
|
|
70
|
+
|
|
|
71
|
+ OTHER-POINTER
|
|
|
72
|
+ (load-type result object (- other-pointer-type))
|
|
|
73
|
+
|
|
|
74
|
+ DONE))
|
|
|
75
|
+
|
|
|
76
|
+
|
|
|
77
|
+(define-vop (function-subtype)
|
|
|
78
|
+ (:translate function-subtype)
|
|
|
79
|
+ (:policy :fast-safe)
|
|
|
80
|
+ (:args (function :scs (descriptor-reg)))
|
|
|
81
|
+ (:results (result :scs (unsigned-reg)))
|
|
|
82
|
+ (:result-types positive-fixnum)
|
|
|
83
|
+ (:generator 6
|
|
|
84
|
+ (emit-not-implemented)
|
|
|
85
|
+ (load-type result function (- vm:function-pointer-type))))
|
|
|
86
|
+
|
|
|
87
|
+(define-vop (set-function-subtype)
|
|
|
88
|
+ (:translate (setf function-subtype))
|
|
|
89
|
+ (:policy :fast-safe)
|
|
|
90
|
+ (:args (type :scs (unsigned-reg) :target result)
|
|
|
91
|
+ (function :scs (descriptor-reg)))
|
|
|
92
|
+ (:arg-types positive-fixnum *)
|
|
|
93
|
+ (:results (result :scs (unsigned-reg)))
|
|
|
94
|
+ (:result-types positive-fixnum)
|
|
|
95
|
+ (:generator 6
|
|
|
96
|
+ (emit-not-implemented)
|
|
|
97
|
+ ;; AArch64 is little-endian: the type byte is at byte offset 0 of
|
|
|
98
|
+ ;; the header word (i.e. the lowest-address byte), adjusted for the
|
|
|
99
|
+ ;; function-pointer tag. We use STURB (unscaled store byte).
|
|
|
100
|
+ (inst sturb type function (- vm:function-pointer-type))
|
|
|
101
|
+ (move result type)))
|
|
|
102
|
+
|
|
|
103
|
+(define-vop (get-header-data)
|
|
|
104
|
+ (:translate get-header-data)
|
|
|
105
|
+ (:policy :fast-safe)
|
|
|
106
|
+ (:args (x :scs (descriptor-reg)))
|
|
|
107
|
+ (:results (res :scs (unsigned-reg)))
|
|
|
108
|
+ (:result-types positive-fixnum)
|
|
|
109
|
+ (:generator 6
|
|
|
110
|
+ (emit-not-implemented)
|
|
|
111
|
+ (loadw res x 0 vm:other-pointer-type)
|
|
|
112
|
+ (inst lsr res res vm:type-bits)))
|
|
|
113
|
+
|
|
|
114
|
+(define-vop (get-closure-length)
|
|
|
115
|
+ (:translate get-closure-length)
|
|
|
116
|
+ (:policy :fast-safe)
|
|
|
117
|
+ (:args (x :scs (descriptor-reg)))
|
|
|
118
|
+ (:results (res :scs (unsigned-reg)))
|
|
|
119
|
+ (:result-types positive-fixnum)
|
|
|
120
|
+ (:generator 6
|
|
|
121
|
+ (emit-not-implemented)
|
|
|
122
|
+ (loadw res x 0 vm:function-pointer-type)
|
|
|
123
|
+ (inst lsr res res vm:type-bits)))
|
|
|
124
|
+
|
|
|
125
|
+(define-vop (set-header-data)
|
|
|
126
|
+ (:translate set-header-data)
|
|
|
127
|
+ (:policy :fast-safe)
|
|
|
128
|
+ (:args (x :scs (descriptor-reg) :target res)
|
|
|
129
|
+ (data :scs (any-reg immediate zero)))
|
|
|
130
|
+ (:arg-types * positive-fixnum)
|
|
|
131
|
+ (:results (res :scs (descriptor-reg)))
|
|
|
132
|
+ (:temporary (:scs (non-descriptor-reg)) t1 t2)
|
|
|
133
|
+ (:generator 6
|
|
|
134
|
+ (emit-not-implemented)
|
|
|
135
|
+ (loadw t1 x 0 vm:other-pointer-type)
|
|
|
136
|
+ (inst and t1 t1 vm:type-mask)
|
|
|
137
|
+ ;; Load DATA into t2 (untagging fixnums from any-reg/immediate, or
|
|
|
138
|
+ ;; zero from the zero SC), then shift into the header data field and
|
|
|
139
|
+ ;; add. Using ADD rather than ORR avoids the bitmask-immediate
|
|
|
140
|
+ ;; encoding restriction, and is safe here because t1 holds only the
|
|
|
141
|
+ ;; low type-bits with the data field already cleared.
|
|
|
142
|
+ (sc-case data
|
|
|
143
|
+ (any-reg
|
|
|
144
|
+ ;; DATA is a fixnum-tagged integer; remove the fixnum tag first.
|
|
|
145
|
+ (inst asr t2 data vm:fixnum-tag-bits))
|
|
|
146
|
+ (immediate
|
|
|
147
|
+ (inst li t2 (tn-value data)))
|
|
|
148
|
+ (zero
|
|
|
149
|
+ (inst li t2 0)))
|
|
|
150
|
+ (inst add t1 t1 (shift t2 :lsl vm:type-bits))
|
|
|
151
|
+ (storew t1 x 0 vm:other-pointer-type)
|
|
|
152
|
+ (move res x)))
|
|
|
153
|
+
|
|
|
154
|
+
|
|
|
155
|
+(define-vop (make-fixnum)
|
|
|
156
|
+ (:args (ptr :scs (any-reg descriptor-reg)))
|
|
|
157
|
+ (:results (res :scs (any-reg descriptor-reg)))
|
|
|
158
|
+ (:generator 1
|
|
|
159
|
+ (emit-not-implemented)
|
|
|
160
|
+ ;;
|
|
|
161
|
+ ;; Some code (the hash table code) depends on this returning a
|
|
|
162
|
+ ;; positive number so make sure it does.
|
|
|
163
|
+ ;;
|
|
|
164
|
+ ;; LSL by lowtag-bits then LSR by 1 gives a net left shift of
|
|
|
165
|
+ ;; (lowtag-bits - 1), converting a tagged pointer into a positive
|
|
|
166
|
+ ;; fixnum value. AND with a mask cannot replicate this because the
|
|
|
167
|
+ ;; two low fixnum-tag bits must also be cleared -- BIC/AND would
|
|
|
168
|
+ ;; only clear the lowtag bits, leaving bits that should be zero.
|
|
|
169
|
+ (inst lsl res ptr vm:lowtag-bits)
|
|
|
170
|
+ (inst lsr res res 1)))
|
|
|
171
|
+
|
|
|
172
|
+(define-vop (make-other-immediate-type)
|
|
|
173
|
+ (:args (val :scs (any-reg descriptor-reg))
|
|
|
174
|
+ (type :scs (any-reg descriptor-reg immediate)
|
|
|
175
|
+ :target temp))
|
|
|
176
|
+ (:results (res :scs (any-reg descriptor-reg)))
|
|
|
177
|
+ (:temporary (:scs (non-descriptor-reg)) temp)
|
|
|
178
|
+ (:generator 2
|
|
|
179
|
+ (emit-not-implemented)
|
|
|
180
|
+ (sc-case type
|
|
|
181
|
+ (immediate
|
|
|
182
|
+ (inst lsl temp val vm:type-bits)
|
|
|
183
|
+ (inst orr res temp (tn-value type)))
|
|
|
184
|
+ (t
|
|
|
185
|
+ ;; TYPE is a fixnum-tagged integer; un-tag it with ASR, then
|
|
|
186
|
+ ;; shift VAL up and OR the pieces together.
|
|
|
187
|
+ (inst asr temp type vm:fixnum-tag-bits)
|
|
|
188
|
+ (inst lsl res val (- vm:type-bits vm:fixnum-tag-bits))
|
|
|
189
|
+ (inst orr res res temp)))))
|
|
|
190
|
+
|
|
|
191
|
+
|
|
|
192
|
+;;;; Allocation
|
|
|
193
|
+
|
|
|
194
|
+(define-vop (dynamic-space-free-pointer)
|
|
|
195
|
+ (:results (int :scs (sap-reg)))
|
|
|
196
|
+ (:result-types system-area-pointer)
|
|
|
197
|
+ (:translate dynamic-space-free-pointer)
|
|
|
198
|
+ (:policy :fast-safe)
|
|
|
199
|
+ (:generator 1
|
|
|
200
|
+ (emit-not-implemented)
|
|
|
201
|
+ (move int alloc-tn)))
|
|
|
202
|
+
|
|
|
203
|
+(define-vop (binding-stack-pointer-sap)
|
|
|
204
|
+ (:results (int :scs (sap-reg)))
|
|
|
205
|
+ (:result-types system-area-pointer)
|
|
|
206
|
+ (:translate binding-stack-pointer-sap)
|
|
|
207
|
+ (:policy :fast-safe)
|
|
|
208
|
+ (:generator 1
|
|
|
209
|
+ (emit-not-implemented)
|
|
|
210
|
+ (move int bsp-tn)))
|
|
|
211
|
+
|
|
|
212
|
+(define-vop (control-stack-pointer-sap)
|
|
|
213
|
+ (:results (int :scs (sap-reg)))
|
|
|
214
|
+ (:result-types system-area-pointer)
|
|
|
215
|
+ (:translate control-stack-pointer-sap)
|
|
|
216
|
+ (:policy :fast-safe)
|
|
|
217
|
+ (:generator 1
|
|
|
218
|
+ (emit-not-implemented)
|
|
|
219
|
+ (move int csp-tn)))
|
|
|
220
|
+
|
|
|
221
|
+
|
|
|
222
|
+;;;; Code object frobbing.
|
|
|
223
|
+
|
|
|
224
|
+(define-vop (code-instructions)
|
|
|
225
|
+ (:translate code-instructions)
|
|
|
226
|
+ (:policy :fast-safe)
|
|
|
227
|
+ (:args (code :scs (descriptor-reg)))
|
|
|
228
|
+ (:temporary (:scs (non-descriptor-reg)) ndescr)
|
|
|
229
|
+ (:results (sap :scs (sap-reg)))
|
|
|
230
|
+ (:result-types system-area-pointer)
|
|
|
231
|
+ (:generator 10
|
|
|
232
|
+ (emit-not-implemented)
|
|
|
233
|
+ ;; Read the header word, extract the word count (top bits above
|
|
|
234
|
+ ;; type-bits), scale to bytes, then subtract the other-pointer tag
|
|
|
235
|
+ ;; to get the byte displacement from CODE to the first instruction.
|
|
|
236
|
+ (loadw ndescr code 0 vm:other-pointer-type)
|
|
|
237
|
+ ;; Extract the word count and scale to bytes in one shift:
|
|
|
238
|
+ ;; LSR by (type-bits - word-shift) = LSR by 5.
|
|
|
239
|
+ (inst lsr ndescr ndescr (- vm:type-bits vm:word-shift))
|
|
|
240
|
+ (inst sub ndescr ndescr vm:other-pointer-type)
|
|
|
241
|
+ (inst add sap code ndescr)))
|
|
|
242
|
+
|
|
|
243
|
+(define-vop (compute-function)
|
|
|
244
|
+ (:args (code :scs (descriptor-reg))
|
|
|
245
|
+ (offset :scs (signed-reg unsigned-reg)))
|
|
|
246
|
+ (:arg-types * positive-fixnum)
|
|
|
247
|
+ (:results (func :scs (descriptor-reg)))
|
|
|
248
|
+ (:temporary (:scs (non-descriptor-reg)) ndescr)
|
|
|
249
|
+ (:generator 10
|
|
|
250
|
+ (emit-not-implemented)
|
|
|
251
|
+ ;; Compute the byte offset from CODE to the start of the code vector,
|
|
|
252
|
+ ;; add the caller-supplied byte OFFSET, adjust for the tag difference
|
|
|
253
|
+ ;; between other-pointer and function-pointer, then add to CODE.
|
|
|
254
|
+ (loadw ndescr code 0 vm:other-pointer-type)
|
|
|
255
|
+ ;; Extract the word count and scale to bytes in one shift:
|
|
|
256
|
+ ;; LSR by (type-bits - word-shift) = LSR by 5.
|
|
|
257
|
+ (inst lsr ndescr ndescr (- vm:type-bits vm:word-shift))
|
|
|
258
|
+ (inst add ndescr ndescr offset)
|
|
|
259
|
+ (inst sub ndescr ndescr (- vm:other-pointer-type vm:function-pointer-type))
|
|
|
260
|
+ (inst add func code ndescr)))
|
|
|
261
|
+
|
|
|
262
|
+
|
|
|
263
|
+;;;; Other random VOPs.
|
|
|
264
|
+
|
|
|
265
|
+(defknown unix::do-pending-interrupt () (values))
|
|
|
266
|
+(define-vop (unix::do-pending-interrupt)
|
|
|
267
|
+ (:policy :fast-safe)
|
|
|
268
|
+ (:translate unix::do-pending-interrupt)
|
|
|
269
|
+ (:generator 1
|
|
|
270
|
+ (emit-not-implemented)
|
|
|
271
|
+ ;; AArch64 uses UDF (permanently-undefined instruction) as the trap
|
|
|
272
|
+ ;; mechanism; the signal handler decodes the immediate from the
|
|
|
273
|
+ ;; instruction word. This replaces SPARC's UNIMP instruction.
|
|
|
274
|
+ (inst udf pending-interrupt-trap)))
|
|
|
275
|
+
|
|
|
276
|
+
|
|
|
277
|
+(define-vop (halt)
|
|
|
278
|
+ (:generator 1
|
|
|
279
|
+ (emit-not-implemented)
|
|
|
280
|
+ (inst udf halt-trap)))
|
|
|
281
|
+
|
|
|
282
|
+
|
|
|
283
|
+;;;; Dynamic vop count collection support
|
|
|
284
|
+
|
|
|
285
|
+(define-vop (count-me)
|
|
|
286
|
+ (:args (count-vector :scs (descriptor-reg)))
|
|
|
287
|
+ (:info index)
|
|
|
288
|
+ (:temporary (:scs (non-descriptor-reg)) count)
|
|
|
289
|
+ (:generator 1
|
|
|
290
|
+ (emit-not-implemented)
|
|
|
291
|
+ ;; Compute the byte offset of element INDEX in the vector's data
|
|
|
292
|
+ ;; area, accounting for the other-pointer tag.
|
|
|
293
|
+ (let ((offset
|
|
|
294
|
+ (- (* (+ index vector-data-offset) word-bytes) other-pointer-type)))
|
|
|
295
|
+ ;; On AArch64 the unscaled (LDUR/STUR) immediate is a signed 9-bit
|
|
|
296
|
+ ;; value (-256..255). For larger offsets the loadw/storew macros
|
|
|
297
|
+ ;; will use a register-offset form when given a TEMP argument; here
|
|
|
298
|
+ ;; we simply assert the offset fits, matching the SPARC port's
|
|
|
299
|
+ ;; (signed-byte 13) check for its own immediate range.
|
|
|
300
|
+ (assert (typep offset '(signed-byte 9)))
|
|
|
301
|
+ (inst ldur count count-vector offset)
|
|
|
302
|
+ (inst add count count 1)
|
|
|
303
|
+ (inst stur count count-vector offset))))
|
|
|
304
|
+
|
|
|
305
|
+
|
|
|
306
|
+;;;; Cycle counter support.
|
|
|
307
|
+;;;
|
|
|
308
|
+;;; AArch64 exposes a 64-bit virtual counter via the CNTVCT_EL0 system
|
|
|
309
|
+;;; register (accessible from EL0 when CNTKCTL_EL1.EL0VCTEN = 1, which
|
|
|
310
|
+;;; is normally set by the OS). This replaces the SPARC RDTICK
|
|
|
311
|
+;;; instruction and provides a similar monotonically-increasing cycle /
|
|
|
312
|
+;;; time-base counter.
|
|
|
313
|
+;;;
|
|
|
314
|
+;;; The counter is a single 64-bit value; we split it into two 32-bit
|
|
|
315
|
+;;; halves (low, high) to match the SPARC port's two-value interface and
|
|
|
316
|
+;;; keep WITH-CYCLE-COUNTER source-compatible.
|
|
|
317
|
+
|
|
|
318
|
+(defknown read-cycle-counter ()
|
|
|
319
|
+ (values (unsigned-byte 32) (unsigned-byte 32)))
|
|
|
320
|
+
|
|
|
321
|
+(define-vop (read-cycle-counter)
|
|
|
322
|
+ (:translate read-cycle-counter)
|
|
|
323
|
+ (:args)
|
|
|
324
|
+ (:policy :fast-safe)
|
|
|
325
|
+ (:results (lo :scs (unsigned-reg))
|
|
|
326
|
+ (hi :scs (unsigned-reg)))
|
|
|
327
|
+ (:result-types unsigned-num unsigned-num)
|
|
|
328
|
+ (:temporary (:sc unsigned-reg) tick)
|
|
|
329
|
+ (:generator 3
|
|
|
330
|
+ (emit-not-implemented)
|
|
|
331
|
+ ;; Read the virtual count register into a 64-bit temp.
|
|
|
332
|
+ (inst mrs tick :cntvct-el0)
|
|
|
333
|
+ ;; High 32 bits.
|
|
|
334
|
+ (inst lsr hi tick 32)
|
|
|
335
|
+ ;; Low 32 bits: zero-extend by masking the upper half.
|
|
|
336
|
+ (inst and lo tick #xffffffff)))
|
|
|
337
|
+
|
|
|
338
|
+(defun read-cycle-counter ()
|
|
|
339
|
+ "Read the virtual instruction cycle counter available on AArch64.
|
|
|
340
|
+The 64-bit counter is returned as two 32-bit unsigned integers.
|
|
|
341
|
+The low 32-bit result is the first value."
|
|
|
342
|
+ (read-cycle-counter))
|
|
|
343
|
+
|
|
|
344
|
+(defmacro with-cycle-counter (&body body)
|
|
|
345
|
+ "Returns the primary value of BODY as the primary value, and the
|
|
|
346
|
+ number of CPU cycles elapsed as secondary value."
|
|
|
347
|
+ (let ((hi0 (gensym))
|
|
|
348
|
+ (hi1 (gensym))
|
|
|
349
|
+ (lo0 (gensym))
|
|
|
350
|
+ (lo1 (gensym)))
|
|
|
351
|
+ `(multiple-value-bind (,lo0 ,hi0)
|
|
|
352
|
+ (read-cycle-counter)
|
|
|
353
|
+ (values (locally ,@body)
|
|
|
354
|
+ (multiple-value-bind (,lo1 ,hi1)
|
|
|
355
|
+ (read-cycle-counter)
|
|
|
356
|
+ ;; Can't do anything about the notes about generic
|
|
|
357
|
+ ;; arithmetic, so silence the notes.
|
|
|
358
|
+ (declare (optimize (inhibit-warnings 3)))
|
|
|
359
|
+ (+ (ash (- ,hi1 ,hi0) 32)
|
|
|
360
|
+ (- ,lo1 ,lo0))))))) |