... |
... |
@@ -404,22 +404,39 @@ FUNCDEF(alloc_overflow_sse2) |
404
|
404
|
ENDFUNC(alloc_overflow_sse2)
|
405
|
405
|
#else
|
406
|
406
|
FUNCDEF(alloc_overflow_sse2)
|
407
|
|
- # Need 512 bytes for the fpu save area, space to save ecx and edx,
|
408
|
|
- # space for mxcsr, a temp, and one arg to pass to alloc. That's
|
409
|
|
- # 512 + 20. But the save area needs to be 16-byte aligned, so
|
410
|
|
- # allocate 512 + 32 bytes. The fpu area will be at offset 32.
|
|
407
|
+ # Need 8*16 bytes for the xmm registers, and space to save ecx
|
|
408
|
+ # and edx, space for mxcsr, a temp, and one arg to pass to alloc.
|
|
409
|
+ # That's 8*16 + 5*4 = 148 bytes. Might as well have a few
|
|
410
|
+ # more so the xmm0 area is 16-byte aligned. That makes it 160
|
|
411
|
+ # bytes.
|
411
|
412
|
#
|
412
|
413
|
# Stack looks like:
|
413
|
414
|
#
|
414
|
|
- # +544 -> end
|
415
|
|
- # +32 -> fpu save
|
416
|
|
- # +20 -> unused
|
|
415
|
+ # +160
|
|
416
|
+ # +144 -> xmm7
|
|
417
|
+ # +128 -> xmm6
|
|
418
|
+ # +112 -> xmm5
|
|
419
|
+ # +96 -> xmm4
|
|
420
|
+ # +80 -> xmm3
|
|
421
|
+ # +64 -> xmm2
|
|
422
|
+ # +48 -> xmm1
|
|
423
|
+ # +32 -> xmm0
|
|
424
|
+ # +20 -> unused
|
417
|
425
|
# +16 -> temp
|
418
|
426
|
# +12 -> mxcsr
|
419
|
427
|
# + 8 -> save ecx
|
420
|
428
|
# + 4 -> save edx
|
421
|
429
|
# esp + 0 -> arg for alloc
|
422
|
|
- STACK_PROLOGUE(32+512)
|
|
430
|
+ STACK_PROLOGUE(160)
|
|
431
|
+ movapd %xmm0, (32 + 0*16)(%esp)
|
|
432
|
+ movapd %xmm1, (32 + 1*16)(%esp)
|
|
433
|
+ movapd %xmm2, (32 + 2*16)(%esp)
|
|
434
|
+ movapd %xmm3, (32 + 3*16)(%esp)
|
|
435
|
+ movapd %xmm4, (32 + 4*16)(%esp)
|
|
436
|
+ movapd %xmm5, (32 + 5*16)(%esp)
|
|
437
|
+ movapd %xmm6, (32 + 6*16)(%esp)
|
|
438
|
+ movapd %xmm7, (32 + 7*16)(%esp)
|
|
439
|
+
|
423
|
440
|
movl %ecx, 8(%esp) # Save ecx and edx registers
|
424
|
441
|
movl %edx, 4(%esp)
|
425
|
442
|
stmxcsr 12(%esp) # Save MXCSR
|
... |
... |
@@ -430,16 +447,21 @@ FUNCDEF(alloc_overflow_sse2) |
430
|
447
|
ldmxcsr 16(%esp) # Get new mxcsr value
|
431
|
448
|
movl %eax, (%esp) # Put size on stack for first arg to alloc()
|
432
|
449
|
|
433
|
|
- # Save all FPU regs because we don't know what's in use by lisp.
|
434
|
|
- fxsave 32(%esp)
|
435
|
|
-
|
436
|
450
|
call GNAME(alloc)
|
437
|
451
|
|
438
|
|
- fxrstor 32(%esp)
|
439
|
|
-
|
440
|
452
|
movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value.
|
441
|
453
|
movl 8(%esp), %ecx
|
442
|
454
|
ldmxcsr 12(%esp)
|
|
455
|
+
|
|
456
|
+ movapd (32 + 0*16)(%esp), %xmm0
|
|
457
|
+ movapd (32 + 1*16)(%esp), %xmm1
|
|
458
|
+ movapd (32 + 2*16)(%esp), %xmm2
|
|
459
|
+ movapd (32 + 3*16)(%esp), %xmm3
|
|
460
|
+ movapd (32 + 4*16)(%esp), %xmm4
|
|
461
|
+ movapd (32 + 5*16)(%esp), %xmm5
|
|
462
|
+ movapd (32 + 6*16)(%esp), %xmm6
|
|
463
|
+ movapd (32 + 7*16)(%esp), %xmm7
|
|
464
|
+
|
443
|
465
|
STACK_EPILOGUE
|
444
|
466
|
ret
|
445
|
467
|
ENDFUNC(alloc_overflow_sse2)
|