Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits: 17144e16 by Raymond Toy at 2020-08-28T16:23:59-07:00 Save just the xmm registers
Instead of saving the entire FPU state, we really only need to save the xmm registers.
- - - - -
1 changed file:
- src/lisp/x86-assem.S
Changes:
===================================== src/lisp/x86-assem.S ===================================== @@ -404,22 +404,39 @@ FUNCDEF(alloc_overflow_sse2) ENDFUNC(alloc_overflow_sse2) #else FUNCDEF(alloc_overflow_sse2) - # Need 512 bytes for the fpu save area, space to save ecx and edx, - # space for mxcsr, a temp, and one arg to pass to alloc. That's - # 512 + 20. But the save area needs to be 16-byte aligned, so - # allocate 512 + 32 bytes. The fpu area will be at offset 32. + # Need 8*16 bytes for the xmm registers, and space to save ecx + # and edx, space for mxcsr, a temp, and one arg to pass to alloc. + # That's 8*16 + 5*4 = 148 bytes. Might as well have a few + # more so the xmm0 area is 16-byte aligned. That makes it 160 + # bytes. # # Stack looks like: # - # +544 -> end - # +32 -> fpu save - # +20 -> unused + # +160 + # +144 -> xmm7 + # +128 -> xmm6 + # +112 -> xmm5 + # +96 -> xmm4 + # +80 -> xmm3 + # +64 -> xmm2 + # +48 -> xmm1 + # +32 -> xmm0 + # +20 -> unused # +16 -> temp # +12 -> mxcsr # + 8 -> save ecx # + 4 -> save edx # esp + 0 -> arg for alloc - STACK_PROLOGUE(32+512) + STACK_PROLOGUE(160) + movapd %xmm0, (32 + 0*16)(%esp) + movapd %xmm1, (32 + 1*16)(%esp) + movapd %xmm2, (32 + 2*16)(%esp) + movapd %xmm3, (32 + 3*16)(%esp) + movapd %xmm4, (32 + 4*16)(%esp) + movapd %xmm5, (32 + 5*16)(%esp) + movapd %xmm6, (32 + 6*16)(%esp) + movapd %xmm7, (32 + 7*16)(%esp) + movl %ecx, 8(%esp) # Save ecx and edx registers movl %edx, 4(%esp) stmxcsr 12(%esp) # Save MXCSR @@ -430,16 +447,21 @@ FUNCDEF(alloc_overflow_sse2) ldmxcsr 16(%esp) # Get new mxcsr value movl %eax, (%esp) # Put size on stack for first arg to alloc()
- # Save all FPU regs because we don't know what's in use by lisp. - fxsave 32(%esp) - call GNAME(alloc)
- fxrstor 32(%esp) - movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value. movl 8(%esp), %ecx ldmxcsr 12(%esp) + + movapd (32 + 0*16)(%esp), %xmm0 + movapd (32 + 1*16)(%esp), %xmm1 + movapd (32 + 2*16)(%esp), %xmm2 + movapd (32 + 3*16)(%esp), %xmm3 + movapd (32 + 4*16)(%esp), %xmm4 + movapd (32 + 5*16)(%esp), %xmm5 + movapd (32 + 6*16)(%esp), %xmm6 + movapd (32 + 7*16)(%esp), %xmm7 + STACK_EPILOGUE ret ENDFUNC(alloc_overflow_sse2)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/17144e16d4f7578644fac575...