Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits: 8b08b800 by Raymond Toy at 2020-08-27T20:39:07-07:00 Save FPU state in alloc_overflow_sse2
It's best to save the FPU state here instead of in alloc() because we can't know what the compiler might do. Remove the fpu save stuff from alloc().
gcc 9.3.1 builds lisp successfully.
- - - - -
2 changed files:
- src/lisp/gencgc.c - src/lisp/x86-assem.S
Changes:
===================================== src/lisp/gencgc.c ===================================== @@ -8412,28 +8412,11 @@ gencgc_pickup_dynamic(void)
void do_pending_interrupt(void);
-//#pragma GCC optimize ("-O1") char * alloc(int nbytes) { -#if (defined(i386) || defined(__x86_64)) - /* - * Need to save and restore the FPU registers on x86, but only for - * sse2. See Trac ticket #61 - * (https://trac.common-lisp.net/cmucl/ticket/61) and gitlab - * ticket #86 - * (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/86). - * - * Not needed by sparc or ppc because we never call alloc from - * Lisp directly to do allocation. - */ - FPU_STATE(fpu_state); - - if (fpu_mode == SSE2) { - save_fpu_state(fpu_state); - } -#endif void *new_obj; + #if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__))) /* * *current-region-free-pointer* is the same as alloc-tn (= @@ -8483,14 +8466,8 @@ alloc(int nbytes) } }
-#if (defined(i386) || defined(__x86_64)) - if (fpu_mode == SSE2) { - restore_fpu_state(fpu_state); - } -#endif return new_obj; } -#pragma GCC optimize ("-O2")
char * alloc_pseudo_atomic(int nbytes)
===================================== src/lisp/x86-assem.S ===================================== @@ -381,6 +381,7 @@ ENDFUNC(fastcopy16) * On exit: * %eax = address */ +#if 0 FUNCDEF(alloc_overflow_sse2) STACK_PROLOGUE(20) movl %ecx, 8(%esp) # Save ecx and edx registers @@ -401,7 +402,49 @@ FUNCDEF(alloc_overflow_sse2) STACK_EPILOGUE ret ENDFUNC(alloc_overflow_sse2) - +#else +FUNCDEF(alloc_overflow_sse2) + # Need 512 bytes for the fpu save area, space to save ecx and edx, + # space for mxcsr, a temp, and one arg to pass to alloc. That's + # 512 + 20. But the save area needs to be 16-byte aligned, so + # allocate 512 + 32 bytes. The fpu area will be at offset 32. + # + # Stack looks like: + # + # +544 -> end + # +32 -> fpu save + # +20 -> unused + # +16 -> temp + # +12 -> mxcsr + # + 8 -> save ecx + # + 4 -> save edx + # esp + 0 -> arg for alloc + STACK_PROLOGUE(32+512) + movl %ecx, 8(%esp) # Save ecx and edx registers + movl %edx, 4(%esp) + stmxcsr 12(%esp) # Save MXCSR + /* Clear the exceptions that might have occurred */ + movl 12(%esp), %edx + and $-64, %edx # Clear the exceptions + movl %edx, 16(%esp) + ldmxcsr 16(%esp) # Get new mxcsr value + movl %eax, (%esp) # Put size on stack for first arg to alloc() + + # Save all FPU regs because we don't know what's in use by lisp. + fxsave 32(%esp) + + call GNAME(alloc) + + fxrstor 32(%esp) + + movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value. + movl 8(%esp), %ecx + ldmxcsr 12(%esp) + STACK_EPILOGUE + ret +ENDFUNC(alloc_overflow_sse2) +#endif + #ifdef LINKAGE_TABLE
/* Call into C code to resolve a linkage entry. The initial code in the
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/8b08b800dc1c26d498fbc405...