Raymond Toy pushed to branch master at cmucl / cmucl
Commits: 449f8ec1 by Raymond Toy at 2020-08-26T17:09:13-07:00 Fix #85: Let each x86 configs set optimization level
Add `COPT` variable in `Config.x86_common` to set the optimization level (defaulting to `-O2`). Then each `Config.x86` file can set `COPT` as desired if the default doesn't work.
Thus, `Config.x86_linux` sets `COPT` to `-O1`, but others can use the default value. See issue #68.
- - - - - 38372fd9 by Raymond Toy at 2020-08-26T17:14:30-07:00 Fix typo
- - - - - d51dabf0 by Raymond Toy at 2020-08-26T23:21:23-07:00 Fix #86: Make cmucl work with gcc 8.1.1 and later
In alloc(), save the fpu state on entry to the function and restore it just before returning.
While we're at it, use the __attribute__ option to get a 16-byte aligned area where we can save the fpu state.
And also set optimization to -O2 for linux.
- - - - - 4b80a6e5 by Raymond Toy at 2020-08-26T23:26:12-07:00 Merge branch 'master' into issue-86-save-fpu-state-on-entry-to-alloc
- - - - - a95db7ba by Raymond Toy at 2020-08-26T23:30:54-07:00 Update comments
- - - - - ad3862c9 by Raymond Toy at 2020-08-26T23:34:05-07:00 Clean up code
- - - - - 01f8217b by Raymond Toy at 2020-08-26T23:41:36-07:00 Add -R flag to recompile lisp
- - - - - 8b08b800 by Raymond Toy at 2020-08-27T20:39:07-07:00 Save FPU state in alloc_overflow_sse2
It's best to save the FPU state here instead of in alloc() because we can't know what the compiler might do. Remove the fpu save stuff from alloc().
gcc 9.3.1 builds lisp successfully.
- - - - - e3aa51f3 by Raymond Toy at 2020-08-27T20:58:52-07:00 Remove stray #pragma
Forgot to remove this; it's not needed anymore.
- - - - - 17144e16 by Raymond Toy at 2020-08-28T16:23:59-07:00 Save just the xmm registers
Instead of saving the entire FPU state, we really only need to save the xmm registers.
- - - - - f923302e by Raymond Toy at 2020-08-28T16:32:49-07:00 Remove old version of alloc_overflow_sse2
- - - - - 9b7c0185 by Raymond Toy at 2020-08-29T02:27:00+00:00 Merge branch 'issue-86-save-fpu-state-on-entry-to-alloc' into 'master'
Fix #86: save fpu state on entry to alloc
Closes #86 and #85
See merge request cmucl/cmucl!53 - - - - -
5 changed files:
- .gitlab-ci.yml - src/lisp/Config.x86_linux - src/lisp/gencgc.c - src/lisp/x86-arch.h - src/lisp/x86-assem.S
Changes:
===================================== .gitlab-ci.yml ===================================== @@ -12,7 +12,7 @@ linux-runner: - mkdir snapshot - (cd snapshot; tar xjf ../cmucl-$version-linux.tar.bz2; tar xjf ../cmucl-$version-linux.extra.tar.bz2) script: - - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp + - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp - bin/make-dist.sh -I dist linux-4 - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
@@ -24,6 +24,6 @@ osx-runner: - mkdir snapshot - (cd snapshot; tar xjf ../cmucl-$version-darwin.tar.bz2) script: - - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp + - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp - bin/make-dist.sh -I dist darwin-4 - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
===================================== src/lisp/Config.x86_linux ===================================== @@ -3,7 +3,7 @@ include Config.x86_common
# gcc 8.1.1 and 8.3.1 (and probably anything after 8.1.1?) won't # produce a working lisp with -O2. Just use -O1. -COPT = -O1 +COPT = -O2 CFLAGS += $(COPT) CPPFLAGS += -m32 -D__NO_CTYPE -D_GNU_SOURCE CFLAGS += -rdynamic -march=pentium4 -mfpmath=sse -mtune=generic
===================================== src/lisp/gencgc.c ===================================== @@ -8416,6 +8416,7 @@ char * alloc(int nbytes) { void *new_obj; + #if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__))) /* * *current-region-free-pointer* is the same as alloc-tn (= @@ -8442,20 +8443,6 @@ alloc(int nbytes) set_current_region_free((lispobj) new_free_pointer); break; } else if (bytes_allocated <= auto_gc_trigger) { -#if defined(i386) || defined(__x86_64) - /* - * Need to save and restore the FPU registers on x86, but only for - * sse2. See Ticket #61. - * - * Not needed by sparc or ppc because we never call alloc from - * Lisp directly to do allocation. - */ - FPU_STATE(fpu_state); - - if (fpu_mode == SSE2) { - save_fpu_state(fpu_state); - } -#endif /* Call gc_alloc. */ boxed_region.free_pointer = (void *) get_current_region_free(); boxed_region.end_addr = @@ -8466,11 +8453,6 @@ alloc(int nbytes) set_current_region_free((lispobj) boxed_region.free_pointer); set_current_region_end((lispobj) boxed_region.end_addr);
-#if defined(i386) || defined(__x86_64) - if (fpu_mode == SSE2) { - restore_fpu_state(fpu_state); - } -#endif break; } else { /* Run GC and try again. */
===================================== src/lisp/x86-arch.h ===================================== @@ -17,16 +17,14 @@ extern boolean os_support_sse2(void); #define FPU_STATE_SIZE 27
/* - * Need 512 byte area, aligned on a 16-byte boundary. So allocate - * 512+16 bytes of space and let the routine adjust the appropriate - * alignment. + * Need 512 byte area, aligned on a 16-byte boundary. */ -#define SSE_STATE_SIZE ((512+16)/4) +#define SSE_STATE_SIZE 512
/* * Just use the SSE size for both x87 and sse2 since the SSE size is - * enough for either. + * enough for either. Make sure it's on a 16-byte boundary. */ -#define FPU_STATE(name) int name[SSE_STATE_SIZE]; +#define FPU_STATE(name) u_int8_t name[SSE_STATE_SIZE] __attribute__((aligned(16)))
#endif
===================================== src/lisp/x86-assem.S ===================================== @@ -382,7 +382,39 @@ ENDFUNC(fastcopy16) * %eax = address */ FUNCDEF(alloc_overflow_sse2) - STACK_PROLOGUE(20) + # Need 8*16 bytes for the xmm registers, and space to save ecx + # and edx, space for mxcsr, a temp, and one arg to pass to alloc. + # That's 8*16 + 5*4 = 148 bytes. Might as well have a few + # more so the xmm0 area is 16-byte aligned. That makes it 160 + # bytes. + # + # Stack looks like: + # + # +160 + # +144 -> xmm7 + # +128 -> xmm6 + # +112 -> xmm5 + # +96 -> xmm4 + # +80 -> xmm3 + # +64 -> xmm2 + # +48 -> xmm1 + # +32 -> xmm0 + # +20 -> unused + # +16 -> temp + # +12 -> mxcsr + # + 8 -> save ecx + # + 4 -> save edx + # esp + 0 -> arg for alloc + STACK_PROLOGUE(160) + movapd %xmm0, (32 + 0*16)(%esp) + movapd %xmm1, (32 + 1*16)(%esp) + movapd %xmm2, (32 + 2*16)(%esp) + movapd %xmm3, (32 + 3*16)(%esp) + movapd %xmm4, (32 + 4*16)(%esp) + movapd %xmm5, (32 + 5*16)(%esp) + movapd %xmm6, (32 + 6*16)(%esp) + movapd %xmm7, (32 + 7*16)(%esp) + movl %ecx, 8(%esp) # Save ecx and edx registers movl %edx, 4(%esp) stmxcsr 12(%esp) # Save MXCSR @@ -398,10 +430,20 @@ FUNCDEF(alloc_overflow_sse2) movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value. movl 8(%esp), %ecx ldmxcsr 12(%esp) + + movapd (32 + 0*16)(%esp), %xmm0 + movapd (32 + 1*16)(%esp), %xmm1 + movapd (32 + 2*16)(%esp), %xmm2 + movapd (32 + 3*16)(%esp), %xmm3 + movapd (32 + 4*16)(%esp), %xmm4 + movapd (32 + 5*16)(%esp), %xmm5 + movapd (32 + 6*16)(%esp), %xmm6 + movapd (32 + 7*16)(%esp), %xmm7 + STACK_EPILOGUE ret ENDFUNC(alloc_overflow_sse2) - + #ifdef LINKAGE_TABLE
/* Call into C code to resolve a linkage entry. The initial code in the
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d0b192cd3cf63abb94ecc75...