Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits:
17144e16 by Raymond Toy at 2020-08-28T16:23:59-07:00
Save just the xmm registers
Instead of saving the entire FPU state, we really only need to save
the xmm registers.
- - - - -
1 changed file:
- src/lisp/x86-assem.S
Changes:
=====================================
src/lisp/x86-assem.S
=====================================
@@ -404,22 +404,39 @@ FUNCDEF(alloc_overflow_sse2)
ENDFUNC(alloc_overflow_sse2)
#else
FUNCDEF(alloc_overflow_sse2)
- # Need 512 bytes for the fpu save area, space to save ecx and edx,
- # space for mxcsr, a temp, and one arg to pass to alloc. That's
- # 512 + 20. But the save area needs to be 16-byte aligned, so
- # allocate 512 + 32 bytes. The fpu area will be at offset 32.
+ # Need 8*16 bytes for the xmm registers, and space to save ecx
+ # and edx, space for mxcsr, a temp, and one arg to pass to alloc.
+ # That's 8*16 + 5*4 = 148 bytes. Might as well have a few
+ # more so the xmm0 area is 16-byte aligned. That makes it 160
+ # bytes.
#
# Stack looks like:
#
- # +544 -> end
- # +32 -> fpu save
- # +20 -> unused
+ # +160
+ # +144 -> xmm7
+ # +128 -> xmm6
+ # +112 -> xmm5
+ # +96 -> xmm4
+ # +80 -> xmm3
+ # +64 -> xmm2
+ # +48 -> xmm1
+ # +32 -> xmm0
+ # +20 -> unused
# +16 -> temp
# +12 -> mxcsr
# + 8 -> save ecx
# + 4 -> save edx
# esp + 0 -> arg for alloc
- STACK_PROLOGUE(32+512)
+ STACK_PROLOGUE(160)
+ movapd %xmm0, (32 + 0*16)(%esp)
+ movapd %xmm1, (32 + 1*16)(%esp)
+ movapd %xmm2, (32 + 2*16)(%esp)
+ movapd %xmm3, (32 + 3*16)(%esp)
+ movapd %xmm4, (32 + 4*16)(%esp)
+ movapd %xmm5, (32 + 5*16)(%esp)
+ movapd %xmm6, (32 + 6*16)(%esp)
+ movapd %xmm7, (32 + 7*16)(%esp)
+
movl %ecx, 8(%esp) # Save ecx and edx registers
movl %edx, 4(%esp)
stmxcsr 12(%esp) # Save MXCSR
@@ -430,16 +447,21 @@ FUNCDEF(alloc_overflow_sse2)
ldmxcsr 16(%esp) # Get new mxcsr value
movl %eax, (%esp) # Put size on stack for first arg to alloc()
- # Save all FPU regs because we don't know what's in use by lisp.
- fxsave 32(%esp)
-
call GNAME(alloc)
- fxrstor 32(%esp)
-
movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value.
movl 8(%esp), %ecx
ldmxcsr 12(%esp)
+
+ movapd (32 + 0*16)(%esp), %xmm0
+ movapd (32 + 1*16)(%esp), %xmm1
+ movapd (32 + 2*16)(%esp), %xmm2
+ movapd (32 + 3*16)(%esp), %xmm3
+ movapd (32 + 4*16)(%esp), %xmm4
+ movapd (32 + 5*16)(%esp), %xmm5
+ movapd (32 + 6*16)(%esp), %xmm6
+ movapd (32 + 7*16)(%esp), %xmm7
+
STACK_EPILOGUE
ret
ENDFUNC(alloc_overflow_sse2)
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/17144e16d4f7578644fac57…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/17144e16d4f7578644fac57…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits:
e3aa51f3 by Raymond Toy at 2020-08-27T20:58:52-07:00
Remove stray #pragma
Forgot to remove this; it's not needed anymore.
- - - - -
1 changed file:
- src/lisp/gencgc.c
Changes:
=====================================
src/lisp/gencgc.c
=====================================
@@ -8527,7 +8527,6 @@ component_ptr_from_pc(lispobj * pc)
return NULL;
}
-#pragma GCC optimize ("-O1")
/*
* Get lower and upper(middle) 28 bits of total allocation
*/
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/e3aa51f30a04d6f299a4c36…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/e3aa51f30a04d6f299a4c36…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits:
8b08b800 by Raymond Toy at 2020-08-27T20:39:07-07:00
Save FPU state in alloc_overflow_sse2
It's best to save the FPU state here instead of in alloc() because we
can't know what the compiler might do. Remove the fpu save stuff from
alloc().
gcc 9.3.1 builds lisp successfully.
- - - - -
2 changed files:
- src/lisp/gencgc.c
- src/lisp/x86-assem.S
Changes:
=====================================
src/lisp/gencgc.c
=====================================
@@ -8412,28 +8412,11 @@ gencgc_pickup_dynamic(void)
void do_pending_interrupt(void);
-//#pragma GCC optimize ("-O1")
char *
alloc(int nbytes)
{
-#if (defined(i386) || defined(__x86_64))
- /*
- * Need to save and restore the FPU registers on x86, but only for
- * sse2. See Trac ticket #61
- * (https://trac.common-lisp.net/cmucl/ticket/61) and gitlab
- * ticket #86
- * (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/86).
- *
- * Not needed by sparc or ppc because we never call alloc from
- * Lisp directly to do allocation.
- */
- FPU_STATE(fpu_state);
-
- if (fpu_mode == SSE2) {
- save_fpu_state(fpu_state);
- }
-#endif
void *new_obj;
+
#if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
/*
* *current-region-free-pointer* is the same as alloc-tn (=
@@ -8483,14 +8466,8 @@ alloc(int nbytes)
}
}
-#if (defined(i386) || defined(__x86_64))
- if (fpu_mode == SSE2) {
- restore_fpu_state(fpu_state);
- }
-#endif
return new_obj;
}
-#pragma GCC optimize ("-O2")
char *
alloc_pseudo_atomic(int nbytes)
=====================================
src/lisp/x86-assem.S
=====================================
@@ -381,6 +381,7 @@ ENDFUNC(fastcopy16)
* On exit:
* %eax = address
*/
+#if 0
FUNCDEF(alloc_overflow_sse2)
STACK_PROLOGUE(20)
movl %ecx, 8(%esp) # Save ecx and edx registers
@@ -401,7 +402,49 @@ FUNCDEF(alloc_overflow_sse2)
STACK_EPILOGUE
ret
ENDFUNC(alloc_overflow_sse2)
-
+#else
+FUNCDEF(alloc_overflow_sse2)
+ # Need 512 bytes for the fpu save area, space to save ecx and edx,
+ # space for mxcsr, a temp, and one arg to pass to alloc. That's
+ # 512 + 20. But the save area needs to be 16-byte aligned, so
+ # allocate 512 + 32 bytes. The fpu area will be at offset 32.
+ #
+ # Stack looks like:
+ #
+ # +544 -> end
+ # +32 -> fpu save
+ # +20 -> unused
+ # +16 -> temp
+ # +12 -> mxcsr
+ # + 8 -> save ecx
+ # + 4 -> save edx
+ # esp + 0 -> arg for alloc
+ STACK_PROLOGUE(32+512)
+ movl %ecx, 8(%esp) # Save ecx and edx registers
+ movl %edx, 4(%esp)
+ stmxcsr 12(%esp) # Save MXCSR
+ /* Clear the exceptions that might have occurred */
+ movl 12(%esp), %edx
+ and $-64, %edx # Clear the exceptions
+ movl %edx, 16(%esp)
+ ldmxcsr 16(%esp) # Get new mxcsr value
+ movl %eax, (%esp) # Put size on stack for first arg to alloc()
+
+ # Save all FPU regs because we don't know what's in use by lisp.
+ fxsave 32(%esp)
+
+ call GNAME(alloc)
+
+ fxrstor 32(%esp)
+
+ movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value.
+ movl 8(%esp), %ecx
+ ldmxcsr 12(%esp)
+ STACK_EPILOGUE
+ ret
+ENDFUNC(alloc_overflow_sse2)
+#endif
+
#ifdef LINKAGE_TABLE
/* Call into C code to resolve a linkage entry. The initial code in the
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/8b08b800dc1c26d498fbc40…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/8b08b800dc1c26d498fbc40…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits:
a95db7ba by Raymond Toy at 2020-08-26T23:30:54-07:00
Update comments
- - - - -
ad3862c9 by Raymond Toy at 2020-08-26T23:34:05-07:00
Clean up code
- - - - -
01f8217b by Raymond Toy at 2020-08-26T23:41:36-07:00
Add -R flag to recompile lisp
- - - - -
3 changed files:
- .gitlab-ci.yml
- src/lisp/gencgc.c
- src/lisp/x86-arch.h
Changes:
=====================================
.gitlab-ci.yml
=====================================
@@ -12,7 +12,7 @@ linux-runner:
- mkdir snapshot
- (cd snapshot; tar xjf ../cmucl-$version-linux.tar.bz2; tar xjf ../cmucl-$version-linux.extra.tar.bz2)
script:
- - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
+ - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
- bin/make-dist.sh -I dist linux-4
- bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
@@ -24,6 +24,6 @@ osx-runner:
- mkdir snapshot
- (cd snapshot; tar xjf ../cmucl-$version-darwin.tar.bz2)
script:
- - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
+ - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
- bin/make-dist.sh -I dist darwin-4
- bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
=====================================
src/lisp/gencgc.c
=====================================
@@ -8416,10 +8416,13 @@ void do_pending_interrupt(void);
char *
alloc(int nbytes)
{
-#if 0 && (defined(i386) || defined(__x86_64))
+#if (defined(i386) || defined(__x86_64))
/*
* Need to save and restore the FPU registers on x86, but only for
- * sse2. See Ticket #61.
+ * sse2. See Trac ticket #61
+ * (https://trac.common-lisp.net/cmucl/ticket/61) and gitlab
+ * ticket #86
+ * (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/86).
*
* Not needed by sparc or ppc because we never call alloc from
* Lisp directly to do allocation.
@@ -8457,20 +8460,6 @@ alloc(int nbytes)
set_current_region_free((lispobj) new_free_pointer);
break;
} else if (bytes_allocated <= auto_gc_trigger) {
-#if 1 && (defined(i386) || defined(__x86_64))
- /*
- * Need to save and restore the FPU registers on x86, but only for
- * sse2. See Ticket #61.
- *
- * Not needed by sparc or ppc because we never call alloc from
- * Lisp directly to do allocation.
- */
- FPU_STATE(fpu_state);
-
- if (fpu_mode == SSE2) {
- save_fpu_state(fpu_state);
- }
-#endif
/* Call gc_alloc. */
boxed_region.free_pointer = (void *) get_current_region_free();
boxed_region.end_addr =
@@ -8481,11 +8470,6 @@ alloc(int nbytes)
set_current_region_free((lispobj) boxed_region.free_pointer);
set_current_region_end((lispobj) boxed_region.end_addr);
-#if 1 && (defined(i386) || defined(__x86_64))
- if (fpu_mode == SSE2) {
- restore_fpu_state(fpu_state);
- }
-#endif
break;
} else {
/* Run GC and try again. */
@@ -8499,7 +8483,7 @@ alloc(int nbytes)
}
}
-#if 0 && (defined(i386) || defined(__x86_64))
+#if (defined(i386) || defined(__x86_64))
if (fpu_mode == SSE2) {
restore_fpu_state(fpu_state);
}
=====================================
src/lisp/x86-arch.h
=====================================
@@ -17,15 +17,13 @@ extern boolean os_support_sse2(void);
#define FPU_STATE_SIZE 27
/*
- * Need 512 byte area, aligned on a 16-byte boundary. So allocate
- * 512+16 bytes of space and let the routine adjust the appropriate
- * alignment.
+ * Need 512 byte area, aligned on a 16-byte boundary.
*/
#define SSE_STATE_SIZE 512
/*
* Just use the SSE size for both x87 and sse2 since the SSE size is
- * enough for either.
+ * enough for either. Make sure it's on a 16-byte boundary.
*/
#define FPU_STATE(name) u_int8_t name[SSE_STATE_SIZE] __attribute__((aligned(16)))
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/4b80a6e595faa3e2343b62…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/4b80a6e595faa3e2343b62…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits:
29cac208 by Raymond Toy at 2020-08-27T00:36:27+00:00
Fix #85: Let each x86 configs set optimization level
Add `COPT` variable in `Config.x86_common` to set the optimization
level (defaulting to `-O2`). Then each `Config.x86` file can set
`COPT` as desired if the default doesn't work.
Thus, `Config.x86_linux` sets `COPT` to `-O1`, but others can use the
default value. See issue #68.
- - - - -
d0b192cd by Raymond Toy at 2020-08-27T00:36:28+00:00
Merge branch 'issue-85-opt-level-set-in-x86-config' into 'master'
Fix #85: Let each x86 configs set optimization level
Closes #85
See merge request cmucl/cmucl!52
- - - - -
4b80a6e5 by Raymond Toy at 2020-08-26T23:26:12-07:00
Merge branch 'master' into issue-86-save-fpu-state-on-entry-to-alloc
- - - - -
0 changed files:
Changes:
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d51dabf0f0c6868834ba5c…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d51dabf0f0c6868834ba5c…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
29cac208 by Raymond Toy at 2020-08-27T00:36:27+00:00
Fix #85: Let each x86 configs set optimization level
Add `COPT` variable in `Config.x86_common` to set the optimization
level (defaulting to `-O2`). Then each `Config.x86` file can set
`COPT` as desired if the default doesn't work.
Thus, `Config.x86_linux` sets `COPT` to `-O1`, but others can use the
default value. See issue #68.
- - - - -
d0b192cd by Raymond Toy at 2020-08-27T00:36:28+00:00
Merge branch 'issue-85-opt-level-set-in-x86-config' into 'master'
Fix #85: Let each x86 configs set optimization level
Closes #85
See merge request cmucl/cmucl!52
- - - - -
6 changed files:
- src/lisp/Config.x86_common
- src/lisp/Config.x86_darwin
- src/lisp/Config.x86_linux
- src/lisp/Config.x86_linux_clang
- src/lisp/Config.x86_netbsd
- src/lisp/Config.x86_solaris_sunc
Changes:
=====================================
src/lisp/Config.x86_common
=====================================
@@ -45,10 +45,11 @@ endif
CPPFLAGS := $(CPP_DEFINE_OPTIONS) $(CPP_INCLUDE_OPTIONS)
CFLAGS += -Wstrict-prototypes -Wall -g -fno-omit-frame-pointer
-# gcc 8.1.1 and 8.3.1 (and probably anything after 8.1.1?) won't
-# produce a working lisp with -O2. Just use -O1.
-CFLAGS += -O1
-ASFLAGS = -g
+# Default optimization level. This can be changed in the individual
+# configs.
+COPT = -O2
+
+ASFLAGS = -g
ASSEM_SRC = x86-assem.S
ARCH_SRC = x86-arch.c
=====================================
src/lisp/Config.x86_darwin
=====================================
@@ -6,6 +6,7 @@ include Config.x86_common
# you have the SDK available.
MIN_VER = -mmacosx-version-min=10.6
+CFLAGS += $(COPT)
CPPFLAGS += -DDARWIN $(MIN_VER) -m32
CFLAGS += -g3 -mtune=generic
ASFLAGS += -g3 $(MIN_VER)
=====================================
src/lisp/Config.x86_linux
=====================================
@@ -1,6 +1,10 @@
# -*- Mode: makefile -*-
include Config.x86_common
+# gcc 8.1.1 and 8.3.1 (and probably anything after 8.1.1?) won't
+# produce a working lisp with -O2. Just use -O1.
+COPT = -O1
+CFLAGS += $(COPT)
CPPFLAGS += -m32 -D__NO_CTYPE -D_GNU_SOURCE
CFLAGS += -rdynamic -march=pentium4 -mfpmath=sse -mtune=generic
=====================================
src/lisp/Config.x86_linux_clang
=====================================
@@ -3,6 +3,7 @@ include Config.x86_common
CC = clang
CPPFLAGS += -m32 -D__NO_CTYPE -D_GNU_SOURCE
+CFLAGS += $(COPT)
CFLAGS += -march=pentium4 -mfpmath=sse -mtune=generic
UNDEFSYMPATTERN = -Xlinker -u -Xlinker &
=====================================
src/lisp/Config.x86_netbsd
=====================================
@@ -1,6 +1,7 @@
# -*- Mode: makefile -*-
include Config.x86_common
+CFLAGS += $(COPT)
CPPFLAGS += -march=pentium4 -mfpmath=sse
UNDEFSYMPATTERN = -Xlinker -u -Xlinker &
=====================================
src/lisp/Config.x86_solaris_sunc
=====================================
@@ -2,6 +2,7 @@
include Config.sparc_common
CC = cc -xlibmieee -g
+CFLAGS += $(COPT)
CFLAGS += -Di386
CPP = cc -E
DEPEND_FLAGS = -xM1
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d1c5289eb069df2ecdbac3…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d1c5289eb069df2ecdbac3…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch issue-85-opt-level-set-in-x86-config at cmucl / cmucl
Commits:
d1c5289e by Raymond Toy at 2020-08-26T17:12:40-07:00
Fix typo
- - - - -
38372fd9 by Raymond Toy at 2020-08-26T17:14:30-07:00
Fix typo
- - - - -
1 changed file:
- src/lisp/Config.x86_freebsd
Changes:
=====================================
src/lisp/Config.x86_freebsd
=====================================
@@ -3,7 +3,7 @@ include Config.x86_common
# Set the path to your verison of GCC here.
CC = gcc -m32
-CFLAGS += -O2
+CFLAGS += $(COPT)
CPPFLAGS += -march=pentium4 -mfpmath=sse
UNDEFSYMPATTERN = -Xlinker -u -Xlinker &
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/449f8ec10cd560b5a1deab…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/449f8ec10cd560b5a1deab…
You're receiving this email because of your account on gitlab.common-lisp.net.
Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
d1c5289e by Raymond Toy at 2020-08-26T17:12:40-07:00
Fix typo
- - - - -
1 changed file:
- src/lisp/Config.x86_freebsd
Changes:
=====================================
src/lisp/Config.x86_freebsd
=====================================
@@ -3,6 +3,7 @@ include Config.x86_common
# Set the path to your verison of GCC here.
CC = gcc -m32
+CFLAGS += $(COPT)
CPPFLAGS += -march=pentium4 -mfpmath=sse
UNDEFSYMPATTERN = -Xlinker -u -Xlinker &
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/d1c5289eb069df2ecdbac38…
--
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/commit/d1c5289eb069df2ecdbac38…
You're receiving this email because of your account on gitlab.common-lisp.net.