Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
-
449f8ec1
by Raymond Toy at 2020-08-26T17:09:13-07:00
-
38372fd9
by Raymond Toy at 2020-08-26T17:14:30-07:00
-
d51dabf0
by Raymond Toy at 2020-08-26T23:21:23-07:00
-
4b80a6e5
by Raymond Toy at 2020-08-26T23:26:12-07:00
-
a95db7ba
by Raymond Toy at 2020-08-26T23:30:54-07:00
-
ad3862c9
by Raymond Toy at 2020-08-26T23:34:05-07:00
-
01f8217b
by Raymond Toy at 2020-08-26T23:41:36-07:00
-
8b08b800
by Raymond Toy at 2020-08-27T20:39:07-07:00
-
e3aa51f3
by Raymond Toy at 2020-08-27T20:58:52-07:00
-
17144e16
by Raymond Toy at 2020-08-28T16:23:59-07:00
-
f923302e
by Raymond Toy at 2020-08-28T16:32:49-07:00
-
9b7c0185
by Raymond Toy at 2020-08-29T02:27:00+00:00
5 changed files:
Changes:
| ... | ... | @@ -12,7 +12,7 @@ linux-runner: |
| 12 | 12 |
- mkdir snapshot
|
| 13 | 13 |
- (cd snapshot; tar xjf ../cmucl-$version-linux.tar.bz2; tar xjf ../cmucl-$version-linux.extra.tar.bz2)
|
| 14 | 14 |
script:
|
| 15 |
- - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
|
|
| 15 |
+ - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
|
|
| 16 | 16 |
- bin/make-dist.sh -I dist linux-4
|
| 17 | 17 |
- bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
|
| 18 | 18 |
|
| ... | ... | @@ -24,6 +24,6 @@ osx-runner: |
| 24 | 24 |
- mkdir snapshot
|
| 25 | 25 |
- (cd snapshot; tar xjf ../cmucl-$version-darwin.tar.bz2)
|
| 26 | 26 |
script:
|
| 27 |
- - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
|
|
| 27 |
+ - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
|
|
| 28 | 28 |
- bin/make-dist.sh -I dist darwin-4
|
| 29 | 29 |
- bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
|
| ... | ... | @@ -3,7 +3,7 @@ include Config.x86_common |
| 3 | 3 |
|
| 4 | 4 |
# gcc 8.1.1 and 8.3.1 (and probably anything after 8.1.1?) won't
|
| 5 | 5 |
# produce a working lisp with -O2. Just use -O1.
|
| 6 |
-COPT = -O1
|
|
| 6 |
+COPT = -O2
|
|
| 7 | 7 |
CFLAGS += $(COPT)
|
| 8 | 8 |
CPPFLAGS += -m32 -D__NO_CTYPE -D_GNU_SOURCE
|
| 9 | 9 |
CFLAGS += -rdynamic -march=pentium4 -mfpmath=sse -mtune=generic
|
| ... | ... | @@ -8416,6 +8416,7 @@ char * |
| 8416 | 8416 |
alloc(int nbytes)
|
| 8417 | 8417 |
{
|
| 8418 | 8418 |
void *new_obj;
|
| 8419 |
+ |
|
| 8419 | 8420 |
#if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
|
| 8420 | 8421 |
/*
|
| 8421 | 8422 |
* *current-region-free-pointer* is the same as alloc-tn (=
|
| ... | ... | @@ -8442,20 +8443,6 @@ alloc(int nbytes) |
| 8442 | 8443 |
set_current_region_free((lispobj) new_free_pointer);
|
| 8443 | 8444 |
break;
|
| 8444 | 8445 |
} else if (bytes_allocated <= auto_gc_trigger) {
|
| 8445 |
-#if defined(i386) || defined(__x86_64)
|
|
| 8446 |
- /*
|
|
| 8447 |
- * Need to save and restore the FPU registers on x86, but only for
|
|
| 8448 |
- * sse2. See Ticket #61.
|
|
| 8449 |
- *
|
|
| 8450 |
- * Not needed by sparc or ppc because we never call alloc from
|
|
| 8451 |
- * Lisp directly to do allocation.
|
|
| 8452 |
- */
|
|
| 8453 |
- FPU_STATE(fpu_state);
|
|
| 8454 |
- |
|
| 8455 |
- if (fpu_mode == SSE2) {
|
|
| 8456 |
- save_fpu_state(fpu_state);
|
|
| 8457 |
- }
|
|
| 8458 |
-#endif
|
|
| 8459 | 8446 |
/* Call gc_alloc. */
|
| 8460 | 8447 |
boxed_region.free_pointer = (void *) get_current_region_free();
|
| 8461 | 8448 |
boxed_region.end_addr =
|
| ... | ... | @@ -8466,11 +8453,6 @@ alloc(int nbytes) |
| 8466 | 8453 |
set_current_region_free((lispobj) boxed_region.free_pointer);
|
| 8467 | 8454 |
set_current_region_end((lispobj) boxed_region.end_addr);
|
| 8468 | 8455 |
|
| 8469 |
-#if defined(i386) || defined(__x86_64)
|
|
| 8470 |
- if (fpu_mode == SSE2) {
|
|
| 8471 |
- restore_fpu_state(fpu_state);
|
|
| 8472 |
- }
|
|
| 8473 |
-#endif
|
|
| 8474 | 8456 |
break;
|
| 8475 | 8457 |
} else {
|
| 8476 | 8458 |
/* Run GC and try again. */
|
| ... | ... | @@ -17,16 +17,14 @@ extern boolean os_support_sse2(void); |
| 17 | 17 |
#define FPU_STATE_SIZE 27
|
| 18 | 18 |
|
| 19 | 19 |
/*
|
| 20 |
- * Need 512 byte area, aligned on a 16-byte boundary. So allocate
|
|
| 21 |
- * 512+16 bytes of space and let the routine adjust the appropriate
|
|
| 22 |
- * alignment.
|
|
| 20 |
+ * Need 512 byte area, aligned on a 16-byte boundary.
|
|
| 23 | 21 |
*/
|
| 24 |
-#define SSE_STATE_SIZE ((512+16)/4)
|
|
| 22 |
+#define SSE_STATE_SIZE 512
|
|
| 25 | 23 |
|
| 26 | 24 |
/*
|
| 27 | 25 |
* Just use the SSE size for both x87 and sse2 since the SSE size is
|
| 28 |
- * enough for either.
|
|
| 26 |
+ * enough for either. Make sure it's on a 16-byte boundary.
|
|
| 29 | 27 |
*/
|
| 30 |
-#define FPU_STATE(name) int name[SSE_STATE_SIZE];
|
|
| 28 |
+#define FPU_STATE(name) u_int8_t name[SSE_STATE_SIZE] __attribute__((aligned(16)))
|
|
| 31 | 29 |
|
| 32 | 30 |
#endif
|
| ... | ... | @@ -382,7 +382,39 @@ ENDFUNC(fastcopy16) |
| 382 | 382 |
* %eax = address
|
| 383 | 383 |
*/
|
| 384 | 384 |
FUNCDEF(alloc_overflow_sse2)
|
| 385 |
- STACK_PROLOGUE(20)
|
|
| 385 |
+ # Need 8*16 bytes for the xmm registers, and space to save ecx
|
|
| 386 |
+ # and edx, space for mxcsr, a temp, and one arg to pass to alloc.
|
|
| 387 |
+ # That's 8*16 + 5*4 = 148 bytes. Might as well have a few
|
|
| 388 |
+ # more so the xmm0 area is 16-byte aligned. That makes it 160
|
|
| 389 |
+ # bytes.
|
|
| 390 |
+ #
|
|
| 391 |
+ # Stack looks like:
|
|
| 392 |
+ #
|
|
| 393 |
+ # +160
|
|
| 394 |
+ # +144 -> xmm7
|
|
| 395 |
+ # +128 -> xmm6
|
|
| 396 |
+ # +112 -> xmm5
|
|
| 397 |
+ # +96 -> xmm4
|
|
| 398 |
+ # +80 -> xmm3
|
|
| 399 |
+ # +64 -> xmm2
|
|
| 400 |
+ # +48 -> xmm1
|
|
| 401 |
+ # +32 -> xmm0
|
|
| 402 |
+ # +20 -> unused
|
|
| 403 |
+ # +16 -> temp
|
|
| 404 |
+ # +12 -> mxcsr
|
|
| 405 |
+ # + 8 -> save ecx
|
|
| 406 |
+ # + 4 -> save edx
|
|
| 407 |
+ # esp + 0 -> arg for alloc
|
|
| 408 |
+ STACK_PROLOGUE(160)
|
|
| 409 |
+ movapd %xmm0, (32 + 0*16)(%esp)
|
|
| 410 |
+ movapd %xmm1, (32 + 1*16)(%esp)
|
|
| 411 |
+ movapd %xmm2, (32 + 2*16)(%esp)
|
|
| 412 |
+ movapd %xmm3, (32 + 3*16)(%esp)
|
|
| 413 |
+ movapd %xmm4, (32 + 4*16)(%esp)
|
|
| 414 |
+ movapd %xmm5, (32 + 5*16)(%esp)
|
|
| 415 |
+ movapd %xmm6, (32 + 6*16)(%esp)
|
|
| 416 |
+ movapd %xmm7, (32 + 7*16)(%esp)
|
|
| 417 |
+ |
|
| 386 | 418 |
movl %ecx, 8(%esp) # Save ecx and edx registers
|
| 387 | 419 |
movl %edx, 4(%esp)
|
| 388 | 420 |
stmxcsr 12(%esp) # Save MXCSR
|
| ... | ... | @@ -398,10 +430,20 @@ FUNCDEF(alloc_overflow_sse2) |
| 398 | 430 |
movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value.
|
| 399 | 431 |
movl 8(%esp), %ecx
|
| 400 | 432 |
ldmxcsr 12(%esp)
|
| 433 |
+ |
|
| 434 |
+ movapd (32 + 0*16)(%esp), %xmm0
|
|
| 435 |
+ movapd (32 + 1*16)(%esp), %xmm1
|
|
| 436 |
+ movapd (32 + 2*16)(%esp), %xmm2
|
|
| 437 |
+ movapd (32 + 3*16)(%esp), %xmm3
|
|
| 438 |
+ movapd (32 + 4*16)(%esp), %xmm4
|
|
| 439 |
+ movapd (32 + 5*16)(%esp), %xmm5
|
|
| 440 |
+ movapd (32 + 6*16)(%esp), %xmm6
|
|
| 441 |
+ movapd (32 + 7*16)(%esp), %xmm7
|
|
| 442 |
+ |
|
| 401 | 443 |
STACK_EPILOGUE
|
| 402 | 444 |
ret
|
| 403 | 445 |
ENDFUNC(alloc_overflow_sse2)
|
| 404 |
-
|
|
| 446 |
+ |
|
| 405 | 447 |
#ifdef LINKAGE_TABLE
|
| 406 | 448 |
|
| 407 | 449 |
/* Call into C code to resolve a linkage entry. The initial code in the
|