Raymond Toy pushed to branch master at cmucl / cmucl
Commits:
-
449f8ec1
by Raymond Toy at 2020-08-26T17:09:13-07:00
-
38372fd9
by Raymond Toy at 2020-08-26T17:14:30-07:00
-
d51dabf0
by Raymond Toy at 2020-08-26T23:21:23-07:00
-
4b80a6e5
by Raymond Toy at 2020-08-26T23:26:12-07:00
-
a95db7ba
by Raymond Toy at 2020-08-26T23:30:54-07:00
-
ad3862c9
by Raymond Toy at 2020-08-26T23:34:05-07:00
-
01f8217b
by Raymond Toy at 2020-08-26T23:41:36-07:00
-
8b08b800
by Raymond Toy at 2020-08-27T20:39:07-07:00
-
e3aa51f3
by Raymond Toy at 2020-08-27T20:58:52-07:00
-
17144e16
by Raymond Toy at 2020-08-28T16:23:59-07:00
-
f923302e
by Raymond Toy at 2020-08-28T16:32:49-07:00
-
9b7c0185
by Raymond Toy at 2020-08-29T02:27:00+00:00
5 changed files:
Changes:
... | ... | @@ -12,7 +12,7 @@ linux-runner: |
12 | 12 |
- mkdir snapshot
|
13 | 13 |
- (cd snapshot; tar xjf ../cmucl-$version-linux.tar.bz2; tar xjf ../cmucl-$version-linux.extra.tar.bz2)
|
14 | 14 |
script:
|
15 |
- - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
|
|
15 |
+ - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
|
|
16 | 16 |
- bin/make-dist.sh -I dist linux-4
|
17 | 17 |
- bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
|
18 | 18 |
|
... | ... | @@ -24,6 +24,6 @@ osx-runner: |
24 | 24 |
- mkdir snapshot
|
25 | 25 |
- (cd snapshot; tar xjf ../cmucl-$version-darwin.tar.bz2)
|
26 | 26 |
script:
|
27 |
- - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
|
|
27 |
+ - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
|
|
28 | 28 |
- bin/make-dist.sh -I dist darwin-4
|
29 | 29 |
- bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
|
... | ... | @@ -3,7 +3,7 @@ include Config.x86_common |
3 | 3 |
|
4 | 4 |
# gcc 8.1.1 and 8.3.1 (and probably anything after 8.1.1?) won't
|
5 | 5 |
# produce a working lisp with -O2. Just use -O1.
|
6 |
-COPT = -O1
|
|
6 |
+COPT = -O2
|
|
7 | 7 |
CFLAGS += $(COPT)
|
8 | 8 |
CPPFLAGS += -m32 -D__NO_CTYPE -D_GNU_SOURCE
|
9 | 9 |
CFLAGS += -rdynamic -march=pentium4 -mfpmath=sse -mtune=generic
|
... | ... | @@ -8416,6 +8416,7 @@ char * |
8416 | 8416 |
alloc(int nbytes)
|
8417 | 8417 |
{
|
8418 | 8418 |
void *new_obj;
|
8419 |
+ |
|
8419 | 8420 |
#if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
|
8420 | 8421 |
/*
|
8421 | 8422 |
* *current-region-free-pointer* is the same as alloc-tn (=
|
... | ... | @@ -8442,20 +8443,6 @@ alloc(int nbytes) |
8442 | 8443 |
set_current_region_free((lispobj) new_free_pointer);
|
8443 | 8444 |
break;
|
8444 | 8445 |
} else if (bytes_allocated <= auto_gc_trigger) {
|
8445 |
-#if defined(i386) || defined(__x86_64)
|
|
8446 |
- /*
|
|
8447 |
- * Need to save and restore the FPU registers on x86, but only for
|
|
8448 |
- * sse2. See Ticket #61.
|
|
8449 |
- *
|
|
8450 |
- * Not needed by sparc or ppc because we never call alloc from
|
|
8451 |
- * Lisp directly to do allocation.
|
|
8452 |
- */
|
|
8453 |
- FPU_STATE(fpu_state);
|
|
8454 |
- |
|
8455 |
- if (fpu_mode == SSE2) {
|
|
8456 |
- save_fpu_state(fpu_state);
|
|
8457 |
- }
|
|
8458 |
-#endif
|
|
8459 | 8446 |
/* Call gc_alloc. */
|
8460 | 8447 |
boxed_region.free_pointer = (void *) get_current_region_free();
|
8461 | 8448 |
boxed_region.end_addr =
|
... | ... | @@ -8466,11 +8453,6 @@ alloc(int nbytes) |
8466 | 8453 |
set_current_region_free((lispobj) boxed_region.free_pointer);
|
8467 | 8454 |
set_current_region_end((lispobj) boxed_region.end_addr);
|
8468 | 8455 |
|
8469 |
-#if defined(i386) || defined(__x86_64)
|
|
8470 |
- if (fpu_mode == SSE2) {
|
|
8471 |
- restore_fpu_state(fpu_state);
|
|
8472 |
- }
|
|
8473 |
-#endif
|
|
8474 | 8456 |
break;
|
8475 | 8457 |
} else {
|
8476 | 8458 |
/* Run GC and try again. */
|
... | ... | @@ -17,16 +17,14 @@ extern boolean os_support_sse2(void); |
17 | 17 |
#define FPU_STATE_SIZE 27
|
18 | 18 |
|
19 | 19 |
/*
|
20 |
- * Need 512 byte area, aligned on a 16-byte boundary. So allocate
|
|
21 |
- * 512+16 bytes of space and let the routine adjust the appropriate
|
|
22 |
- * alignment.
|
|
20 |
+ * Need 512 byte area, aligned on a 16-byte boundary.
|
|
23 | 21 |
*/
|
24 |
-#define SSE_STATE_SIZE ((512+16)/4)
|
|
22 |
+#define SSE_STATE_SIZE 512
|
|
25 | 23 |
|
26 | 24 |
/*
|
27 | 25 |
* Just use the SSE size for both x87 and sse2 since the SSE size is
|
28 |
- * enough for either.
|
|
26 |
+ * enough for either. Make sure it's on a 16-byte boundary.
|
|
29 | 27 |
*/
|
30 |
-#define FPU_STATE(name) int name[SSE_STATE_SIZE];
|
|
28 |
+#define FPU_STATE(name) u_int8_t name[SSE_STATE_SIZE] __attribute__((aligned(16)))
|
|
31 | 29 |
|
32 | 30 |
#endif
|
... | ... | @@ -382,7 +382,39 @@ ENDFUNC(fastcopy16) |
382 | 382 |
* %eax = address
|
383 | 383 |
*/
|
384 | 384 |
FUNCDEF(alloc_overflow_sse2)
|
385 |
- STACK_PROLOGUE(20)
|
|
385 |
+ # Need 8*16 bytes for the xmm registers, and space to save ecx
|
|
386 |
+ # and edx, space for mxcsr, a temp, and one arg to pass to alloc.
|
|
387 |
+ # That's 8*16 + 5*4 = 148 bytes. Might as well have a few
|
|
388 |
+ # more so the xmm0 area is 16-byte aligned. That makes it 160
|
|
389 |
+ # bytes.
|
|
390 |
+ #
|
|
391 |
+ # Stack looks like:
|
|
392 |
+ #
|
|
393 |
+ # +160
|
|
394 |
+ # +144 -> xmm7
|
|
395 |
+ # +128 -> xmm6
|
|
396 |
+ # +112 -> xmm5
|
|
397 |
+ # +96 -> xmm4
|
|
398 |
+ # +80 -> xmm3
|
|
399 |
+ # +64 -> xmm2
|
|
400 |
+ # +48 -> xmm1
|
|
401 |
+ # +32 -> xmm0
|
|
402 |
+ # +20 -> unused
|
|
403 |
+ # +16 -> temp
|
|
404 |
+ # +12 -> mxcsr
|
|
405 |
+ # + 8 -> save ecx
|
|
406 |
+ # + 4 -> save edx
|
|
407 |
+ # esp + 0 -> arg for alloc
|
|
408 |
+ STACK_PROLOGUE(160)
|
|
409 |
+ movapd %xmm0, (32 + 0*16)(%esp)
|
|
410 |
+ movapd %xmm1, (32 + 1*16)(%esp)
|
|
411 |
+ movapd %xmm2, (32 + 2*16)(%esp)
|
|
412 |
+ movapd %xmm3, (32 + 3*16)(%esp)
|
|
413 |
+ movapd %xmm4, (32 + 4*16)(%esp)
|
|
414 |
+ movapd %xmm5, (32 + 5*16)(%esp)
|
|
415 |
+ movapd %xmm6, (32 + 6*16)(%esp)
|
|
416 |
+ movapd %xmm7, (32 + 7*16)(%esp)
|
|
417 |
+ |
|
386 | 418 |
movl %ecx, 8(%esp) # Save ecx and edx registers
|
387 | 419 |
movl %edx, 4(%esp)
|
388 | 420 |
stmxcsr 12(%esp) # Save MXCSR
|
... | ... | @@ -398,10 +430,20 @@ FUNCDEF(alloc_overflow_sse2) |
398 | 430 |
movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value.
|
399 | 431 |
movl 8(%esp), %ecx
|
400 | 432 |
ldmxcsr 12(%esp)
|
433 |
+ |
|
434 |
+ movapd (32 + 0*16)(%esp), %xmm0
|
|
435 |
+ movapd (32 + 1*16)(%esp), %xmm1
|
|
436 |
+ movapd (32 + 2*16)(%esp), %xmm2
|
|
437 |
+ movapd (32 + 3*16)(%esp), %xmm3
|
|
438 |
+ movapd (32 + 4*16)(%esp), %xmm4
|
|
439 |
+ movapd (32 + 5*16)(%esp), %xmm5
|
|
440 |
+ movapd (32 + 6*16)(%esp), %xmm6
|
|
441 |
+ movapd (32 + 7*16)(%esp), %xmm7
|
|
442 |
+ |
|
401 | 443 |
STACK_EPILOGUE
|
402 | 444 |
ret
|
403 | 445 |
ENDFUNC(alloc_overflow_sse2)
|
404 |
-
|
|
446 |
+ |
|
405 | 447 |
#ifdef LINKAGE_TABLE
|
406 | 448 |
|
407 | 449 |
/* Call into C code to resolve a linkage entry. The initial code in the
|