Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits:
-
8b08b800
by Raymond Toy at 2020-08-27T20:39:07-07:00
2 changed files:
Changes:
| ... | ... | @@ -8412,28 +8412,11 @@ gencgc_pickup_dynamic(void) |
| 8412 | 8412 |
|
| 8413 | 8413 |
void do_pending_interrupt(void);
|
| 8414 | 8414 |
|
| 8415 |
-//#pragma GCC optimize ("-O1")
|
|
| 8416 | 8415 |
char *
|
| 8417 | 8416 |
alloc(int nbytes)
|
| 8418 | 8417 |
{
|
| 8419 |
-#if (defined(i386) || defined(__x86_64))
|
|
| 8420 |
- /*
|
|
| 8421 |
- * Need to save and restore the FPU registers on x86, but only for
|
|
| 8422 |
- * sse2. See Trac ticket #61
|
|
| 8423 |
- * (https://trac.common-lisp.net/cmucl/ticket/61) and gitlab
|
|
| 8424 |
- * ticket #86
|
|
| 8425 |
- * (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/86).
|
|
| 8426 |
- *
|
|
| 8427 |
- * Not needed by sparc or ppc because we never call alloc from
|
|
| 8428 |
- * Lisp directly to do allocation.
|
|
| 8429 |
- */
|
|
| 8430 |
- FPU_STATE(fpu_state);
|
|
| 8431 |
- |
|
| 8432 |
- if (fpu_mode == SSE2) {
|
|
| 8433 |
- save_fpu_state(fpu_state);
|
|
| 8434 |
- }
|
|
| 8435 |
-#endif
|
|
| 8436 | 8418 |
void *new_obj;
|
| 8419 |
+ |
|
| 8437 | 8420 |
#if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
|
| 8438 | 8421 |
/*
|
| 8439 | 8422 |
* *current-region-free-pointer* is the same as alloc-tn (=
|
| ... | ... | @@ -8483,14 +8466,8 @@ alloc(int nbytes) |
| 8483 | 8466 |
}
|
| 8484 | 8467 |
}
|
| 8485 | 8468 |
|
| 8486 |
-#if (defined(i386) || defined(__x86_64))
|
|
| 8487 |
- if (fpu_mode == SSE2) {
|
|
| 8488 |
- restore_fpu_state(fpu_state);
|
|
| 8489 |
- }
|
|
| 8490 |
-#endif
|
|
| 8491 | 8469 |
return new_obj;
|
| 8492 | 8470 |
}
|
| 8493 |
-#pragma GCC optimize ("-O2")
|
|
| 8494 | 8471 |
|
| 8495 | 8472 |
char *
|
| 8496 | 8473 |
alloc_pseudo_atomic(int nbytes)
|
| ... | ... | @@ -381,6 +381,7 @@ ENDFUNC(fastcopy16) |
| 381 | 381 |
* On exit:
|
| 382 | 382 |
* %eax = address
|
| 383 | 383 |
*/
|
| 384 |
+#if 0
|
|
| 384 | 385 |
FUNCDEF(alloc_overflow_sse2)
|
| 385 | 386 |
STACK_PROLOGUE(20)
|
| 386 | 387 |
movl %ecx, 8(%esp) # Save ecx and edx registers
|
| ... | ... | @@ -401,7 +402,49 @@ FUNCDEF(alloc_overflow_sse2) |
| 401 | 402 |
STACK_EPILOGUE
|
| 402 | 403 |
ret
|
| 403 | 404 |
ENDFUNC(alloc_overflow_sse2)
|
| 404 |
-
|
|
| 405 |
+#else
|
|
| 406 |
+FUNCDEF(alloc_overflow_sse2)
|
|
| 407 |
+ # Need 512 bytes for the fpu save area, space to save ecx and edx,
|
|
| 408 |
+ # space for mxcsr, a temp, and one arg to pass to alloc. That's
|
|
| 409 |
+ # 512 + 20. But the save area needs to be 16-byte aligned, so
|
|
| 410 |
+ # allocate 512 + 32 bytes. The fpu area will be at offset 32.
|
|
| 411 |
+ #
|
|
| 412 |
+ # Stack looks like:
|
|
| 413 |
+ #
|
|
| 414 |
+ # +544 -> end
|
|
| 415 |
+ # +32 -> fpu save
|
|
| 416 |
+ # +20 -> unused
|
|
| 417 |
+ # +16 -> temp
|
|
| 418 |
+ # +12 -> mxcsr
|
|
| 419 |
+ # + 8 -> save ecx
|
|
| 420 |
+ # + 4 -> save edx
|
|
| 421 |
+ # esp + 0 -> arg for alloc
|
|
| 422 |
+ STACK_PROLOGUE(32+512)
|
|
| 423 |
+ movl %ecx, 8(%esp) # Save ecx and edx registers
|
|
| 424 |
+ movl %edx, 4(%esp)
|
|
| 425 |
+ stmxcsr 12(%esp) # Save MXCSR
|
|
| 426 |
+ /* Clear the exceptions that might have occurred */
|
|
| 427 |
+ movl 12(%esp), %edx
|
|
| 428 |
+ and $-64, %edx # Clear the exceptions
|
|
| 429 |
+ movl %edx, 16(%esp)
|
|
| 430 |
+ ldmxcsr 16(%esp) # Get new mxcsr value
|
|
| 431 |
+ movl %eax, (%esp) # Put size on stack for first arg to alloc()
|
|
| 432 |
+ |
|
| 433 |
+ # Save all FPU regs because we don't know what's in use by lisp.
|
|
| 434 |
+ fxsave 32(%esp)
|
|
| 435 |
+
|
|
| 436 |
+ call GNAME(alloc)
|
|
| 437 |
+ |
|
| 438 |
+ fxrstor 32(%esp)
|
|
| 439 |
+ |
|
| 440 |
+ movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value.
|
|
| 441 |
+ movl 8(%esp), %ecx
|
|
| 442 |
+ ldmxcsr 12(%esp)
|
|
| 443 |
+ STACK_EPILOGUE
|
|
| 444 |
+ ret
|
|
| 445 |
+ENDFUNC(alloc_overflow_sse2)
|
|
| 446 |
+#endif
|
|
| 447 |
+ |
|
| 405 | 448 |
#ifdef LINKAGE_TABLE
|
| 406 | 449 |
|
| 407 | 450 |
/* Call into C code to resolve a linkage entry. The initial code in the
|