Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl
Commits:
-
8b08b800
by Raymond Toy at 2020-08-27T20:39:07-07:00
2 changed files:
Changes:
... | ... | @@ -8412,28 +8412,11 @@ gencgc_pickup_dynamic(void) |
8412 | 8412 |
|
8413 | 8413 |
void do_pending_interrupt(void);
|
8414 | 8414 |
|
8415 |
-//#pragma GCC optimize ("-O1")
|
|
8416 | 8415 |
char *
|
8417 | 8416 |
alloc(int nbytes)
|
8418 | 8417 |
{
|
8419 |
-#if (defined(i386) || defined(__x86_64))
|
|
8420 |
- /*
|
|
8421 |
- * Need to save and restore the FPU registers on x86, but only for
|
|
8422 |
- * sse2. See Trac ticket #61
|
|
8423 |
- * (https://trac.common-lisp.net/cmucl/ticket/61) and gitlab
|
|
8424 |
- * ticket #86
|
|
8425 |
- * (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/86).
|
|
8426 |
- *
|
|
8427 |
- * Not needed by sparc or ppc because we never call alloc from
|
|
8428 |
- * Lisp directly to do allocation.
|
|
8429 |
- */
|
|
8430 |
- FPU_STATE(fpu_state);
|
|
8431 |
- |
|
8432 |
- if (fpu_mode == SSE2) {
|
|
8433 |
- save_fpu_state(fpu_state);
|
|
8434 |
- }
|
|
8435 |
-#endif
|
|
8436 | 8418 |
void *new_obj;
|
8419 |
+ |
|
8437 | 8420 |
#if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
|
8438 | 8421 |
/*
|
8439 | 8422 |
* *current-region-free-pointer* is the same as alloc-tn (=
|
... | ... | @@ -8483,14 +8466,8 @@ alloc(int nbytes) |
8483 | 8466 |
}
|
8484 | 8467 |
}
|
8485 | 8468 |
|
8486 |
-#if (defined(i386) || defined(__x86_64))
|
|
8487 |
- if (fpu_mode == SSE2) {
|
|
8488 |
- restore_fpu_state(fpu_state);
|
|
8489 |
- }
|
|
8490 |
-#endif
|
|
8491 | 8469 |
return new_obj;
|
8492 | 8470 |
}
|
8493 |
-#pragma GCC optimize ("-O2")
|
|
8494 | 8471 |
|
8495 | 8472 |
char *
|
8496 | 8473 |
alloc_pseudo_atomic(int nbytes)
|
... | ... | @@ -381,6 +381,7 @@ ENDFUNC(fastcopy16) |
381 | 381 |
* On exit:
|
382 | 382 |
* %eax = address
|
383 | 383 |
*/
|
384 |
+#if 0
|
|
384 | 385 |
FUNCDEF(alloc_overflow_sse2)
|
385 | 386 |
STACK_PROLOGUE(20)
|
386 | 387 |
movl %ecx, 8(%esp) # Save ecx and edx registers
|
... | ... | @@ -401,7 +402,49 @@ FUNCDEF(alloc_overflow_sse2) |
401 | 402 |
STACK_EPILOGUE
|
402 | 403 |
ret
|
403 | 404 |
ENDFUNC(alloc_overflow_sse2)
|
404 |
-
|
|
405 |
+#else
|
|
406 |
+FUNCDEF(alloc_overflow_sse2)
|
|
407 |
+ # Need 512 bytes for the fpu save area, space to save ecx and edx,
|
|
408 |
+ # space for mxcsr, a temp, and one arg to pass to alloc. That's
|
|
409 |
+ # 512 + 20. But the save area needs to be 16-byte aligned, so
|
|
410 |
+ # allocate 512 + 32 bytes. The fpu area will be at offset 32.
|
|
411 |
+ #
|
|
412 |
+ # Stack looks like:
|
|
413 |
+ #
|
|
414 |
+ # +544 -> end
|
|
415 |
+ # +32 -> fpu save
|
|
416 |
+ # +20 -> unused
|
|
417 |
+ # +16 -> temp
|
|
418 |
+ # +12 -> mxcsr
|
|
419 |
+ # + 8 -> save ecx
|
|
420 |
+ # + 4 -> save edx
|
|
421 |
+ # esp + 0 -> arg for alloc
|
|
422 |
+ STACK_PROLOGUE(32+512)
|
|
423 |
+ movl %ecx, 8(%esp) # Save ecx and edx registers
|
|
424 |
+ movl %edx, 4(%esp)
|
|
425 |
+ stmxcsr 12(%esp) # Save MXCSR
|
|
426 |
+ /* Clear the exceptions that might have occurred */
|
|
427 |
+ movl 12(%esp), %edx
|
|
428 |
+ and $-64, %edx # Clear the exceptions
|
|
429 |
+ movl %edx, 16(%esp)
|
|
430 |
+ ldmxcsr 16(%esp) # Get new mxcsr value
|
|
431 |
+ movl %eax, (%esp) # Put size on stack for first arg to alloc()
|
|
432 |
+ |
|
433 |
+ # Save all FPU regs because we don't know what's in use by lisp.
|
|
434 |
+ fxsave 32(%esp)
|
|
435 |
+
|
|
436 |
+ call GNAME(alloc)
|
|
437 |
+ |
|
438 |
+ fxrstor 32(%esp)
|
|
439 |
+ |
|
440 |
+ movl 4(%esp), %edx # Restore edx and ecx registers. eax has the return value.
|
|
441 |
+ movl 8(%esp), %ecx
|
|
442 |
+ ldmxcsr 12(%esp)
|
|
443 |
+ STACK_EPILOGUE
|
|
444 |
+ ret
|
|
445 |
+ENDFUNC(alloc_overflow_sse2)
|
|
446 |
+#endif
|
|
447 |
+ |
|
405 | 448 |
#ifdef LINKAGE_TABLE
|
406 | 449 |
|
407 | 450 |
/* Call into C code to resolve a linkage entry. The initial code in the
|