Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl

Commits:

2 changed files:

Changes:

  • src/lisp/gencgc.c
    ... ... @@ -8412,28 +8412,11 @@ gencgc_pickup_dynamic(void)
    8412 8412
     
    
    8413 8413
     void do_pending_interrupt(void);
    
    8414 8414
     
    
    8415
    -//#pragma GCC optimize ("-O1")
    
    8416 8415
     char *
    
    8417 8416
     alloc(int nbytes)
    
    8418 8417
     {
    
    8419
    -#if (defined(i386) || defined(__x86_64))
    
    8420
    -    /*
    
    8421
    -     * Need to save and restore the FPU registers on x86, but only for
    
    8422
    -     * sse2.  See Trac ticket #61
    
    8423
    -     * (https://trac.common-lisp.net/cmucl/ticket/61) and gitlab
    
    8424
    -     * ticket #86
    
    8425
    -     * (https://gitlab.common-lisp.net/cmucl/cmucl/-/issues/86).
    
    8426
    -     *
    
    8427
    -     * Not needed by sparc or ppc because we never call alloc from
    
    8428
    -     * Lisp directly to do allocation.
    
    8429
    -     */
    
    8430
    -    FPU_STATE(fpu_state);
    
    8431
    -
    
    8432
    -    if (fpu_mode == SSE2) {
    
    8433
    -        save_fpu_state(fpu_state);
    
    8434
    -    }
    
    8435
    -#endif
    
    8436 8418
         void *new_obj;
    
    8419
    +
    
    8437 8420
     #if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
    
    8438 8421
         /*
    
    8439 8422
          * *current-region-free-pointer* is the same as alloc-tn (=
    
    ... ... @@ -8483,14 +8466,8 @@ alloc(int nbytes)
    8483 8466
     	}
    
    8484 8467
         }
    
    8485 8468
     
    
    8486
    -#if (defined(i386) || defined(__x86_64))
    
    8487
    -    if (fpu_mode == SSE2) {
    
    8488
    -        restore_fpu_state(fpu_state);
    
    8489
    -    }
    
    8490
    -#endif
    
    8491 8469
         return new_obj;
    
    8492 8470
     }
    
    8493
    -#pragma GCC optimize ("-O2")
    
    8494 8471
     
    
    8495 8472
     char *
    
    8496 8473
     alloc_pseudo_atomic(int nbytes)
    

  • src/lisp/x86-assem.S
    ... ... @@ -381,6 +381,7 @@ ENDFUNC(fastcopy16)
    381 381
      * On exit:
    
    382 382
      * %eax = address
    
    383 383
      */
    
    384
    +#if 0
    
    384 385
     FUNCDEF(alloc_overflow_sse2)
    
    385 386
     	STACK_PROLOGUE(20)
    
    386 387
     	movl	%ecx, 8(%esp)	# Save ecx and edx registers
    
    ... ... @@ -401,7 +402,49 @@ FUNCDEF(alloc_overflow_sse2)
    401 402
     	STACK_EPILOGUE
    
    402 403
     	ret
    
    403 404
     ENDFUNC(alloc_overflow_sse2)	
    
    404
    -		
    
    405
    +#else
    
    406
    +FUNCDEF(alloc_overflow_sse2)
    
    407
    +	# Need 512 bytes for the fpu save area, space to save ecx and edx, 
    
    408
    +	# space for mxcsr, a temp, and one arg to pass to alloc.  That's 
    
    409
    +	# 512 + 20.  But the save area needs to be 16-byte aligned, so
    
    410
    +	# allocate 512 + 32 bytes.  The fpu area will be at offset 32.
    
    411
    +	#
    
    412
    +	# Stack looks like:
    
    413
    +	#
    
    414
    +	#      +544 -> end
    
    415
    +	#      +32  -> fpu save
    
    416
    +        #      +20  -> unused
    
    417
    +	#      +16  -> temp
    
    418
    +	#      +12  -> mxcsr
    
    419
    +	#      + 8  -> save ecx
    
    420
    +	#      + 4  -> save edx
    
    421
    +	#  esp + 0  -> arg for alloc
    
    422
    +	STACK_PROLOGUE(32+512)
    
    423
    +	movl	%ecx, 8(%esp)	# Save ecx and edx registers
    
    424
    +	movl	%edx, 4(%esp)
    
    425
    +	stmxcsr 12(%esp)	# Save MXCSR
    
    426
    +	/* Clear the exceptions that might have occurred */
    
    427
    +	movl	12(%esp), %edx
    
    428
    +	and	$-64, %edx	# Clear the exceptions
    
    429
    +	movl	%edx, 16(%esp)
    
    430
    +	ldmxcsr 16(%esp)	# Get new mxcsr value
    
    431
    +	movl	%eax, (%esp)	# Put size on stack for first arg to alloc()
    
    432
    +
    
    433
    +	# Save all FPU regs because we don't know what's in use by lisp.
    
    434
    +	fxsave	32(%esp)
    
    435
    +	
    
    436
    +	call	GNAME(alloc)
    
    437
    +
    
    438
    +	fxrstor 32(%esp)
    
    439
    +
    
    440
    +	movl	4(%esp), %edx	# Restore edx and ecx registers.  eax has the return value.
    
    441
    +	movl	8(%esp), %ecx
    
    442
    +	ldmxcsr	12(%esp)
    
    443
    +	STACK_EPILOGUE
    
    444
    +	ret
    
    445
    +ENDFUNC(alloc_overflow_sse2)	
    
    446
    +#endif		
    
    447
    +
    
    405 448
     #ifdef LINKAGE_TABLE
    
    406 449
     
    
    407 450
     /* Call into C code to resolve a linkage entry.  The initial code in the