Raymond Toy pushed to branch issue-86-save-fpu-state-on-entry-to-alloc at cmucl / cmucl

Commits:

1 changed file:

Changes:

  • src/lisp/x86-assem.S
    ... ... @@ -404,22 +404,39 @@ FUNCDEF(alloc_overflow_sse2)
    404 404
     ENDFUNC(alloc_overflow_sse2)	
    
    405 405
     #else
    
    406 406
     FUNCDEF(alloc_overflow_sse2)
    
    407
    -	# Need 512 bytes for the fpu save area, space to save ecx and edx, 
    
    408
    -	# space for mxcsr, a temp, and one arg to pass to alloc.  That's 
    
    409
    -	# 512 + 20.  But the save area needs to be 16-byte aligned, so
    
    410
    -	# allocate 512 + 32 bytes.  The fpu area will be at offset 32.
    
    407
    +	# Need 8*16 bytes for the xmm registers, and space to save ecx
    
    408
    +	# and edx, space for mxcsr, a temp, and one arg to pass to alloc.
    
    409
    +	# That's 8*16 + 5*4 = 148 bytes.  Might as well have a few
    
    410
    +	# more so the xmm0 area is 16-byte aligned. That makes it 160
    
    411
    +	# bytes.
    
    411 412
     	#
    
    412 413
     	# Stack looks like:
    
    413 414
     	#
    
    414
    -	#      +544 -> end
    
    415
    -	#      +32  -> fpu save
    
    416
    -        #      +20  -> unused
    
    415
    +	#      +160
    
    416
    +	#      +144 -> xmm7
    
    417
    +	#      +128 -> xmm6
    
    418
    +	#      +112 -> xmm5
    
    419
    +	#      +96  -> xmm4
    
    420
    +	#      +80  -> xmm3
    
    421
    +	#      +64  -> xmm2
    
    422
    +	#      +48  -> xmm1
    
    423
    +	#      +32  -> xmm0
    
    424
    +	#      +20  -> unused
    
    417 425
     	#      +16  -> temp
    
    418 426
     	#      +12  -> mxcsr
    
    419 427
     	#      + 8  -> save ecx
    
    420 428
     	#      + 4  -> save edx
    
    421 429
     	#  esp + 0  -> arg for alloc
    
    422
    -	STACK_PROLOGUE(32+512)
    
    430
    +	STACK_PROLOGUE(160)
    
    431
    +	movapd  %xmm0, (32 + 0*16)(%esp)
    
    432
    +	movapd  %xmm1, (32 + 1*16)(%esp)
    
    433
    +	movapd  %xmm2, (32 + 2*16)(%esp)
    
    434
    +	movapd  %xmm3, (32 + 3*16)(%esp)
    
    435
    +	movapd  %xmm4, (32 + 4*16)(%esp)
    
    436
    +	movapd  %xmm5, (32 + 5*16)(%esp)
    
    437
    +	movapd  %xmm6, (32 + 6*16)(%esp)
    
    438
    +	movapd  %xmm7, (32 + 7*16)(%esp)
    
    439
    +
    
    423 440
     	movl	%ecx, 8(%esp)	# Save ecx and edx registers
    
    424 441
     	movl	%edx, 4(%esp)
    
    425 442
     	stmxcsr 12(%esp)	# Save MXCSR
    
    ... ... @@ -430,16 +447,21 @@ FUNCDEF(alloc_overflow_sse2)
    430 447
     	ldmxcsr 16(%esp)	# Get new mxcsr value
    
    431 448
     	movl	%eax, (%esp)	# Put size on stack for first arg to alloc()
    
    432 449
     
    
    433
    -	# Save all FPU regs because we don't know what's in use by lisp.
    
    434
    -	fxsave	32(%esp)
    
    435
    -	
    
    436 450
     	call	GNAME(alloc)
    
    437 451
     
    
    438
    -	fxrstor 32(%esp)
    
    439
    -
    
    440 452
     	movl	4(%esp), %edx	# Restore edx and ecx registers.  eax has the return value.
    
    441 453
     	movl	8(%esp), %ecx
    
    442 454
     	ldmxcsr	12(%esp)
    
    455
    +
    
    456
    +	movapd  (32 + 0*16)(%esp), %xmm0
    
    457
    +	movapd  (32 + 1*16)(%esp), %xmm1
    
    458
    +	movapd  (32 + 2*16)(%esp), %xmm2
    
    459
    +	movapd  (32 + 3*16)(%esp), %xmm3
    
    460
    +	movapd  (32 + 4*16)(%esp), %xmm4
    
    461
    +	movapd  (32 + 5*16)(%esp), %xmm5
    
    462
    +	movapd  (32 + 6*16)(%esp), %xmm6
    
    463
    +	movapd  (32 + 7*16)(%esp), %xmm7
    
    464
    +
    
    443 465
     	STACK_EPILOGUE
    
    444 466
     	ret
    
    445 467
     ENDFUNC(alloc_overflow_sse2)