Raymond Toy pushed to branch master at cmucl / cmucl

Commits:

5 changed files:

Changes:

  • .gitlab-ci.yml
    ... ... @@ -12,7 +12,7 @@ linux-runner:
    12 12
         - mkdir snapshot
    
    13 13
         - (cd snapshot; tar xjf ../cmucl-$version-linux.tar.bz2; tar xjf ../cmucl-$version-linux.extra.tar.bz2)
    
    14 14
       script:
    
    15
    -    - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
    
    15
    +    - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
    
    16 16
         - bin/make-dist.sh -I dist linux-4
    
    17 17
         - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
    
    18 18
     
    
    ... ... @@ -24,6 +24,6 @@ osx-runner:
    24 24
         - mkdir snapshot
    
    25 25
         - (cd snapshot; tar xjf ../cmucl-$version-darwin.tar.bz2)
    
    26 26
       script:
    
    27
    -    - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
    
    27
    +    - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
    
    28 28
         - bin/make-dist.sh -I dist darwin-4
    
    29 29
         - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log

  • src/lisp/Config.x86_linux
    ... ... @@ -3,7 +3,7 @@ include Config.x86_common
    3 3
     
    
    4 4
     # gcc 8.1.1 and 8.3.1 (and probably anything after 8.1.1?) won't
    
    5 5
     # produce a working lisp with -O2.  Just use -O1.
    
    6
    -COPT = -O1
    
    6
    +COPT = -O2
    
    7 7
     CFLAGS += $(COPT)
    
    8 8
     CPPFLAGS += -m32 -D__NO_CTYPE -D_GNU_SOURCE
    
    9 9
     CFLAGS += -rdynamic  -march=pentium4 -mfpmath=sse -mtune=generic
    

  • src/lisp/gencgc.c
    ... ... @@ -8416,6 +8416,7 @@ char *
    8416 8416
     alloc(int nbytes)
    
    8417 8417
     {
    
    8418 8418
         void *new_obj;
    
    8419
    +
    
    8419 8420
     #if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
    
    8420 8421
         /*
    
    8421 8422
          * *current-region-free-pointer* is the same as alloc-tn (=
    
    ... ... @@ -8442,20 +8443,6 @@ alloc(int nbytes)
    8442 8443
     	    set_current_region_free((lispobj) new_free_pointer);
    
    8443 8444
                 break;
    
    8444 8445
     	} else if (bytes_allocated <= auto_gc_trigger) {
    
    8445
    -#if defined(i386) || defined(__x86_64)
    
    8446
    -            /*
    
    8447
    -             * Need to save and restore the FPU registers on x86, but only for
    
    8448
    -             * sse2.  See Ticket #61.
    
    8449
    -             *
    
    8450
    -             * Not needed by sparc or ppc because we never call alloc from
    
    8451
    -             * Lisp directly to do allocation.
    
    8452
    -             */
    
    8453
    -            FPU_STATE(fpu_state);
    
    8454
    -
    
    8455
    -            if (fpu_mode == SSE2) {
    
    8456
    -                save_fpu_state(fpu_state);
    
    8457
    -            }
    
    8458
    -#endif
    
    8459 8446
     	    /* Call gc_alloc.  */
    
    8460 8447
     	    boxed_region.free_pointer = (void *) get_current_region_free();
    
    8461 8448
     	    boxed_region.end_addr =
    
    ... ... @@ -8466,11 +8453,6 @@ alloc(int nbytes)
    8466 8453
     	    set_current_region_free((lispobj) boxed_region.free_pointer);
    
    8467 8454
     	    set_current_region_end((lispobj) boxed_region.end_addr);
    
    8468 8455
     
    
    8469
    -#if defined(i386) || defined(__x86_64)
    
    8470
    -            if (fpu_mode == SSE2) {
    
    8471
    -                restore_fpu_state(fpu_state);
    
    8472
    -            }
    
    8473
    -#endif
    
    8474 8456
                 break;
    
    8475 8457
     	} else {
    
    8476 8458
     	    /* Run GC and try again.  */
    

  • src/lisp/x86-arch.h
    ... ... @@ -17,16 +17,14 @@ extern boolean os_support_sse2(void);
    17 17
     #define FPU_STATE_SIZE 27
    
    18 18
     
    
    19 19
     /* 
    
    20
    - * Need 512 byte area, aligned on a 16-byte boundary.  So allocate
    
    21
    - * 512+16 bytes of space and let the routine adjust the appropriate
    
    22
    - * alignment.
    
    20
    + * Need 512 byte area, aligned on a 16-byte boundary.
    
    23 21
      */
    
    24
    -#define SSE_STATE_SIZE ((512+16)/4)
    
    22
    +#define SSE_STATE_SIZE 512
    
    25 23
     
    
    26 24
     /*
    
    27 25
      * Just use the SSE size for both x87 and sse2 since the SSE size is
    
    28
    - * enough for either.
    
    26
    + * enough for either.  Make sure it's on a 16-byte boundary.
    
    29 27
      */
    
    30
    -#define FPU_STATE(name)    int name[SSE_STATE_SIZE];
    
    28
    +#define FPU_STATE(name)    u_int8_t name[SSE_STATE_SIZE] __attribute__((aligned(16)))
    
    31 29
     
    
    32 30
     #endif

  • src/lisp/x86-assem.S
    ... ... @@ -382,7 +382,39 @@ ENDFUNC(fastcopy16)
    382 382
      * %eax = address
    
    383 383
      */
    
    384 384
     FUNCDEF(alloc_overflow_sse2)
    
    385
    -	STACK_PROLOGUE(20)
    
    385
    +	# Need 8*16 bytes for the xmm registers, and space to save ecx
    
    386
    +	# and edx, space for mxcsr, a temp, and one arg to pass to alloc.
    
    387
    +	# That's 8*16 + 5*4 = 148 bytes.  Might as well have a few
    
    388
    +	# more so the xmm0 area is 16-byte aligned. That makes it 160
    
    389
    +	# bytes.
    
    390
    +	#
    
    391
    +	# Stack looks like:
    
    392
    +	#
    
    393
    +	#      +160
    
    394
    +	#      +144 -> xmm7
    
    395
    +	#      +128 -> xmm6
    
    396
    +	#      +112 -> xmm5
    
    397
    +	#      +96  -> xmm4
    
    398
    +	#      +80  -> xmm3
    
    399
    +	#      +64  -> xmm2
    
    400
    +	#      +48  -> xmm1
    
    401
    +	#      +32  -> xmm0
    
    402
    +	#      +20  -> unused
    
    403
    +	#      +16  -> temp
    
    404
    +	#      +12  -> mxcsr
    
    405
    +	#      + 8  -> save ecx
    
    406
    +	#      + 4  -> save edx
    
    407
    +	#  esp + 0  -> arg for alloc
    
    408
    +	STACK_PROLOGUE(160)
    
    409
    +	movapd  %xmm0, (32 + 0*16)(%esp)
    
    410
    +	movapd  %xmm1, (32 + 1*16)(%esp)
    
    411
    +	movapd  %xmm2, (32 + 2*16)(%esp)
    
    412
    +	movapd  %xmm3, (32 + 3*16)(%esp)
    
    413
    +	movapd  %xmm4, (32 + 4*16)(%esp)
    
    414
    +	movapd  %xmm5, (32 + 5*16)(%esp)
    
    415
    +	movapd  %xmm6, (32 + 6*16)(%esp)
    
    416
    +	movapd  %xmm7, (32 + 7*16)(%esp)
    
    417
    +
    
    386 418
     	movl	%ecx, 8(%esp)	# Save ecx and edx registers
    
    387 419
     	movl	%edx, 4(%esp)
    
    388 420
     	stmxcsr 12(%esp)	# Save MXCSR
    
    ... ... @@ -398,10 +430,20 @@ FUNCDEF(alloc_overflow_sse2)
    398 430
     	movl	4(%esp), %edx	# Restore edx and ecx registers.  eax has the return value.
    
    399 431
     	movl	8(%esp), %ecx
    
    400 432
     	ldmxcsr	12(%esp)
    
    433
    +
    
    434
    +	movapd  (32 + 0*16)(%esp), %xmm0
    
    435
    +	movapd  (32 + 1*16)(%esp), %xmm1
    
    436
    +	movapd  (32 + 2*16)(%esp), %xmm2
    
    437
    +	movapd  (32 + 3*16)(%esp), %xmm3
    
    438
    +	movapd  (32 + 4*16)(%esp), %xmm4
    
    439
    +	movapd  (32 + 5*16)(%esp), %xmm5
    
    440
    +	movapd  (32 + 6*16)(%esp), %xmm6
    
    441
    +	movapd  (32 + 7*16)(%esp), %xmm7
    
    442
    +
    
    401 443
     	STACK_EPILOGUE
    
    402 444
     	ret
    
    403 445
     ENDFUNC(alloc_overflow_sse2)	
    
    404
    -		
    
    446
    +
    
    405 447
     #ifdef LINKAGE_TABLE
    
    406 448
     
    
    407 449
     /* Call into C code to resolve a linkage entry.  The initial code in the