[Git][cmucl/cmucl][master] 12 commits: Fix #85: Let each x86 configs set optimization level

Raymond Toy gitlab at common-lisp.net
Sat Aug 29 02:27:07 UTC 2020



Raymond Toy pushed to branch master at cmucl / cmucl


Commits:
449f8ec1 by Raymond Toy at 2020-08-26T17:09:13-07:00
Fix #85: Let each x86 configs set optimization level

Add `COPT` variable in `Config.x86_common` to set the optimization
level (defaulting to `-O2`).  Then each `Config.x86` file can set
`COPT` as desired if the default doesn't work.

Thus, `Config.x86_linux` sets `COPT` to `-O1`, but others can use the
default value.  See issue #68.

- - - - -
38372fd9 by Raymond Toy at 2020-08-26T17:14:30-07:00
Fix typo

- - - - -
d51dabf0 by Raymond Toy at 2020-08-26T23:21:23-07:00
Fix #86: Make cmucl work with gcc 8.1.1 and later

In alloc(), save the fpu state on entry to the function and restore it
just before returning.  

While we're at it, use the __attribute__ option to get a 16-byte
aligned area where we can save the fpu state.

And also set optimization to -O2 for linux.

- - - - -
4b80a6e5 by Raymond Toy at 2020-08-26T23:26:12-07:00
Merge branch 'master' into issue-86-save-fpu-state-on-entry-to-alloc

- - - - -
a95db7ba by Raymond Toy at 2020-08-26T23:30:54-07:00
Update comments

- - - - -
ad3862c9 by Raymond Toy at 2020-08-26T23:34:05-07:00
Clean up code

- - - - -
01f8217b by Raymond Toy at 2020-08-26T23:41:36-07:00
Add -R flag to recompile lisp

- - - - -
8b08b800 by Raymond Toy at 2020-08-27T20:39:07-07:00
Save FPU state in alloc_overflow_sse2

It's best to save the FPU state here instead of in alloc() because we
can't know what the compiler might do.  Remove the fpu save stuff from
alloc().

gcc 9.3.1 builds lisp successfully.

- - - - -
e3aa51f3 by Raymond Toy at 2020-08-27T20:58:52-07:00
Remove stray #pragma

Forgot to remove this; it's not needed anymore.

- - - - -
17144e16 by Raymond Toy at 2020-08-28T16:23:59-07:00
Save just the xmm registers

Instead of saving the entire FPU state, we really only need to save
the xmm registers.

- - - - -
f923302e by Raymond Toy at 2020-08-28T16:32:49-07:00
Remove old version of alloc_overflow_sse2

- - - - -
9b7c0185 by Raymond Toy at 2020-08-29T02:27:00+00:00
Merge branch 'issue-86-save-fpu-state-on-entry-to-alloc' into 'master'

Fix #86: save fpu state on entry to alloc

Closes #86 and #85

See merge request cmucl/cmucl!53
- - - - -


5 changed files:

- .gitlab-ci.yml
- src/lisp/Config.x86_linux
- src/lisp/gencgc.c
- src/lisp/x86-arch.h
- src/lisp/x86-assem.S


Changes:

=====================================
.gitlab-ci.yml
=====================================
@@ -12,7 +12,7 @@ linux-runner:
     - mkdir snapshot
     - (cd snapshot; tar xjf ../cmucl-$version-linux.tar.bz2; tar xjf ../cmucl-$version-linux.extra.tar.bz2)
   script:
-    - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
+    - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
     - bin/make-dist.sh -I dist linux-4
     - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log
 
@@ -24,6 +24,6 @@ osx-runner:
     - mkdir snapshot
     - (cd snapshot; tar xjf ../cmucl-$version-darwin.tar.bz2)
   script:
-    - bin/build.sh $bootstrap -C "" -o snapshot/bin/lisp
+    - bin/build.sh $bootstrap -R -C "" -o snapshot/bin/lisp
     - bin/make-dist.sh -I dist darwin-4
     - bin/run-tests.sh -l dist/bin/lisp 2>&1 | tee test.log


=====================================
src/lisp/Config.x86_linux
=====================================
@@ -3,7 +3,7 @@ include Config.x86_common
 
 # gcc 8.1.1 and 8.3.1 (and probably anything after 8.1.1?) won't
 # produce a working lisp with -O2.  Just use -O1.
-COPT = -O1
+COPT = -O2
 CFLAGS += $(COPT)
 CPPFLAGS += -m32 -D__NO_CTYPE -D_GNU_SOURCE
 CFLAGS += -rdynamic  -march=pentium4 -mfpmath=sse -mtune=generic


=====================================
src/lisp/gencgc.c
=====================================
@@ -8416,6 +8416,7 @@ char *
 alloc(int nbytes)
 {
     void *new_obj;
+
 #if !(defined(sparc) || (defined(DARWIN) && defined(__ppc__)))
     /*
      * *current-region-free-pointer* is the same as alloc-tn (=
@@ -8442,20 +8443,6 @@ alloc(int nbytes)
 	    set_current_region_free((lispobj) new_free_pointer);
             break;
 	} else if (bytes_allocated <= auto_gc_trigger) {
-#if defined(i386) || defined(__x86_64)
-            /*
-             * Need to save and restore the FPU registers on x86, but only for
-             * sse2.  See Ticket #61.
-             *
-             * Not needed by sparc or ppc because we never call alloc from
-             * Lisp directly to do allocation.
-             */
-            FPU_STATE(fpu_state);
-
-            if (fpu_mode == SSE2) {
-                save_fpu_state(fpu_state);
-            }
-#endif
 	    /* Call gc_alloc.  */
 	    boxed_region.free_pointer = (void *) get_current_region_free();
 	    boxed_region.end_addr =
@@ -8466,11 +8453,6 @@ alloc(int nbytes)
 	    set_current_region_free((lispobj) boxed_region.free_pointer);
 	    set_current_region_end((lispobj) boxed_region.end_addr);
 
-#if defined(i386) || defined(__x86_64)
-            if (fpu_mode == SSE2) {
-                restore_fpu_state(fpu_state);
-            }
-#endif
             break;
 	} else {
 	    /* Run GC and try again.  */


=====================================
src/lisp/x86-arch.h
=====================================
@@ -17,16 +17,14 @@ extern boolean os_support_sse2(void);
 #define FPU_STATE_SIZE 27
 
 /* 
- * Need 512 byte area, aligned on a 16-byte boundary.  So allocate
- * 512+16 bytes of space and let the routine adjust the appropriate
- * alignment.
+ * Need 512 byte area, aligned on a 16-byte boundary.
  */
-#define SSE_STATE_SIZE ((512+16)/4)
+#define SSE_STATE_SIZE 512
 
 /*
  * Just use the SSE size for both x87 and sse2 since the SSE size is
- * enough for either.
+ * enough for either.  Make sure it's on a 16-byte boundary.
  */
-#define FPU_STATE(name)    int name[SSE_STATE_SIZE];
+#define FPU_STATE(name)    u_int8_t name[SSE_STATE_SIZE] __attribute__((aligned(16)))
 
 #endif


=====================================
src/lisp/x86-assem.S
=====================================
@@ -382,7 +382,39 @@ ENDFUNC(fastcopy16)
  * %eax = address
  */
 FUNCDEF(alloc_overflow_sse2)
-	STACK_PROLOGUE(20)
+	# Need 8*16 bytes for the xmm registers, and space to save ecx
+	# and edx, space for mxcsr, a temp, and one arg to pass to alloc.
+	# That's 8*16 + 5*4 = 148 bytes.  Might as well have a few
+	# more so the xmm0 area is 16-byte aligned. That makes it 160
+	# bytes.
+	#
+	# Stack looks like:
+	#
+	#      +160
+	#      +144 -> xmm7
+	#      +128 -> xmm6
+	#      +112 -> xmm5
+	#      +96  -> xmm4
+	#      +80  -> xmm3
+	#      +64  -> xmm2
+	#      +48  -> xmm1
+	#      +32  -> xmm0
+	#      +20  -> unused
+	#      +16  -> temp
+	#      +12  -> mxcsr
+	#      + 8  -> save ecx
+	#      + 4  -> save edx
+	#  esp + 0  -> arg for alloc
+	STACK_PROLOGUE(160)
+	movapd  %xmm0, (32 + 0*16)(%esp)
+	movapd  %xmm1, (32 + 1*16)(%esp)
+	movapd  %xmm2, (32 + 2*16)(%esp)
+	movapd  %xmm3, (32 + 3*16)(%esp)
+	movapd  %xmm4, (32 + 4*16)(%esp)
+	movapd  %xmm5, (32 + 5*16)(%esp)
+	movapd  %xmm6, (32 + 6*16)(%esp)
+	movapd  %xmm7, (32 + 7*16)(%esp)
+
 	movl	%ecx, 8(%esp)	# Save ecx and edx registers
 	movl	%edx, 4(%esp)
 	stmxcsr 12(%esp)	# Save MXCSR
@@ -398,10 +430,20 @@ FUNCDEF(alloc_overflow_sse2)
 	movl	4(%esp), %edx	# Restore edx and ecx registers.  eax has the return value.
 	movl	8(%esp), %ecx
 	ldmxcsr	12(%esp)
+
+	movapd  (32 + 0*16)(%esp), %xmm0
+	movapd  (32 + 1*16)(%esp), %xmm1
+	movapd  (32 + 2*16)(%esp), %xmm2
+	movapd  (32 + 3*16)(%esp), %xmm3
+	movapd  (32 + 4*16)(%esp), %xmm4
+	movapd  (32 + 5*16)(%esp), %xmm5
+	movapd  (32 + 6*16)(%esp), %xmm6
+	movapd  (32 + 7*16)(%esp), %xmm7
+
 	STACK_EPILOGUE
 	ret
 ENDFUNC(alloc_overflow_sse2)	
-		
+
 #ifdef LINKAGE_TABLE
 
 /* Call into C code to resolve a linkage entry.  The initial code in the 



View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d0b192cd3cf63abb94ecc75dabfabd0dd82b4d4c...9b7c0185a90edf8220c9392e84f87350ca32314e

-- 
View it on GitLab: https://gitlab.common-lisp.net/cmucl/cmucl/-/compare/d0b192cd3cf63abb94ecc75dabfabd0dd82b4d4c...9b7c0185a90edf8220c9392e84f87350ca32314e
You're receiving this email because of your account on gitlab.common-lisp.net.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cmucl-cvs/attachments/20200829/79ad0ef5/attachment-0001.htm>


More information about the cmucl-cvs mailing list