[cxml-devel] patch - xml-name-rune-p - proposing an approach w/o #.

Sean Champ gimmal at gmail.com
Wed Aug 16 17:02:05 UTC 2006


Hello,

I was compiling CXML, in SBCL 0.9.15. I noticed that the compiler kept
"getting stuck", when compiling the file xml-name-rune-p. Upon looking at the
file, it appeared that the 'getting stuck' behavior may have had something to
do with the #. form in the file.

With some simple reformatting on the forms in that file, I was able to define
a set of forms effectively comparable to those under the #. form -- using the
same code, essentially, though by a slightly different approach. I've used a
number of DEFVAR calls to hold the predicate values, as well as a DEMACRO call
defining a 'defpredicate' macro -- the lot of those calls being wrapped in a
form like EVAL-WHEN :COMPILE-TOPLEVEL. The code in the file's toplevel calls
the 'defpredicate' macro, to define each of the two predicate functions. The
patched code still uses the thing-code-onto-bit-vector comparison.

The patch for it is attached to this message. The patched code compiles,
successfully, in SBCL 0.9.15.

The patch affects the definitions of the functions `name-rune-p' and
`name-start-p'. I've tested the redefined name-rune-p, in calling it on one
ASCII alphabetic charater; it works.



Absent of adverbs,


--
Sean Champ
-------------- next part --------------
Index: xml-name-rune-p.lisp
===================================================================
RCS file: /project/cxml/cvsroot/cxml/xml/xml-name-rune-p.lisp,v
retrieving revision 1.6
diff -p -u -r1.6 xml-name-rune-p.lisp
--- xml-name-rune-p.lisp	28 Nov 2005 22:33:47 -0000	1.6
+++ xml-name-rune-p.lisp	16 Aug 2006 15:56:48 -0000
@@ -7,221 +7,212 @@
 
 (in-package :cxml)
 
-#.(funcall 
-   (compile 
-    nil
-    '(lambda ()
-      (let ((+max+ #xD800))
-        (labels
-            ((name-start-rune-p (rune)
-               (or (letter-rune-p rune)
-                   (= #.(char-code #\_) rune)
-                   (= #.(char-code #\:) rune)))
-
-             (name-rune-p (rune)
-               (or (letter-rune-p rune)
-                   (digit-rune-p* rune)
-                   (= rune #.(char-code #\.))
-                   (= rune #.(char-code #\-))
-                   (= rune #.(char-code #\_))
-                   (= rune #.(char-code #\:))
-                   (combining-rune-p rune)
-                   (extender-rune-p rune)))
-
-             (letter-rune-p (rune)
-               (or (base-rune-p rune)
-                   (ideographic-rune-p rune)))
-
-             (digit-rune-p* (rune)
-               (or (<= 48 rune 57)
-                   (<= 1632 rune 1641)
-                   (<= 1776 rune 1785)
-                   (<= 2406 rune 2415)
-                   (<= 2534 rune 2543)
-                   (<= 2662 rune 2671)
-                   (<= 2790 rune 2799)
-                   (<= 2918 rune 2927)
-                   (<= 3047 rune 3055)
-                   (<= 3174 rune 3183)
-                   (<= 3302 rune 3311)
-                   (<= 3430 rune 3439)
-                   (<= 3664 rune 3673)
-                   (<= 3792 rune 3801)
-                   (<= 3872 rune 3881)))
-
-
-             (combining-rune-p (rune)
-               (or (<= 768 rune 837)
-                   (<= 864 rune 865)
-                   (<= 1155 rune 1158)
-                   (<= 1425 rune 1441)
-                   (<= 1443 rune 1465)
-                   (<= 1467 rune 1469)
-                   (= 1471 rune)
-                   (<= 1473 rune 1474)
-                   (= 1476 rune)
-                   (<= 1611 rune 1618)
-                   (= 1648 rune)
-                   (<= 1750 rune 1756)
-                   (<= 1757 rune 1759)
-                   (<= 1760 rune 1764)
-                   (<= 1767 rune 1768)
-                   (<= 1770 rune 1773)
-                   (<= 2305 rune 2307)
-                   (= 2364 rune)
-                   (<= 2366 rune 2380)
-                   (= 2381 rune)
-                   (<= 2385 rune 2388)
-                   (<= 2402 rune 2403)
-                   (<= 2433 rune 2435)
-                   (= 2492 rune)
-                   (= 2494 rune)
-                   (= 2495 rune)
-                   (<= 2496 rune 2500)
-                   (<= 2503 rune 2504)
-                   (<= 2507 rune 2509)
-                   (= 2519 rune)
-                   (<= 2530 rune 2531)
-                   (= 2562 rune)
-                   (= 2620 rune)
-                   (= 2622 rune)
-                   (= 2623 rune)
-                   (<= 2624 rune 2626)
-                   (<= 2631 rune 2632)
-                   (<= 2635 rune 2637)
-                   (<= 2672 rune 2673)
-                   (<= 2689 rune 2691)
-                   (= 2748 rune)
-                   (<= 2750 rune 2757)
-                   (<= 2759 rune 2761)
-                   (<= 2763 rune 2765)
-                   (<= 2817 rune 2819)
-                   (= 2876 rune)
-                   (<= 2878 rune 2883)
-                   (<= 2887 rune 2888)
-                   (<= 2891 rune 2893)
-                   (<= 2902 rune 2903)
-                   (<= 2946 rune 2947)
-                   (<= 3006 rune 3010)
-                   (<= 3014 rune 3016)
-                   (<= 3018 rune 3021)
-                   (= 3031 rune)
-                   (<= 3073 rune 3075)
-                   (<= 3134 rune 3140)
-                   (<= 3142 rune 3144)
-                   (<= 3146 rune 3149)
-                   (<= 3157 rune 3158)
-                   (<= 3202 rune 3203)
-                   (<= 3262 rune 3268)
-                   (<= 3270 rune 3272)
-                   (<= 3274 rune 3277)
-                   (<= 3285 rune 3286)
-                   (<= 3330 rune 3331)
-                   (<= 3390 rune 3395)
-                   (<= 3398 rune 3400)
-                   (<= 3402 rune 3405)
-                   (= 3415 rune)
-                   (= 3633 rune)
-                   (<= 3636 rune 3642)
-                   (<= 3655 rune 3662)
-                   (= 3761 rune)
-                   (<= 3764 rune 3769)
-                   (<= 3771 rune 3772)
-                   (<= 3784 rune 3789)
-                   (<= 3864 rune 3865)
-                   (= 3893 rune)
-                   (= 3895 rune)
-                   (= 3897 rune)
-                   (= 3902 rune)
-                   (= 3903 rune)
-                   (<= 3953 rune 3972)
-                   (<= 3974 rune 3979)
-                   (<= 3984 rune 3989)
-                   (= 3991 rune)
-                   (<= 3993 rune 4013)
-                   (<= 4017 rune 4023)
-                   (= 4025 rune)
-                   (<= 8400 rune 8412)
-                   (= 8417 rune)
-                   (<= 12330 rune 12335)
-                   (= 12441 rune)
-                   (= 12442 rune)))
-
-             (extender-rune-p (rune)
-               (or
-                (= 183 rune)
-                (= 720 rune)
-                (= 721 rune)
-                (= 903 rune)
-                (= 1600 rune)
-                (= 3654 rune)
-                (= 3782 rune)
-                (= 12293 rune)
-                (<= 12337 rune 12341)
-                (<= 12445 rune 12446)
-                (<= 12540 rune 12542)))
-
-             (base-rune-p (rune)
-               ;; split into two ORs for LispWorks...
-               (or
-                 (or (<= 65 rune 90) (<= 97 rune 122) (<= 192 rune 214) (<= 216 rune 246) (<= 248 rune 255) (<= 256 rune 305)
-                     (<= 308 rune 318) (<= 321 rune 328) (<= 330 rune 382) (<= 384 rune 451) (<= 461 rune 496) (<= 500 rune 501)
-                     (<= 506 rune 535) (<= 592 rune 680) (<= 699 rune 705) (= 902 rune) (<= 904 rune 906) (= 908 rune)
-                     (<= 910 rune 929) (<= 931 rune 974) (<= 976 rune 982) (= 986 rune) (= 988 rune) (= 990 rune) (= 992 rune)
-                     (<= 994 rune 1011) (<= 1025 rune 1036) (<= 1038 rune 1103) (<= 1105 rune 1116) (<= 1118 rune 1153)
-                     (<= 1168 rune 1220) (<= 1223 rune 1224) (<= 1227 rune 1228) (<= 1232 rune 1259) (<= 1262 rune 1269)
-                     (<= 1272 rune 1273) (<= 1329 rune 1366) (= 1369 rune) (<= 1377 rune 1414) (<= 1488 rune 1514)
-                     (<= 1520 rune 1522) (<= 1569 rune 1594) (<= 1601 rune 1610) (<= 1649 rune 1719) (<= 1722 rune 1726)
-                     (<= 1728 rune 1742) (<= 1744 rune 1747) (= 1749 rune) (<= 1765 rune 1766) (<= 2309 rune 2361) (= 2365 rune)
-                     (<= 2392 rune 2401) (<= 2437 rune 2444) (<= 2447 rune 2448) (<= 2451 rune 2472) (<= 2474 rune 2480)
-                     (= 2482 rune) (<= 2486 rune 2489) (<= 2524 rune 2525) (<= 2527 rune 2529) (<= 2544 rune 2545)
-                     (<= 2565 rune 2570) (<= 2575 rune 2576) (<= 2579 rune 2600) (<= 2602 rune 2608) (<= 2610 rune 2611)
-                     (<= 2613 rune 2614) (<= 2616 rune 2617) (<= 2649 rune 2652) (= 2654 rune) (<= 2674 rune 2676)
-                     (<= 2693 rune 2699) (= 2701 rune) (<= 2703 rune 2705) (<= 2707 rune 2728) (<= 2730 rune 2736)
-                     (<= 2738 rune 2739) (<= 2741 rune 2745) (= 2749 rune) (= 2784 rune) (<= 2821 rune 2828) (<= 2831 rune 2832)
-                     (<= 2835 rune 2856) (<= 2858 rune 2864) (<= 2866 rune 2867) (<= 2870 rune 2873) (= 2877 rune)
-                     (<= 2908 rune 2909) (<= 2911 rune 2913) (<= 2949 rune 2954) (<= 2958 rune 2960) (<= 2962 rune 2965)
-                     (<= 2969 rune 2970) (= 2972 rune))
-                 (or (<= 2974 rune 2975) (<= 2979 rune 2980) (<= 2984 rune 2986)
-                     (<= 2990 rune 2997) (<= 2999 rune 3001) (<= 3077 rune 3084) (<= 3086 rune 3088) (<= 3090 rune 3112)
-                     (<= 3114 rune 3123) (<= 3125 rune 3129) (<= 3168 rune 3169) (<= 3205 rune 3212) (<= 3214 rune 3216)
-                     (<= 3218 rune 3240) (<= 3242 rune 3251) (<= 3253 rune 3257) (= 3294 rune) (<= 3296 rune 3297)
-                     (<= 3333 rune 3340) (<= 3342 rune 3344) (<= 3346 rune 3368) (<= 3370 rune 3385) (<= 3424 rune 3425)
-                     (<= 3585 rune 3630) (= 3632 rune) (<= 3634 rune 3635) (<= 3648 rune 3653) (<= 3713 rune 3714) (= 3716 rune)
-                     (<= 3719 rune 3720) (= 3722 rune) (= 3725 rune) (<= 3732 rune 3735) (<= 3737 rune 3743) (<= 3745 rune 3747)
-                     (= 3749 rune) (= 3751 rune) (<= 3754 rune 3755) (<= 3757 rune 3758) (= 3760 rune) (<= 3762 rune 3763) (= 3773 rune)
-                     (<= 3776 rune 3780) (<= 3904 rune 3911) (<= 3913 rune 3945) (<= 4256 rune 4293) (<= 4304 rune 4342)
-                     (= 4352 rune) (<= 4354 rune 4355) (<= 4357 rune 4359) (= 4361 rune) (<= 4363 rune 4364) (<= 4366 rune 4370)
-                     (= 4412 rune) (= 4414 rune) (= 4416 rune) (= 4428 rune) (= 4430 rune) (= 4432 rune) (<= 4436 rune 4437) (= 4441 rune)
-                     (<= 4447 rune 4449) (= 4451 rune) (= 4453 rune) (= 4455 rune) (= 4457 rune) (<= 4461 rune 4462) (<= 4466 rune 4467)
-                     (= 4469 rune) (= 4510 rune) (= 4520 rune) (= 4523 rune) (<= 4526 rune 4527) (<= 4535 rune 4536) (= 4538 rune)
-                     (<= 4540 rune 4546) (= 4587 rune) (= 4592 rune) (= 4601 rune) (<= 7680 rune 7835) (<= 7840 rune 7929)
-                     (<= 7936 rune 7957) (<= 7960 rune 7965) (<= 7968 rune 8005) (<= 8008 rune 8013) (<= 8016 rune 8023)
-                     (= 8025 rune) (= 8027 rune) (= 8029 rune) (<= 8031 rune 8061) (<= 8064 rune 8116) (<= 8118 rune 8124) (= 8126 rune)
-                     (<= 8130 rune 8132) (<= 8134 rune 8140) (<= 8144 rune 8147) (<= 8150 rune 8155) (<= 8160 rune 8172)
-                     (<= 8178 rune 8180) (<= 8182 rune 8188) (= 8486 rune) (<= 8490 rune 8491) (= 8494 rune) (<= 8576 rune 8578)
-                     (<= 12353 rune 12436) (<= 12449 rune 12538) (<= 12549 rune 12588) (<= 44032 rune 55203))))
-
-             (ideographic-rune-p (rune)
-               (or (<= 19968 rune 40869) (= 12295 rune) (<= 12321 rune 12329)))
-
-
-             (predicate-to-bv (p)
-               (let ((r (make-array +max+ :element-type 'bit :initial-element 0)))
-                 (dotimes (i #x10000 r)
-                   (when (funcall p i)
-                     (setf (aref r i) 1))))) )
-
-          `(progn
-             (DEFINLINE NAME-RUNE-P (RUNE)
-               (SETF RUNE (RUNE-CODE RUNE))
-               (AND (<= 0 RUNE ,+max+)
-                    (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3)))
-                             (= 1 (SBIT ',(predicate-to-bv #'name-rune-p)
-                                        (THE FIXNUM RUNE))))))
-             (DEFINLINE NAME-START-RUNE-P (RUNE)
-               (SETF RUNE (RUNE-CODE RUNE))
-               (AND (<= 0 RUNE ,+MAX+)
-                    (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3)))
-                             (= 1 (SBIT ',(predicate-to-bv #'name-start-rune-p)
-                                        (THE FIXNUM RUNE)))))))) ))))
+
+
+(eval-when (:compile-toplevel :execute)
+
+(defconstant +max+
+  #xD800)
+
+(defmacro defpredicate (name set)
+  (labels ((c-in-set-p (c s)
+	     (declare (type (integer 0 #x10000) c)
+		      (type list s)
+		      (values t &optional))
+	     (find c s
+		   :test #'(lambda (c eltv)
+			     (declare (type (integer 0 #x10000) c)
+				      (values boolean &optional))
+			     (etypecase eltv
+			       (fixnum (= c (the fixnum eltv)))
+			       (cons (or (>= c (the fixnum (car eltv)))
+					 (<= c (the fixnum (cadr eltv)))))))))
+
+	   (set-to-bv (s)
+	     (let ((r (make-array +max+ :element-type 'bit :initial-element 0)))
+	       (dotimes (i #x10000 r)
+		 (when (c-in-set-p i s)
+		   (setf (aref r i) 1))))))
+    ` (definline ,name (rune)
+	(decalare (values boolean &optional))
+	(setf rune (rune-code rune))
+	(and (<= 0 rune ,+max+)
+	     (locally (declare (optimize (safety 0) (speed 3)))
+	       (= 1 (sbit ,(set-to-bv (symbol-value set))
+			  (the fixnum rune))))))))
+
+
+
+
+(defvar name-start-codes
+  '(#.(char-code #\_) #.(char-code #\:)))
+
+(defvar base-codes
+  '((65 90) (97 122) (192 214) (216 246) (248 255) (256 305)
+    (308 318) (321 328) (330 382) (384 451) (461 496) (500 501)
+    (506 535) (592 680) (699 705) 902 (904 906) 908
+    (910 929) (931 974) (976 982) 986 988 990 992
+    (994 1011) (1025 1036) (1038 1103) (1105 1116) (1118 1153)
+    (1168 1220) (1223 1224) (1227 1228) (1232 1259) (1262 1269)
+    (1272 1273) (1329 1366) 1369 (1377 1414) (1488 1514)
+    (1520 1522) (1569 1594) (1601 1610) (1649 1719) (1722 1726)
+    (1728 1742) (1744 1747) 1749 (1765 1766) (2309 2361) 2365
+    (2392 2401) (2437 2444) (2447 2448) (2451 2472) (2474 2480)
+    2482 (2486 2489) (2524 2525) (2527 2529) (2544 2545)
+    (2565 2570) (2575 2576) (2579 2600) (2602 2608) (2610 2611)
+    (2613 2614) (2616 2617) (2649 2652) 2654 (2674 2676)
+    (2693 2699) 2701 (2703 2705) (2707 2728) (2730 2736)
+    (2738 2739) (2741 2745) 2749 2784 (2821 2828) (2831 2832)
+    (2835 2856) (2858 2864) (2866 2867) (2870 2873) 2877
+    (2908 2909) (2911 2913) (2949 2954) (2958 2960) (2962 2965)
+    (2969 2970) 2972
+
+    (2974 2975) (2979 2980) (2984 2986)
+    (2990 2997) (2999 3001) (3077 3084) (3086 3088) (3090 3112)
+    (3114 3123) (3125 3129) (3168 3169) (3205 3212) (3214 3216)
+    (3218 3240) (3242 3251) (3253 3257) 3294 (3296 3297)
+    (3333 3340) (3342 3344) (3346 3368) (3370 3385) (3424 3425)
+    (3585 3630) 3632 (3634 3635) (3648 3653) (3713 3714) 3716
+    (3719 3720) 3722 3725 (3732 3735) (3737 3743) (3745 3747)
+    3749 3751 (3754 3755) (3757 3758) 3760 (3762 3763) 3773
+    (3776 3780) (3904 3911) (3913 3945) (4256 4293) (4304 4342)
+    4352 (4354 4355) (4357 4359) 4361 (4363 4364) (4366 4370)
+    4412 4414 4416 4428 4430 4432 (4436 4437) 4441
+    (4447 4449) 4451 4453 4455 4457 (4461 4462) (4466 4467)
+    4469 4510 4520 4523 (4526 4527) (4535 4536) 4538
+    (4540 4546) 4587 4592 4601 (7680 7835) (7840 7929)
+    (7936 7957) (7960 7965) (7968 8005) (8008 8013) (8016 8023)
+    8025 8027 8029 (8031 8061) (8064 8116) (8118 8124) 8126
+    (8130 8132) (8134 8140) (8144 8147) (8150 8155) (8160 8172)
+    (8178 8180) (8182 8188) 8486 (8490 8491) 8494 (8576 8578)
+    (12353 12436) (12449 12538) (12549 12588) (44032 55203)))
+
+(defvar ideographic-codes
+  '((19968 40869)
+    12295
+    (12321 12329)))
+
+(defvar letter-codes
+  `(, at base-codes , at ideographic-codes))
+
+(defvar digit-codes
+  '((48 57)
+    (1632 1641)
+    (1776 1785)
+    (2406 2415)
+    (2534 2543)
+    (2662 2671)
+    (2790 2799)
+    (2918 2927)
+    (3047 3055)
+    (3174 3183)
+    (3302 3311)
+    (3430 3439)
+    (3664 3673)
+    (3792 3801)
+    (3872 3881)))
+
+(defvar combining-codes
+  '((768 837)
+    (864 865)
+    (1155 1158)
+    (1425 1441)
+    (1443 1465)
+    (1467 1469)
+    1471
+    (1473 1474)
+    1476
+    (1611 1618)
+    1648
+    (1750 1756)
+    (1757 1759)
+    (1760 1764)
+    (1767 1768)
+    (1770 1773)
+    (2305 2307)
+    2364
+    (2366 2380)
+    2381
+    (2385 2388)
+    (2402 2403)
+    (2433 2435)
+    2492 2494 2495
+    (2496 2500)
+    (2503 2504)
+    (2507 2509)
+    2519
+    (2530 2531)
+    2562 2620 2622 2623
+    (2624 2626)
+    (2631 2632)
+    (2635 2637)
+    (2672 2673)
+    (2689 2691)
+    2748
+    (2750 2757)
+    (2759 2761)
+    (2763 2765)
+    (2817 2819)
+    2876
+    (2878 2883)
+    (2887 2888)
+    (2891 2893)
+    (2902 2903)
+    (2946 2947)
+    (3006 3010)
+    (3014 3016)
+    (3018 3021)
+    3031
+    (3073 3075)
+    (3134 3140)
+    (3142 3144)
+    (3146 3149)
+    (3157 3158)
+    (3202 3203)
+    (3262 3268)
+    (3270 3272)
+    (3274 3277)
+    (3285 3286)
+    (3330 3331)
+    (3390 3395)
+    (3398 3400)
+    (3402 3405)
+    3415 3633
+    (3636 3642)
+    (3655 3662)
+    3761
+    (3764 3769)
+    (3771 3772)
+    (3784 3789)
+    (3864 3865)
+    3893 3895 3897 3902 3903
+    (3953 3972)
+    (3974 3979)
+    (3984 3989)
+    3991
+    (3993 4013)
+    (4017 4023)
+    4025
+    (8400 8412)
+    8417
+    (12330 12335)
+    12441 12442))
+
+(defvar extender-codes
+  '(183 720 721 903 1600 3654 3782 12293
+    (12337 12341)
+    (12445 12446)
+    (12540 12542)))
+
+(defvar name-codes
+  `(, at letter-codes , at digit-codes
+    #.(char-code #\.) #.(char-code #\-)
+    #.(char-code #\_) #.(char-code #\:)
+    , at combining-codes , at extender-codes))
+
+
+) ;; Eval-when
+
+(defpredicate name-rune-p name-codes)
+
+(defpredicate name-start-p name-start-codes)


More information about the cxml-devel mailing list