[cxml-devel] patch - xml-name-rune-p - proposing an approach w/o #.
Sean Champ
gimmal at gmail.com
Wed Aug 16 17:02:05 UTC 2006
Hello,
I was compiling CXML, in SBCL 0.9.15. I noticed that the compiler kept
"getting stuck", when compiling the file xml-name-rune-p. Upon looking at the
file, it appeared that the 'getting stuck' behavior may have had something to
do with the #. form in the file.
With some simple reformatting on the forms in that file, I was able to define
a set of forms effectively comparable to those under the #. form -- using the
same code, essentially, though by a slightly different approach. I've used a
number of DEFVAR calls to hold the predicate values, as well as a DEMACRO call
defining a 'defpredicate' macro -- the lot of those calls being wrapped in a
form like EVAL-WHEN :COMPILE-TOPLEVEL. The code in the file's toplevel calls
the 'defpredicate' macro, to define each of the two predicate functions. The
patched code still uses the thing-code-onto-bit-vector comparison.
The patch for it is attached to this message. The patched code compiles,
successfully, in SBCL 0.9.15.
The patch affects the definitions of the functions `name-rune-p' and
`name-start-p'. I've tested the redefined name-rune-p, in calling it on one
ASCII alphabetic charater; it works.
Absent of adverbs,
--
Sean Champ
-------------- next part --------------
Index: xml-name-rune-p.lisp
===================================================================
RCS file: /project/cxml/cvsroot/cxml/xml/xml-name-rune-p.lisp,v
retrieving revision 1.6
diff -p -u -r1.6 xml-name-rune-p.lisp
--- xml-name-rune-p.lisp 28 Nov 2005 22:33:47 -0000 1.6
+++ xml-name-rune-p.lisp 16 Aug 2006 15:56:48 -0000
@@ -7,221 +7,212 @@
(in-package :cxml)
-#.(funcall
- (compile
- nil
- '(lambda ()
- (let ((+max+ #xD800))
- (labels
- ((name-start-rune-p (rune)
- (or (letter-rune-p rune)
- (= #.(char-code #\_) rune)
- (= #.(char-code #\:) rune)))
-
- (name-rune-p (rune)
- (or (letter-rune-p rune)
- (digit-rune-p* rune)
- (= rune #.(char-code #\.))
- (= rune #.(char-code #\-))
- (= rune #.(char-code #\_))
- (= rune #.(char-code #\:))
- (combining-rune-p rune)
- (extender-rune-p rune)))
-
- (letter-rune-p (rune)
- (or (base-rune-p rune)
- (ideographic-rune-p rune)))
-
- (digit-rune-p* (rune)
- (or (<= 48 rune 57)
- (<= 1632 rune 1641)
- (<= 1776 rune 1785)
- (<= 2406 rune 2415)
- (<= 2534 rune 2543)
- (<= 2662 rune 2671)
- (<= 2790 rune 2799)
- (<= 2918 rune 2927)
- (<= 3047 rune 3055)
- (<= 3174 rune 3183)
- (<= 3302 rune 3311)
- (<= 3430 rune 3439)
- (<= 3664 rune 3673)
- (<= 3792 rune 3801)
- (<= 3872 rune 3881)))
-
-
- (combining-rune-p (rune)
- (or (<= 768 rune 837)
- (<= 864 rune 865)
- (<= 1155 rune 1158)
- (<= 1425 rune 1441)
- (<= 1443 rune 1465)
- (<= 1467 rune 1469)
- (= 1471 rune)
- (<= 1473 rune 1474)
- (= 1476 rune)
- (<= 1611 rune 1618)
- (= 1648 rune)
- (<= 1750 rune 1756)
- (<= 1757 rune 1759)
- (<= 1760 rune 1764)
- (<= 1767 rune 1768)
- (<= 1770 rune 1773)
- (<= 2305 rune 2307)
- (= 2364 rune)
- (<= 2366 rune 2380)
- (= 2381 rune)
- (<= 2385 rune 2388)
- (<= 2402 rune 2403)
- (<= 2433 rune 2435)
- (= 2492 rune)
- (= 2494 rune)
- (= 2495 rune)
- (<= 2496 rune 2500)
- (<= 2503 rune 2504)
- (<= 2507 rune 2509)
- (= 2519 rune)
- (<= 2530 rune 2531)
- (= 2562 rune)
- (= 2620 rune)
- (= 2622 rune)
- (= 2623 rune)
- (<= 2624 rune 2626)
- (<= 2631 rune 2632)
- (<= 2635 rune 2637)
- (<= 2672 rune 2673)
- (<= 2689 rune 2691)
- (= 2748 rune)
- (<= 2750 rune 2757)
- (<= 2759 rune 2761)
- (<= 2763 rune 2765)
- (<= 2817 rune 2819)
- (= 2876 rune)
- (<= 2878 rune 2883)
- (<= 2887 rune 2888)
- (<= 2891 rune 2893)
- (<= 2902 rune 2903)
- (<= 2946 rune 2947)
- (<= 3006 rune 3010)
- (<= 3014 rune 3016)
- (<= 3018 rune 3021)
- (= 3031 rune)
- (<= 3073 rune 3075)
- (<= 3134 rune 3140)
- (<= 3142 rune 3144)
- (<= 3146 rune 3149)
- (<= 3157 rune 3158)
- (<= 3202 rune 3203)
- (<= 3262 rune 3268)
- (<= 3270 rune 3272)
- (<= 3274 rune 3277)
- (<= 3285 rune 3286)
- (<= 3330 rune 3331)
- (<= 3390 rune 3395)
- (<= 3398 rune 3400)
- (<= 3402 rune 3405)
- (= 3415 rune)
- (= 3633 rune)
- (<= 3636 rune 3642)
- (<= 3655 rune 3662)
- (= 3761 rune)
- (<= 3764 rune 3769)
- (<= 3771 rune 3772)
- (<= 3784 rune 3789)
- (<= 3864 rune 3865)
- (= 3893 rune)
- (= 3895 rune)
- (= 3897 rune)
- (= 3902 rune)
- (= 3903 rune)
- (<= 3953 rune 3972)
- (<= 3974 rune 3979)
- (<= 3984 rune 3989)
- (= 3991 rune)
- (<= 3993 rune 4013)
- (<= 4017 rune 4023)
- (= 4025 rune)
- (<= 8400 rune 8412)
- (= 8417 rune)
- (<= 12330 rune 12335)
- (= 12441 rune)
- (= 12442 rune)))
-
- (extender-rune-p (rune)
- (or
- (= 183 rune)
- (= 720 rune)
- (= 721 rune)
- (= 903 rune)
- (= 1600 rune)
- (= 3654 rune)
- (= 3782 rune)
- (= 12293 rune)
- (<= 12337 rune 12341)
- (<= 12445 rune 12446)
- (<= 12540 rune 12542)))
-
- (base-rune-p (rune)
- ;; split into two ORs for LispWorks...
- (or
- (or (<= 65 rune 90) (<= 97 rune 122) (<= 192 rune 214) (<= 216 rune 246) (<= 248 rune 255) (<= 256 rune 305)
- (<= 308 rune 318) (<= 321 rune 328) (<= 330 rune 382) (<= 384 rune 451) (<= 461 rune 496) (<= 500 rune 501)
- (<= 506 rune 535) (<= 592 rune 680) (<= 699 rune 705) (= 902 rune) (<= 904 rune 906) (= 908 rune)
- (<= 910 rune 929) (<= 931 rune 974) (<= 976 rune 982) (= 986 rune) (= 988 rune) (= 990 rune) (= 992 rune)
- (<= 994 rune 1011) (<= 1025 rune 1036) (<= 1038 rune 1103) (<= 1105 rune 1116) (<= 1118 rune 1153)
- (<= 1168 rune 1220) (<= 1223 rune 1224) (<= 1227 rune 1228) (<= 1232 rune 1259) (<= 1262 rune 1269)
- (<= 1272 rune 1273) (<= 1329 rune 1366) (= 1369 rune) (<= 1377 rune 1414) (<= 1488 rune 1514)
- (<= 1520 rune 1522) (<= 1569 rune 1594) (<= 1601 rune 1610) (<= 1649 rune 1719) (<= 1722 rune 1726)
- (<= 1728 rune 1742) (<= 1744 rune 1747) (= 1749 rune) (<= 1765 rune 1766) (<= 2309 rune 2361) (= 2365 rune)
- (<= 2392 rune 2401) (<= 2437 rune 2444) (<= 2447 rune 2448) (<= 2451 rune 2472) (<= 2474 rune 2480)
- (= 2482 rune) (<= 2486 rune 2489) (<= 2524 rune 2525) (<= 2527 rune 2529) (<= 2544 rune 2545)
- (<= 2565 rune 2570) (<= 2575 rune 2576) (<= 2579 rune 2600) (<= 2602 rune 2608) (<= 2610 rune 2611)
- (<= 2613 rune 2614) (<= 2616 rune 2617) (<= 2649 rune 2652) (= 2654 rune) (<= 2674 rune 2676)
- (<= 2693 rune 2699) (= 2701 rune) (<= 2703 rune 2705) (<= 2707 rune 2728) (<= 2730 rune 2736)
- (<= 2738 rune 2739) (<= 2741 rune 2745) (= 2749 rune) (= 2784 rune) (<= 2821 rune 2828) (<= 2831 rune 2832)
- (<= 2835 rune 2856) (<= 2858 rune 2864) (<= 2866 rune 2867) (<= 2870 rune 2873) (= 2877 rune)
- (<= 2908 rune 2909) (<= 2911 rune 2913) (<= 2949 rune 2954) (<= 2958 rune 2960) (<= 2962 rune 2965)
- (<= 2969 rune 2970) (= 2972 rune))
- (or (<= 2974 rune 2975) (<= 2979 rune 2980) (<= 2984 rune 2986)
- (<= 2990 rune 2997) (<= 2999 rune 3001) (<= 3077 rune 3084) (<= 3086 rune 3088) (<= 3090 rune 3112)
- (<= 3114 rune 3123) (<= 3125 rune 3129) (<= 3168 rune 3169) (<= 3205 rune 3212) (<= 3214 rune 3216)
- (<= 3218 rune 3240) (<= 3242 rune 3251) (<= 3253 rune 3257) (= 3294 rune) (<= 3296 rune 3297)
- (<= 3333 rune 3340) (<= 3342 rune 3344) (<= 3346 rune 3368) (<= 3370 rune 3385) (<= 3424 rune 3425)
- (<= 3585 rune 3630) (= 3632 rune) (<= 3634 rune 3635) (<= 3648 rune 3653) (<= 3713 rune 3714) (= 3716 rune)
- (<= 3719 rune 3720) (= 3722 rune) (= 3725 rune) (<= 3732 rune 3735) (<= 3737 rune 3743) (<= 3745 rune 3747)
- (= 3749 rune) (= 3751 rune) (<= 3754 rune 3755) (<= 3757 rune 3758) (= 3760 rune) (<= 3762 rune 3763) (= 3773 rune)
- (<= 3776 rune 3780) (<= 3904 rune 3911) (<= 3913 rune 3945) (<= 4256 rune 4293) (<= 4304 rune 4342)
- (= 4352 rune) (<= 4354 rune 4355) (<= 4357 rune 4359) (= 4361 rune) (<= 4363 rune 4364) (<= 4366 rune 4370)
- (= 4412 rune) (= 4414 rune) (= 4416 rune) (= 4428 rune) (= 4430 rune) (= 4432 rune) (<= 4436 rune 4437) (= 4441 rune)
- (<= 4447 rune 4449) (= 4451 rune) (= 4453 rune) (= 4455 rune) (= 4457 rune) (<= 4461 rune 4462) (<= 4466 rune 4467)
- (= 4469 rune) (= 4510 rune) (= 4520 rune) (= 4523 rune) (<= 4526 rune 4527) (<= 4535 rune 4536) (= 4538 rune)
- (<= 4540 rune 4546) (= 4587 rune) (= 4592 rune) (= 4601 rune) (<= 7680 rune 7835) (<= 7840 rune 7929)
- (<= 7936 rune 7957) (<= 7960 rune 7965) (<= 7968 rune 8005) (<= 8008 rune 8013) (<= 8016 rune 8023)
- (= 8025 rune) (= 8027 rune) (= 8029 rune) (<= 8031 rune 8061) (<= 8064 rune 8116) (<= 8118 rune 8124) (= 8126 rune)
- (<= 8130 rune 8132) (<= 8134 rune 8140) (<= 8144 rune 8147) (<= 8150 rune 8155) (<= 8160 rune 8172)
- (<= 8178 rune 8180) (<= 8182 rune 8188) (= 8486 rune) (<= 8490 rune 8491) (= 8494 rune) (<= 8576 rune 8578)
- (<= 12353 rune 12436) (<= 12449 rune 12538) (<= 12549 rune 12588) (<= 44032 rune 55203))))
-
- (ideographic-rune-p (rune)
- (or (<= 19968 rune 40869) (= 12295 rune) (<= 12321 rune 12329)))
-
-
- (predicate-to-bv (p)
- (let ((r (make-array +max+ :element-type 'bit :initial-element 0)))
- (dotimes (i #x10000 r)
- (when (funcall p i)
- (setf (aref r i) 1))))) )
-
- `(progn
- (DEFINLINE NAME-RUNE-P (RUNE)
- (SETF RUNE (RUNE-CODE RUNE))
- (AND (<= 0 RUNE ,+max+)
- (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3)))
- (= 1 (SBIT ',(predicate-to-bv #'name-rune-p)
- (THE FIXNUM RUNE))))))
- (DEFINLINE NAME-START-RUNE-P (RUNE)
- (SETF RUNE (RUNE-CODE RUNE))
- (AND (<= 0 RUNE ,+MAX+)
- (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3)))
- (= 1 (SBIT ',(predicate-to-bv #'name-start-rune-p)
- (THE FIXNUM RUNE)))))))) ))))
+
+
+(eval-when (:compile-toplevel :execute)
+
+(defconstant +max+
+ #xD800)
+
+(defmacro defpredicate (name set)
+ (labels ((c-in-set-p (c s)
+ (declare (type (integer 0 #x10000) c)
+ (type list s)
+ (values t &optional))
+ (find c s
+ :test #'(lambda (c eltv)
+ (declare (type (integer 0 #x10000) c)
+ (values boolean &optional))
+ (etypecase eltv
+ (fixnum (= c (the fixnum eltv)))
+ (cons (or (>= c (the fixnum (car eltv)))
+ (<= c (the fixnum (cadr eltv)))))))))
+
+ (set-to-bv (s)
+ (let ((r (make-array +max+ :element-type 'bit :initial-element 0)))
+ (dotimes (i #x10000 r)
+ (when (c-in-set-p i s)
+ (setf (aref r i) 1))))))
+ ` (definline ,name (rune)
+ (decalare (values boolean &optional))
+ (setf rune (rune-code rune))
+ (and (<= 0 rune ,+max+)
+ (locally (declare (optimize (safety 0) (speed 3)))
+ (= 1 (sbit ,(set-to-bv (symbol-value set))
+ (the fixnum rune))))))))
+
+
+
+
+(defvar name-start-codes
+ '(#.(char-code #\_) #.(char-code #\:)))
+
+(defvar base-codes
+ '((65 90) (97 122) (192 214) (216 246) (248 255) (256 305)
+ (308 318) (321 328) (330 382) (384 451) (461 496) (500 501)
+ (506 535) (592 680) (699 705) 902 (904 906) 908
+ (910 929) (931 974) (976 982) 986 988 990 992
+ (994 1011) (1025 1036) (1038 1103) (1105 1116) (1118 1153)
+ (1168 1220) (1223 1224) (1227 1228) (1232 1259) (1262 1269)
+ (1272 1273) (1329 1366) 1369 (1377 1414) (1488 1514)
+ (1520 1522) (1569 1594) (1601 1610) (1649 1719) (1722 1726)
+ (1728 1742) (1744 1747) 1749 (1765 1766) (2309 2361) 2365
+ (2392 2401) (2437 2444) (2447 2448) (2451 2472) (2474 2480)
+ 2482 (2486 2489) (2524 2525) (2527 2529) (2544 2545)
+ (2565 2570) (2575 2576) (2579 2600) (2602 2608) (2610 2611)
+ (2613 2614) (2616 2617) (2649 2652) 2654 (2674 2676)
+ (2693 2699) 2701 (2703 2705) (2707 2728) (2730 2736)
+ (2738 2739) (2741 2745) 2749 2784 (2821 2828) (2831 2832)
+ (2835 2856) (2858 2864) (2866 2867) (2870 2873) 2877
+ (2908 2909) (2911 2913) (2949 2954) (2958 2960) (2962 2965)
+ (2969 2970) 2972
+
+ (2974 2975) (2979 2980) (2984 2986)
+ (2990 2997) (2999 3001) (3077 3084) (3086 3088) (3090 3112)
+ (3114 3123) (3125 3129) (3168 3169) (3205 3212) (3214 3216)
+ (3218 3240) (3242 3251) (3253 3257) 3294 (3296 3297)
+ (3333 3340) (3342 3344) (3346 3368) (3370 3385) (3424 3425)
+ (3585 3630) 3632 (3634 3635) (3648 3653) (3713 3714) 3716
+ (3719 3720) 3722 3725 (3732 3735) (3737 3743) (3745 3747)
+ 3749 3751 (3754 3755) (3757 3758) 3760 (3762 3763) 3773
+ (3776 3780) (3904 3911) (3913 3945) (4256 4293) (4304 4342)
+ 4352 (4354 4355) (4357 4359) 4361 (4363 4364) (4366 4370)
+ 4412 4414 4416 4428 4430 4432 (4436 4437) 4441
+ (4447 4449) 4451 4453 4455 4457 (4461 4462) (4466 4467)
+ 4469 4510 4520 4523 (4526 4527) (4535 4536) 4538
+ (4540 4546) 4587 4592 4601 (7680 7835) (7840 7929)
+ (7936 7957) (7960 7965) (7968 8005) (8008 8013) (8016 8023)
+ 8025 8027 8029 (8031 8061) (8064 8116) (8118 8124) 8126
+ (8130 8132) (8134 8140) (8144 8147) (8150 8155) (8160 8172)
+ (8178 8180) (8182 8188) 8486 (8490 8491) 8494 (8576 8578)
+ (12353 12436) (12449 12538) (12549 12588) (44032 55203)))
+
+(defvar ideographic-codes
+ '((19968 40869)
+ 12295
+ (12321 12329)))
+
+(defvar letter-codes
+ `(, at base-codes , at ideographic-codes))
+
+(defvar digit-codes
+ '((48 57)
+ (1632 1641)
+ (1776 1785)
+ (2406 2415)
+ (2534 2543)
+ (2662 2671)
+ (2790 2799)
+ (2918 2927)
+ (3047 3055)
+ (3174 3183)
+ (3302 3311)
+ (3430 3439)
+ (3664 3673)
+ (3792 3801)
+ (3872 3881)))
+
+(defvar combining-codes
+ '((768 837)
+ (864 865)
+ (1155 1158)
+ (1425 1441)
+ (1443 1465)
+ (1467 1469)
+ 1471
+ (1473 1474)
+ 1476
+ (1611 1618)
+ 1648
+ (1750 1756)
+ (1757 1759)
+ (1760 1764)
+ (1767 1768)
+ (1770 1773)
+ (2305 2307)
+ 2364
+ (2366 2380)
+ 2381
+ (2385 2388)
+ (2402 2403)
+ (2433 2435)
+ 2492 2494 2495
+ (2496 2500)
+ (2503 2504)
+ (2507 2509)
+ 2519
+ (2530 2531)
+ 2562 2620 2622 2623
+ (2624 2626)
+ (2631 2632)
+ (2635 2637)
+ (2672 2673)
+ (2689 2691)
+ 2748
+ (2750 2757)
+ (2759 2761)
+ (2763 2765)
+ (2817 2819)
+ 2876
+ (2878 2883)
+ (2887 2888)
+ (2891 2893)
+ (2902 2903)
+ (2946 2947)
+ (3006 3010)
+ (3014 3016)
+ (3018 3021)
+ 3031
+ (3073 3075)
+ (3134 3140)
+ (3142 3144)
+ (3146 3149)
+ (3157 3158)
+ (3202 3203)
+ (3262 3268)
+ (3270 3272)
+ (3274 3277)
+ (3285 3286)
+ (3330 3331)
+ (3390 3395)
+ (3398 3400)
+ (3402 3405)
+ 3415 3633
+ (3636 3642)
+ (3655 3662)
+ 3761
+ (3764 3769)
+ (3771 3772)
+ (3784 3789)
+ (3864 3865)
+ 3893 3895 3897 3902 3903
+ (3953 3972)
+ (3974 3979)
+ (3984 3989)
+ 3991
+ (3993 4013)
+ (4017 4023)
+ 4025
+ (8400 8412)
+ 8417
+ (12330 12335)
+ 12441 12442))
+
+(defvar extender-codes
+ '(183 720 721 903 1600 3654 3782 12293
+ (12337 12341)
+ (12445 12446)
+ (12540 12542)))
+
+(defvar name-codes
+ `(, at letter-codes , at digit-codes
+ #.(char-code #\.) #.(char-code #\-)
+ #.(char-code #\_) #.(char-code #\:)
+ , at combining-codes , at extender-codes))
+
+
+) ;; Eval-when
+
+(defpredicate name-rune-p name-codes)
+
+(defpredicate name-start-p name-start-codes)
More information about the cxml-devel
mailing list