revised patch (Re: [cxml-devel] patch - xml-name-rune-p - proposing an approach w/o #.)

Sean Champ gimmal at gmail.com
Wed Aug 16 17:59:57 UTC 2006


Begging your pardon, but I've been dazing at the end of the shift, here.

The previous patch I'd submitted on this line, it had a typo and a misnamed
form, in it.

The patch attatched to this message would serve to obsolete the prior patch.


In this patch, those quirks of the prior patch are resolved:
 DECALRE => DECLARE
 NAME-START-P => NAME-START-RUNE-P




--
Sean Champ
-------------- next part --------------
;;;; xml-name-rune-p -- character class definitions
;;;;
;;;; This file is part of the CXML parser, released under Lisp-LGPL.
;;;; See file COPYING for details.
;;;;
;;;; Author: Gilbert Baumann <unk6 at rz.uni-karlsruhe.de>

(in-package :cxml)


(eval-when (:compile-toplevel :execute)

(defconstant +max+
  #xD800)

(defmacro defpredicate (name set)
  (labels ((c-in-set-p (c s)
	     (declare (type (integer 0 #x10000) c)
		      (type list s)
		      (values t &optional))
	     (find c s
		   :test #'(lambda (c eltv)
			     (declare (type (integer 0 #x10000) c)
				      (values boolean &optional))
			     (etypecase eltv
			       (fixnum (= c (the fixnum eltv)))
			       (cons (or (>= c (the fixnum (car eltv)))
					 (<= c (the fixnum (cadr eltv)))))))))

	   (set-to-bv (s)
	     (let ((r (make-array +max+ :element-type 'bit :initial-element 0)))
	       (dotimes (i #x10000 r)
		 (when (c-in-set-p i s)
		   (setf (aref r i) 1))))))
    ` (definline ,name (rune)
	(declare (values boolean &optional))
	(setf rune (rune-code rune))
	(and (<= 0 rune ,+max+)
	     (locally (declare (optimize (safety 0) (speed 3)))
	       (= 1 (sbit ,(set-to-bv (symbol-value set))
			  (the fixnum rune))))))))




(defvar name-start-codes
  '(#.(char-code #\_) #.(char-code #\:)))

(defvar base-codes
  '((65 90) (97 122) (192 214) (216 246) (248 255) (256 305)
    (308 318) (321 328) (330 382) (384 451) (461 496) (500 501)
    (506 535) (592 680) (699 705) 902 (904 906) 908
    (910 929) (931 974) (976 982) 986 988 990 992
    (994 1011) (1025 1036) (1038 1103) (1105 1116) (1118 1153)
    (1168 1220) (1223 1224) (1227 1228) (1232 1259) (1262 1269)
    (1272 1273) (1329 1366) 1369 (1377 1414) (1488 1514)
    (1520 1522) (1569 1594) (1601 1610) (1649 1719) (1722 1726)
    (1728 1742) (1744 1747) 1749 (1765 1766) (2309 2361) 2365
    (2392 2401) (2437 2444) (2447 2448) (2451 2472) (2474 2480)
    2482 (2486 2489) (2524 2525) (2527 2529) (2544 2545)
    (2565 2570) (2575 2576) (2579 2600) (2602 2608) (2610 2611)
    (2613 2614) (2616 2617) (2649 2652) 2654 (2674 2676)
    (2693 2699) 2701 (2703 2705) (2707 2728) (2730 2736)
    (2738 2739) (2741 2745) 2749 2784 (2821 2828) (2831 2832)
    (2835 2856) (2858 2864) (2866 2867) (2870 2873) 2877
    (2908 2909) (2911 2913) (2949 2954) (2958 2960) (2962 2965)
    (2969 2970) 2972

    (2974 2975) (2979 2980) (2984 2986)
    (2990 2997) (2999 3001) (3077 3084) (3086 3088) (3090 3112)
    (3114 3123) (3125 3129) (3168 3169) (3205 3212) (3214 3216)
    (3218 3240) (3242 3251) (3253 3257) 3294 (3296 3297)
    (3333 3340) (3342 3344) (3346 3368) (3370 3385) (3424 3425)
    (3585 3630) 3632 (3634 3635) (3648 3653) (3713 3714) 3716
    (3719 3720) 3722 3725 (3732 3735) (3737 3743) (3745 3747)
    3749 3751 (3754 3755) (3757 3758) 3760 (3762 3763) 3773
    (3776 3780) (3904 3911) (3913 3945) (4256 4293) (4304 4342)
    4352 (4354 4355) (4357 4359) 4361 (4363 4364) (4366 4370)
    4412 4414 4416 4428 4430 4432 (4436 4437) 4441
    (4447 4449) 4451 4453 4455 4457 (4461 4462) (4466 4467)
    4469 4510 4520 4523 (4526 4527) (4535 4536) 4538
    (4540 4546) 4587 4592 4601 (7680 7835) (7840 7929)
    (7936 7957) (7960 7965) (7968 8005) (8008 8013) (8016 8023)
    8025 8027 8029 (8031 8061) (8064 8116) (8118 8124) 8126
    (8130 8132) (8134 8140) (8144 8147) (8150 8155) (8160 8172)
    (8178 8180) (8182 8188) 8486 (8490 8491) 8494 (8576 8578)
    (12353 12436) (12449 12538) (12549 12588) (44032 55203)))

(defvar ideographic-codes
  '((19968 40869)
    12295
    (12321 12329)))

(defvar letter-codes
  `(, at base-codes , at ideographic-codes))

(defvar digit-codes
  '((48 57)
    (1632 1641)
    (1776 1785)
    (2406 2415)
    (2534 2543)
    (2662 2671)
    (2790 2799)
    (2918 2927)
    (3047 3055)
    (3174 3183)
    (3302 3311)
    (3430 3439)
    (3664 3673)
    (3792 3801)
    (3872 3881)))

(defvar combining-codes
  '((768 837)
    (864 865)
    (1155 1158)
    (1425 1441)
    (1443 1465)
    (1467 1469)
    1471
    (1473 1474)
    1476
    (1611 1618)
    1648
    (1750 1756)
    (1757 1759)
    (1760 1764)
    (1767 1768)
    (1770 1773)
    (2305 2307)
    2364
    (2366 2380)
    2381
    (2385 2388)
    (2402 2403)
    (2433 2435)
    2492 2494 2495
    (2496 2500)
    (2503 2504)
    (2507 2509)
    2519
    (2530 2531)
    2562 2620 2622 2623
    (2624 2626)
    (2631 2632)
    (2635 2637)
    (2672 2673)
    (2689 2691)
    2748
    (2750 2757)
    (2759 2761)
    (2763 2765)
    (2817 2819)
    2876
    (2878 2883)
    (2887 2888)
    (2891 2893)
    (2902 2903)
    (2946 2947)
    (3006 3010)
    (3014 3016)
    (3018 3021)
    3031
    (3073 3075)
    (3134 3140)
    (3142 3144)
    (3146 3149)
    (3157 3158)
    (3202 3203)
    (3262 3268)
    (3270 3272)
    (3274 3277)
    (3285 3286)
    (3330 3331)
    (3390 3395)
    (3398 3400)
    (3402 3405)
    3415 3633
    (3636 3642)
    (3655 3662)
    3761
    (3764 3769)
    (3771 3772)
    (3784 3789)
    (3864 3865)
    3893 3895 3897 3902 3903
    (3953 3972)
    (3974 3979)
    (3984 3989)
    3991
    (3993 4013)
    (4017 4023)
    4025
    (8400 8412)
    8417
    (12330 12335)
    12441 12442))

(defvar extender-codes
  '(183 720 721 903 1600 3654 3782 12293
    (12337 12341)
    (12445 12446)
    (12540 12542)))

(defvar name-codes
  `(, at letter-codes , at digit-codes
    #.(char-code #\.) #.(char-code #\-)
    #.(char-code #\_) #.(char-code #\:)
    , at combining-codes , at extender-codes))


) ;; Eval-when

(defpredicate name-rune-p name-codes)

(defpredicate name-start-rune-p name-start-codes)


More information about the cxml-devel mailing list