[Bese-devel] arnesi http patch

Chris Dean ctdean at sokitomi.com
Sat Apr 5 01:44:25 UTC 2008


Below is a patch to slightly extend html entity handling.  

- We add more html entity mappings
- We handle the conversion of numeric html entities.

Cheers,
Chris Dean

diff -rN -u old-arnesi_dev/src/http.lisp new-arnesi_dev/src/http.lisp
--- old-arnesi_dev/src/http.lisp	2008-04-04 17:16:16.000000000 -0700
+++ new-arnesi_dev/src/http.lisp	2008-04-04 17:16:16.000000000 -0700
@@ -126,9 +126,12 @@
 
 (defun make-html-entities ()
   (let ((ht (make-hash-table :test 'equalp)))
-    (flet ((add-mapping (char escaped)
-             (setf (gethash char ht) escaped
-                   (gethash escaped ht) char)))
+    (flet ((add-mapping (char-or-code escaped)
+             (let ((char (if (numberp char-or-code)
+                             (code-char char-or-code)
+                             char-or-code)))
+               (setf (gethash char ht) escaped
+                     (gethash escaped ht) char))))
       (add-mapping #\< "<")
       (add-mapping #\> ">")
       (add-mapping #\& "&")
@@ -143,18 +146,283 @@
       (add-mapping "o`" "ò")
       (add-mapping "o'" "ó")
       (add-mapping "u`" "ù")
-      (add-mapping "u'" "ú"))
+      (add-mapping "u'" "ú")
+      (add-mapping  160 " ")
+      (add-mapping  161 "¡")
+      (add-mapping  162 "¢")
+      (add-mapping  163 "£")
+      (add-mapping  164 "¤")
+      (add-mapping  165 "¥")
+      (add-mapping  166 "¦")
+      (add-mapping  167 "§")
+      (add-mapping  168 "¨")
+      (add-mapping  169 "©")
+      (add-mapping  170 "ª")
+      (add-mapping  171 "«")
+      (add-mapping  172 "¬")
+      (add-mapping  173 "­")
+      (add-mapping  174 "®")
+      (add-mapping  175 "¯")
+      (add-mapping  176 "°")
+      (add-mapping  177 "±")
+      (add-mapping  178 "&sup2;")
+      (add-mapping  179 "&sup3;")
+      (add-mapping  180 "´")
+      (add-mapping  181 "µ")
+      (add-mapping  182 "¶")
+      (add-mapping  183 "·")
+      (add-mapping  184 "¸")
+      (add-mapping  185 "&sup1;")
+      (add-mapping  186 "º")
+      (add-mapping  187 "»")
+      (add-mapping  188 "&frac14;")
+      (add-mapping  189 "&frac12;")
+      (add-mapping  190 "&frac34;")
+      (add-mapping  191 "¿")
+      (add-mapping  192 "À")
+      (add-mapping  193 "Á")
+      (add-mapping  194 "Â")
+      (add-mapping  195 "Ã")
+      (add-mapping  196 "Ä")
+      (add-mapping  197 "Å")
+      (add-mapping  198 "Æ")
+      (add-mapping  199 "Ç")
+      (add-mapping  200 "È")
+      (add-mapping  201 "É")
+      (add-mapping  202 "Ê")
+      (add-mapping  203 "Ë")
+      (add-mapping  204 "Ì")
+      (add-mapping  205 "Í")
+      (add-mapping  206 "Î")
+      (add-mapping  207 "Ï")
+      (add-mapping  208 "Ð")
+      (add-mapping  209 "Ñ")
+      (add-mapping  210 "Ò")
+      (add-mapping  211 "Ó")
+      (add-mapping  212 "Ô")
+      (add-mapping  213 "Õ")
+      (add-mapping  214 "Ö")
+      (add-mapping  215 "×")
+      (add-mapping  216 "Ø")
+      (add-mapping  217 "Ù")
+      (add-mapping  218 "Ú")
+      (add-mapping  219 "Û")
+      (add-mapping  220 "Ü")
+      (add-mapping  221 "Ý")
+      (add-mapping  222 "Þ")
+      (add-mapping  223 "ß")
+      (add-mapping  224 "à")
+      (add-mapping  225 "á")
+      (add-mapping  226 "â")
+      (add-mapping  227 "ã")
+      (add-mapping  228 "ä")
+      (add-mapping  229 "å")
+      (add-mapping  230 "æ")
+      (add-mapping  231 "ç")
+      (add-mapping  232 "è")
+      (add-mapping  233 "é")
+      (add-mapping  234 "ê")
+      (add-mapping  235 "ë")
+      (add-mapping  236 "ì")
+      (add-mapping  237 "í")
+      (add-mapping  238 "î")
+      (add-mapping  239 "ï")
+      (add-mapping  240 "ð")
+      (add-mapping  241 "ñ")
+      (add-mapping  242 "ò")
+      (add-mapping  243 "ó")
+      (add-mapping  244 "ô")
+      (add-mapping  245 "õ")
+      (add-mapping  246 "ö")
+      (add-mapping  247 "÷")
+      (add-mapping  248 "ø")
+      (add-mapping  249 "ù")
+      (add-mapping  250 "ú")
+      (add-mapping  251 "û")
+      (add-mapping  252 "ü")
+      (add-mapping  253 "ý")
+      (add-mapping  254 "þ")
+      (add-mapping  255 "ÿ")
+      (add-mapping  338 "Œ")
+      (add-mapping  339 "œ")
+      (add-mapping  352 "Š")
+      (add-mapping  353 "š")
+      (add-mapping  376 "Ÿ")
+      (add-mapping  402 "ƒ")
+      (add-mapping  710 "ˆ")
+      (add-mapping  732 "˜")
+      (add-mapping  913 "Α")
+      (add-mapping  914 "Β")
+      (add-mapping  915 "Γ")
+      (add-mapping  916 "Δ")
+      (add-mapping  917 "Ε")
+      (add-mapping  918 "Ζ")
+      (add-mapping  919 "Η")
+      (add-mapping  920 "Θ")
+      (add-mapping  921 "Ι")
+      (add-mapping  922 "Κ")
+      (add-mapping  923 "Λ")
+      (add-mapping  924 "Μ")
+      (add-mapping  925 "Ν")
+      (add-mapping  926 "Ξ")
+      (add-mapping  927 "Ο")
+      (add-mapping  928 "Π")
+      (add-mapping  929 "Ρ")
+      (add-mapping  931 "Σ")
+      (add-mapping  932 "Τ")
+      (add-mapping  933 "Υ")
+      (add-mapping  934 "Φ")
+      (add-mapping  935 "Χ")
+      (add-mapping  936 "Ψ")
+      (add-mapping  937 "Ω")
+      (add-mapping  945 "α")
+      (add-mapping  946 "β")
+      (add-mapping  947 "γ")
+      (add-mapping  948 "δ")
+      (add-mapping  949 "ε")
+      (add-mapping  950 "ζ")
+      (add-mapping  951 "η")
+      (add-mapping  952 "θ")
+      (add-mapping  953 "ι")
+      (add-mapping  954 "κ")
+      (add-mapping  955 "λ")
+      (add-mapping  956 "μ")
+      (add-mapping  957 "ν")
+      (add-mapping  958 "ξ")
+      (add-mapping  959 "ο")
+      (add-mapping  960 "π")
+      (add-mapping  961 "ρ")
+      (add-mapping  962 "ς")
+      (add-mapping  963 "σ")
+      (add-mapping  964 "τ")
+      (add-mapping  965 "υ")
+      (add-mapping  966 "φ")
+      (add-mapping  967 "χ")
+      (add-mapping  968 "ψ")
+      (add-mapping  969 "ω")
+      (add-mapping  977 "ϑ")
+      (add-mapping  978 "ϒ")
+      (add-mapping  982 "ϖ")
+      (add-mapping 8194 " ")
+      (add-mapping 8195 " ")
+      (add-mapping 8201 " ")
+      (add-mapping 8204 "‌")
+      (add-mapping 8205 "‍")
+      (add-mapping 8206 "‎")
+      (add-mapping 8207 "‏")
+      (add-mapping 8211 "–")
+      (add-mapping 8212 "—")
+      (add-mapping 8216 "‘")
+      (add-mapping 8217 "’")
+      (add-mapping 8218 "‚")
+      (add-mapping 8220 "“")
+      (add-mapping 8221 "”")
+      (add-mapping 8222 "„")
+      (add-mapping 8224 "†")
+      (add-mapping 8225 "‡")
+      (add-mapping 8226 "•")
+      (add-mapping 8230 "…")
+      (add-mapping 8240 "‰")
+      (add-mapping 8242 "′")
+      (add-mapping 8243 "″")
+      (add-mapping 8249 "‹")
+      (add-mapping 8250 "›")
+      (add-mapping 8254 "‾")
+      (add-mapping 8260 "⁄")
+      (add-mapping 8364 "€")
+      (add-mapping 8465 "ℑ")
+      (add-mapping 8472 "℘")
+      (add-mapping 8476 "ℜ")
+      (add-mapping 8482 "™")
+      (add-mapping 8501 "ℵ")
+      (add-mapping 8592 "←")
+      (add-mapping 8593 "↑")
+      (add-mapping 8594 "→")
+      (add-mapping 8595 "↓")
+      (add-mapping 8596 "↔")
+      (add-mapping 8629 "↵")
+      (add-mapping 8656 "⇐")
+      (add-mapping 8657 "⇑")
+      (add-mapping 8658 "⇒")
+      (add-mapping 8659 "⇓")
+      (add-mapping 8660 "⇔")
+      (add-mapping 8704 "∀")
+      (add-mapping 8706 "∂")
+      (add-mapping 8707 "∃")
+      (add-mapping 8709 "∅")
+      (add-mapping 8711 "∇")
+      (add-mapping 8712 "∈")
+      (add-mapping 8713 "∉")
+      (add-mapping 8715 "∋")
+      (add-mapping 8719 "∏")
+      (add-mapping 8721 "∑")
+      (add-mapping 8722 "−")
+      (add-mapping 8727 "∗")
+      (add-mapping 8730 "√")
+      (add-mapping 8733 "∝")
+      (add-mapping 8734 "∞")
+      (add-mapping 8736 "∠")
+      (add-mapping 8743 "∧")
+      (add-mapping 8744 "∨")
+      (add-mapping 8745 "∩")
+      (add-mapping 8746 "∪")
+      (add-mapping 8747 "∫")
+      (add-mapping 8756 "&there4;")
+      (add-mapping 8764 "∼")
+      (add-mapping 8773 "≅")
+      (add-mapping 8776 "≈")
+      (add-mapping 8800 "≠")
+      (add-mapping 8801 "≡")
+      (add-mapping 8804 "≤")
+      (add-mapping 8805 "≥")
+      (add-mapping 8834 "⊂")
+      (add-mapping 8835 "⊃")
+      (add-mapping 8836 "⊄")
+      (add-mapping 8838 "⊆")
+      (add-mapping 8839 "⊇")
+      (add-mapping 8853 "⊕")
+      (add-mapping 8855 "⊗")
+      (add-mapping 8869 "⊥")
+      (add-mapping 8901 "⋅")
+      (add-mapping 8968 "⌈")
+      (add-mapping 8969 "⌉")
+      (add-mapping 8970 "⌊")
+      (add-mapping 8971 "⌋")
+      (add-mapping 9001 "⟨")
+      (add-mapping 9002 "⟩")
+      (add-mapping 9674 "◊")
+      (add-mapping 9824 "♠")
+      (add-mapping 9827 "♣")
+      (add-mapping 9829 "♥")
+      (add-mapping 9830 "♦"))
+
     ht))
 
 (defparameter *html-entites* (make-html-entities))
 
+(defun numeric-html-entity-value (s)
+  (let ((len (length s)))
+    (and (> len 3)
+         (char-equal (char s 0) #\&)
+         (char-equal (char s 1) #\#)
+         (char-equal (char s (1- len)) #\;)
+         (if (char-equal (char s 2) #\x)
+             (and (every (lambda (ch) (digit-char-p ch 16)) 
+                         (subseq s 3 (1- len)))
+                  (parse-integer s :start 3 :end (1- len) :radix 16))
+             (and (every #'digit-char-p 
+                         (subseq s 2 (1- len)))
+                  (parse-integer s :start 2 :end (1- len)))))))
+
 (defun html-entity->char (entity &optional (default #\?))
   (let ((res (gethash entity *html-entites*)))
     (if res
         (if (stringp res)
             (char res 0)
             res)
-        default)))
+        (aif (numeric-html-entity-value entity)
+             (code-char it)
+             default))))
 
 (defun write-as-html (string &key (stream t) (escape-whitespace nil))
   (loop




More information about the bese-devel mailing list