[Bese-devel] arnesi http patch
Chris Dean
ctdean at sokitomi.com
Sat Apr 5 01:44:25 UTC 2008
Below is a patch to slightly extend html entity handling.
- We add more html entity mappings
- We handle the conversion of numeric html entities.
Cheers,
Chris Dean
diff -rN -u old-arnesi_dev/src/http.lisp new-arnesi_dev/src/http.lisp
--- old-arnesi_dev/src/http.lisp 2008-04-04 17:16:16.000000000 -0700
+++ new-arnesi_dev/src/http.lisp 2008-04-04 17:16:16.000000000 -0700
@@ -126,9 +126,12 @@
(defun make-html-entities ()
(let ((ht (make-hash-table :test 'equalp)))
- (flet ((add-mapping (char escaped)
- (setf (gethash char ht) escaped
- (gethash escaped ht) char)))
+ (flet ((add-mapping (char-or-code escaped)
+ (let ((char (if (numberp char-or-code)
+ (code-char char-or-code)
+ char-or-code)))
+ (setf (gethash char ht) escaped
+ (gethash escaped ht) char))))
(add-mapping #\< "<")
(add-mapping #\> ">")
(add-mapping #\& "&")
@@ -143,18 +146,283 @@
(add-mapping "o`" "ò")
(add-mapping "o'" "ó")
(add-mapping "u`" "ù")
- (add-mapping "u'" "ú"))
+ (add-mapping "u'" "ú")
+ (add-mapping 160 " ")
+ (add-mapping 161 "¡")
+ (add-mapping 162 "¢")
+ (add-mapping 163 "£")
+ (add-mapping 164 "¤")
+ (add-mapping 165 "¥")
+ (add-mapping 166 "¦")
+ (add-mapping 167 "§")
+ (add-mapping 168 "¨")
+ (add-mapping 169 "©")
+ (add-mapping 170 "ª")
+ (add-mapping 171 "«")
+ (add-mapping 172 "¬")
+ (add-mapping 173 "")
+ (add-mapping 174 "®")
+ (add-mapping 175 "¯")
+ (add-mapping 176 "°")
+ (add-mapping 177 "±")
+ (add-mapping 178 "²")
+ (add-mapping 179 "³")
+ (add-mapping 180 "´")
+ (add-mapping 181 "µ")
+ (add-mapping 182 "¶")
+ (add-mapping 183 "·")
+ (add-mapping 184 "¸")
+ (add-mapping 185 "¹")
+ (add-mapping 186 "º")
+ (add-mapping 187 "»")
+ (add-mapping 188 "¼")
+ (add-mapping 189 "½")
+ (add-mapping 190 "¾")
+ (add-mapping 191 "¿")
+ (add-mapping 192 "À")
+ (add-mapping 193 "Á")
+ (add-mapping 194 "Â")
+ (add-mapping 195 "Ã")
+ (add-mapping 196 "Ä")
+ (add-mapping 197 "Å")
+ (add-mapping 198 "Æ")
+ (add-mapping 199 "Ç")
+ (add-mapping 200 "È")
+ (add-mapping 201 "É")
+ (add-mapping 202 "Ê")
+ (add-mapping 203 "Ë")
+ (add-mapping 204 "Ì")
+ (add-mapping 205 "Í")
+ (add-mapping 206 "Î")
+ (add-mapping 207 "Ï")
+ (add-mapping 208 "Ð")
+ (add-mapping 209 "Ñ")
+ (add-mapping 210 "Ò")
+ (add-mapping 211 "Ó")
+ (add-mapping 212 "Ô")
+ (add-mapping 213 "Õ")
+ (add-mapping 214 "Ö")
+ (add-mapping 215 "×")
+ (add-mapping 216 "Ø")
+ (add-mapping 217 "Ù")
+ (add-mapping 218 "Ú")
+ (add-mapping 219 "Û")
+ (add-mapping 220 "Ü")
+ (add-mapping 221 "Ý")
+ (add-mapping 222 "Þ")
+ (add-mapping 223 "ß")
+ (add-mapping 224 "à")
+ (add-mapping 225 "á")
+ (add-mapping 226 "â")
+ (add-mapping 227 "ã")
+ (add-mapping 228 "ä")
+ (add-mapping 229 "å")
+ (add-mapping 230 "æ")
+ (add-mapping 231 "ç")
+ (add-mapping 232 "è")
+ (add-mapping 233 "é")
+ (add-mapping 234 "ê")
+ (add-mapping 235 "ë")
+ (add-mapping 236 "ì")
+ (add-mapping 237 "í")
+ (add-mapping 238 "î")
+ (add-mapping 239 "ï")
+ (add-mapping 240 "ð")
+ (add-mapping 241 "ñ")
+ (add-mapping 242 "ò")
+ (add-mapping 243 "ó")
+ (add-mapping 244 "ô")
+ (add-mapping 245 "õ")
+ (add-mapping 246 "ö")
+ (add-mapping 247 "÷")
+ (add-mapping 248 "ø")
+ (add-mapping 249 "ù")
+ (add-mapping 250 "ú")
+ (add-mapping 251 "û")
+ (add-mapping 252 "ü")
+ (add-mapping 253 "ý")
+ (add-mapping 254 "þ")
+ (add-mapping 255 "ÿ")
+ (add-mapping 338 "Œ")
+ (add-mapping 339 "œ")
+ (add-mapping 352 "Š")
+ (add-mapping 353 "š")
+ (add-mapping 376 "Ÿ")
+ (add-mapping 402 "ƒ")
+ (add-mapping 710 "ˆ")
+ (add-mapping 732 "˜")
+ (add-mapping 913 "Α")
+ (add-mapping 914 "Β")
+ (add-mapping 915 "Γ")
+ (add-mapping 916 "Δ")
+ (add-mapping 917 "Ε")
+ (add-mapping 918 "Ζ")
+ (add-mapping 919 "Η")
+ (add-mapping 920 "Θ")
+ (add-mapping 921 "Ι")
+ (add-mapping 922 "Κ")
+ (add-mapping 923 "Λ")
+ (add-mapping 924 "Μ")
+ (add-mapping 925 "Ν")
+ (add-mapping 926 "Ξ")
+ (add-mapping 927 "Ο")
+ (add-mapping 928 "Π")
+ (add-mapping 929 "Ρ")
+ (add-mapping 931 "Σ")
+ (add-mapping 932 "Τ")
+ (add-mapping 933 "Υ")
+ (add-mapping 934 "Φ")
+ (add-mapping 935 "Χ")
+ (add-mapping 936 "Ψ")
+ (add-mapping 937 "Ω")
+ (add-mapping 945 "α")
+ (add-mapping 946 "β")
+ (add-mapping 947 "γ")
+ (add-mapping 948 "δ")
+ (add-mapping 949 "ε")
+ (add-mapping 950 "ζ")
+ (add-mapping 951 "η")
+ (add-mapping 952 "θ")
+ (add-mapping 953 "ι")
+ (add-mapping 954 "κ")
+ (add-mapping 955 "λ")
+ (add-mapping 956 "μ")
+ (add-mapping 957 "ν")
+ (add-mapping 958 "ξ")
+ (add-mapping 959 "ο")
+ (add-mapping 960 "π")
+ (add-mapping 961 "ρ")
+ (add-mapping 962 "ς")
+ (add-mapping 963 "σ")
+ (add-mapping 964 "τ")
+ (add-mapping 965 "υ")
+ (add-mapping 966 "φ")
+ (add-mapping 967 "χ")
+ (add-mapping 968 "ψ")
+ (add-mapping 969 "ω")
+ (add-mapping 977 "ϑ")
+ (add-mapping 978 "ϒ")
+ (add-mapping 982 "ϖ")
+ (add-mapping 8194 " ")
+ (add-mapping 8195 " ")
+ (add-mapping 8201 " ")
+ (add-mapping 8204 "")
+ (add-mapping 8205 "")
+ (add-mapping 8206 "")
+ (add-mapping 8207 "")
+ (add-mapping 8211 "–")
+ (add-mapping 8212 "—")
+ (add-mapping 8216 "‘")
+ (add-mapping 8217 "’")
+ (add-mapping 8218 "‚")
+ (add-mapping 8220 "“")
+ (add-mapping 8221 "”")
+ (add-mapping 8222 "„")
+ (add-mapping 8224 "†")
+ (add-mapping 8225 "‡")
+ (add-mapping 8226 "•")
+ (add-mapping 8230 "…")
+ (add-mapping 8240 "‰")
+ (add-mapping 8242 "′")
+ (add-mapping 8243 "″")
+ (add-mapping 8249 "‹")
+ (add-mapping 8250 "›")
+ (add-mapping 8254 "‾")
+ (add-mapping 8260 "⁄")
+ (add-mapping 8364 "€")
+ (add-mapping 8465 "ℑ")
+ (add-mapping 8472 "℘")
+ (add-mapping 8476 "ℜ")
+ (add-mapping 8482 "™")
+ (add-mapping 8501 "ℵ")
+ (add-mapping 8592 "←")
+ (add-mapping 8593 "↑")
+ (add-mapping 8594 "→")
+ (add-mapping 8595 "↓")
+ (add-mapping 8596 "↔")
+ (add-mapping 8629 "↵")
+ (add-mapping 8656 "⇐")
+ (add-mapping 8657 "⇑")
+ (add-mapping 8658 "⇒")
+ (add-mapping 8659 "⇓")
+ (add-mapping 8660 "⇔")
+ (add-mapping 8704 "∀")
+ (add-mapping 8706 "∂")
+ (add-mapping 8707 "∃")
+ (add-mapping 8709 "∅")
+ (add-mapping 8711 "∇")
+ (add-mapping 8712 "∈")
+ (add-mapping 8713 "∉")
+ (add-mapping 8715 "∋")
+ (add-mapping 8719 "∏")
+ (add-mapping 8721 "∑")
+ (add-mapping 8722 "−")
+ (add-mapping 8727 "∗")
+ (add-mapping 8730 "√")
+ (add-mapping 8733 "∝")
+ (add-mapping 8734 "∞")
+ (add-mapping 8736 "∠")
+ (add-mapping 8743 "∧")
+ (add-mapping 8744 "∨")
+ (add-mapping 8745 "∩")
+ (add-mapping 8746 "∪")
+ (add-mapping 8747 "∫")
+ (add-mapping 8756 "∴")
+ (add-mapping 8764 "∼")
+ (add-mapping 8773 "≅")
+ (add-mapping 8776 "≈")
+ (add-mapping 8800 "≠")
+ (add-mapping 8801 "≡")
+ (add-mapping 8804 "≤")
+ (add-mapping 8805 "≥")
+ (add-mapping 8834 "⊂")
+ (add-mapping 8835 "⊃")
+ (add-mapping 8836 "⊄")
+ (add-mapping 8838 "⊆")
+ (add-mapping 8839 "⊇")
+ (add-mapping 8853 "⊕")
+ (add-mapping 8855 "⊗")
+ (add-mapping 8869 "⊥")
+ (add-mapping 8901 "⋅")
+ (add-mapping 8968 "⌈")
+ (add-mapping 8969 "⌉")
+ (add-mapping 8970 "⌊")
+ (add-mapping 8971 "⌋")
+ (add-mapping 9001 "〈")
+ (add-mapping 9002 "〉")
+ (add-mapping 9674 "◊")
+ (add-mapping 9824 "♠")
+ (add-mapping 9827 "♣")
+ (add-mapping 9829 "♥")
+ (add-mapping 9830 "♦"))
+
ht))
(defparameter *html-entites* (make-html-entities))
+(defun numeric-html-entity-value (s)
+ (let ((len (length s)))
+ (and (> len 3)
+ (char-equal (char s 0) #\&)
+ (char-equal (char s 1) #\#)
+ (char-equal (char s (1- len)) #\;)
+ (if (char-equal (char s 2) #\x)
+ (and (every (lambda (ch) (digit-char-p ch 16))
+ (subseq s 3 (1- len)))
+ (parse-integer s :start 3 :end (1- len) :radix 16))
+ (and (every #'digit-char-p
+ (subseq s 2 (1- len)))
+ (parse-integer s :start 2 :end (1- len)))))))
+
(defun html-entity->char (entity &optional (default #\?))
(let ((res (gethash entity *html-entites*)))
(if res
(if (stringp res)
(char res 0)
res)
- default)))
+ (aif (numeric-html-entity-value entity)
+ (code-char it)
+ default))))
(defun write-as-html (string &key (stream t) (escape-whitespace nil))
(loop
More information about the bese-devel
mailing list