From hhubner at common-lisp.net Thu May 1 06:25:28 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 02:25:28 -0400 (EDT) Subject: [flexi-streams-cvs] r1 - in trunk: . doc test Message-ID: <20080501062528.A36B2751A6@common-lisp.net> Author: hhubner Date: Thu May 1 02:25:14 2008 New Revision: 1 Added: trunk/ trunk/CHANGELOG trunk/ascii.lisp trunk/code-pages.lisp trunk/doc/ trunk/doc/foo.txt (contents, props changed) trunk/doc/index.html trunk/external-format.lisp trunk/flexi-streams.asd trunk/in-memory.lisp trunk/input.lisp trunk/iso-8859.lisp trunk/koi8-r.lisp trunk/lw-binary-stream.lisp trunk/output.lisp trunk/packages.lisp trunk/specials.lisp trunk/stream.lisp trunk/strings.lisp trunk/test/ trunk/test/README (contents, props changed) trunk/test/hebrew_latin8_cr.txt trunk/test/hebrew_latin8_crlf.txt trunk/test/hebrew_latin8_lf.txt trunk/test/hebrew_utf8_cr.txt trunk/test/hebrew_utf8_crlf.txt trunk/test/hebrew_utf8_lf.txt trunk/test/kafka_cp1252_cr.txt trunk/test/kafka_cp1252_crlf.txt trunk/test/kafka_cp1252_lf.txt trunk/test/kafka_latin1_cr.txt trunk/test/kafka_latin1_crlf.txt trunk/test/kafka_latin1_lf.txt trunk/test/kafka_utf8_cr.txt trunk/test/kafka_utf8_crlf.txt trunk/test/kafka_utf8_lf.txt trunk/test/packages.lisp trunk/test/russian_koi8r_cr.txt trunk/test/russian_koi8r_crlf.txt trunk/test/russian_koi8r_lf.txt trunk/test/russian_utf8_cr.txt (contents, props changed) trunk/test/russian_utf8_crlf.txt (contents, props changed) trunk/test/russian_utf8_lf.txt (contents, props changed) trunk/test/test.lisp trunk/test/tilton_ascii_cr.txt trunk/test/tilton_ascii_crlf.txt trunk/test/tilton_ascii_lf.txt trunk/test/tilton_utf8_cr.txt trunk/test/tilton_utf8_crlf.txt trunk/test/tilton_utf8_lf.txt trunk/test/unicode_demo_ucs2_cr_be.txt (contents, props changed) trunk/test/unicode_demo_ucs2_cr_le.txt (contents, props changed) trunk/test/unicode_demo_ucs2_crlf_be.txt (contents, props changed) trunk/test/unicode_demo_ucs2_crlf_le.txt (contents, props changed) trunk/test/unicode_demo_ucs2_lf_be.txt (contents, props changed) trunk/test/unicode_demo_ucs2_lf_le.txt (contents, props changed) trunk/test/unicode_demo_ucs4_cr_be.txt (contents, props changed) trunk/test/unicode_demo_ucs4_cr_le.txt (contents, props changed) trunk/test/unicode_demo_ucs4_crlf_be.txt (contents, props changed) trunk/test/unicode_demo_ucs4_crlf_le.txt (contents, props changed) trunk/test/unicode_demo_ucs4_lf_be.txt (contents, props changed) trunk/test/unicode_demo_ucs4_lf_le.txt (contents, props changed) trunk/test/unicode_demo_utf8_cr.txt trunk/test/unicode_demo_utf8_crlf.txt trunk/test/unicode_demo_utf8_lf.txt trunk/util.lisp Log: Import flexi-stream-0.14.0 Added: trunk/CHANGELOG ============================================================================== --- (empty file) +++ trunk/CHANGELOG Thu May 1 02:25:14 2008 @@ -0,0 +1,196 @@ +Version 0.14.0 +2007-12-30 +Some fixes for LispWorks (when the underlying stream is a character stream) +Optimized methods for UNREAD-CHAR% in case of 8-bit encodings +More tests + +Version 0.13.1 +2007-10-11 +Small fix for AllegroCL's "modern" mode + +Version 0.13.0 +2007-09-13 +Better optimizations for STREAM-WRITE-SEQUENCE (thanks to Anton Vodonosov) +Bugfix for STREAM-WRITE-BYTE + +Version 0.12.0 +2007-09-07 +Added "bound" for flexi input streams + +Version 0.11.2 +2007-04-06 +Fixed bug in STREAM-WRITE-STRING implementation (reported by quasi) + +Version 0.11.1 +2007-03-22 +More ugliness for a bit of output performance in special cases + +Version 0.11.0 +2007-03-09 +Re-factoring of how encoding errors are handled (patch by Anton Vodonosov) + +Version 0.10.3 +2007-02-19 +Fixed bug in UTF-16 output (patch by Stelian Ionescu) +Fixed *SUBSTITUTION-CHAR* example in docs + +Version 0.10.2 +2007-01-12 +Another fix - sigh... + +Version 0.10.1 +2007-01-11 +Fixed the last change (thanks to Red Daly) + +Version 0.10.0 +2007-01-10 +Added transformers to in-memory streams (thanks to Chris Dean) +Documentation fixes + +Version 0.9.1 +2006-12-27 +More performance improvements (thanks to Robert J. Macomber for SBCL hints) + +Version 0.9.0 +2006-12-27 +Complete re-factoring to improve performance and reduce consing (at least for LispWorks) +Added some tests +Added *PROVIDE-USE-VALUE-RESTART* +Added FLEXI-STREAM-POSITION-SPEC-ERROR condition + +Version 0.8.0 +2006-11-14 +Added USE-VALUE restart for STREAM-READ-CHAR (thanks to Anton Vodonosov) +Added *SUBSTITUTION-CHAR* + +Version 0.7.2 +2006-11-06 +Removed unnecessary CHECK-EOF-NO-HANG also for in-memory streams (see 0.5.8) + +Version 0.7.1 +2006-10-31 +Argh, missed the most important part... + +Version 0.7.0 +2006-10-31 +Added KOI8-R (thanks to Igor Plekhov) + +Version 0.6.6 +2006-10-06 +Made sure not to apply Gray stream generic function to underlying stream + +Version 0.6.5 +2006-10-06 +Optimized STREAM-WRITE-SEQUENCE and STREAM-READ-SEQUENCE for arrays of octets + +Version 0.6.4 +2006-10-05 +Made READ-BYTE/WRITE-BYTE the default behaviour, i.e. we only use the sequence functions for LW if necessary + +Version 0.6.3 +2006-10-02 +Fixed problems with CMUCL Gray streams implementation (reported by Ivan Toshkov) + +Version 0.6.2 +2006-09-23 +Added method for MAKE-LOAD-FORM which is needed for OpenMCL (reported by Robert Synnott, see Drakma mailing list) + +Version 0.6.1 +2006-09-15 +Switched FILE-POSITION implementation to TRIVIAL-GRAY-STREAMS (thanks to David Lichteblau) + +Version 0.6.0 +2006-09-13 +Implemented file positions for LispWorks + +Version 0.5.10 +2006-09-04 +Flexi streams can have binary element types now + +Version 0.5.9 +2006-09-01 +Added string functions + +Version 0.5.8 +2006-09-01 +CHECK-EOF-NO-HANG is not necessary +Updated LW links in documentation +Changed package handling in system definition (thanks to Christophe Rhodes) + +Version 0.5.7 +2006-06-29 +Removed incompatibility with AllegroCL, see mailing list archive for details + +Version 0.5.6 +2006-06-13 +Fixed Emacs mode lines (reported by Robert Goldman) + +Version 0.5.5 +2006-05-24 +Some small fixes for LW + +Version 0.5.4 +2006-05-18 +Workaround for CMUCL (thanks to Satyaki Das) + +Version 0.5.3 +2006-03-06 +Fixed more typos in stream.lisp +Added missing exports in packages.lisp + +Version 0.5.2 +2006-01-26 +Fixed typos in stream.lisp (thanks to James Bielman) + +Version 0.5.1 +2005-12-14 +Some bugfixes in output.lisp (thanks to Jan Idzikowski) + +Version 0.5.0 +2005-12-11 +Added in-memory streams +Exported types +Added specific conditions + +Version 0.4.1 +2005-12-05 +Updated docs + +Version 0.4.0 +2005-12-05 +Added US-ASCII encoding +Added *USE-REPLACEMENT-CHAR* + +Version 0.3.0 +2005-11-26 +Added UNREAD-BYTE and PEEK-BYTE + +Version 0.2.4 +2005-11-26 +WIN32:CODE-PAGE only for LispWorks + +Version 0.2.3 +2005-11-26 +Added STREAM-TERPRI to appease AllegroCL +Fixed typo in docs + +Version 0.2.2 +2005-11-26 +Patch to make class precendence list work in AllegroCL (David Lichteblau) + +Version 0.2.1 +2005-11-25 +Adapted to new TRIVIAL-GRAY-STREAMS API (David Lichteblau) +More changes for portability, specifically for SBCL (David Lichteblau) + +Version 0.2.0 +2005-11-25 +Portable version thanks to TRIVIAL-GRAY-STREAMS (David Lichteblau) + +Version 0.1.1 +2005-11-25 +Documentation enhancements + +Version 0.1.0 +2005-11-25 +Initial public release Added: trunk/ascii.lisp ============================================================================== --- (empty file) +++ trunk/ascii.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,35 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.7 2007/01/01 23:46:49 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defvar +ascii-table+ + #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533) + "An array enumerating the character codes for the US-ASCII +encoding.") Added: trunk/code-pages.lisp ============================================================================== --- (empty file) +++ trunk/code-pages.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,62 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +;;; the following code was auto-generated with LWW + +(defvar +code-page-tables+ + '((437 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (720 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 233 226 132 224 134 231 234 235 232 239 238 141 142 143 144 1617 1618 244 164 1600 251 249 1569 1570 1571 1572 163 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1590 1591 1592 1593 1594 1601 181 1602 1603 1604 1605 1606 1607 1608 1609 1610 8801 1611 1612 1613 1614 1615 1616 8776 176 8729 183 8730 8319 178 9632 160)) + (737 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 931 932 933 934 935 936 937 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 965 966 967 968 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 969 940 941 942 970 943 972 973 971 974 902 904 905 906 908 910 911 177 8805 8804 938 939 247 8776 176 8729 183 8730 8319 178 9632 160)) + (775 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 262 252 233 257 228 291 229 263 322 275 342 343 299 377 196 197 201 230 198 333 246 290 162 346 347 214 220 248 163 216 215 164 256 298 243 379 380 378 8221 166 169 174 172 189 188 321 171 187 9617 9618 9619 9474 9508 260 268 280 278 9571 9553 9559 9565 302 352 9488 9492 9524 9516 9500 9472 9532 370 362 9562 9556 9577 9574 9568 9552 9580 381 261 269 281 279 303 353 371 363 382 9496 9484 9608 9604 9612 9616 9600 211 223 332 323 245 213 181 324 310 311 315 316 326 274 325 8217 173 177 8220 190 182 167 247 8222 176 8729 183 185 179 178 9632 160)) + (850 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 215 402 225 237 243 250 241 209 170 186 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 240 208 202 203 200 305 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 254 222 218 219 217 253 221 175 180 173 177 8215 190 182 167 247 184 176 168 183 185 179 178 9632 160)) + (852 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 367 263 231 322 235 336 337 238 377 196 262 201 313 314 244 246 317 318 346 347 214 220 356 357 321 215 269 225 237 243 250 260 261 381 382 280 281 172 378 268 351 171 187 9617 9618 9619 9474 9508 193 194 282 350 9571 9553 9559 9565 379 380 9488 9492 9524 9516 9500 9472 9532 258 259 9562 9556 9577 9574 9568 9552 9580 164 273 272 270 203 271 327 205 206 283 9496 9484 9608 9604 354 366 9600 211 223 212 323 324 328 352 353 340 218 341 368 253 221 355 180 173 733 731 711 728 167 247 184 176 168 729 369 344 345 9632 160)) + (855 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1106 1026 1107 1027 1105 1025 1108 1028 1109 1029 1110 1030 1111 1031 1112 1032 1113 1033 1114 1034 1115 1035 1116 1036 1118 1038 1119 1039 1102 1070 1098 1066 1072 1040 1073 1041 1094 1062 1076 1044 1077 1045 1092 1060 1075 1043 171 187 9617 9618 9619 9474 9508 1093 1061 1080 1048 9571 9553 9559 9565 1081 1049 9488 9492 9524 9516 9500 9472 9532 1082 1050 9562 9556 9577 9574 9568 9552 9580 164 1083 1051 1084 1052 1085 1053 1086 1054 1087 9496 9484 9608 9604 1055 1103 9600 1071 1088 1056 1089 1057 1090 1058 1091 1059 1078 1046 1074 1042 1100 1068 8470 173 1099 1067 1079 1047 1096 1064 1101 1069 1097 1065 1095 1063 167 9632 160)) + (857 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 305 196 197 201 230 198 244 246 242 251 249 304 214 220 248 163 216 350 351 225 237 243 250 241 209 286 287 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 186 170 202 203 200 65533 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 65533 215 218 219 217 236 255 175 180 173 177 65533 190 182 167 247 184 176 168 183 185 179 178 9632 160)) + (860 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 227 224 193 231 234 202 232 205 212 236 195 194 201 192 200 244 245 242 218 249 204 213 220 162 163 217 8359 211 225 237 243 250 241 209 170 186 191 210 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (861 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 208 240 222 196 197 201 230 198 244 246 254 251 221 253 214 220 248 163 216 8359 402 225 237 243 250 193 205 211 218 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (862 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (863 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 194 224 182 231 234 235 232 239 238 8215 192 167 201 200 202 244 203 207 251 249 164 212 220 162 163 217 219 402 166 180 243 250 168 184 179 175 206 8976 172 189 188 190 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (864 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 176 183 8729 8730 9618 9472 9474 9532 9508 9516 9500 9524 9488 9484 9492 9496 946 8734 966 177 189 188 8776 171 187 65271 65272 155 156 65275 65276 159 160 173 65154 163 164 65156 65533 65533 65166 65167 65173 65177 1548 65181 65185 65189 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 65233 1563 65201 65205 65209 1567 162 65152 65153 65155 65157 65226 65163 65165 65169 65171 65175 65179 65183 65187 65191 65193 65195 65197 65199 65203 65207 65211 65215 65217 65221 65227 65231 166 172 247 215 65225 1600 65235 65239 65243 65247 65251 65255 65259 65261 65263 65267 65213 65228 65230 65229 65249 65149 1617 65253 65257 65260 65264 65266 65232 65237 65269 65270 65245 65241 65265 9632 65533)) + (865 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 164 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (866 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1025 1105 1028 1108 1031 1111 1038 1118 176 8729 183 8730 8470 164 9632 160)) + (869 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 902 135 183 172 166 8216 8217 904 8213 905 906 938 908 147 148 910 939 169 911 178 179 940 163 941 942 943 970 912 972 973 913 914 915 916 917 918 919 189 920 921 171 187 9617 9618 9619 9474 9508 922 923 924 925 9571 9553 9559 9565 926 927 9488 9492 9524 9516 9500 9472 9532 928 929 9562 9556 9577 9574 9568 9552 9580 931 932 933 934 935 936 937 945 946 947 9496 9484 9608 9604 948 949 9600 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 900 173 177 965 966 967 167 968 901 176 168 969 971 944 974 9632 160)) + (1250 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 352 8249 346 356 381 377 144 8216 8217 8220 8221 8226 8211 8212 152 8482 353 8250 347 357 382 378 160 711 728 321 164 260 166 167 168 169 350 171 172 173 174 379 176 177 731 322 180 181 182 183 184 261 351 187 317 733 318 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) + (1251 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1026 1027 8218 1107 8222 8230 8224 8225 8364 8240 1033 8249 1034 1036 1035 1039 1106 8216 8217 8220 8221 8226 8211 8212 152 8482 1113 8250 1114 1116 1115 1119 160 1038 1118 1032 164 1168 166 167 1025 169 1028 171 172 173 174 1031 176 177 1030 1110 1169 181 182 183 1105 8470 1108 187 1112 1029 1109 1111 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103)) + (1252 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 381 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 382 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) + (1253 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 136 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 157 158 159 160 901 902 163 164 165 166 167 168 169 65533 171 172 173 174 8213 176 177 178 179 900 181 182 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) + (1254 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) + (1255 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 156 157 158 159 160 161 162 163 8362 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 191 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1520 1521 1522 1523 1524 65533 65533 65533 65533 65533 65533 65533 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) + (1256 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 1662 8218 402 8222 8230 8224 8225 710 8240 1657 8249 338 1670 1688 1672 1711 8216 8217 8220 8221 8226 8211 8212 1705 8482 1681 8250 339 8204 8205 1722 160 1548 162 163 164 165 166 167 168 169 1726 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 1563 187 188 189 190 1567 1729 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 215 1591 1592 1593 1594 1600 1601 1602 1603 224 1604 226 1605 1606 1607 1608 231 232 233 234 235 1609 1610 238 239 1611 1612 1613 1614 244 1615 1616 247 1617 249 1618 251 252 8206 8207 1746)) + (1257 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 138 8249 140 168 711 184 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 175 731 159 160 65533 162 163 164 65533 166 167 216 169 342 171 172 173 174 198 176 177 178 179 180 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 729)) + (1258 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 258 196 197 198 199 200 201 202 203 768 205 206 207 272 209 777 211 212 416 214 215 216 217 218 219 220 431 771 223 224 225 226 259 228 229 230 231 232 233 234 235 769 237 238 239 273 241 803 243 244 417 246 247 248 249 250 251 252 432 8363 255))) + "A list of 8-bit Windows code pages where each element is a +cons with the car being the ID of the code page and the cdr being +a vector enumerating the corresponding character codes.") Added: trunk/doc/foo.txt ============================================================================== Binary file. No diff available. Added: trunk/doc/index.html ============================================================================== --- (empty file) +++ trunk/doc/index.html Thu May 1 02:25:14 2008 @@ -0,0 +1,1044 @@ + + + + + + FLEXI-STREAMS - Flexible bivalent streams for Common Lisp + + + + + +

FLEXI-STREAMS - Flexible bivalent streams for Common Lisp

+ +
+
 

Abstract

+ +FLEXI-STREAMS implements "virtual" bivalent streams that can be +layered atop real binary or bivalent streams and that can be used to +read and write character data in various single- or multi-octet +encodings which can be changed on the fly. It also supplies +in-memory binary streams which are similar to string streams. +

+The library needs a Common Lisp implementation that +supports Gray +streams and relies on David +Lichteblau's trivial-gray-streams +to offer portability between different Lisps. +

+The code comes with +a BSD-style +license so you can basically do with it whatever you want. + +

+Download shortcut: http://weitz.de/files/flexi-streams.tar.gz. +

+ +
 

Contents

+
    +
  1. Example usage +
  2. Download and installation +
  3. Backward compatibility with version 0.10.3 and before +
  4. Support and mailing lists +
  5. The FLEXI-STREAMS dictionary +
      +
    1. External formats +
        +
      1. make-external-format +
      2. external-format-name +
      3. external-format-eol-style +
      4. external-format-little-endian +
      5. external-format-id +
      6. external-format-equal +
      7. *default-eol-style* +
      8. *default-little-endian* +
      +
    2. Flexi streams +
        +
      1. flexi-stream +
      2. flexi-input-stream +
      3. flexi-output-stream +
      4. flexi-io-stream +
      5. make-flexi-stream +
      6. flexi-stream-external-format +
      7. flexi-stream-element-type +
      8. flexi-stream-column +
      9. flexi-stream-position +
      10. flexi-stream-bound +
      11. flexi-stream-stream +
      12. unread-byte +
      13. peek-byte +
      14. *substitution-char* +
      15. octet +
      16. flexi-stream-error +
      17. flexi-stream-encoding-error +
      18. flexi-stream-element-type-error +
      19. flexi-stream-element-type-error-element-type +
      20. flexi-stream-position-spec-error +
      21. flexi-stream-position-spec-error-position-spec +
      +
    3. In-memory streams +
        +
      1. in-memory-stream +
      2. in-memory-input-stream +
      3. in-memory-output-stream +
      4. list-stream +
      5. vector-stream +
      6. make-in-memory-input-stream +
      7. make-in-memory-output-stream +
      8. get-output-stream-sequence +
      9. output-stream-sequence-length +
      10. with-input-from-sequence +
      11. with-output-to-sequence +
      12. in-memory-stream-error +
      13. in-memory-stream-closed-error +
      +
    4. Strings +
        +
      1. string-to-octets +
      2. octets-to-string +
      +
    +
  6. File positions +
  7. Acknowledgements +
+ +
 

Example usage

+ +The examples were created with LispWorks 4.4.6 pro on Windows. The following two functions create the same file: + +
+(defun foo (pathspec)
+  "With standard LispWorks streams."
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :supersede
+                       :external-format '(:utf-8 :eol-style :crlf))
+    (write-line "ÄÖÜ1" out))
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :append
+                       :external-format '(:latin-1 :eol-style :lf))
+    (write-line "ÄÖÜ2" out))
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :append
+                       :element-type 'octet)
+    (write-byte #xeb out)
+    (write-sequence #(#xa3 #xa4 #xa5) out))
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :append
+                       :external-format '(:unicode :little-endian nil :eol-style :crlf))
+    (write-line "ÄÖÜ3" out)))
+
+(defun bar (pathspec)
+  "With a flexi stream."
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :supersede
+                       :external-format '(:latin-1 :eol-style :lf))
+    (setq out (make-flexi-stream out :external-format :utf-8))
+    (write-line "ÄÖÜ1" out)
+    (setf (flexi-stream-external-format out) '(:latin-1 :eol-style :lf))
+    (write-line "ÄÖÜ2" out) 
+    (write-byte #xeb out)
+    (write-sequence #(#xa3 #xa4 #xa5) out)
+    (setf (flexi-stream-external-format out) :ucs-2be)
+    (write-line "ÄÖÜ3" out)))
+
+ +

+And applying this function +

+(defun baz (pathspec)
+  (let (result)
+    (with-open-file (in pathspec :element-type 'octet)
+      (setq in (make-flexi-stream in :external-format :utf-8))
+      (push (read-line in) result)
+      (push (read-byte in) result)
+      (setf (flexi-stream-external-format in) '(:latin-1 :eol-style :lf))
+      (push (read-line in) result) 
+      (setf (flexi-stream-external-format in) :greek)
+      (push (read-char in) result)
+      (setf (flexi-stream-external-format in) :latin0)
+      (let ((string (make-string 3 :element-type 'character)))
+        (read-sequence string in)
+        (push string result))
+      (let ((octets (make-array 2 :element-type 'octet)))
+        (read-sequence octets in)
+        (push octets result))
+      (setf (flexi-stream-external-format in) :ucs-2be)
+      (push (read-line in) result))
+    (nreverse result)))
+
+to the file created above will yield the list +
+("ÄÖÜ1" 196 "ÖÜ2" #\λ "£€¥" #(0 196) "ÖÜ3")
+
+ +

+For more examples see the source code +of Drakma, Chunga, +or CL-WBXML. + +
 

Download and installation

+ +Before you try to install FLEXI-STREAMS, first check that in your Lisp +each character's +character +code is equal to +its Unicode code point and +that (CHAR-CODE #\Newline) +and (CHAR-CODE #\Linefeed) have the same +value (10). (This is the case for all relevant CL +implementations which were in use when this library was written. It +is not mandated by the ANSI standard, though.) +

+FLEXI-STREAMS together with this documentation can be downloaded from http://weitz.de/files/flexi-streams.tar.gz. The +current version is 0.14.0. +

+Before you install FLEXI-STREAMS you first need to +install the trivial-gray-streams library +unless you already have it. +

+FLEXI-STREAMS comes with a system definition for ASDF so you can install the library with +

+(asdf:oos 'asdf:load-op :flexi-streams)
+
+if you've unpacked it in a place where ASDF can find it. Installation +via asdf-install +should also be possible, and there's a port +to Gentoo Lisp thanks to +Matthew Kennedy. +

+You can run a test suite which tests some (but +not all) aspects of the library with +

+(asdf:oos 'asdf:test-op :flexi-streams)
+
+This might take a while... +

+Luís Oliveira maintains a darcs +repository of FLEXI-STREAMS +at http://common-lisp.net/~loliveira/ediware/. +

+A Mercurial +repository of older versions is available +at http://arcanes.fr.eu.org/~pierre/2007/02/weitz/ +thanks to Pierre Thierry. + + +
 
+

+Backward compatibility with version 0.10.3 and before

+ +Two special variables used in flexi-streams 0.10.3 and before were removed - +*PROVIDE-USE-VALUE-RESTART* and *USE-REPLACEMENT-CHAR*. + +

+The code now behaves as if +*PROVIDE-USE-VALUE-RESTART* is always T. +Instead of *USE-REPLACEMENT-CHAR*, you can use +*SUBSTITUTION-CHAR* or +invoke +a USE-VALUE +restart +when a FLEXI-STREAM-ENCODING-ERROR +is signalled. + +
 

Support and mailing lists

+ +For questions, bug reports, feature requests, improvements, or patches +please use the flexi-streams-devel +mailing list. If you want to be notified about future releases, +subscribe to the flexi-streams-announce +mailing list. These mailing lists were made available thanks to +the services of common-lisp.net. +

+If you want to send patches, please read this first. + + +
 

The FLEXI-STREAMS dictionary

+ +

External formats

+ +EXTERNAL-FORMAT objects are used to denote the external +formats of flexi streams. These objects are created using +the MAKE-EXTERNAL-FORMAT +function, and there are various +readers to query their attributes. Once such an object is +created it can't be changed. +

+An external format consists of a basic encoding +(like ISO 8859-1 +or UTF-8), a +definition how line endings are denoted - by a carriage return +character (ASCII 13), by a line feed character (ASCII 10), +or by both of these characters in a row -, and optionally (for +encodings that use units larger than 8 bits) information +about the endianess +of the encoding. +

+The following encodings are currently supported by FLEXI-STREAMS: +

+

+A couple of alternative names are allowed that are listed below: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
:UTF-8:UTF8
:UTF-16:UTF16
:UCS-2
:UCS2
:UNICODE
:UTF-32:UTF32
:UCS-4
:UCS4
:ISO-8859-1:LATIN-1
:LATIN1
:ISO-8859-2:LATIN-2
:LATIN2
:ISO-8859-3:LATIN-3
:LATIN3
:ISO-8859-4:LATIN-4
:LATIN4
:ISO-8859-5:CYRILLIC
:ISO-8859-6:ARABIC
:ISO-8859-7:GREEK
:ISO-8859-8:HEBREW
:ISO-8859-9:LATIN-5
:LATIN5
:ISO-8859-10:LATIN-6
:LATIN6
:ISO-8859-11:THAI
:ISO-8859-13:LATIN-7
:LATIN7
:ISO-8859-14:LATIN-8
:LATIN8
:ISO-8859-15:LATIN-9
:LATIN9
:LATIN-0
:LATIN0
:ISO-8859-16:LATIN-10
:LATIN10
:CODE-PAGE:CODEPAGE
WIN32:CODE-PAGE
(only on LWW)
:KOI8-R:KOI8R
:US-ASCII:ASCII
+

+(Note that we treat UCS-2 exactly like UTF-16 although there +are subtle +differences. Also note that even though we support encodings like +UTF-32 some Lisps only supports characters contained within +the Basic +Multilingual Plane (like LispWorks) or even less (like CMUCL), so +if other characters are read from a +flexi +stream, READ-CHAR +will try to be helpful and return the corresponding Unicode code point - +an integer - instead. This might lead to an error if you're using +functions +like READ-LINE, though.) + +

+Whenever a FLEXI-STREAMS function accepts an external format as one of +its arguments, you can provide either an EXTERNAL-FORMAT +object or a shortcut which can be a list or a symbol. The list +shortcuts have a syntax similar +to the +one used by LispWorks - the cars are the names of and encoding +and the cdrs of these lists correspond to the keyword arguments +to MAKE-EXTERNAL-FORMAT, so +for example +

(:latin-1 :eol-style :crlf)
+is equivalent to +
(make-external-format :latin-1 :eol-style :crlf)
The +symbol shortcuts are equivalent to +calling MAKE-EXTERNAL-FORMAT +without keyword arguments, i.e. +
:thai
+behaves like +
(make-external-format :thai)
+Finally, the following expansions are +available: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
:UCS-2LE(:UCS-2 :LITTLE-ENDIAN T)
:UCS-2BE(:UCS-2 :LITTLE-ENDIAN NIL)
:UCS-4LE(:UCS-4 :LITTLE-ENDIAN T)
:UCS-4BE(:UCS-4 :LITTLE-ENDIAN NIL)
:UTF-16LE(:UTF-16 :LITTLE-ENDIAN T)
:UTF-16BE(:UTF-16 :LITTLE-ENDIAN NIL)
:UTF-32LE(:UTF-32 :LITTLE-ENDIAN T)
:UTF-32BE(:UTF-32 :LITTLE-ENDIAN NIL)
:IBM437(:CODE-PAGE :ID 437)
:IBM850(:CODE-PAGE :ID 850)
:IBM852(:CODE-PAGE :ID 852)
:IBM855(:CODE-PAGE :ID 855)
:IBM857(:CODE-PAGE :ID 857)
:IBM860(:CODE-PAGE :ID 860)
:IBM861(:CODE-PAGE :ID 861)
:IBM862(:CODE-PAGE :ID 862)
:IBM863(:CODE-PAGE :ID 863)
:IBM864(:CODE-PAGE :ID 864)
:IBM865(:CODE-PAGE :ID 865)
:IBM866(:CODE-PAGE :ID 866)
:IBM869(:CODE-PAGE :ID 869)
:WINDOWS-1250(:CODE-PAGE :ID 1250)
:WINDOWS-1251(:CODE-PAGE :ID 1251)
:WINDOWS-1252(:CODE-PAGE :ID 1252)
:WINDOWS-1253(:CODE-PAGE :ID 1253)
:WINDOWS-1254(:CODE-PAGE :ID 1254)
:WINDOWS-1255(:CODE-PAGE :ID 1255)
:WINDOWS-1256(:CODE-PAGE :ID 1256)
:WINDOWS-1257(:CODE-PAGE :ID 1257)
:WINDOWS-1258(:CODE-PAGE :ID 1258)
+

+Note that if you provide a shortcut, it +will be converted to an EXTERNAL-FORMAT object first. +So, if you're concerned about efficiency, create these objects once and +re-use them. + +


[Function] +
make-external-format name &key eol-style little-endian id => external-format + +


Creates and returns +an EXTERNAL-FORMAT +object. name is a +symbol, eol-style is one of the +keywords :CR, :LF, or :CRLF, +and little-endian is +a generalized +boolean. The default value for eol-style is the value of *DEFAULT-EOL-STYLE* except for Windows code pages where it is :CRLF. The default value +for little-endian is the value of *DEFAULT-LITTLE-ENDIAN* - this value is ignored unless name denotes one of UTF-16 or UTF-32. +id must be an integer denoting a Windows code page +known by FLEXI-STREAMS if name +is :CODE-PAGE or WIN32:CODE-PAGE, otherwise +the value is ignored. See the section +about external formats for more info. +

+Examples (run on Windows): + +

+CL-USER 1 > (make-external-format :latin-1)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:ISO-8859-1 :EOL-STYLE :CRLF) 2067DA84>
+
+CL-USER 2 > (make-external-format :latin-1 :eol-style :lf)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:ISO-8859-1 :EOL-STYLE :LF) 2068B4D4>
+
+CL-USER 3 > (make-external-format :ibm437)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:CODE-PAGE :ID 437 :EOL-STYLE :CRLF) 2069B33C>
+
+CL-USER 4 > (make-external-format :ucs-2)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CRLF :LITTLE-ENDIAN T) 206B4F4C>
+
+CL-USER 5 > (make-external-format :ucs-2be)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CRLF :LITTLE-ENDIAN NIL) 2067DBE4>
+
+CL-USER 6 > (make-external-format :ucs-2be :eol-style :br)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :BR :LITTLE-ENDIAN NIL) 206B54AC>
+
+
+ +


[Readers] +
external-format-name external-format => name +
external-format-eol-style external-format => eol-style +
external-format-little-endian external-format => little-endian +
external-format-id external-format => id + +


+These methods can be used to query an EXTERNAL-FORMAT object for its attributes. +
+ +


[Functions] +
external-format-equal external-format-1 external-format-2 => generalized-boolean + +


+Checks whether the two external formats external-format-1 and external-format-2 are equivalent with respect to their effects on flexi streams. +

+Examples (run on Windows): + +

+CL-USER 1 > (make-external-format :ucs-4le)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-32 :EOL-STYLE :CRLF :LITTLE-ENDIAN T) 2067FB74>
+
+CL-USER 2 > (external-format-equal * (make-external-format :utf32 :little-endian t))
+T
+
+CL-USER 3 > (make-external-format :code-page :id 437)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:CODE-PAGE :ID 437 :EOL-STYLE :CRLF) 2069428C>
+
+CL-USER 4 > (external-format-equal * (make-external-format :ibm437))
+T
+
+ +
+ +


[Special variable] +
*default-eol-style* + +


+The default value for the eol-style keyword argument of MAKE-EXTERNAL-FORMAT. Its initial value is :CRLF on Windows and :LF on other operating systems. +
+ +


[Special variable] +
*default-little-endian* + +


+The default value for the little-endian keyword argument of MAKE-EXTERNAL-FORMAT. Its initial value corresponds to the endianess of the platform FLEXI-STREAMS is used on as revealed by the :LITTLE-ENDIAN feature. +
+ +

Flexi streams

+ +Flexi streams are the core of the FLEXI-STREAMS library. You +create them using the +function MAKE-FLEXI-STREAM which +takes an open binary stream (called the underlying stream) as its only required argument. +A binary stream in this context means that if it's an input +stream, you can read from it with +READ-BYTE +(or, as a workaround for LispWorks, you can at least apply +READ-SEQUENCE +to it where the sequence is an array of element +type OCTET), and similarly for +WRITE-BYTE +(WRITE-SEQUENCE +for LispWorks) +and output +streams. (Note that this specifically holds +for bivalent +streams like socket streams.) +

+A flexi stream behaves like an ordinary Lisp stream. It is an input +stream if the underlying binary stream is an input stream, and it is +an output stream when the underlying binary stream is an output +stream. You can write characters as well +as octets to an output flexi stream and similarly +you can read characters and octets from an input flexi stream. +

+A flexi stream always has an external +format associated with it which is deployed whenever you read +characters from the stream or write characters to it. You +can change the external +format while you use the stream. +

+Once you're using a flexi stream you should not read from or +write to the underlying stream directly anymore. +

+If +you close +a flexi stream, the underlying stream will also be closed. However, it +also suffices to close the underlying stream directly should you not +want to use the flexi stream anymore. So, the following usage +(where IN is implicitly closed at the end) is OK: +

+(with-open-file (in "/foo/bar/baz.txt")
+  (let ((flexi (make-flexi-stream in :external-format :hebrew)))
+    (read-line flexi)))
+
+

+Output flexi streams will try to keep track of +the column +they're in but you can also set the +column directly. This value will be incremented by one for each +character written to the stream and it will be set to 0 +if you send a #\Newline character. The column will be +set to NIL if an OCTET +is sent to the stream. Once the column is NIL it'll stay +like that unless it is explicitly set to another value. +

+Input flexi streams keep track of +their position within the stream. +This value is incremented by one for +each OCTET read from the stream, and +it is incremented by the number of octets actually read for each +character read from the stream. So, if the encoding is UTF-8, reading +the character #\ä (a-umlaut) will advance the position by two. +If the encoding is UTF-32 and the end-of-line style +is :CRLF, reading a #\Newline will advance +the position by eight. +

+You can also set the bound of an +input flexi stream. Initially it is NIL, but when it's +an integer and the +stream's position has gone beyond +this bound, the stream will behave as if no more input is available. +

+Caveat: You can +only unread +a character from a flexi stream if you haven't changed the external format after you read it. +

+Caveat: The underlying stream should either be a binary stream (i.e. have an element type that is a subtype of integer) or it should explicitly use an external format with :LF as its end-of-line style. Otherwise it might perform unwanted conversion of line endings on its own. (LispWorks does this even if you write binary data to the stream using WRITE-SEQUENCE.) + +


[Standard class] +
flexi-stream + +


+Every flexi stream returned by MAKE-FLEXI-STREAM is of this type which is a subtype of STREAM. +
+ +


[Standard class] +
flexi-input-stream + +


+A flexi stream is of this type if its underlying stream is an input stream. This is a subtype of FLEXI-STREAM. +
+ +


[Standard class] +
flexi-output-stream + +


+A flexi stream is of this type if its underlying stream is an output stream. This is a subtype of FLEXI-STREAM. +
+ +


[Standard class] +
flexi-io-stream + +


+A flexi stream is of this type if it is both a FLEXI-INPUT-STREAM as well as a FLEXI-OUTPUT-STREAM. +
+ +


[Function] +
make-flexi-stream stream &key external-format element-type column position bound => flexi-stream + +


+Creates and returns a flexi stream, i.e. an object of type FLEXI-STREAM. stream is the underlying Lisp stream. external-format is the initial external format to be used by the stream, the default is the value of evaluating (MAKE-EXTERNAL-FORMAT :LATIN1). element-type is the initial element type of the flexi stream the default of which is LW:SIMPLE-CHAR for LispWorks and CHARACTER otherwise. column is the initial column of the stream and should only be provided for output streams, the default is 0. position is the initial octet position of the stream and must only be provided for input streams, the default is 0. bound should be NIL (the default) or an integer and must only be provided for input streams. If the octet position of the stream has gone beyond this bound, the stream will behave as if no more input is available. See the section about flexi streams for more information. +
+ +


[Accessors] +
flexi-stream-external-format flexi-stream => external-format +
(setf (flexi-stream-external-format flexi-stream) external-format) +
flexi-stream-element-type flexi-stream => element-type +
(setf (flexi-stream-element-type flexi-stream) element-type) +
flexi-stream-column flexi-output-stream => column +
(setf (flexi-stream-column flexi-output-stream) column) +
flexi-stream-position flexi-input-stream => position +
(setf (flexi-stream-position flexi-input-stream) position) +
flexi-stream-bound flexi-input-stream => bound +
(setf (flexi-stream-bound flexi-input-stream) bound) + +


+These methods can be used to get and set the corresponding attributes of a flexi stream. +

+(SETF +FLEXI-STREAM-EXTERNAL-FORMAT) accepts keyword symbols +(names of external formats), lists +(which should be valid lists of parameters +to MAKE-EXTERNAL-FORMAT), or EXTERNAL-FORMAT objects: +

+CL-USER 1 > (setf (flexi-stream-external-format *my-stream*) :ucs-4le)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-32 :EOL-STYLE :CRLF :LITTLE-ENDIAN T) 206920DC>
+
+CL-USER 2 > (setf (flexi-stream-external-format *my-stream*) '(:ucs-2be :eol-style :br))
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :BR :LITTLE-ENDIAN NIL) 20696934>
+
+CL-USER 3 > (setf (flexi-stream-external-format *my-stream*) (make-external-format :ibm437))
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:CODE-PAGE :ID 437 :EOL-STYLE :CRLF) 2068716C>
+
+
+ +


[Reader] +
flexi-stream-stream flexi-stream => stream + +


+This method returns the underlying stream of a flexi stream. +
+ +


[Generic function] +
unread-byte byte stream => nil + +


+Similar to UNREAD-CHAR in that it "unreads" the last octet from +stream which must be a flexi stream. Note that you can only call UNREAD-BYTE after a corresponding +READ-BYTE, not after READ-CHAR. +
+ +


[Generic function] +
peek-byte stream &optional peek-type eof-error-p eof-value => byte + +


+PEEK-BYTE is like PEEK-CHAR, i.e. it returns an octet from stream (which must be a flexi stream) +without actually removing it. If peek-type is NIL, the next octet is +returned, if peek-type is T, the next octet which is not 0 is +returned, if peek-type is an octet, the next octet which equals +peek-type is returned. eof-error-p and eof-value are interpreted as usual. +

+Note that the parameters aren't in the same order as with PEEK-CHAR because it doesn't make much sense to make stream an optional argument. +

+ +


[Special variable] +
*substitution-char* + +


+If this value is not NIL, it should be a character which is used +(as if by a USE-VALUE restart) whenever during reading an error of +type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise. + +
+CL-USER 1 > (defun foo ()
+              ;; not a valid UTF-8 sequence
+              (with-input-from-sequence (in '(#xe4 #xf6 #xfc))
+                (setq in (make-flexi-stream in :external-format :utf8))
+                (read-line in)))
+FOO
+
+CL-USER 2 > (foo)
+
+Error: Unexpected value #xF6 in UTF-8 sequence.
+  1 (continue) Specify a character to be used instead.
+  2 (abort) Return to level 0.
+  3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed,  or :? for other options
+
+CL-USER 3 : 1 > :c
+Type a character: x
+
+Error: End of file while in UTF-8 sequence.
+  1 (continue) Specify a character to be used instead.
+  2 (abort) Return to level 0.
+  3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed,  or :? for other options
+
+CL-USER 4 : 1 > :c
+Type a character: y
+"xy"
+T
+
+CL-USER 5 > (handler-bind ((flexi-stream-encoding-error (lambda (condition)
+                                                          (use-value #\-))))
+              (foo))
+"--"
+T
+
+CL-USER 6 > (let ((*substitution-char* #\?))
+              (foo))
+"??"
+T
+
+
+ +


[Type] +
octet + +


+Just a shortcut for (UNSIGNED-BYTE 8). +
+ +


[Condition] +
flexi-stream-error + +


+All errors related to flexi streams are of this type. This is a subtype of STREAM-ERROR. +
+ +


[Condition] +
flexi-stream-encoding-error + +


+All errors related to encoding problems with flexi streams are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE +restart is provided. See also *SUBSTITUTION-CHAR* and example for it. FLEXI-STREAM-ENCODING-ERROR is a subtype of FLEXI-STREAM-ERROR. +
+ +


[Condition] +
flexi-stream-element-type-error + +


+All errors related to problems with the element type of flexi streams are of this type. This is a subtype of FLEXI-STREAM-ERROR and has an additional slot for the element type which can be accessed with FLEXI-STREAM-ELEMENT-TYPE-ERROR-ELEMENT-TYPE. +
+ +


[Reader] +
flexi-stream-element-type-error-element-type condition => element-type + +


+If condition is of type FLEXI-STREAM-ELEMENT-TYPE-ERROR, this function will return the offending element type. +
+ +


[Condition] +
flexi-stream-position-spec-error + +


Errors of this type are signalled if an erroneous +position spec is used in conjunction +with FILE-POSITION. This is a +subtype +of FLEXI-STREAM-ERROR +and has an additional slot for the position spec which can be accessed +with FLEXI-STREAM-POSITION-SPEC-ERROR-POSITION-SPEC. +
+ +


[Reader] +
flexi-stream-position-spec-error-position-spec condition => position-spec + +


+If condition is of type FLEXI-STREAM-POSITION-SPEC-ERROR, this function will return the offending position spec. +
+ +

In-memory streams

+ +The library also provides in-memory binary streams which are modeled after string streams and behave very similar only that they deal with octets instead of characters and the underlying data structure is not a string but either a list or a vector. These streams can obviously be used as the underlying streams for flexi streams. + +


[Standard class] +
in-memory-stream + +


+Every in-memory stream returned by MAKE-IN-MEMORY-INPUT-STREAM or MAKE-IN-MEMORY-OUTPUT-STREAM is of this type which is a subtype of STREAM. +
+ +


[Standard class] +
in-memory-input-stream + +


+Every in-memory stream returned by MAKE-IN-MEMORY-INPUT-STREAM is of this type which is a subtype of IN-MEMORY-STREAM. +
+ +


[Standard class] +
in-memory-output-stream + +


+Every in-memory stream returned by MAKE-IN-MEMORY-OUTPUT-STREAM is of this type which is a subtype of IN-MEMORY-STREAM. +
+ +


[Standard class] +
list-stream + +


+Every in-memory input stream is of this type if it reads from a list. +
+ +


[Standard class] +
vector-stream + +


+Every in-memory stream is of this type if it reads from or writes to a vector. +
+ +


[Generic function] +
make-in-memory-input-stream sequence &key start end transformer => in-memory-input-stream + +


+Returns a binary input stream (of type IN-MEMORY-INPUT-STREAM) which will supply, in order, the +octets in the subsequence of sequence bounded by start (the default is 0) and end (the default is the length of sequence). sequence must either be a list or a vector of octets. +Each octet returned will be transformed in turn by the optional +transformer function. +
+ +


[Function] +
make-in-memory-output-stream &key element-type transformer => in-memory-output-stream + +


+Returns a binary output stream (of type IN-MEMORY-OUTPUT-STREAM) which accepts objects of type element-type (a subtype of OCTET) and makes +available a sequence (see GET-OUTPUT-STREAM-SEQUENCE) that contains the octets that were actually +output. The octets stored will each be transformed by the optional transformer function. +
+ +


[Generic function] +
get-output-stream-sequence stream &key as-list => sequence + +


+Returns a vector containing, in order, all the octets that have +been output to the in-memory output stream stream. This operation clears any +octets on stream, so the vector contains only those octets which have +been output since the last call to GET-OUTPUT-STREAM-SEQUENCE or since +the creation of the stream, whichever occurred most recently. If +as-list is true the return value is coerced to a list. +
+ +


[Generic function] +
output-stream-sequence-length stream => length + +


Returns the current length of the underlying vector +of the in-memory output +stream stream, i.e. this is the length of the +sequence that GET-OUTPUT-STREAM-SEQUENCE would return if called at +this very moment. +
+ +


[Macro] +
with-input-from-sequence (var sequence &key start end transformer) statement* => result* + +


Creates an in-memory input +stream from the sequence sequence using the +parameters start and end +(see MAKE-IN-MEMORY-INPUT-STREAM), +binds var to this stream and then executes +the statement* forms. A +function transformer may optionally be specified +to transform the returned octets. The stream is automatically closed +on exit from +WITH-OUTPUT-TO-SEQUENCE, no matter whether the exit is normal or +abnormal. The return value of this macro is the return value of +the last statement of statement*. +
+ +


[Macro] +
with-output-to-sequence (var &key as-list element-type transformer) statement* => sequence + +


+Creates an in-memory output stream, binds var to this stream and +then executes the statement* forms. The stream stores +data of type element-type (a subtype of OCTET) which is (optionally) transformed by the +function transformer prior to storage. The stream is automatically closed on +exit from WITH-OUTPUT-TO-SEQUENCE, no matter whether the exit is +normal or abnormal. The return value of this macro is a vector (or a +list if as-list is true) containing the octets that were sent to the +stream within the body of the macro. +
+ +


[Condition] +
in-memory-stream-error + +


+All errors related to in-memory streams are of this type. This is a subtype of STREAM-ERROR. +
+ +


[Condition] +
in-memory-stream-closed-error + +


+An error of this type is signalled if one tries to read from or write to an in-memory stream which had already been closed. This is a subtype of IN-MEMORY-STREAM-ERROR. +
+ +

Strings

+ +This section collects a few convenience functions for strings conversions: + +


[Function] +
string-to-octets string &key external-format start end => vector + +


+ +Converts the Lisp string string from start to end to an array of +octets corresponding to the external format external-format. The defaults for +start and end +are 0 and NIL (meaning the length of the +vector). The default for external-format is the +value of +evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) + +
+ +


[Function] +
octets-to-string vector &key external-format start end => string + +


Converts the Lisp vector vector +of octets from start +to end to string using +the external +format external-format. The defaults for +start and end +are 0 and the length of the vector. The default +for external-format is the value of +evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) +
+ +
 

File positions

+ +For flexi streams as well +as for in-memory +streams, FILE-POSITION +will usually return NIL and do nothing when a second +argument is supplied. This is correct +w.r.t. the ANSI +standard, but not very helpful. However, even +with Gray +streams there is no portable way to implement a better +behaviour. +

+For LispWorks +and CLISP, +FILE-POSITION +for flexi streams will work as if the +function had been applied to the underlying stream, and +for in-memory streams it will try to do +something sensible if the underlying data structure is a vector +(i.e. not a list). Patches for other Common Lisp +implementations should be sent to +the trivial-gray-streams +maintainers. + +
 

Acknowledgements

+ +Thanks to David Lichteblau for numerous portability patches. Thanks +to Igor Plekhov for the KOI8-R code. Thanks to Anton Vodonosov for +numerous patches and additions. + +

+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.98 2007/12/29 23:15:27 edi Exp $ +

BACK TO MY HOMEPAGE + + + Added: trunk/external-format.lisp ============================================================================== --- (empty file) +++ trunk/external-format.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,147 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.11 2007/01/01 23:46:49 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass external-format () + ((name :initarg :name + :reader external-format-name + :documentation "The name of the external format - a +keyword.") + (id :initarg :id + :initform nil + :reader external-format-id + :documentation "If the external format denotes a Windows +code page this ID specifies which one to use. Otherwise the +value is ignored \(and usually NIL).") + (little-endian :initarg :little-endian + :initform *default-little-endian* + :reader external-format-little-endian + :documentation "Whether multi-octet values are +read and written with the least significant octet first. For +8-bit encodings like :ISO-8859-1 this value is ignored.") + (eol-style :initarg :eol-style + :reader external-format-eol-style + :documentation "The character\(s) to or from which +a #\Newline will be translated - one of the keywords :CR, :LF, +or :CRLF.")) + (:documentation "EXTERNAL-FORMAT objects are used to denote +encodings for flexi streams.")) + +(defmethod make-load-form ((thing external-format) &optional environment) + "Defines a way to reconstruct external formats. Needed for OpenMCL." + (make-load-form-saving-slots thing :environment environment)) + +(defun make-external-format% (name &key (little-endian *default-little-endian*) + id eol-style) + "Used internally by MAKE-EXTERNAL-FORMAT." + (let* ((real-name (normalize-external-format-name name)) + (initargs + (cond ((or (iso-8859-name-p real-name) + (koi8-r-name-p real-name) + (ascii-name-p real-name)) + (list :eol-style (or eol-style *default-eol-style*))) + ((code-page-name-p real-name) + (list :id (or (known-code-page-id-p id) + (error "Unknown code page ID ~S" id)) + ;; default EOL style for Windows code pages is :CRLF + :eol-style (or eol-style :crlf))) + (t (list :eol-style (or eol-style *default-eol-style*) + :little-endian little-endian))))) + (apply #'make-instance 'external-format + :name real-name + initargs))) + +(defun make-external-format (name &rest args + &key (little-endian *default-little-endian*) + id eol-style) + "Creates and returns an external format object as specified. +NAME is a keyword like :LATIN1 or :UTF-8, LITTLE-ENDIAN specifies +the `endianess' of the external format and is ignored for 8-bit +encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF +which denote the end-of-line character \(sequence), ID is the ID +of a Windows code page \(and ignored for other encodings)." + (declare (ignore id little-endian)) + (let ((shortcut-args (cdr (assoc name +shortcut-map+)))) + (cond (shortcut-args + (apply #'make-external-format% + (append shortcut-args + `(:eol-style ,eol-style)))) + (t (apply #'make-external-format% name args))))) + +(defun external-format-equal (ef1 ef2) + "Checks whether two EXTERNAL-FORMAT objects denote the same +encoding." + (let* ((name1 (external-format-name ef1)) + (code-page-name-p (code-page-name-p name1))) + ;; they must habe the same canonical name + (and (eq name1 + (external-format-name ef2)) + ;; if both are code pages the IDs must be the same + (or (not code-page-name-p) + (eql (external-format-id ef1) + (external-format-id ef2))) + ;; for non-8-bit encodings the endianess must be the same + (or code-page-name-p + (ascii-name-p name1) + (koi8-r-name-p name1) + (iso-8859-name-p name1) + (eq name1 :utf-8) + (eq (not (external-format-little-endian ef1)) + (not (external-format-little-endian ef2)))) + ;; the EOL style must also be the same + (eq (external-format-eol-style ef1) + (external-format-eol-style ef2))))) + +(defun normalize-external-format (external-format) + "Returns a list which is a `normalized' representation of the +external format EXTERNAL-FORMAT. Used internally by +PRINT-OBJECT, for example. Basically, the result is argument +list that can be fed back to MAKE-EXTERNAL-FORMAT to create an +equivalent object." + (let ((name (external-format-name external-format)) + (eol-style (external-format-eol-style external-format))) + (cond ((or (ascii-name-p name) + (koi8-r-name-p name) + (iso-8859-name-p name) + (eq name :utf-8)) + (list name :eol-style eol-style)) + ((code-page-name-p name) + (list name + :id (external-format-id external-format) + :eol-style eol-style)) + (t (list name + :eol-style eol-style + :little-endian (external-format-little-endian external-format)))))) + +(defmethod print-object ((object external-format) stream) + "How an EXTERNAL-FORMAT object is rendered. Uses +NORMALIZE-EXTERNAL-FORMAT." + (print-unreadable-object (object stream :type t :identity t) + (prin1 (normalize-external-format object) stream))) \ No newline at end of file Added: trunk/flexi-streams.asd ============================================================================== --- (empty file) +++ trunk/flexi-streams.asd Thu May 1 02:25:14 2008 @@ -0,0 +1,66 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.58 2007/12/29 23:15:26 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :cl-user) + +(defpackage :flexi-streams-system + (:use :asdf :cl)) + +(in-package :flexi-streams-system) + +(defsystem :flexi-streams + :version "0.14.0" + :serial t + :components ((:file "packages") + (:file "ascii") + (:file "koi8-r") + (:file "iso-8859") + (:file "code-pages") + (:file "specials") + (:file "util") + (:file "external-format") + (:file "in-memory") + (:file "stream") + #+:lispworks (:file "lw-binary-stream") + (:file "output") + (:file "input") + (:file "strings")) + :depends-on (:trivial-gray-streams)) + +(defsystem :flexi-streams-test + :components ((:module "test" + :serial t + :components ((:file "packages") + (:file "test")))) + :depends-on (:flexi-streams)) + +(defmethod perform ((o test-op) (c (eql (find-system 'flexi-streams)))) + (operate 'load-op 'flexi-streams-test) + (funcall (intern (symbol-name :run-tests) + (find-package :flexi-streams-test)))) Added: trunk/in-memory.lisp ============================================================================== --- (empty file) +++ trunk/in-memory.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,395 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.26 2007/12/29 21:17:05 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass in-memory-stream (trivial-gray-stream-mixin) + ((transformer :initarg :transformer + :accessor in-memory-stream-transformer + :documentation "A function used to transform the +written/read octet to the value stored/retrieved in/from the +underlying vector.") + #+:cmu + (open-p :initform t + :accessor in-memory-stream-open-p + :documentation "For CMUCL we have to keep track of this +manually.")) + (:documentation "An IN-MEMORY-STREAM is a binary stream that reads +octets from or writes octets to a sequence in RAM.")) + +(defclass in-memory-input-stream (in-memory-stream fundamental-binary-input-stream) + () + (:documentation "An IN-MEMORY-INPUT-STREAM is a binary stream that +reads octets from a sequence in RAM.")) + +#+:cmu +(defmethod output-stream-p ((stream in-memory-input-stream)) + "Explicitly states whether this is an output stream." + (declare (optimize speed)) + nil) + +(defclass in-memory-output-stream (in-memory-stream fundamental-binary-output-stream) + () + (:documentation "An IN-MEMORY-OUTPUT-STREAM is a binary stream that +writes octets to a sequence in RAM.")) + +#+:cmu +(defmethod input-stream-p ((stream in-memory-output-stream)) + "Explicitly states whether this is an input stream." + (declare (optimize speed)) + nil) + +(defclass list-stream () + ((list :initarg :list + :accessor list-stream-list + :documentation "The underlying list of the stream.")) + (:documentation "A LIST-STREAM is a mixin for IN-MEMORY streams +where the underlying sequence is a list.")) + +(defclass vector-stream () + ((vector :initarg :vector + :accessor vector-stream-vector + :documentation "The underlying vector of the stream which +\(for output) must always be adjustable and have a fill pointer.")) + (:documentation "A VECTOR-STREAM is a mixin for IN-MEMORY streams +where the underlying sequence is a vector.")) + +(defclass list-input-stream (list-stream in-memory-input-stream) + () + (:documentation "A binary input stream that gets its data from an +associated list of octets.")) + +(defclass vector-input-stream (vector-stream in-memory-input-stream) + ((index :initarg :index + :accessor vector-stream-index + :type (integer 0 #.array-dimension-limit) + :documentation "An index into the underlying vector denoting +the current position.") + (end :initarg :end + :accessor vector-stream-end + :type (integer 0 #.array-dimension-limit) + :documentation "An index into the underlying vector denoting +the end of the available data.")) + (:documentation "A binary input stream that gets its data from an +associated vector of octets.")) + +(defclass vector-output-stream (vector-stream in-memory-output-stream) + () + (:documentation "A binary output stream that writes its data to an +associated vector.")) + +(define-condition in-memory-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +IN-MEMORY streams.")) + +(define-condition in-memory-stream-closed-error (in-memory-stream-error) + () + (:report (lambda (condition stream) + (format stream "~S is closed." + (stream-error-stream condition)))) + (:documentation "An error that is signalled when someone is trying +to read from or write to a closed IN-MEMORY stream.")) + +#+:cmu +(defmethod open-stream-p ((stream in-memory-stream)) + "Returns a true value if STREAM is open. See ANSI standard." + (declare (optimize speed)) + (in-memory-stream-open-p stream)) + +#+:cmu +(defmethod close ((stream in-memory-stream) &key abort) + "Closes the stream STREAM. See ANSI standard." + (declare (ignore abort) + (optimize speed)) + (prog1 + (in-memory-stream-open-p stream) + (setf (in-memory-stream-open-p stream) nil))) + +(defmethod check-if-open ((stream in-memory-stream)) + "Checks if STREAM is open and signals an error otherwise." + (declare (optimize speed)) + (unless (open-stream-p stream) + (error 'in-memory-stream-closed-error + :stream stream))) + +(defmethod stream-element-type ((stream in-memory-stream)) + "The element type is always OCTET by definition." + (declare (optimize speed)) + 'octet) + +(defmethod transform-octet ((stream in-memory-stream) octet) + "Applies the transformer of STREAM to octet and returns the result." + (funcall (or (in-memory-stream-transformer stream) + #'identity) octet)) + +(defmethod stream-read-byte ((stream list-input-stream)) + "Reads one byte by simply popping it off of the top of the list." + (declare (optimize speed)) + (check-if-open stream) + (transform-octet stream (or (pop (list-stream-list stream)) + (return-from stream-read-byte :eof)))) + +(defmethod stream-listen ((stream list-input-stream)) + "Checks whether list is not empty." + (declare (optimize speed)) + (check-if-open stream) + (list-stream-list stream)) + +(defmethod stream-read-sequence ((stream list-input-stream) sequence start end &key) + "Repeatedly pops elements from the list until it's empty." + (declare (optimize speed) (type (integer 0 *) start end)) + (loop for index from start below end + while (list-stream-list stream) + do (setf (elt sequence index) + (pop (list-stream-list stream))) + finally (return index))) + +(defmethod stream-read-byte ((stream vector-input-stream)) + "Reads one byte and increments INDEX pointer unless we're beyond +END pointer." + (declare (optimize speed)) + (check-if-open stream) + (let ((index (vector-stream-index stream))) + (cond ((< index (vector-stream-end stream)) + (incf (vector-stream-index stream)) + (transform-octet stream (aref (vector-stream-vector stream) index))) + (t :eof)))) + +(defmethod stream-listen ((stream vector-input-stream)) + "Checking whether INDEX is beyond END." + (declare (optimize speed)) + (check-if-open stream) + (< (vector-stream-index stream) (vector-stream-end stream))) + +(defmethod stream-read-sequence ((stream vector-input-stream) sequence start end &key) + "Traverses both sequences in parallel until the end of one of them +is reached." + (declare (optimize speed) (type (integer 0 *) start end)) + (loop with vector-end of-type (integer 0 #.array-dimension-limit) = (vector-stream-end stream) + with vector = (vector-stream-vector stream) + for index from start below end + for vector-index of-type (integer 0 #.array-dimension-limit) = (vector-stream-index stream) + while (< vector-index vector-end) + do (setf (elt sequence index) + (aref vector vector-index)) + (incf (vector-stream-index stream)) + finally (return index))) + +(defmethod stream-write-byte ((stream vector-output-stream) byte) + "Writes a byte \(octet) by extending the underlying vector." + (declare (optimize speed)) + (check-if-open stream) + (vector-push-extend (transform-octet stream byte) + (vector-stream-vector stream))) + +(defmethod stream-write-sequence ((stream vector-output-stream) sequence start end &key) + "Just calls VECTOR-PUSH-EXTEND repeatedly." + (declare (optimize speed) (type (integer 0 *) start end)) + (loop with vector = (vector-stream-vector stream) + for index from start below end + do (vector-push-extend (elt sequence index) vector)) + sequence) + +(defmethod stream-file-position ((stream vector-input-stream)) + "Simply returns the index into the underlying vector." + (declare (optimize speed)) + (vector-stream-index stream)) + +(defmethod (setf stream-file-position) (position-spec (stream vector-input-stream)) + "Sets the index into the underlying vector if POSITION-SPEC is acceptable." + (declare (optimize speed)) + (setf (vector-stream-index stream) + (case position-spec + (:start 0) + (:end (vector-stream-end stream)) + (otherwise + (unless (integerp position-spec) + (error 'flexi-stream-position-spec-error + :format-control "Unknown file position designator: ~S." + :format-arguments (list position-spec) + :position-spec position-spec)) + (unless (<= 0 position-spec (vector-stream-end stream)) + (error 'flexi-stream-position-spec-error + :format-control "File position designator ~S is out of bounds." + :format-arguments (list position-spec) + :position-spec position-spec)) + position-spec))) + position-spec) + +(defmethod stream-file-position ((stream vector-output-stream)) + "Simply returns the fill pointer of the underlying vector." + (declare (optimize speed)) + (fill-pointer (vector-stream-vector stream))) + +(defmethod (setf stream-file-position) (position-spec (stream vector-output-stream)) + "Sets the fill pointer underlying vector if POSITION-SPEC is +acceptable. Adjusts the vector if necessary." + (declare (optimize speed)) + (let* ((vector (vector-stream-vector stream)) + (total-size (array-total-size vector)) + (new-fill-pointer + (case position-spec + (:start 0) + (:end + (warn "File position designator :END doesn't really make sense for an output stream.") + total-size) + (otherwise + (unless (integerp position-spec) + (error 'flexi-stream-position-spec-error + :format-control "Unknown file position designator: ~S." + :format-arguments (list position-spec) + :position-spec position-spec)) + (unless (<= 0 position-spec array-total-size-limit) + (error 'flexi-stream-position-spec-error + :format-control "File position designator ~S is out of bounds." + :format-arguments (list position-spec) + :position-spec position-spec)) + position-spec)))) + (when (> new-fill-pointer total-size) + (adjust-array vector new-fill-pointer)) + (setf (fill-pointer vector) new-fill-pointer) + position-spec)) + +(defmethod make-in-memory-input-stream ((vector vector) &key (start 0) + (end (length vector)) + transformer) + "Returns a binary input stream which will supply, in order, the +octets in the subsequence of VECTOR bounded by START and END. +Each octet returned will be transformed in turn by the optional +TRANSFORMER function." + (declare (optimize speed)) + (make-instance 'vector-input-stream + :vector vector + :index start + :end end + :transformer transformer)) + +(defmethod make-in-memory-input-stream ((list list) &key (start 0) + (end (length list)) + transformer) + "Returns a binary input stream which will supply, in order, the +octets in the subsequence of LIST bounded by START and END. Each +octet returned will be transformed in turn by the optional +TRANSFORMER function." + (declare (optimize speed)) + (make-instance 'list-input-stream + :list (subseq list start end) + :transformer transformer)) + +(defun make-output-vector (&key (element-type 'octet)) + "Creates and returns an array which can be used as the underlying +vector for a VECTOR-OUTPUT-STREAM." + (declare (optimize speed)) + (make-array 0 :adjustable t + :fill-pointer 0 + :element-type element-type)) + +(defun make-in-memory-output-stream (&key (element-type 'octet) transformer) + "Returns a binary output stream which accepts objects of type +ELEMENT-TYPE \(a subtype of OCTET) and makes available a sequence +that contains the octes that were actually output. The octets +stored will each be transformed by the optional TRANSFORMER +function." + (declare (optimize speed)) + (make-instance 'vector-output-stream + :vector (make-output-vector :element-type element-type) + :transformer transformer)) + +(defmethod get-output-stream-sequence ((stream in-memory-output-stream) &key as-list) + "Returns a vector containing, in order, all the octets that have +been output to the IN-MEMORY stream STREAM. This operation clears any +octets on STREAM, so the vector contains only those octets which have +been output since the last call to GET-OUTPUT-STREAM-SEQUENCE or since +the creation of the stream, whichever occurred most recently. If +AS-LIST is true the return value is coerced to a list." + (declare (optimize speed)) + (prog1 + (if as-list + (coerce (vector-stream-vector stream) 'list) + (vector-stream-vector stream)) + (setf (vector-stream-vector stream) + (make-output-vector)))) + +(defmethod output-stream-sequence-length ((stream in-memory-output-stream)) + "Returns the current length of the underlying vector of the +IN-MEMORY output stream STREAM." + (declare (optimize speed)) + (length (the (simple-array * (*)) (vector-stream-vector stream)))) + +(defmacro with-input-from-sequence ((var sequence &key start end transformer) + &body body) + "Creates an IN-MEMORY input stream from SEQUENCE using the +parameters START and END, binds VAR to this stream and then +executes the code in BODY. A function TRANSFORMER may optionally +be specified to transform the returned octets. The stream is +automatically closed on exit from WITH-INPUT-FROM-SEQUENCE, no +matter whether the exit is normal or abnormal. The return value +of this macro is the return value of BODY." + (with-rebinding (sequence) + `(let (,var) + (unwind-protect + (progn + (setq ,var (make-in-memory-input-stream ,sequence + :start (or ,start 0) + :end (or ,end (length ,sequence)) + :transformer ,transformer)) + , at body) + (when ,var (close ,var)))))) + +(defmacro with-output-to-sequence ((var &key as-list (element-type ''octet) transformer) + &body body) + "Creates an IN-MEMORY output stream, binds VAR to this stream +and then executes the code in BODY. The stream stores data of +type ELEMENT-TYPE \(a subtype of OCTET) which is \(optionally) +transformed by the function TRANSFORMER prior to storage. The +stream is automatically closed on exit from +WITH-OUTPUT-TO-SEQUENCE, no matter whether the exit is normal or +abnormal. The return value of this macro is a vector \(or a list +if AS-LIST is true) containing the octets that were sent to the +stream within BODY." + `(let (,var) + (unwind-protect + (progn + (setq ,var (make-in-memory-output-stream :element-type ,element-type + :transformer ,transformer)) + , at body + (get-output-stream-sequence ,var :as-list ,as-list)) + (when ,var (close ,var))))) + +(declaim (inline translate-char)) +(defun translate-char (char-code external-format) + "Returns a list of octets which correspond to the +representation of the character with character code CHAR-CODE +when sent to a flexi stream with external format EXTERNAL-FORMAT. +Used internally by UNREAD-CHAR%. See also STRING-TO-OCTETS." + (declare (optimize speed)) + (with-output-to-sequence (list :as-list t) + (let ((stream (make-flexi-stream list :external-format external-format))) + (write-char (code-char char-code) stream)))) \ No newline at end of file Added: trunk/input.lisp ============================================================================== --- (empty file) +++ trunk/input.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,585 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.51 2007/12/29 22:58:43 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +#-:lispworks +(defmethod read-byte* ((flexi-input-stream flexi-input-stream)) + "Reads one byte \(octet) from the underlying stream of +FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not +empty)." + (declare (optimize speed)) + ;; we're using S instead of STREAM here because of an + ;; issue with SBCL: + ;; + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (s flexi-stream-stream)) + flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) + (when (and bound + (>= position bound)) + (return-from read-byte* nil)) + (incf position) + (or (pop octet-stack) + (read-byte s nil nil) + (progn (decf position) nil)))) + +#+:lispworks +(defmethod read-byte* ((flexi-input-stream flexi-input-stream)) + "Reads one byte \(octet) from the underlying stream of +FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not +empty)." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) + (when (and bound + (>= position bound)) + (return-from read-byte* nil)) + (incf position) + (or (pop octet-stack) + ;; we use READ-SEQUENCE because READ-BYTE doesn't work with all + ;; bivalent streams in LispWorks + (let* ((buffer (make-array 1 :element-type 'octet)) + (new-position (read-sequence buffer stream))) + (cond ((zerop new-position) + (decf position) nil) + (t (aref buffer 0))))))) + +#+:lispworks +(defmethod read-byte* ((flexi-input-stream flexi-binary-input-stream)) + "Reads one byte \(octet) from the underlying stream of +FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty). +Optimized version \(only needed for LispWorks) in case the underlying +stream is binary." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) + (when (and bound + (>= position bound)) + (return-from read-byte* nil)) + (incf position) + (or (pop octet-stack) + (read-byte stream nil nil) + (progn (decf position) nil)))) + +(defmethod stream-clear-input ((flexi-input-stream flexi-input-stream)) + "Calls the corresponding method for the underlying input stream +and also clears the value of the OCTET-STACK slot." + (declare (optimize speed)) + ;; note that we don't reset the POSITION slot + (with-accessors ((octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (setq octet-stack nil) + (clear-input stream))) + +(defmethod stream-listen ((flexi-input-stream flexi-input-stream)) + "Calls the corresponding method for the underlying input stream +but first checks if \(old) input is available in the OCTET-STACK +slot." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (when (and bound + (>= position bound)) + (return-from stream-listen nil)) + (or octet-stack (listen stream)))) + +(defmethod stream-read-byte ((stream flexi-input-stream)) + "Reads one byte \(octet) from the underlying stream." + (declare (optimize speed)) + ;; set LAST-CHAR-CODE slot to NIL because we can't UNREAD-CHAR after + ;; this operation + (with-accessors ((last-char-code flexi-stream-last-char-code) + (last-octet flexi-stream-last-octet)) + stream + (setq last-char-code nil) + (let ((octet (read-byte* stream))) + (setq last-octet octet) + (or octet :eof)))) + +(defgeneric unread-char% (char-code flexi-input-stream) + (:documentation "Used internally to put a character denoted by the +character code CHAR-CODE which was already read back on the stream. +Uses the OCTET-STACK slot and decrements the POSITION slot +accordingly.")) + +(defmethod unread-char% (char-code (flexi-input-stream flexi-input-stream)) + "The default method which is un-optimized and uses TRANSLATE-CHAR to +figure out which octets to put on the octet stack." + (declare (optimize speed) (inline translate-char)) + (with-accessors ((position flexi-stream-position) + (octet-stack flexi-stream-octet-stack) + (external-format flexi-stream-external-format)) + flexi-input-stream + (declare (integer position)) + (let ((octets-read (translate-char char-code external-format))) + (decf position (length octets-read)) + (setq octet-stack (append octets-read octet-stack))))) + +(defmethod unread-char% (char-code (flexi-input-stream flexi-latin-1-input-stream)) + "For ISO-8859-1 we can simply put the character code itself on the +octet stack." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (octet-stack flexi-stream-octet-stack)) + flexi-input-stream + (declare (integer position)) + (decf position) + (push char-code octet-stack))) + +(defmethod unread-char% (char-code (flexi-input-stream flexi-ascii-input-stream)) + "For ASCII we can simply put the character code itself on the octet +stack." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (octet-stack flexi-stream-octet-stack)) + flexi-input-stream + (declare (integer position)) + (decf position) + (push char-code octet-stack))) + +(defmethod unread-char% (char-code (flexi-input-stream flexi-8-bit-input-stream)) + "For 8-bit encodings we just have to put one octet on the octet +stack which we can look up in the encoding hash." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (octet-stack flexi-stream-octet-stack) + (encoding-hash flexi-stream-encoding-hash)) + flexi-input-stream + (declare (integer position)) + (decf position) + (push (gethash char-code encoding-hash) octet-stack))) + +(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) + (flexi-input-stream flexi-cr-8-bit-input-stream)) + "A kind of `safety net' for the optimized 8-bit versions of +UNREAD-CHAR% which checks for the single case where more than one +octet has to be put on the octet stack." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (octet-stack flexi-stream-octet-stack) + (external-format flexi-stream-external-format)) + flexi-input-stream + (declare (integer position)) + ;; note that below we use the knowledge that in all 8-bit encodings + ;; #\Return and #\Linefeed are mapped to the same octets + (case (external-format-eol-style external-format) + (:crlf + (decf position 2) + (push #.(char-code #\Linefeed) octet-stack) + (push #.(char-code #\Return) octet-stack)) + (otherwise + (decf position) + (push #.(char-code #\Return) octet-stack))))) + +#+:lispworks +(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) + (flexi-input-stream flexi-binary-cr-8-bit-input-stream)) + "A kind of `safety net' for the optimized 8-bit versions of +UNREAD-CHAR% which checks for the single case where more than one +octet has to be put on the octet stack. + +This method \(identical to the one defined directly above) exists only +for LispWorks' \"binary\" streams and must be there due to the +slightly clunky class hierarchy." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (octet-stack flexi-stream-octet-stack) + (external-format flexi-stream-external-format)) + flexi-input-stream + (declare (integer position)) + ;; note that below we use the knowledge that in all 8-bit encodings + ;; #\Return and #\Linefeed are mapped to the same octets + (case (external-format-eol-style external-format) + (:crlf + (decf position 2) + (push #.(char-code #\Linefeed) octet-stack) + (push #.(char-code #\Return) octet-stack)) + (otherwise + (decf position) + (push #.(char-code #\Return) octet-stack))))) + +(defmacro define-char-reader ((stream-var stream-class) &body body) + "Helper macro to define methods for STREAM-READ-CHAR. Defines a +method for the class STREAM-CLASS using the variable STREAM-VAR and +the code body BODY wrapped with some standard code common to all +methods defined here. The return value of BODY is a character code. +In case of encoding problems, BODY must return the value returned by +\(RECOVER-FROM-ENCODING-ERROR ...)." + (with-unique-names (char-code body-fn) + `(defmethod stream-read-char ((,stream-var ,stream-class)) + "This method was generated with the DEFINE-CHAR-READER macro." + (declare (optimize speed)) + ;; note that we do nothing for the :LF EOL style because we + ;; assume that #\Newline is the same as #\Linefeed in all + ;; Lisps which will use this library + (with-accessors ((last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code)) + ,stream-var + ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after + ;; this operation + (setq last-octet nil) + (let ((,char-code + (flet ((,body-fn () , at body)) + (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) + (,body-fn)))) + ;; remember this character and the current external format + ;; for UNREAD-CHAR + (setq last-char-code ,char-code) + (or (code-char ,char-code) ,char-code)))))) + +(defun recover-from-encoding-error (flexi-stream format-control &rest format-args) + "Helper function used by the STREAM-READ-CHAR methods below to deal +with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and +returns its character code in this case. Otherwise signals a +FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this +function and provides a corresponding USE-VALUE restart." + (when *substitution-char* + (return-from recover-from-encoding-error (char-code *substitution-char*))) + (restart-case + (apply #'signal-encoding-error flexi-stream format-control format-args) + (use-value (char) + :report "Specify a character to be used instead." + :interactive (lambda () + (loop + (format *query-io* "Type a character: ") + (let ((line (read-line *query-io*))) + (when (= 1 (length line)) + (return (list (char line 0))))))) + (char-code char)))) + +(define-char-reader (stream flexi-latin-1-input-stream) + (or (read-byte* stream) + (return-from stream-read-char :eof))) + +(define-char-reader (stream flexi-ascii-input-stream) + (let ((octet (or (read-byte* stream) + (return-from stream-read-char :eof)))) + (declare (type octet octet)) + (if (> octet 127) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + octet))) + +(define-char-reader (stream flexi-8-bit-input-stream) + (with-accessors ((encoding-table flexi-stream-encoding-table)) + stream + (let* ((octet (or (read-byte* stream) + (return-from stream-read-char :eof))) + (char-code (aref (the (simple-array * *) encoding-table) octet))) + (declare (type octet octet)) + (if (or (null char-code) + (= char-code 65533)) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + char-code)))) + +(define-char-reader (stream flexi-utf-8-input-stream) + (block body + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (read-byte* stream) + (cond (first-octet-seen + (return-from body + (recover-from-encoding-error stream + "End of file while in UTF-8 sequence."))) + (t (return-from stream-read-char :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (let ((octet (read-next-byte))) + (declare (type octet octet)) + (multiple-value-bind (start count) + (cond ((zerop (logand octet #b10000000)) + (values octet 0)) + ((= #b11000000 (logand octet #b11100000)) + (values (logand octet #b00011111) 1)) + ((= #b11100000 (logand octet #b11110000)) + (values (logand octet #b00001111) 2)) + ((= #b11110000 (logand octet #b11111000)) + (values (logand octet #b00000111) 3)) + ((= #b11111000 (logand octet #b11111100)) + (values (logand octet #b00000011) 4)) + ((= #b11111100 (logand octet #b11111110)) + (values (logand octet #b00000001) 5)) + (t (return-from body + (recover-from-encoding-error stream + "Unexpected value #x~X at start of UTF-8 sequence." + octet)))) + ;; note that we currently don't check for "overlong" + ;; sequences or other illegal values + (loop for result of-type (unsigned-byte 32) + = start then (+ (ash result 6) + (logand octet #b111111)) + repeat count + for octet of-type octet = (read-next-byte) + unless (= #b10000000 (logand octet #b11000000)) + do (return-from body + (recover-from-encoding-error stream + "Unexpected value #x~X in UTF-8 sequence." octet)) + finally (return result)))))))) + +(define-char-reader (stream flexi-utf-16-le-input-stream) + (block body + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (read-byte* stream) + (cond (first-octet-seen + (return-from body + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from stream-read-char :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (the octet (read-next-byte)) + (ash (the octet (read-next-byte)) 8)))) + (declare (inline read-next-byte read-next-word) + (dynamic-extent (function read-next-byte) (function read-next-word))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from body + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word))))))) + +(define-char-reader (stream flexi-utf-16-be-input-stream) + (block body + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (read-byte* stream) + (cond (first-octet-seen + (return-from body + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from stream-read-char :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (ash (the octet (read-next-byte)) 8) + (the octet (read-next-byte))))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from body + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word))))))) + +(define-char-reader (stream flexi-utf-32-le-input-stream) + (block body + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (read-byte* stream) + (cond (first-octet-seen + (return-from body + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from stream-read-char :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 0 to 24 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count)))))) + +(define-char-reader (stream flexi-utf-32-be-input-stream) + (block body + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (read-byte* stream) + (cond (first-octet-seen + (return-from body + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from stream-read-char :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 24 downto 0 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count)))))) + +(defmethod stream-read-char ((stream flexi-cr-mixin)) + "The `base' method for all streams which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of +reading one or more characters from the stream." + (declare (optimize speed)) + (let ((char (call-next-method))) + (when (eq char :eof) + (return-from stream-read-char :eof)) + (with-accessors ((external-format flexi-stream-external-format) + (last-char-code flexi-stream-last-char-code)) + stream + (when (eql char #\Return) + (case (external-format-eol-style external-format) + (:cr (setq char #\Newline + last-char-code #.(char-code #\Newline))) + ;; in the case :CRLF we have to look ahead one character + (:crlf (let ((next-char (call-next-method))) + (case next-char + (#\Linefeed + (setq char #\Newline + last-char-code #.(char-code #\Newline))) + (:eof) + ;; if the character we peeked at wasn't a + ;; linefeed character we push its + ;; constituents back onto our internal + ;; octet stack + (otherwise (unread-char% (char-code next-char) stream))))))) + char))) + +(defmethod stream-read-char-no-hang ((stream flexi-input-stream)) + "Reads one character if the underlying stream has at least one +octet available." + (declare (optimize speed)) + ;; note that this may block for non-8-bit encodings - I think + ;; there's no easy way to handle this correctly + (and (stream-listen stream) + (stream-read-char stream))) + +(defmethod stream-read-sequence ((flexi-input-stream flexi-input-stream) sequence start end &key) + "Reads enough input from STREAM to fill SEQUENCE from START to END. +If SEQUENCE is an array which can store octets we use READ-SEQUENCE to +fill it in one fell swoop, otherwise we iterate using +STREAM-READ-CHAR." + (declare (optimize speed) + (type (integer 0 *) start end)) + (with-accessors ((last-char-code flexi-stream-last-char-code) + (last-octet flexi-stream-last-octet) + (stream flexi-stream-stream) + (position flexi-stream-position) + (octet-stack flexi-stream-octet-stack)) + flexi-input-stream + (declare (integer position)) + (cond ((and (arrayp sequence) + (subtypep 'octet (array-element-type sequence))) + (setf last-char-code nil) + (let ((cursor start)) + (loop with stack = octet-stack + for continuep = (< cursor end) + for octet = (and continuep (pop stack)) + while octet + do (setf (aref sequence cursor) (the octet octet)) + (incf cursor)) + (let ((index + (read-sequence sequence stream :start cursor :end end))) + (incf position (- index start)) + (when (> index start) + (setq last-octet (aref sequence (1- index)))) + index))) + (t + (loop for index from start below end + for element = (stream-read-char flexi-input-stream) + until (eq element :eof) + do (setf (elt sequence index) element) + finally (return index)))))) + +(defmethod stream-unread-char ((stream flexi-input-stream) char) + "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM. +Makes sure CHAR will only be unread if it was the last character +read and if it was read with the same encoding that's currently +being used by the stream." + (declare (optimize speed)) + (with-accessors ((last-char-code flexi-stream-last-char-code)) + stream + (unless last-char-code + (error 'flexi-stream-simple-error + :format-control "No character to unread from this stream \(or external format has changed or last reading operation was binary).")) + (unless (= (char-code char) last-char-code) + (error 'flexi-stream-simple-error + :format-control "Last character read (~S) was different from ~S." + :format-arguments (list (code-char last-char-code) char))) + (unread-char% last-char-code stream) + (setq last-char-code nil) + nil)) + +(defmethod unread-byte (byte (flexi-input-stream flexi-input-stream)) + "Similar to UNREAD-CHAR in that it `unreads' the last octet from +STREAM. Note that you can only call UNREAD-BYTE after a corresponding +READ-BYTE." + (declare (optimize speed)) + (with-accessors ((last-octet flexi-stream-last-octet) + (octet-stack flexi-stream-octet-stack) + (position flexi-stream-position)) + flexi-input-stream + (unless last-octet + (error 'flexi-stream-simple-error + :format-control "No byte to unread from this stream \(or last reading operation read a character).")) + (unless (= byte last-octet) + (error 'flexi-stream-simple-error + :format-control "Last byte read was different from #x~X." + :format-arguments (list byte))) + (setq last-octet nil) + (decf (the integer position)) + (push byte octet-stack) + nil)) + +(defmethod peek-byte ((flexi-input-stream flexi-input-stream) + &optional peek-type (eof-error-p t) eof-value) + "PEEK-BYTE is like PEEK-CHAR, i.e. it returns an octet from +FLEXI-INPUT-STREAM without actually removing it. If PEEK-TYPE is NIL +the next octet is returned, if PEEK-TYPE is T, the next octet which is +not 0 is returned, if PEEK-TYPE is an octet, the next octet which +equals PEEK-TYPE is returned. EOF-ERROR-P and EOF-VALUE are +interpreted as usual." + (declare (optimize speed)) + (loop for octet = (read-byte flexi-input-stream eof-error-p eof-value) + until (cond ((null peek-type)) + ((eql octet eof-value)) + ((eq peek-type t) + (plusp octet)) + (t (= octet peek-type))) + finally (unless (eql octet eof-value) + (unread-byte octet flexi-input-stream)) + (return octet))) \ No newline at end of file Added: trunk/iso-8859.lisp ============================================================================== --- (empty file) +++ trunk/iso-8859.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,53 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +;;; the following code was auto-generated from files which can be +;;; found at + +(defvar +iso-8859-tables+ + '((:iso-8859-1 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) + (:iso-8859-2 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 352 350 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 357 378 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) + (:iso-8859-3 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168 304 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305 351 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 202 203 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219 220 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236 237 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365 349 729)) + (:iso-8859-4 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 352 274 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 291 359 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 326 333 311 244 245 246 247 248 371 250 251 252 361 363 729)) + (:iso-8859-5 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 167 1118 1119)) + (:iso-8859-6 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533 65533 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 1567 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 65533 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) + (:iso-8859-7 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 168 169 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) + (:iso-8859-8 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) + (:iso-8859-9 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) + (:iso-8859-10 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 272 352 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 359 382 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 326 333 243 244 245 246 361 248 371 250 251 252 253 254 312)) + (:iso-8859-11 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 65533 65533 65533 65533)) + (:iso-8859-13 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216 169 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 8217)) + (:iso-8859-14 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 7808 169 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7809 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 217 218 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 253 375 255)) + (:iso-8859-15 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 169 170 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 187 338 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) + (:iso-8859-16 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353 169 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 537 187 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 204 205 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 538 223 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 273 324 242 243 244 337 246 347 369 249 250 251 252 281 539 255))) + "A list of the ISO-8859 encodings where each element is a cons +with the car being a keyword denoting the encoding and the cdr +being a vector enumerating the corresponding character codes.") Added: trunk/koi8-r.lisp ============================================================================== --- (empty file) +++ trunk/koi8-r.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,6 @@ +(in-package :flexi-streams) + +;; http://unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT +(defvar +koi8-r-table+ + #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 9472 9474 9484 9488 9492 9496 9500 9508 9516 9524 9532 9600 9604 9608 9612 9616 9617 9618 9619 8992 9632 8729 8730 8776 8804 8805 160 8993 176 178 183 247 9552 9553 9554 1105 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 1025 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 169 1102 1072 1073 1094 1076 1077 1092 1075 1093 1080 1081 1082 1083 1084 1085 1086 1087 1103 1088 1089 1090 1091 1078 1074 1100 1099 1079 1096 1101 1097 1095 1098 1070 1040 1041 1062 1044 1045 1060 1043 1061 1048 1049 1050 1051 1052 1053 1054 1055 1071 1056 1057 1058 1059 1046 1042 1068 1067 1047 1064 1069 1065 1063 1066) + "An array enumerating the character codes for the KOI8-R encoding.") Added: trunk/lw-binary-stream.lisp ============================================================================== --- (empty file) +++ trunk/lw-binary-stream.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,441 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.10 2007/01/01 23:46:49 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass flexi-binary-output-stream (flexi-output-stream) + () + (:documentation "This class is for output streams where the +underlying stream is binary. It exists solely for the purpose of +optimizing output on LispWorks. See WRITE-BYTE*.")) + +(defclass flexi-binary-input-stream (flexi-input-stream) + () + (:documentation "This class is for input streams where the +underlying stream is binary. It exists solely for the purpose of +optimizing input on LispWorks. See READ-BYTE*.")) + +(defclass flexi-binary-io-stream (flexi-binary-input-stream flexi-binary-output-stream flexi-io-stream) + () + (:documentation "This class is for bidirectional streams where the +underlying stream is binary. It exists solely for the purpose of +optimizing input and output on LispWorks. See READ-BYTE* and +WRITE-BYTE*.")) + +(defclass flexi-binary-8-bit-input-stream (flexi-8-bit-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-8-BIT-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-8-bit-input-stream (flexi-cr-mixin flexi-binary-8-bit-input-stream) + () + (:documentation "Like FLEXI-CR-8-BIT-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-ascii-input-stream (flexi-ascii-input-stream flexi-binary-8-bit-input-stream) + () + (:documentation "Like FLEXI-ASCII-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-ascii-input-stream (flexi-cr-mixin flexi-binary-ascii-input-stream) + () + (:documentation "Like FLEXI-CR-ASCII-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-latin-1-input-stream (flexi-latin-1-input-stream flexi-binary-8-bit-input-stream) + () + (:documentation "Like FLEXI-LATIN-1-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-latin-1-input-stream (flexi-cr-mixin flexi-binary-latin-1-input-stream) + () + (:documentation "Like FLEXI-CR-LATIN-1-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-le-input-stream (flexi-utf-32-le-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-32-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-binary-utf-32-le-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-be-input-stream (flexi-utf-32-be-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-32-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-binary-utf-32-be-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-le-input-stream (flexi-utf-16-le-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-16-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-binary-utf-16-le-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-be-input-stream (flexi-utf-16-be-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-16-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-binary-utf-16-be-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-8-input-stream (flexi-utf-8-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-8-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-8-input-stream (flexi-cr-mixin flexi-binary-utf-8-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-8-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-8-bit-output-stream (flexi-8-bit-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-8-BIT-OUTPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-8-bit-output-stream (flexi-cr-mixin flexi-binary-8-bit-output-stream) + () + (:documentation "Like FLEXI-CR-8-BIT-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-ascii-output-stream (flexi-ascii-output-stream flexi-binary-8-bit-output-stream) + () + (:documentation "Like FLEXI-ASCII-OUTPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-ascii-output-stream (flexi-cr-mixin flexi-binary-ascii-output-stream) + () + (:documentation "Like FLEXI-CR-ASCII-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-latin-1-output-stream (flexi-latin-1-output-stream flexi-binary-8-bit-output-stream) + () + (:documentation "Like FLEXI-LATIN-1-OUTPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-latin-1-output-stream (flexi-cr-mixin flexi-binary-latin-1-output-stream) + () + (:documentation "Like FLEXI-CR-LATIN-1-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-le-output-stream (flexi-utf-32-le-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-32-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-binary-utf-32-le-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-be-output-stream (flexi-utf-32-be-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-32-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-binary-utf-32-be-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-le-output-stream (flexi-utf-16-le-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-16-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-binary-utf-16-le-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-be-output-stream (flexi-utf-16-be-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-16-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-binary-utf-16-be-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-8-output-stream (flexi-utf-8-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-8-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-8-output-stream (flexi-cr-mixin flexi-binary-utf-8-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-8-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-8-bit-io-stream (flexi-binary-io-stream flexi-8-bit-io-stream) + () + (:documentation "Like FLEXI-8-BIT-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-8-bit-io-stream (flexi-cr-mixin flexi-binary-8-bit-io-stream) + () + (:documentation "Like FLEXI-CR-8-BIT-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-ascii-io-stream (flexi-ascii-io-stream flexi-binary-8-bit-io-stream) + () + (:documentation "Like FLEXI-ASCII-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-ascii-io-stream (flexi-cr-mixin flexi-binary-ascii-io-stream) + () + (:documentation "Like FLEXI-CR-ASCII-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-latin-1-io-stream (flexi-latin-1-io-stream flexi-binary-8-bit-io-stream) + () + (:documentation "Like FLEXI-LATIN-1-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-latin-1-io-stream (flexi-cr-mixin flexi-binary-latin-1-io-stream) + () + (:documentation "Like FLEXI-CR-LATIN-1-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-le-io-stream (flexi-utf-32-le-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-32-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-binary-utf-32-le-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-be-io-stream (flexi-utf-32-be-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-32-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-binary-utf-32-be-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-le-io-stream (flexi-utf-16-le-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-16-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-binary-utf-16-le-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-be-io-stream (flexi-utf-16-be-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-16-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-binary-utf-16-be-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-8-io-stream (flexi-utf-8-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-8-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-8-io-stream (flexi-cr-mixin flexi-binary-utf-8-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-8-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defmethod set-class ((stream flexi-binary-input-stream)) + "Changes the actual class of STREAM depending on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (change-class stream + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-binary-cr-ascii-input-stream + 'flexi-binary-ascii-input-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-binary-cr-latin-1-input-stream + 'flexi-binary-latin-1-input-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-binary-cr-8-bit-input-stream + 'flexi-binary-8-bit-input-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-binary-cr-utf-8-input-stream + 'flexi-binary-utf-8-input-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-binary-cr-utf-16-le-input-stream + 'flexi-binary-cr-utf-16-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-binary-utf-16-le-input-stream + 'flexi-binary-utf-16-be-input-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-binary-cr-utf-32-le-input-stream + 'flexi-binary-cr-utf-32-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-binary-utf-32-le-input-stream + 'flexi-binary-utf-32-be-input-stream)))))))))) + +(defmethod set-class ((stream flexi-binary-output-stream)) + "Changes the actual class of STREAM depending on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (change-class stream + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-binary-cr-ascii-output-stream + 'flexi-binary-ascii-output-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-binary-cr-latin-1-output-stream + 'flexi-binary-latin-1-output-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-binary-cr-8-bit-output-stream + 'flexi-binary-8-bit-output-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-binary-cr-utf-8-output-stream + 'flexi-binary-utf-8-output-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-binary-cr-utf-16-le-output-stream + 'flexi-binary-cr-utf-16-be-output-stream) + (if (external-format-little-endian external-format) + 'flexi-binary-utf-16-le-output-stream + 'flexi-binary-utf-16-be-output-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-binary-cr-utf-32-le-output-stream + 'flexi-binary-cr-utf-32-be-output-stream) + (if (external-format-little-endian external-format) + 'flexi-binary-utf-32-le-output-stream + 'flexi-binary-utf-32-be-output-stream)))))))))) + +(defmethod set-class ((stream flexi-binary-io-stream)) + "Changes the actual class of STREAM depending on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (change-class stream + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-binary-cr-ascii-io-stream + 'flexi-binary-ascii-io-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-binary-cr-latin-1-io-stream + 'flexi-binary-latin-1-io-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-binary-cr-8-bit-io-stream + 'flexi-binary-8-bit-io-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-binary-cr-utf-8-io-stream + 'flexi-binary-utf-8-io-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-binary-cr-utf-16-le-io-stream + 'flexi-binary-cr-utf-16-be-io-stream) + (if (external-format-little-endian external-format) + 'flexi-binary-utf-16-le-io-stream + 'flexi-binary-utf-16-be-io-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-binary-cr-utf-32-le-io-stream + 'flexi-binary-cr-utf-32-be-io-stream) + (if (external-format-little-endian external-format) + 'flexi-binary-utf-32-le-io-stream + 'flexi-binary-utf-32-be-io-stream)))))))))) + + +(defmethod initialize-instance :after ((flexi-stream flexi-output-stream) &rest initargs) + "Might change the class of FLEXI-STREAM for optimization purposes. +Only needed for LispWorks." + (declare (ignore initargs) + (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-stream + (when (subtypep (stream-element-type stream) 'octet) + (change-class flexi-stream + (typecase flexi-stream + (flexi-io-stream 'flexi-binary-io-stream) + (otherwise 'flexi-binary-output-stream))) + (set-class flexi-stream)))) + +(defmethod initialize-instance :after ((flexi-stream flexi-input-stream) &rest initargs) + "Might change the class of FLEXI-STREAM for optimization purposes. +Only needed for LispWorks." + (declare (ignore initargs) + (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-stream + (when (subtypep (stream-element-type stream) 'octet) + (change-class flexi-stream + (typecase flexi-stream + (flexi-io-stream 'flexi-binary-io-stream) + (otherwise 'flexi-binary-input-stream))) + (set-class flexi-stream)))) Added: trunk/output.lisp ============================================================================== --- (empty file) +++ trunk/output.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,316 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.44 2007/12/29 22:23:23 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric write-byte* (byte sink) + (:documentation "Writes one byte \(octet) to the underlying stream +of SINK \(if SINK is a flexi stream) or adds the byte to the end of +SINK \(if SINK is an array with a fill pointer).")) + +#-:lispworks +(defmethod write-byte* (byte (sink flexi-output-stream)) + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + sink + (write-byte byte stream))) + +#+:lispworks +(defmethod write-byte* (byte (sink flexi-output-stream)) + (declare (optimize speed)) + ;; we use WRITE-SEQUENCE because WRITE-BYTE doesn't work with all + ;; bivalent streams in LispWorks (4.4.6) + (with-accessors ((stream flexi-stream-stream)) + sink + (write-sequence (make-array 1 :element-type 'octet + :initial-element byte) + stream) + byte)) + +#+:lispworks +(defmethod write-byte* (byte (sink flexi-binary-output-stream)) + "Optimized version \(only needed for LispWorks) in case the +underlying stream is binary." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + sink + (write-byte byte stream))) + +(defmethod write-byte* (byte (sink array)) + (declare (optimize speed)) + (vector-push byte sink)) + +(defgeneric char-to-octets (stream char sink) + (:documentation "Converts the character CHAR to sequence of octets +and sends this sequence to SINK. STREAM will always be a flexi stream +which is used to determine how the character should be converted. +This function does all the work for STREAM-WRITE-CHAR in which case +SINK is the same as STREAM. It is also used in the implementation of +STREAM-WRITE-SEQUENCE below.")) + +(defmethod stream-write-char ((stream flexi-output-stream) char) + (declare (optimize speed)) + (char-to-octets stream char stream)) + +(defmethod char-to-octets ((stream flexi-latin-1-output-stream) char sink) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 255) + (signal-encoding-error stream "~S is not a LATIN-1 character." char)) + (write-byte* octet sink)) + char) + +(defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 127) + (signal-encoding-error stream "~S is not an ASCII character." char)) + (write-byte* octet sink)) + char) + +(defmethod char-to-octets ((stream flexi-8-bit-output-stream) char sink) + (declare (optimize speed)) + (with-accessors ((encoding-hash flexi-stream-encoding-hash)) + stream + (let ((octet (gethash (char-code char) encoding-hash))) + (unless octet + (signal-encoding-error stream "~S is not in this encoding." char)) + (write-byte* octet sink)) + char)) + +(defmethod char-to-octets ((stream flexi-utf-8-output-stream) char sink) + (declare (optimize speed)) + (let ((char-code (char-code char))) + (tagbody + (cond ((< char-code #x80) + (write-byte* char-code sink) + (go zero)) + ((< char-code #x800) + (write-byte* (logior #b11000000 (ldb (byte 5 6) char-code)) sink) + (go one)) + ((< char-code #x10000) + (write-byte* (logior #b11100000 (ldb (byte 4 12) char-code)) sink) + (go two)) + ((< char-code #x200000) + (write-byte* (logior #b11110000 (ldb (byte 3 18) char-code)) sink) + (go three)) + ((< char-code #x4000000) + (write-byte* (logior #b11111000 (ldb (byte 2 24) char-code)) sink) + (go four)) + (t (write-byte* (logior #b11111100 (ldb (byte 1 30) char-code)) sink))) + (write-byte* (logior #b10000000 (ldb (byte 6 24) char-code)) sink) + four + (write-byte* (logior #b10000000 (ldb (byte 6 18) char-code)) sink) + three + (write-byte* (logior #b10000000 (ldb (byte 6 12) char-code)) sink) + two + (write-byte* (logior #b10000000 (ldb (byte 6 6) char-code)) sink) + one + (write-byte* (logior #b10000000 (ldb (byte 6 0) char-code)) sink) + zero)) + char) + +(defmethod char-to-octets ((stream flexi-utf-16-le-output-stream) char sink) + (declare (optimize speed)) + (flet ((write-word (word) + (write-byte* (ldb (byte 8 0) word) sink) + (write-byte* (ldb (byte 8 8) word) sink))) + (declare (inline write-word) (dynamic-extent (function write-word))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((stream flexi-utf-16-be-output-stream) char sink) + (declare (optimize speed)) + (flet ((write-word (word) + (write-byte* (ldb (byte 8 8) word) sink) + (write-byte* (ldb (byte 8 0) word) sink))) + (declare (inline write-word) (dynamic-extent (function write-word))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((stream flexi-utf-32-le-output-stream) char sink) + (declare (optimize speed)) + (loop with char-code = (char-code char) + for position in '(0 8 16 24) do + (write-byte* (ldb (byte 8 position) char-code) sink)) + char) + +(defmethod char-to-octets ((stream flexi-utf-32-be-output-stream) char sink) + (declare (optimize speed)) + (loop with char-code = (char-code char) + for position in '(24 16 8 0) do + (write-byte* (ldb (byte 8 position) char-code) sink)) + char) + +(defmethod char-to-octets ((stream flexi-cr-mixin) char sink) + "The `base' method for all streams which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of sending +one or more characters to SINK." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (case char + (#\Newline + (case (external-format-eol-style external-format) + (:cr (call-next-method stream #\Return sink)) + (:crlf (call-next-method stream #\Return sink) + (call-next-method stream #\Linefeed sink)))) + (otherwise (call-next-method))) + char)) + +(defmethod stream-write-char :after ((stream flexi-output-stream) char) + (declare (optimize speed)) + ;; update the column unless we're in the middle of the line and + ;; the current value is NIL + (with-accessors ((column flexi-stream-column)) + stream + (cond ((char= char #\Newline) (setq column 0)) + (column (incf (the integer column)))))) + +(defmethod stream-clear-output ((flexi-output-stream flexi-output-stream)) + "Simply calls the corresponding method for the underlying +output stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-output-stream + (clear-output stream))) + +(defmethod stream-finish-output ((flexi-output-stream flexi-output-stream)) + "Simply calls the corresponding method for the underlying +output stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-output-stream + (finish-output stream))) + +(defmethod stream-force-output ((flexi-output-stream flexi-output-stream)) + "Simply calls the corresponding method for the underlying +output stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-output-stream + (force-output stream))) + +(defmethod stream-line-column ((flexi-output-stream flexi-output-stream)) + "Returns the column stored in the COLUMN slot of the +FLEXI-OUTPUT-STREAM object STREAM." + (declare (optimize speed)) + (with-accessors ((column flexi-stream-column)) + flexi-output-stream + column)) + +(defmethod stream-write-byte ((flexi-output-stream flexi-output-stream) byte) + "Writes a byte \(octet) to the underlying stream." + (declare (optimize speed)) + (with-accessors ((column flexi-stream-column)) + flexi-output-stream + ;; set column to NIL because we don't know how to handle binary + ;; output mixed with character output + (setq column nil) + (write-byte* byte flexi-output-stream))) + +#+:allegro +(defmethod stream-terpri ((stream flexi-output-stream)) + "Writes a #\Newline character to the underlying stream." + (declare (optimize speed)) + ;; needed for AllegroCL - grrr... + (stream-write-char stream #\Newline)) + +(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) + "Writes all elements of the sequence SEQUENCE from START to END +to the underlying stream. The elements can be either octets or +characters. Characters are output according to the current +encoding \(external format) of the FLEXI-OUTPUT-STREAM object +STREAM." + (declare (optimize speed) + (type (integer 0 *) start end)) + (with-accessors ((stream flexi-stream-stream) + (column flexi-stream-column)) + flexi-output-stream + (cond ((and (arrayp sequence) + (subtypep (array-element-type sequence) 'octet)) + ;; set column to NIL because we don't know how to handle binary + ;; output mixed with character output + (setq column nil) + (write-sequence sequence stream :start start :end end)) + (t (loop for index from start below end + for element = (elt sequence index) + when (characterp element) do + (stream-write-char flexi-output-stream element) + else do + (stream-write-byte flexi-output-stream element)) + sequence)))) + +(defmethod stream-write-sequence ((stream flexi-output-stream) (sequence string) start end &key) + "Optimized method for the cases where SEQUENCE is a string. Fills +an internal buffer and uses repeated calls to WRITE-SEQUENCE to write +to the underlying stream." + (declare (optimize speed) + (type (integer 0 *) start end)) + ;; don't use this optimized method for bivalent character streams on + ;; LispWorks, as it currently gets confused by the fill pointer + #+:lispworks + (unless (typep stream 'flexi-binary-output-stream) + (return-from stream-write-sequence + (call-next-method))) + (let* ((buffer (make-array (+ +buffer-size+ 20) + :element-type '(unsigned-byte 8) + :fill-pointer 0)) + (last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (loop for index from start below end + do (char-to-octets stream (aref sequence index) buffer) + when (>= (fill-pointer buffer) +buffer-size+) do + (write-sequence buffer (flexi-stream-stream stream)) + (setf (fill-pointer buffer) 0) + finally (when (>= (fill-pointer buffer) 0) + (write-sequence buffer (flexi-stream-stream stream)))) + (setf (flexi-stream-column stream) + (cond (last-newline-pos (- end last-newline-pos 1)) + ((flexi-stream-column stream) + (+ (flexi-stream-column stream) (- end start)))))) + sequence) + +(defmethod stream-write-string ((stream flexi-output-stream) string + &optional (start 0) (end (length string))) + "Simply hands over to the optimized method for STREAM-WRITE-SEQUENCE." + (stream-write-sequence stream string start (or end (length string)))) Added: trunk/packages.lisp ============================================================================== --- (empty file) +++ trunk/packages.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,83 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.30 2007/10/11 20:23:53 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :cl-user) + +(unless (find-symbol (symbol-name :stream-file-position) :trivial-gray-streams) + (error "You need a newer version of TRIVIAL-GRAY-STREAMS.")) + +(defpackage :flexi-streams + (:use :cl :trivial-gray-streams) + (:nicknames :flex) + #+:lispworks + (:shadow :with-accessors) + (:export :*default-eol-style* + :*default-little-endian* + :*substitution-char* + :external-format-eol-style + :external-format-equal + :external-format-id + :external-format-little-endian + :external-format-name + :flexi-input-stream + :flexi-output-stream + :flexi-io-stream + :flexi-stream + :flexi-stream-bound + :flexi-stream-external-format + :flexi-stream-encoding-error + :flexi-stream-element-type + :flexi-stream-element-type-error + :flexi-stream-element-type-error-element-type + :flexi-stream-error + :flexi-stream-column + :flexi-stream-position + :flexi-stream-position-spec-error + :flexi-stream-position-spec-error-position-spec + :flexi-stream-stream + :get-output-stream-sequence + :in-memory-stream + :in-memory-stream-closed-error + :in-memory-stream-error + :in-memory-input-stream + :in-memory-output-stream + :list-stream + :make-external-format + :make-in-memory-input-stream + :make-in-memory-output-stream + :make-flexi-stream + :octet + :octets-to-string + :output-stream-sequence-length + :peek-byte + :string-to-octets + :unread-byte + :vector-stream + :with-input-from-sequence + :with-output-to-sequence)) Added: trunk/specials.lisp ============================================================================== --- (empty file) +++ trunk/specials.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,184 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.25 2007/12/29 21:17:06 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(deftype octet () + "A shortcut for \(UNSIGNED-BYTE 8)." + '(unsigned-byte 8)) + +(defvar +name-map+ + '((:utf8 . :utf-8) + (:utf16 . :utf-16) + (:ucs2 . :utf-16) + (:ucs-2 . :utf-16) + (:unicode . :utf-16) + (:utf32 . :utf-32) + (:ucs4 . :utf-32) + (:ucs-4 . :utf-32) + (:ascii . :us-ascii) + (:koi8r . :koi8-r) + (:latin-1 . :iso-8859-1) + (:latin1 . :iso-8859-1) + (:latin-2 . :iso-8859-2) + (:latin2 . :iso-8859-2) + (:latin-3 . :iso-8859-3) + (:latin3 . :iso-8859-3) + (:latin-4 . :iso-8859-4) + (:latin4 . :iso-8859-4) + (:cyrillic . :iso-8859-5) + (:arabic . :iso-8859-6) + (:greek . :iso-8859-7) + (:hebrew . :iso-8859-8) + (:latin-5 . :iso-8859-9) + (:latin5 . :iso-8859-9) + (:latin-6 . :iso-8859-10) + (:latin6 . :iso-8859-10) + (:thai . :iso-8859-11) + (:latin-7 . :iso-8859-13) + (:latin7 . :iso-8859-13) + (:latin-8 . :iso-8859-14) + (:latin8 . :iso-8859-14) + (:latin-9 . :iso-8859-15) + (:latin9 . :iso-8859-15) + (:latin-0 . :iso-8859-15) + (:latin0 . :iso-8859-15) + (:latin-10 . :iso-8859-16) + (:latin10 . :iso-8859-16) + (:codepage . :code-page) + #+(and :lispworks :win32) + (win32:code-page . :code-page)) + "An alist which mapes alternative names for external formats to +their canonical counterparts.") + +(defvar +shortcut-map+ + '((:ucs-2le . (:ucs-2 :little-endian t)) + (:ucs-2be . (:ucs-2 :little-endian nil)) + (:ucs-4le . (:ucs-4 :little-endian t)) + (:ucs-4be . (:ucs-4 :little-endian nil)) + (:utf-16le . (:utf-16 :little-endian t)) + (:utf-16be . (:utf-16 :little-endian nil)) + (:utf-32le . (:utf-32 :little-endian t)) + (:utf-32be . (:utf-32 :little-endian nil)) + (:ibm437 . (:code-page :id 437)) + (:ibm850 . (:code-page :id 850)) + (:ibm852 . (:code-page :id 852)) + (:ibm855 . (:code-page :id 855)) + (:ibm857 . (:code-page :id 857)) + (:ibm860 . (:code-page :id 860)) + (:ibm861 . (:code-page :id 861)) + (:ibm862 . (:code-page :id 862)) + (:ibm863 . (:code-page :id 863)) + (:ibm864 . (:code-page :id 864)) + (:ibm865 . (:code-page :id 865)) + (:ibm866 . (:code-page :id 866)) + (:ibm869 . (:code-page :id 869)) + (:windows-1250 . (:code-page :id 1250)) + (:windows-1251 . (:code-page :id 1251)) + (:windows-1252 . (:code-page :id 1252)) + (:windows-1253 . (:code-page :id 1253)) + (:windows-1254 . (:code-page :id 1254)) + (:windows-1255 . (:code-page :id 1255)) + (:windows-1256 . (:code-page :id 1256)) + (:windows-1257 . (:code-page :id 1257)) + (:windows-1258 . (:code-page :id 1258))) + "An alist which maps shortcuts for external formats to their +long forms.") + +(defvar *default-eol-style* + #+:win32 :crlf + #-:win32 :lf + "The end-of-line style used by external formats if none is +explicitly given. Depends on the OS the code is compiled on.") + +(defvar *default-little-endian* + #+:little-endian t + #-:little-endian nil + "Whether external formats are little-endian by default +\(i.e. unless explicitly specified). Depends on the platform +the code is compiled on.") + +(defvar *substitution-char* nil + "If this value is not NIL, it should be a character which is used +\(as if by a USE-VALUE restart) whenever during reading an error of +type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise.") + +(defun invert-table (table) + "`Inverts' an array which maps octets to character codes to a +hash tables which maps character codes to octets." + (let ((hash (make-hash-table))) + (loop for octet from 0 + for char-code across table + unless (= char-code 65533) + do (setf (gethash char-code hash) octet)) + hash)) + +(defvar +iso-8859-hashes+ + (loop for (name . table) in +iso-8859-tables+ + collect (cons name (invert-table table))) + "An alist which maps names for ISO-8859 encodings to hash +tables which map character codes to the corresponding octets.") + +(defvar +code-page-hashes+ + (loop for (id . table) in +code-page-tables+ + collect (cons id (invert-table table))) + "An alist which maps IDs of Windows code pages to hash tables +which map character codes to the corresponding octets.") + +(defvar +ascii-hash+ (invert-table +ascii-table+) + "A hash table which maps US-ASCII character codes to the +corresponding octets.") + +(defvar +koi8-r-hash+ (invert-table +koi8-r-table+) + "A hash table which maps KOI8-R character codes to the +corresponding octets.") + +(defconstant +buffer-size+ 8192 + "Size of buffers used for internal purposes.") + +(pushnew :flexi-streams *features*) + +;; stuff for Nikodemus Siivola's HYPERDOC +;; see +;; and +;; also used by LW-ADD-ONS + +(defvar *hyperdoc-base-uri* "http://weitz.de/flexi-streams/") + +(let ((exported-symbols-alist + (loop for symbol being the external-symbols of :flexi-streams + collect (cons symbol + (concatenate 'string + "#" + (string-downcase symbol)))))) + (defun hyperdoc-lookup (symbol type) + (declare (ignore type)) + (cdr (assoc symbol + exported-symbols-alist + :test #'eq)))) Added: trunk/stream.lisp ============================================================================== --- (empty file) +++ trunk/stream.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,732 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.53 2007/12/29 22:26:04 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass flexi-stream (trivial-gray-stream-mixin) + ((stream :initarg :stream + :reader flexi-stream-stream + :documentation "The actual stream that's used for +input and/or output. It must be capable of reading/writing +octets with READ-SEQUENCE and/or WRITE-SEQUENCE.") + (external-format :initform (make-external-format :iso-8859-1) + :initarg :flexi-stream-external-format + :accessor flexi-stream-external-format + :documentation "The encoding currently used +by this stream. Can be changed on the fly.") + (element-type :initform #+:lispworks 'lw:simple-char #-:lispworks 'character + :initarg :element-type + :accessor flexi-stream-element-type + :documentation "The element type of this stream.")) + (:documentation "A FLEXI-STREAM object is a stream that's +`layered' atop an existing binary/bivalent stream in order to +allow for multi-octet external formats. FLEXI-STREAM itself is a +mixin and should not be instantiated.")) + +(define-condition flexi-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +flexi streams.")) + +(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) + () + (:documentation "Like FLEXI-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition flexi-stream-element-type-error (flexi-stream-error) + ((element-type :initarg :element-type + :reader flexi-stream-element-type-error-element-type)) + (:report (lambda (condition stream) + (format stream "Element type ~S not allowed." + (flexi-stream-element-type-error-element-type condition)))) + (:documentation "Errors of this type are signalled if the flexi +stream has a wrong element type.")) + +(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) + () + (:documentation "Errors of this type are signalled if there is an +encoding problem.")) + +(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) + ((position-spec :initarg :position-spec + :reader flexi-stream-position-spec-error-position-spec)) + (:documentation "Errors of this type are signalled if an +erroneous position spec is used in conjunction with +FILE-POSITION.")) + +(defun signal-encoding-error (flexi-stream format-control &rest format-args) + "Convenience function similar to ERROR to signal conditions of type +FLEXI-STREAM-ENCODING-ERROR." + (error 'flexi-stream-encoding-error + :format-control format-control + :format-arguments format-args + :stream flexi-stream)) + +(defun maybe-convert-external-format (external-format) + "Given an external format designator \(a keyword, a list, or an +EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT +object." + (typecase external-format + (symbol (make-external-format external-format)) + (list (apply #'make-external-format external-format)) + (otherwise external-format))) + +(defmethod initialize-instance :after ((flexi-stream flexi-stream) &rest initargs) + "Makes sure the EXTERNAL-FORMAT and ELEMENT-TYPE slots contain +reasonable values." + (declare (ignore initargs) + (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format) + (element-type flexi-stream-element-type)) + flexi-stream + (unless (or (subtypep element-type 'character) + (subtypep element-type 'octet)) + (error 'flexi-stream-element-type-error + :element-type element-type + :stream flexi-stream)) + (setq external-format (maybe-convert-external-format external-format))) + ;; set actual class and maybe contents of 8-bit encoding slots + (set-class flexi-stream)) + +(defmethod (setf flexi-stream-external-format) :around (new-value (flexi-stream flexi-stream)) + "Converts the new value to an EXTERNAL-FORMAT object if +necessary." + (call-next-method (maybe-convert-external-format new-value) flexi-stream)) + +(defmethod (setf flexi-stream-element-type) :before (new-value (flexi-stream flexi-stream)) + "Checks whether the new value makes sense before it is set." + (unless (or (subtypep new-value 'character) + (subtypep new-value 'octet)) + (error 'flexi-stream-element-type-error + :element-type new-value + :stream flexi-stream))) + +(defmethod stream-element-type ((stream flexi-stream)) + "Returns the element type that was provided by the creator of +the stream." + (declare (optimize speed)) + (flexi-stream-element-type stream)) + +(defmethod close ((stream flexi-stream) &key abort) + "Closes the flexi stream by closing the underlying `real' +stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + stream + (cond ((open-stream-p stream) + (close stream :abort abort)) + (t nil)))) + +(defmethod open-stream-p ((stream flexi-stream)) + "A flexi stream is open if its underlying stream is open." + (declare (optimize speed)) + (open-stream-p (flexi-stream-stream stream))) + +(defmethod stream-file-position ((stream flexi-stream)) + "Dispatch to method for underlying stream." + (declare (optimize speed)) + (stream-file-position (flexi-stream-stream stream))) + +(defmethod (setf stream-file-position) (position-spec (stream flexi-stream)) + "Dispatch to method for underlying stream." + (declare (optimize speed)) + (setf (stream-file-position (flexi-stream-stream stream)) + position-spec)) + +(defclass flexi-output-stream (flexi-stream fundamental-binary-output-stream + fundamental-character-output-stream) + ((column :initform 0 + :accessor flexi-stream-column + :documentation "The current output column. A +non-negative integer or NIL.")) + (:documentation "A FLEXI-OUTPUT-STREAM is a FLEXI-STREAM that +can actually be instatiated and used for output. Don't use +MAKE-INSTANCE to create a new FLEXI-OUTPUT-STREAM but use +MAKE-FLEXI-STREAM instead.")) + +#+:cmu +(defmethod input-stream-p ((stream flexi-output-stream)) + "Explicitly states whether this is an input stream." + (declare (optimize speed)) + nil) + +(defclass flexi-input-stream (flexi-stream fundamental-binary-input-stream + fundamental-character-input-stream) + ((last-char-code :initform nil + :accessor flexi-stream-last-char-code + :documentation "This slot either holds NIL or the +last character \(code) read successfully. This is mainly used for +UNREAD-CHAR sanity checks.") + (last-octet :initform nil + :accessor flexi-stream-last-octet + :documentation "This slot either holds NIL or the last +octet read successfully from the stream using a `binary' operation +such as READ-BYTE. This is mainly used for UNREAD-BYTE sanity +checks.") + (octet-stack :initform nil + :accessor flexi-stream-octet-stack + :documentation "A small buffer which holds octets +that were already read from the underlying stream but not yet +used to produce characters. This is mainly used if we have to +look ahead for a CR/LF line ending.") + (position :initform 0 + :initarg :position + :type integer + :accessor flexi-stream-position + :documentation "The position within the stream where each +octet read counts as one.") + (bound :initform nil + :initarg :bound + :type (or null integer) + :accessor flexi-stream-bound + :documentation "When this is not NIL, it must be an integer +and the stream will behave as if no more data is available as soon as +POSITION is greater or equal than this value.")) + (:documentation "A FLEXI-INPUT-STREAM is a FLEXI-STREAM that +can actually be instatiated and used for input. Don't use +MAKE-INSTANCE to create a new FLEXI-INPUT-STREAM but use +MAKE-FLEXI-STREAM instead.")) + +#+:cmu +(defmethod output-stream-p ((stream flexi-input-stream)) + "Explicitly states whether this is an output stream." + (declare (optimize speed)) + nil) + +(defclass flexi-io-stream (flexi-input-stream flexi-output-stream) + () + (:documentation "A FLEXI-IO-STREAM is a FLEXI-STREAM that can +actually be instatiated and used for input and output. Don't use +MAKE-INSTANCE to create a new FLEXI-IO-STREAM but use +MAKE-FLEXI-STREAM instead.")) + +(defclass flexi-cr-mixin () + () + (:documentation "A mixin for flexi streams which need +end-of-line conversion, i.e. for those where the end-of-line +designator is /not/ the single character #\Linefeed.")) + +(defclass flexi-8-bit-stream (flexi-stream) + ((encoding-hash :accessor flexi-stream-encoding-hash)) + (:documentation "The class for all flexi streams which use an 8-bit +encoding and thus need an additional slot for the encoding hash.")) + +(defclass flexi-8-bit-input-stream (flexi-input-stream flexi-8-bit-stream) + ((encoding-table :accessor flexi-stream-encoding-table)) + (:documentation "The class for all flexi input streams which use an +8-bit encoding and thus need an additional slot for the encoding +table.")) + +(defclass flexi-cr-8-bit-input-stream (flexi-cr-mixin flexi-8-bit-input-stream) + () + (:documentation "The class for all flexi input streams which +use an 8-bit encoding /and/ need end-of-line conversion.")) + +(defclass flexi-ascii-input-stream (flexi-8-bit-input-stream) + () + (:documentation "Special class for flexi input streams which +use the US-ASCCI encoding.")) + +(defclass flexi-cr-ascii-input-stream (flexi-cr-mixin flexi-ascii-input-stream) + () + (:documentation "Special class for flexi input streams which +use the US-ASCCI encoding /and/ need end-of-line conversion.")) + +(defclass flexi-latin-1-input-stream (flexi-8-bit-input-stream) + () + (:documentation "Special class for flexi input streams which +use the ISO-8859-1 encoding.")) + +(defclass flexi-cr-latin-1-input-stream (flexi-cr-mixin flexi-latin-1-input-stream) + () + (:documentation "Special class for flexi input streams which +use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-utf-32-le-input-stream (flexi-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-32 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-utf-32-le-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-32 encoding with little-endian byte ordering /and/ +need end-of-line conversion.")) + +(defclass flexi-utf-32-be-input-stream (flexi-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-32 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-utf-32-be-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-32 encoding with big-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-16-le-input-stream (flexi-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-16 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-utf-16-le-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-16 encoding with little-endian byte ordering /and/ +need end-of-line conversion.")) + +(defclass flexi-utf-16-be-input-stream (flexi-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-16 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-utf-16-be-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-16 encoding with big-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-8-input-stream (flexi-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-8 encoding.")) + +(defclass flexi-cr-utf-8-input-stream (flexi-cr-mixin flexi-utf-8-input-stream) + () + (:documentation "Special class for flexi input streams which +use the UTF-8 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-8-bit-output-stream (flexi-output-stream flexi-8-bit-stream) + () + (:documentation "The class for all flexi output streams which use an +8-bit encoding.")) + +(defclass flexi-cr-8-bit-output-stream (flexi-cr-mixin flexi-8-bit-output-stream) + () + (:documentation "The class for all flexi output streams which +use an 8-bit encoding /and/ need end-of-line conversion.")) + +(defclass flexi-ascii-output-stream (flexi-8-bit-output-stream) + () + (:documentation "Special class for flexi output streams which +use the US-ASCCI encoding.")) + +(defclass flexi-cr-ascii-output-stream (flexi-cr-mixin flexi-ascii-output-stream) + () + (:documentation "Special class for flexi output streams which +use the US-ASCCI encoding /and/ need end-of-line conversion.")) + +(defclass flexi-latin-1-output-stream (flexi-8-bit-output-stream) + () + (:documentation "Special class for flexi output streams which +use the ISO-8859-1 encoding.")) + +(defclass flexi-cr-latin-1-output-stream (flexi-cr-mixin flexi-latin-1-output-stream) + () + (:documentation "Special class for flexi output streams which +use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-utf-32-le-output-stream (flexi-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-32 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-utf-32-le-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-32 encoding with little-endian byte ordering /and/ +need end-of-line conversion.")) + +(defclass flexi-utf-32-be-output-stream (flexi-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-32 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-utf-32-be-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-32 encoding with big-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-16-le-output-stream (flexi-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-16 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-utf-16-le-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-16 encoding with little-endian byte ordering /and/ +need end-of-line conversion.")) + +(defclass flexi-utf-16-be-output-stream (flexi-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-16 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-utf-16-be-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-16 encoding with big-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-8-output-stream (flexi-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-8 encoding.")) + +(defclass flexi-cr-utf-8-output-stream (flexi-cr-mixin flexi-utf-8-output-stream) + () + (:documentation "Special class for flexi output streams which +use the UTF-8 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-8-bit-io-stream (flexi-8-bit-input-stream flexi-8-bit-output-stream flexi-io-stream) + () + (:documentation "The class for all flexi I/O streams which use an +8-bit encoding.")) + +(defclass flexi-cr-8-bit-io-stream (flexi-cr-mixin flexi-8-bit-io-stream) + () + (:documentation "The class for all flexi I/O streams which use +an 8-bit encoding /and/ need end-of-line conversion.")) + +(defclass flexi-ascii-io-stream (flexi-ascii-input-stream flexi-ascii-output-stream flexi-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the US-ASCCI encoding.")) + +(defclass flexi-cr-ascii-io-stream (flexi-cr-mixin flexi-ascii-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the US-ASCCI encoding /and/ need end-of-line conversion.")) + +(defclass flexi-latin-1-io-stream (flexi-latin-1-input-stream flexi-latin-1-output-stream flexi-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the ISO-8859-1 encoding.")) + +(defclass flexi-cr-latin-1-io-stream (flexi-cr-mixin flexi-latin-1-io-stream) + () + (:documentation "Special class for flexi input streams which +use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-utf-32-le-io-stream (flexi-utf-32-le-input-stream + flexi-utf-32-le-output-stream + flexi-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-32 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-utf-32-le-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-32 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-32-be-io-stream (flexi-utf-32-be-input-stream + flexi-utf-32-be-output-stream + flexi-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-32 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-utf-32-be-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-32 encoding with big-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-16-le-io-stream (flexi-utf-16-le-input-stream + flexi-utf-16-le-output-stream + flexi-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-16 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-utf-16-le-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-16 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-16-be-io-stream (flexi-utf-16-be-input-stream + flexi-utf-16-be-output-stream + flexi-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-16 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-utf-16-be-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-16 encoding with big-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-8-io-stream (flexi-utf-8-input-stream flexi-utf-8-output-stream flexi-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-8 encoding.")) + +(defclass flexi-cr-utf-8-io-stream (flexi-cr-mixin flexi-utf-8-io-stream) + () + (:documentation "Special class for flexi I/O streams which use +the UTF-8 encoding /and/ need end-of-line conversion.")) + +(defmethod (setf flexi-stream-external-format) :after (new-value (stream flexi-stream)) + "After we've changed the external format of a flexi stream, we +might have to change its actual class and maybe also the contents +of its 8-bit encoding slots." + (declare (ignore new-value) + (optimize speed)) + ;; note that it's potentially dangerous to call SET-CLASS from + ;; within a method, see for example this thread: + ;; + (set-class stream)) + +(defmethod set-class ((stream flexi-input-stream)) + "Changes the actual class of STREAM depending on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (change-class stream + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-cr-ascii-input-stream + 'flexi-ascii-input-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-cr-latin-1-input-stream + 'flexi-latin-1-input-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-cr-8-bit-input-stream + 'flexi-8-bit-input-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-cr-utf-8-input-stream + 'flexi-utf-8-input-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-16-le-input-stream + 'flexi-cr-utf-16-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-16-le-input-stream + 'flexi-utf-16-be-input-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-32-le-input-stream + 'flexi-cr-utf-32-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-32-le-input-stream + 'flexi-utf-32-be-input-stream)))))))))) + +(defmethod set-class ((stream flexi-output-stream)) + "Changes the actual class of STREAM depending on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (change-class stream + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-cr-ascii-output-stream + 'flexi-ascii-output-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-cr-latin-1-output-stream + 'flexi-latin-1-output-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-cr-8-bit-output-stream + 'flexi-8-bit-output-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-cr-utf-8-output-stream + 'flexi-utf-8-output-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-16-le-output-stream + 'flexi-cr-utf-16-be-output-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-16-le-output-stream + 'flexi-utf-16-be-output-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-32-le-output-stream + 'flexi-cr-utf-32-be-output-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-32-le-output-stream + 'flexi-utf-32-be-output-stream)))))))))) + +(defmethod set-class ((stream flexi-io-stream)) + "Changes the actual class of STREAM depending on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (change-class stream + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-cr-ascii-io-stream + 'flexi-ascii-io-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-cr-latin-1-io-stream + 'flexi-latin-1-io-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-cr-8-bit-io-stream + 'flexi-8-bit-io-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-cr-utf-8-io-stream + 'flexi-utf-8-io-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-16-le-io-stream + 'flexi-cr-utf-16-be-io-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-16-le-io-stream + 'flexi-utf-16-be-io-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-32-le-io-stream + 'flexi-cr-utf-32-be-io-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-32-le-io-stream + 'flexi-utf-32-be-io-stream)))))))))) + +(defmethod set-class :after ((stream flexi-stream)) + "After we've changed the actual class of a flexi stream we may +have to set the contents of the 8-bit enconding slots as well." + (declare (optimize speed)) + (set-encoding-hash stream) + (set-encoding-table stream)) + +(defgeneric set-encoding-hash (stream) + (:method (stream)) + (:documentation "Sets the value of the ENCODING-HASH slot of +STREAM if necessary. The default method does nothing.")) + +(defgeneric set-encoding-table (stream) + (:method (stream)) + (:documentation "Sets the value of the ENCODING-TABLE slot of +STREAM if necessary. The default method does nothing.")) + +(defmethod set-encoding-hash ((stream flexi-8-bit-stream)) + "Sets the value of the ENCODING-HASH slot of STREAM depending +on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format) + (encoding-hash flexi-stream-encoding-hash)) + stream + (let ((external-format-name (external-format-name external-format))) + (setq encoding-hash + (cond ((ascii-name-p external-format-name) +ascii-hash+) + ((koi8-r-name-p external-format-name) +koi8-r-hash+) + ((iso-8859-name-p external-format-name) + (cdr (assoc external-format-name +iso-8859-hashes+ :test #'eq))) + ((code-page-name-p external-format-name) + (cdr (assoc (external-format-id external-format) +code-page-hashes+)))))))) + +(defmethod set-encoding-table ((stream flexi-8-bit-input-stream)) + "Sets the value of the ENCODING-TABLE slot of STREAM depending +on its external format." + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format) + (encoding-table flexi-stream-encoding-table)) + stream + (let ((external-format-name (external-format-name external-format))) + (setq encoding-table + (cond ((ascii-name-p external-format-name) +ascii-table+) + ((koi8-r-name-p external-format-name) +koi8-r-table+) + ((iso-8859-name-p external-format-name) + (cdr (assoc external-format-name +iso-8859-tables+ :test #'eq))) + ((code-page-name-p external-format-name) + (cdr (assoc (external-format-id external-format) +code-page-tables+)))))))) + +#+:cmu +(defmethod input-stream-p ((stream flexi-io-stream)) + "Explicitly states whether this is an input stream." + (declare (optimize speed)) + t) + +#+:cmu +(defmethod output-stream-p ((stream flexi-io-stream)) + "Explicitly states whether this is an output stream." + (declare (optimize speed)) + t) + +(defun make-flexi-stream (stream &rest args + &key (external-format (make-external-format :iso-8859-1)) + element-type column position bound) + "Creates and returns a new flexi stream. STREAM must be an open +binary or `bivalent' stream, i.e. it must be capable of +reading/writing octets with READ-SEQUENCE and/or WRITE-SEQUENCE. The +resulting flexi stream is an input stream if and only if STREAM is an +input stream. Likewise, it's an output stream if and only if STREAM +is an output stream. The default for ELEMENT-TYPE is LW:SIMPLE-CHAR +on LispWorks and CHARACTER on other Lisps. EXTERNAL-FORMAT must be an +EXTERNAL-FORMAT object or a symbol or a list denoting such an object. +COLUMN is the initial column of the stream which is either a +non-negative integer or NIL. The COLUMN argument must only be used +for output streams. POSITION \(only used for input streams) should be +an integer and it denotes the position the stream is in - it will be +increased by one for each octet read. BOUND \(only used for input +streams) should be NIL or an integer. If BOUND is not NIL and +POSITION has gone beyond BOUND, then the stream will behave as if no +more input is available." + ;; these arguments are ignored - they are only there to provide a + ;; meaningful parameter list for IDEs + (declare (ignore element-type column position bound)) + (unless (and (streamp stream) + (open-stream-p stream)) + (error "~S should have been an open stream." stream)) + (apply #'make-instance + ;; actual type depends on STREAM + (cond ((and (input-stream-p stream) + (output-stream-p stream)) + 'flexi-io-stream) + ((input-stream-p stream) + 'flexi-input-stream) + ((output-stream-p stream) + 'flexi-output-stream)) + :stream stream + :flexi-stream-external-format external-format + (sans args :external-format))) Added: trunk/strings.lisp ============================================================================== --- (empty file) +++ trunk/strings.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,56 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.4 2007/01/01 23:46:49 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defun string-to-octets (string &key (external-format (make-external-format :latin1)) + (start 0) end) + "Converts the Lisp string STRING from START to END to an array of +octets corresponding to the external format EXTERNAL-FORMAT." + (declare (optimize speed)) + (with-output-to-sequence (out) + (let ((flexi (make-flexi-stream out :external-format external-format))) + (write-string string flexi :start start :end end)))) + +(defun octets-to-string (vector &key (external-format (make-external-format :latin1)) + (start 0) (end (length vector))) + "Converts the Lisp vector VECTOR of octets from START to END to +string using the external format EXTERNAL-FORMAT." + (declare (optimize speed)) + (with-input-from-sequence (in vector :start start :end end) + (let ((flexi (make-flexi-stream in :external-format external-format)) + (result (make-array (- end start) + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer result) + (read-sequence result flexi)) + result))) + + Added: trunk/test/README ============================================================================== --- (empty file) +++ trunk/test/README Thu May 1 02:25:14 2008 @@ -0,0 +1,4 @@ +The reference files in this directory were created/converted using a +mixture of GNU recode and the native internationalization facilities +of LispWorks and AllegroCL, i.e. we're not testing FLEXI-STREAMS +against files created by itself (which would be kind of useless). \ No newline at end of file Added: trunk/test/hebrew_latin8_cr.txt ============================================================================== --- (empty file) +++ trunk/test/hebrew_latin8_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +:???? ??? ????? ?? ????? ??? ?????? ? 1 ???? ???-?? ???? ???? ??? ???? ????? ? 2 :???? ???-?? ????? ????? ???? :???-???? ??? ??? ????? ????? ? 3 ????? ????? ???-?? ????-?? ????? ???? ? 4 :???? ???? ???? ??? ???? ??? ????? ??? ???? ????? ????? ? 5 :??? ??? ???-???? ???-???? ???? ???? ???? ???? ??? ????? ????? ? 6 :???? ??? ??? ????? ??? ???? ??? ????? ?????-?? ????? ???? ? 7 ????? ??? ??? ???? ???? ????? ???? :??-???? ???-???? ???-???? ???? ????? ????? ????? ? 8 :??? ??? ????-?? ????? ???? ???? ???? ????? ????? ? 9 :??-???? ????? ????? ??? ??? ???? ?????? ??? ????? ????? ????? ? 10 :???-?? ????? ???? ???? ??? ????? ??? ??? ???? ???? ????? ????? ?? 11 ????-?? ??-???? ??? ????? ??? ??? ??? ?? :??-???? ??? ?????? ??? ????? ??? ??? ???? ????? ?? 12 ????? ???? ?????? ??-???? ??? ???-??? :???-?? :????? ??? ???-???? ???-???? ?? 13 ?????? ????? ????? ???? ??? ????? ????? ?? 14 ???????? ???? ???? ????? ???? ???? ??? :????? ?????? ????-?? ????? ????? ????? ?????? ???? ?? 15 :??-???? ?????-?? ?????? ????? ???-?? ????? ???? ?? 16 ?????? ???? ?????-??? ???? ?????? ???? :??????? ??? ????? ????? ????? ????? ????? ??? ???? ?? 17 :????-?? ???? ???? ??? ??????? ?????? ???? ????? ?? 18 :???-?? ????? ???? ???? :????? ??? ???-???? ???-???? ?? 19 ???? ??? ??? ??? ???? ????? ????? ????? ? 20 :????? ???? ???-?? ????-?? ????? ???-?? ??? ?????? ??????-?? ????? ????? ?? 21 ??? ?????? ???? ???? ??? ????? ???? :???-?? ????? ???? ?????? ??? ???-?? ????? ???? ??? ???? ????? ??? ????? ?? 22 :???? ??? ????? ????? ????-?? :????? ??? ???-???? ???-???? ?? 23 ???? ????? ??? ??? ???? ???? ????? ????? ?? 24 :??-???? ????? ???-????? ???? ?????-??? ????? ???? ???-?? ????? ???? ?? 25 ????? ???? ?????? ????? ???-?? ??? ????? :???-?? ??????? ?????? ??? ???? ????? ????? ?? 26 ?????? ????? ????? ??? ???? ????? :????-?? ???? ????-???? ????-???? ??? ????? ???? ????? ????-?? ????? ????? ?? 27 :??? ??? ????? ??? ??? ???? ??? ????? ??? ????? ????? ??? ????? ?? 28 ????? ??? ???? ???? ????? ????-?? ????? :????-?? ????? ???-???? ????? ??? ???-??-?? ??? ???? ??? ????? ????? ?? 29 ??-??? ???-??-??? ????-?? ???-?? ??? ??? :????? ???? ??? ??? ??? ??-??? ???? ???? ????? ???-???? ???? ???-???? ? 30 ??? ???-??-?? ??? ??? ??-??? ????-?? :??-???? ????? ??? ???-???? ??? ???-??-?? ????? ???? ?? 31 :???? ??? ???-???? ?? \ No newline at end of file Added: trunk/test/hebrew_latin8_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/hebrew_latin8_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,68 @@ +:???? ??? ????? ?? ????? ??? ?????? ? 1 +???? ???-?? ???? ???? ??? ???? ????? ? 2 +:???? ???-?? ????? ????? ???? +:???-???? ??? ??? ????? ????? ? 3 +????? ????? ???-?? ????-?? ????? ???? ? 4 +:???? ???? ???? ??? +???? ??? ????? ??? ???? ????? ????? ? 5 +:??? ??? ???-???? ???-???? +???? ???? ???? ???? ??? ????? ????? ? 6 +:???? ??? ??? ????? +??? ???? ??? ????? ?????-?? ????? ???? ? 7 +????? ??? ??? ???? ???? ????? ???? +:??-???? +???-???? ???-???? ???? ????? ????? ????? ? 8 +:??? ??? +????-?? ????? ???? ???? ???? ????? ????? ? 9 +:??-???? ????? ????? ??? +??? ???? ?????? ??? ????? ????? ????? ? 10 +:???-?? ????? ???? ???? +??? ????? ??? ??? ???? ???? ????? ????? ?? 11 +????-?? ??-???? ??? ????? ??? ??? ??? ?? +:??-???? +??? ?????? ??? ????? ??? ??? ???? ????? ?? 12 +????? ???? ?????? ??-???? ??? ???-??? +:???-?? +:????? ??? ???-???? ???-???? ?? 13 +?????? ????? ????? ???? ??? ????? ????? ?? 14 +???????? ???? ???? ????? ???? ???? ??? +:????? ?????? +????-?? ????? ????? ????? ?????? ???? ?? 15 +:??-???? +?????-?? ?????? ????? ???-?? ????? ???? ?? 16 +?????? ???? ?????-??? ???? ?????? ???? +:??????? ??? ????? +????? ????? ????? ????? ??? ???? ?? 17 +:????-?? +???? ???? ??? ??????? ?????? ???? ????? ?? 18 +:???-?? ????? ???? ???? +:????? ??? ???-???? ???-???? ?? 19 +???? ??? ??? ??? ???? ????? ????? ????? ? 20 +:????? ???? ???-?? ????-?? ????? +???-?? ??? ?????? ??????-?? ????? ????? ?? 21 +??? ?????? ???? ???? ??? ????? ???? +:???-?? ????? ???? ?????? ??? ???-?? +????? ???? ??? ???? ????? ??? ????? ?? 22 +:???? ??? ????? ????? ????-?? +:????? ??? ???-???? ???-???? ?? 23 +???? ????? ??? ??? ???? ???? ????? ????? ?? 24 +:??-???? ????? ???-????? ???? +?????-??? ????? ???? ???-?? ????? ???? ?? 25 +????? ???? ?????? ????? ???-?? ??? ????? +:???-?? +??????? ?????? ??? ???? ????? ????? ?? 26 +?????? ????? ????? ??? ???? ????? +:????-?? ???? ????-???? ????-???? +??? ????? ???? ????? ????-?? ????? ????? ?? 27 +:??? ??? ????? ??? ??? +???? ??? ????? ??? ????? ????? ??? ????? ?? 28 +????? ??? ???? ???? ????? ????-?? ????? +:????-?? ????? ???-???? ????? +??? ???-??-?? ??? ???? ??? ????? ????? ?? 29 +??-??? ???-??-??? ????-?? ???-?? ??? ??? +:????? ???? ??? ??? ??? ??-??? +???? ???? ????? ???-???? ???? ???-???? ? 30 +??? ???-??-?? ??? ??? ??-??? ????-?? +:??-???? ????? +??? ???-???? ??? ???-??-?? ????? ???? ?? 31 +:???? ??? ???-???? ?? Added: trunk/test/hebrew_latin8_lf.txt ============================================================================== --- (empty file) +++ trunk/test/hebrew_latin8_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,68 @@ +:???? ??? ????? ?? ????? ??? ?????? ? 1 +???? ???-?? ???? ???? ??? ???? ????? ? 2 +:???? ???-?? ????? ????? ???? +:???-???? ??? ??? ????? ????? ? 3 +????? ????? ???-?? ????-?? ????? ???? ? 4 +:???? ???? ???? ??? +???? ??? ????? ??? ???? ????? ????? ? 5 +:??? ??? ???-???? ???-???? +???? ???? ???? ???? ??? ????? ????? ? 6 +:???? ??? ??? ????? +??? ???? ??? ????? ?????-?? ????? ???? ? 7 +????? ??? ??? ???? ???? ????? ???? +:??-???? +???-???? ???-???? ???? ????? ????? ????? ? 8 +:??? ??? +????-?? ????? ???? ???? ???? ????? ????? ? 9 +:??-???? ????? ????? ??? +??? ???? ?????? ??? ????? ????? ????? ? 10 +:???-?? ????? ???? ???? +??? ????? ??? ??? ???? ???? ????? ????? ?? 11 +????-?? ??-???? ??? ????? ??? ??? ??? ?? +:??-???? +??? ?????? ??? ????? ??? ??? ???? ????? ?? 12 +????? ???? ?????? ??-???? ??? ???-??? +:???-?? +:????? ??? ???-???? ???-???? ?? 13 +?????? ????? ????? ???? ??? ????? ????? ?? 14 +???????? ???? ???? ????? ???? ???? ??? +:????? ?????? +????-?? ????? ????? ????? ?????? ???? ?? 15 +:??-???? +?????-?? ?????? ????? ???-?? ????? ???? ?? 16 +?????? ???? ?????-??? ???? ?????? ???? +:??????? ??? ????? +????? ????? ????? ????? ??? ???? ?? 17 +:????-?? +???? ???? ??? ??????? ?????? ???? ????? ?? 18 +:???-?? ????? ???? ???? +:????? ??? ???-???? ???-???? ?? 19 +???? ??? ??? ??? ???? ????? ????? ????? ? 20 +:????? ???? ???-?? ????-?? ????? +???-?? ??? ?????? ??????-?? ????? ????? ?? 21 +??? ?????? ???? ???? ??? ????? ???? +:???-?? ????? ???? ?????? ??? ???-?? +????? ???? ??? ???? ????? ??? ????? ?? 22 +:???? ??? ????? ????? ????-?? +:????? ??? ???-???? ???-???? ?? 23 +???? ????? ??? ??? ???? ???? ????? ????? ?? 24 +:??-???? ????? ???-????? ???? +?????-??? ????? ???? ???-?? ????? ???? ?? 25 +????? ???? ?????? ????? ???-?? ??? ????? +:???-?? +??????? ?????? ??? ???? ????? ????? ?? 26 +?????? ????? ????? ??? ???? ????? +:????-?? ???? ????-???? ????-???? +??? ????? ???? ????? ????-?? ????? ????? ?? 27 +:??? ??? ????? ??? ??? +???? ??? ????? ??? ????? ????? ??? ????? ?? 28 +????? ??? ???? ???? ????? ????-?? ????? +:????-?? ????? ???-???? ????? +??? ???-??-?? ??? ???? ??? ????? ????? ?? 29 +??-??? ???-??-??? ????-?? ???-?? ??? ??? +:????? ???? ??? ??? ??? ??-??? +???? ???? ????? ???-???? ???? ???-???? ? 30 +??? ???-??-?? ??? ??? ??-??? ????-?? +:??-???? ????? +??? ???-???? ??? ???-??-?? ????? ???? ?? 31 +:???? ??? ???-???? ?? Added: trunk/test/hebrew_utf8_cr.txt ============================================================================== --- (empty file) +++ trunk/test/hebrew_utf8_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +:???????? ?????? ?????????? ???? ?????????? ?????? ???????????? ?? 1 ???????? ??????-???? ???????? ???????? ?????? ???????? ?????????? ?? 2 :???????? ??????-???? ?????????? ?????????? ???????? :??????-???????? ?????? ?????? ?????????? ?????????? ?? 3 ?????????? ?????????? ??????-???? ????????-???? ?????????? ???????? ?? 4 :???????? ???????? ???????? ?????? ???????? ?????? ?????????? ?????? ???????? ?????????? ?????????? ?? 5 :?????? ?????? ??????-???????? ??????-???????? ???????? ???????? ???????? ???????? ?????? ?????????? ?????????? ?? 6 :???????? ?????? ?????? ?????????? ?????? ???????? ?????? ?????????? ??????????-???? ?????????? ???????? ?? 7 ?????????? ?????? ?????? ???????? ???????? ?????????? ???????? :????-???????? ??????-???????? ??????-???????? ???????? ?????????? ?????????? ?????????? ?? 8 :?????? ?????? ????????-???? ?????????? ???????? ???????? ???????? ?????????? ?????????? ?? 9 :????-???????? ?????????? ?????????? ?????? ?????? ???????? ???????????? ?????? ?????????? ?????????? ?????????? ?? 10 :??????-???? ?????????? ???????? ???????? ?????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 11 ????????-???? ????-???????? ?????? ?????????? ?????? ?????? ?????? ???? :????-???????? ?????? ???????????? ?????? ?????????? ?????? ?????? ???????? ?????????? ???? 12 ?????????? ???????? ???????????? ????-???????? ?????? ??????-?????? :??????-???? :?????????? ?????? ??????-???????? ??????-???????? ???? 13 ???????????? ?????????? ?????????? ???????? ?????? ?????????? ?????????? ???? 14 ???????????????? ???????? ???????? ?????????? ???????? ???????? ?????? :?????????? ???????????? ????????-???? ?????????? ?????????? ?????????? ???????????? ???????? ???? 15 :????-???????? ??????????-???? ???????????? ?????????? ??????-???? ?????????? ???????? ???? 16 ???????????? ???????? ??????????-?????? ???????? ???????????? ???????? :?????????????? ?????? ?????????? ?????????? ?????????? ?????????? ?????????? ?????? ???????? ???? 17 :????????-???? ???????? ???????? ?????? ?????????????? ???????????? ???????? ?????????? ???? 18 :??????-???? ?????????? ???????? ???????? :?????????? ?????? ??????-???????? ??????-???????? ???? 19 ???????? ?????? ?????? ?????? ???????? ?????????? ?????????? ?????????? ?? 20 :?????????? ???????? ??????-???? ????????-???? ?????????? ??????-???? ?????? ???????????? ????????????-???? ?????????? ?????????? ???? 21 ?????? ???????????? ???????? ???????? ?????? ?????????? ???????? :??????-???? ?????????? ???????? ???????????? ?????? ??????-???? ?????????? ???????? ?????? ???????? ?????????? ?????? ?????????? ???? 22 :???????? ?????? ?????????? ?????????? ????????-???? :?????????? ?????? ??????-???????? ??????-???????? ???? 23 ???????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 24 :????-???????? ?????????? ??????-?????????? ???????? ??????????-?????? ?????????? ???????? ??????-???? ?????????? ???????? ???? 25 ?????????? ???????? ???????????? ?????????? ??????-???? ?????? ?????????? :??????-???? ?????????????? ???????????? ?????? ???????? ?????????? ?????????? ???? 26 ???????????? ?????????? ?????????? ?????? ???????? ?????????? :????????-???? ???????? ????????-???????? ????????-???????? ?????? ?????????? ???????? ?????????? ????????-???? ?????????? ?????????? ???? 27 :?????? ?????? ?????????? ?????? ?????? ???????? ?????? ?????????? ?????? ?????????? ?????????? ?????? ?????????? ???? 28 ?????????? ?????? ???????? ???????? ?????????? ????????-???? ?????????? :????????-???? ?????????? ??????-???????? ?????????? ?????? ??????-????-???? ?????? ???????? ?????? ?????????? ?????????? ???? 29 ????-?????? ??????-????-?????? ????????-???? ??????-???? ?????? ?????? :?????????? ???????? ?????? ?????? ?????? ????-?????? ???????? ???????? ?????????? ??????-???????? ???????? ??????-???????? ?? 30 ?????? ??????-????-???? ?????? ?????? ????-?????? ????????-???? :????-???????? ?????????? ?????? ??????-???????? ?????? ??????-????-???? ?????????? ???????? ???? 31 :???????? ?????? ??????-???????? ???? \ No newline at end of file Added: trunk/test/hebrew_utf8_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/hebrew_utf8_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,68 @@ +:???????? ?????? ?????????? ???? ?????????? ?????? ???????????? ?? 1 +???????? ??????-???? ???????? ???????? ?????? ???????? ?????????? ?? 2 +:???????? ??????-???? ?????????? ?????????? ???????? +:??????-???????? ?????? ?????? ?????????? ?????????? ?? 3 +?????????? ?????????? ??????-???? ????????-???? ?????????? ???????? ?? 4 +:???????? ???????? ???????? ?????? +???????? ?????? ?????????? ?????? ???????? ?????????? ?????????? ?? 5 +:?????? ?????? ??????-???????? ??????-???????? +???????? ???????? ???????? ???????? ?????? ?????????? ?????????? ?? 6 +:???????? ?????? ?????? ?????????? +?????? ???????? ?????? ?????????? ??????????-???? ?????????? ???????? ?? 7 +?????????? ?????? ?????? ???????? ???????? ?????????? ???????? +:????-???????? +??????-???????? ??????-???????? ???????? ?????????? ?????????? ?????????? ?? 8 +:?????? ?????? +????????-???? ?????????? ???????? ???????? ???????? ?????????? ?????????? ?? 9 +:????-???????? ?????????? ?????????? ?????? +?????? ???????? ???????????? ?????? ?????????? ?????????? ?????????? ?? 10 +:??????-???? ?????????? ???????? ???????? +?????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 11 +????????-???? ????-???????? ?????? ?????????? ?????? ?????? ?????? ???? +:????-???????? +?????? ???????????? ?????? ?????????? ?????? ?????? ???????? ?????????? ???? 12 +?????????? ???????? ???????????? ????-???????? ?????? ??????-?????? +:??????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 13 +???????????? ?????????? ?????????? ???????? ?????? ?????????? ?????????? ???? 14 +???????????????? ???????? ???????? ?????????? ???????? ???????? ?????? +:?????????? ???????????? +????????-???? ?????????? ?????????? ?????????? ???????????? ???????? ???? 15 +:????-???????? +??????????-???? ???????????? ?????????? ??????-???? ?????????? ???????? ???? 16 +???????????? ???????? ??????????-?????? ???????? ???????????? ???????? +:?????????????? ?????? ?????????? +?????????? ?????????? ?????????? ?????????? ?????? ???????? ???? 17 +:????????-???? +???????? ???????? ?????? ?????????????? ???????????? ???????? ?????????? ???? 18 +:??????-???? ?????????? ???????? ???????? +:?????????? ?????? ??????-???????? ??????-???????? ???? 19 +???????? ?????? ?????? ?????? ???????? ?????????? ?????????? ?????????? ?? 20 +:?????????? ???????? ??????-???? ????????-???? ?????????? +??????-???? ?????? ???????????? ????????????-???? ?????????? ?????????? ???? 21 +?????? ???????????? ???????? ???????? ?????? ?????????? ???????? +:??????-???? ?????????? ???????? ???????????? ?????? ??????-???? +?????????? ???????? ?????? ???????? ?????????? ?????? ?????????? ???? 22 +:???????? ?????? ?????????? ?????????? ????????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 23 +???????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 24 +:????-???????? ?????????? ??????-?????????? ???????? +??????????-?????? ?????????? ???????? ??????-???? ?????????? ???????? ???? 25 +?????????? ???????? ???????????? ?????????? ??????-???? ?????? ?????????? +:??????-???? +?????????????? ???????????? ?????? ???????? ?????????? ?????????? ???? 26 +???????????? ?????????? ?????????? ?????? ???????? ?????????? +:????????-???? ???????? ????????-???????? ????????-???????? +?????? ?????????? ???????? ?????????? ????????-???? ?????????? ?????????? ???? 27 +:?????? ?????? ?????????? ?????? ?????? +???????? ?????? ?????????? ?????? ?????????? ?????????? ?????? ?????????? ???? 28 +?????????? ?????? ???????? ???????? ?????????? ????????-???? ?????????? +:????????-???? ?????????? ??????-???????? ?????????? +?????? ??????-????-???? ?????? ???????? ?????? ?????????? ?????????? ???? 29 +????-?????? ??????-????-?????? ????????-???? ??????-???? ?????? ?????? +:?????????? ???????? ?????? ?????? ?????? ????-?????? +???????? ???????? ?????????? ??????-???????? ???????? ??????-???????? ?? 30 +?????? ??????-????-???? ?????? ?????? ????-?????? ????????-???? +:????-???????? ?????????? +?????? ??????-???????? ?????? ??????-????-???? ?????????? ???????? ???? 31 +:???????? ?????? ??????-???????? ???? Added: trunk/test/hebrew_utf8_lf.txt ============================================================================== --- (empty file) +++ trunk/test/hebrew_utf8_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,68 @@ +:???????? ?????? ?????????? ???? ?????????? ?????? ???????????? ?? 1 +???????? ??????-???? ???????? ???????? ?????? ???????? ?????????? ?? 2 +:???????? ??????-???? ?????????? ?????????? ???????? +:??????-???????? ?????? ?????? ?????????? ?????????? ?? 3 +?????????? ?????????? ??????-???? ????????-???? ?????????? ???????? ?? 4 +:???????? ???????? ???????? ?????? +???????? ?????? ?????????? ?????? ???????? ?????????? ?????????? ?? 5 +:?????? ?????? ??????-???????? ??????-???????? +???????? ???????? ???????? ???????? ?????? ?????????? ?????????? ?? 6 +:???????? ?????? ?????? ?????????? +?????? ???????? ?????? ?????????? ??????????-???? ?????????? ???????? ?? 7 +?????????? ?????? ?????? ???????? ???????? ?????????? ???????? +:????-???????? +??????-???????? ??????-???????? ???????? ?????????? ?????????? ?????????? ?? 8 +:?????? ?????? +????????-???? ?????????? ???????? ???????? ???????? ?????????? ?????????? ?? 9 +:????-???????? ?????????? ?????????? ?????? +?????? ???????? ???????????? ?????? ?????????? ?????????? ?????????? ?? 10 +:??????-???? ?????????? ???????? ???????? +?????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 11 +????????-???? ????-???????? ?????? ?????????? ?????? ?????? ?????? ???? +:????-???????? +?????? ???????????? ?????? ?????????? ?????? ?????? ???????? ?????????? ???? 12 +?????????? ???????? ???????????? ????-???????? ?????? ??????-?????? +:??????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 13 +???????????? ?????????? ?????????? ???????? ?????? ?????????? ?????????? ???? 14 +???????????????? ???????? ???????? ?????????? ???????? ???????? ?????? +:?????????? ???????????? +????????-???? ?????????? ?????????? ?????????? ???????????? ???????? ???? 15 +:????-???????? +??????????-???? ???????????? ?????????? ??????-???? ?????????? ???????? ???? 16 +???????????? ???????? ??????????-?????? ???????? ???????????? ???????? +:?????????????? ?????? ?????????? +?????????? ?????????? ?????????? ?????????? ?????? ???????? ???? 17 +:????????-???? +???????? ???????? ?????? ?????????????? ???????????? ???????? ?????????? ???? 18 +:??????-???? ?????????? ???????? ???????? +:?????????? ?????? ??????-???????? ??????-???????? ???? 19 +???????? ?????? ?????? ?????? ???????? ?????????? ?????????? ?????????? ?? 20 +:?????????? ???????? ??????-???? ????????-???? ?????????? +??????-???? ?????? ???????????? ????????????-???? ?????????? ?????????? ???? 21 +?????? ???????????? ???????? ???????? ?????? ?????????? ???????? +:??????-???? ?????????? ???????? ???????????? ?????? ??????-???? +?????????? ???????? ?????? ???????? ?????????? ?????? ?????????? ???? 22 +:???????? ?????? ?????????? ?????????? ????????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 23 +???????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 24 +:????-???????? ?????????? ??????-?????????? ???????? +??????????-?????? ?????????? ???????? ??????-???? ?????????? ???????? ???? 25 +?????????? ???????? ???????????? ?????????? ??????-???? ?????? ?????????? +:??????-???? +?????????????? ???????????? ?????? ???????? ?????????? ?????????? ???? 26 +???????????? ?????????? ?????????? ?????? ???????? ?????????? +:????????-???? ???????? ????????-???????? ????????-???????? +?????? ?????????? ???????? ?????????? ????????-???? ?????????? ?????????? ???? 27 +:?????? ?????? ?????????? ?????? ?????? +???????? ?????? ?????????? ?????? ?????????? ?????????? ?????? ?????????? ???? 28 +?????????? ?????? ???????? ???????? ?????????? ????????-???? ?????????? +:????????-???? ?????????? ??????-???????? ?????????? +?????? ??????-????-???? ?????? ???????? ?????? ?????????? ?????????? ???? 29 +????-?????? ??????-????-?????? ????????-???? ??????-???? ?????? ?????? +:?????????? ???????? ?????? ?????? ?????? ????-?????? +???????? ???????? ?????????? ??????-???????? ???????? ??????-???????? ?? 30 +?????? ??????-????-???? ?????? ?????? ????-?????? ????????-???? +:????-???????? ?????????? +?????? ??????-???????? ?????? ??????-????-???? ?????????? ???????? ???? 31 +:???????? ?????? ??????-???????? ???? Added: trunk/test/kafka_cp1252_cr.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_cp1252_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. ?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. ?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. \ No newline at end of file Added: trunk/test/kafka_cp1252_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_cp1252_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: trunk/test/kafka_cp1252_lf.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_cp1252_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: trunk/test/kafka_latin1_cr.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_latin1_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. ?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. ?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. \ No newline at end of file Added: trunk/test/kafka_latin1_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_latin1_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: trunk/test/kafka_latin1_lf.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_latin1_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: trunk/test/kafka_utf8_cr.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_utf8_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr??umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R??cken und sah, wenn er den Kopf ein wenig hob, seinen gew??lbten, braunen, von bogenf??rmigen Versteifungen geteilten Bauch, auf dessen H??he sich die Bettdecke, zum g??nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl??glich d??nnen Beine flimmerten ihm hilflos vor den Augen. ??Was ist mit mir geschehen???, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W??nden. ??ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h??bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa?? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. Gregors Blick richtete sich dann zum Fenster, und das tr??be Wetter - man h??rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ??Wie w??re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg????e??, dachte er, aber das war g??nzlich undurchf??hrbar, denn er war gew??hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw??rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R??ckenlage zur??ck. Er versuchte es wohl hundertmal, schlo?? die Augen, um die zappelnden Beine nicht sehen zu m??ssen, und lie?? erst ab, als er in der Seite einen noch nie gef??hlten, leichten, dumpfen Schmerz zu f??hlen begann. ??Ach Gott??, dachte er, ??was f??r einen anstrengenden Beruf habe ich gew??hlt! Tag aus, Tag ein auf der Reise. Die gesch??ftlichen Aufregungen sind viel gr????er, als im eigentlichen Gesch??ft zu Hause, und au??erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl??sse, das unregelm????ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!?? Er f??hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R??cken langsam n??her zum Bettpfosten, um den Kopf besser heben zu k??nnen; fand die juckende Stelle, die mit lauter kleinen wei??en P??nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur??ck, denn bei der Ber??hrung umwehten ihn K??lteschauer. Er glitt wieder in seine fr??here Lage zur??ck. ??Dies fr??hzeitige Aufstehen??, dachte er, ??macht einen ganz bl??dsinnig. Der Mensch mu?? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur??ckgehe, um die erlangten Auftr??ge zu ??berschreiben, sitzen diese Herren erst beim Fr??hst??ck. Das sollte ich bei meinem Chef versuchen; ich w??rde auf der Stelle hinausfliegen. Wer wei?? ??brigens, ob das nicht sehr gut f??r mich w??re. Wenn ich mich nicht wegen meiner Eltern zur??ckhielte, ich h??tte l??ngst gek??ndigt, ich w??re vor den Chef hin getreten und h??tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h??tte er fallen m??ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H??he herab mit dem Angestellten zu reden, der ??berdies wegen der Schwerh??rigkeit des Chefs ganz nahe herantreten mu??. Nun, die Hoffnung ist noch nicht g??nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d??rfte noch f??nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro??e Schnitt gemacht. Vorl??ufig allerdings mu?? ich aufstehen, denn mein Zug f??hrt um f??nf.?? Und er sah zur Weckuhr hin??ber, die auf dem Kasten tickte. ??Himmlischer Vater!??, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw??rts, es war sogar halb vor??ber, es n??herte sich schon dreiviertel. Sollte der Wecker nicht gel??utet haben? Man sah vom Bett aus, da?? er auf vier Uhr richtig eingestellt war; gewi?? hatte er auch gel??utet. Ja, aber war es m??glich, dieses m??belersch??tternde L??uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n??chste Zug ging um sieben Uhr; um den einzuholen, h??tte er sich unsinnig beeilen m??ssen, und die Kollektion war noch nicht eingepackt, und er selbst f??hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch??ftsdiener hatte beim F??nfuhrzug gewartet und die Meldung von seiner Vers??umnis l??ngst erstattet. Es war eine Kreatur des Chefs, ohne R??ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w??re aber ??u??erst peinlich und verd??chtig, denn Gregor war w??hrend seines f??nfj??hrigen Dienstes noch nicht einmal krank gewesen. Gewi?? w??rde der Chef mit dem Krankenkassenarzt kommen, w??rde den Eltern wegen des faulen Sohnes Vorw??rfe machen und alle Einw??nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f??r den es ja ??berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h??tte er ??brigens in diesem Falle so ganz unrecht? Gregor f??hlte sich tats??chlich, abgesehen von einer nach dem langen Schlaf wirklich ??berfl??ssigen Schl??frigkeit, ganz wohl und hatte sogar einen besonders kr??ftigen Hunger. \ No newline at end of file Added: trunk/test/kafka_utf8_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_utf8_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr??umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R??cken und sah, wenn er den Kopf ein wenig hob, seinen gew??lbten, braunen, von bogenf??rmigen Versteifungen geteilten Bauch, auf dessen H??he sich die Bettdecke, zum g??nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl??glich d??nnen Beine flimmerten ihm hilflos vor den Augen. + +??Was ist mit mir geschehen???, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W??nden. ??ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h??bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa?? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr??be Wetter - man h??rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ??Wie w??re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg????e??, dachte er, aber das war g??nzlich undurchf??hrbar, denn er war gew??hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw??rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R??ckenlage zur??ck. Er versuchte es wohl hundertmal, schlo?? die Augen, um die zappelnden Beine nicht sehen zu m??ssen, und lie?? erst ab, als er in der Seite einen noch nie gef??hlten, leichten, dumpfen Schmerz zu f??hlen begann. + +??Ach Gott??, dachte er, ??was f??r einen anstrengenden Beruf habe ich gew??hlt! Tag aus, Tag ein auf der Reise. Die gesch??ftlichen Aufregungen sind viel gr????er, als im eigentlichen Gesch??ft zu Hause, und au??erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl??sse, das unregelm????ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!?? Er f??hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R??cken langsam n??her zum Bettpfosten, um den Kopf besser heben zu k??nnen; fand die juckende Stelle, die mit lauter kleinen wei??en P??nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur??ck, denn bei der Ber??hrung umwehten ihn K??lteschauer. + +Er glitt wieder in seine fr??here Lage zur??ck. ??Dies fr??hzeitige Aufstehen??, dachte er, ??macht einen ganz bl??dsinnig. Der Mensch mu?? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur??ckgehe, um die erlangten Auftr??ge zu ??berschreiben, sitzen diese Herren erst beim Fr??hst??ck. Das sollte ich bei meinem Chef versuchen; ich w??rde auf der Stelle hinausfliegen. Wer wei?? ??brigens, ob das nicht sehr gut f??r mich w??re. Wenn ich mich nicht wegen meiner Eltern zur??ckhielte, ich h??tte l??ngst gek??ndigt, ich w??re vor den Chef hin getreten und h??tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h??tte er fallen m??ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H??he herab mit dem Angestellten zu reden, der ??berdies wegen der Schwerh??rigkeit des Chefs ganz nahe herantreten mu??. Nun, die Hoffnung ist noch nicht g??nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d??rfte noch f??nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro??e Schnitt gemacht. Vorl??ufig allerdings mu?? ich aufstehen, denn mein Zug f??hrt um f??nf.?? + +Und er sah zur Weckuhr hin??ber, die auf dem Kasten tickte. ??Himmlischer Vater!??, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw??rts, es war sogar halb vor??ber, es n??herte sich schon dreiviertel. Sollte der Wecker nicht gel??utet haben? Man sah vom Bett aus, da?? er auf vier Uhr richtig eingestellt war; gewi?? hatte er auch gel??utet. Ja, aber war es m??glich, dieses m??belersch??tternde L??uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n??chste Zug ging um sieben Uhr; um den einzuholen, h??tte er sich unsinnig beeilen m??ssen, und die Kollektion war noch nicht eingepackt, und er selbst f??hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch??ftsdiener hatte beim F??nfuhrzug gewartet und die Meldung von seiner Vers??umnis l??ngst erstattet. Es war eine Kreatur des Chefs, ohne R??ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w??re aber ??u??erst peinlich und verd??chtig, denn Gregor war w??hrend seines f??nfj??hrigen Dienstes noch nicht einmal krank gewesen. Gewi?? w??rde der Chef mit dem Krankenkassenarzt kommen, w??rde den Eltern wegen des faulen Sohnes Vorw??rfe machen und alle Einw??nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f??r den es ja ??berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h??tte er ??brigens in diesem Falle so ganz unrecht? Gregor f??hlte sich tats??chlich, abgesehen von einer nach dem langen Schlaf wirklich ??berfl??ssigen Schl??frigkeit, ganz wohl und hatte sogar einen besonders kr??ftigen Hunger. Added: trunk/test/kafka_utf8_lf.txt ============================================================================== --- (empty file) +++ trunk/test/kafka_utf8_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr??umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R??cken und sah, wenn er den Kopf ein wenig hob, seinen gew??lbten, braunen, von bogenf??rmigen Versteifungen geteilten Bauch, auf dessen H??he sich die Bettdecke, zum g??nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl??glich d??nnen Beine flimmerten ihm hilflos vor den Augen. + +??Was ist mit mir geschehen???, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W??nden. ??ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h??bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa?? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr??be Wetter - man h??rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ??Wie w??re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg????e??, dachte er, aber das war g??nzlich undurchf??hrbar, denn er war gew??hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw??rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R??ckenlage zur??ck. Er versuchte es wohl hundertmal, schlo?? die Augen, um die zappelnden Beine nicht sehen zu m??ssen, und lie?? erst ab, als er in der Seite einen noch nie gef??hlten, leichten, dumpfen Schmerz zu f??hlen begann. + +??Ach Gott??, dachte er, ??was f??r einen anstrengenden Beruf habe ich gew??hlt! Tag aus, Tag ein auf der Reise. Die gesch??ftlichen Aufregungen sind viel gr????er, als im eigentlichen Gesch??ft zu Hause, und au??erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl??sse, das unregelm????ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!?? Er f??hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R??cken langsam n??her zum Bettpfosten, um den Kopf besser heben zu k??nnen; fand die juckende Stelle, die mit lauter kleinen wei??en P??nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur??ck, denn bei der Ber??hrung umwehten ihn K??lteschauer. + +Er glitt wieder in seine fr??here Lage zur??ck. ??Dies fr??hzeitige Aufstehen??, dachte er, ??macht einen ganz bl??dsinnig. Der Mensch mu?? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur??ckgehe, um die erlangten Auftr??ge zu ??berschreiben, sitzen diese Herren erst beim Fr??hst??ck. Das sollte ich bei meinem Chef versuchen; ich w??rde auf der Stelle hinausfliegen. Wer wei?? ??brigens, ob das nicht sehr gut f??r mich w??re. Wenn ich mich nicht wegen meiner Eltern zur??ckhielte, ich h??tte l??ngst gek??ndigt, ich w??re vor den Chef hin getreten und h??tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h??tte er fallen m??ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H??he herab mit dem Angestellten zu reden, der ??berdies wegen der Schwerh??rigkeit des Chefs ganz nahe herantreten mu??. Nun, die Hoffnung ist noch nicht g??nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d??rfte noch f??nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro??e Schnitt gemacht. Vorl??ufig allerdings mu?? ich aufstehen, denn mein Zug f??hrt um f??nf.?? + +Und er sah zur Weckuhr hin??ber, die auf dem Kasten tickte. ??Himmlischer Vater!??, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw??rts, es war sogar halb vor??ber, es n??herte sich schon dreiviertel. Sollte der Wecker nicht gel??utet haben? Man sah vom Bett aus, da?? er auf vier Uhr richtig eingestellt war; gewi?? hatte er auch gel??utet. Ja, aber war es m??glich, dieses m??belersch??tternde L??uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n??chste Zug ging um sieben Uhr; um den einzuholen, h??tte er sich unsinnig beeilen m??ssen, und die Kollektion war noch nicht eingepackt, und er selbst f??hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch??ftsdiener hatte beim F??nfuhrzug gewartet und die Meldung von seiner Vers??umnis l??ngst erstattet. Es war eine Kreatur des Chefs, ohne R??ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w??re aber ??u??erst peinlich und verd??chtig, denn Gregor war w??hrend seines f??nfj??hrigen Dienstes noch nicht einmal krank gewesen. Gewi?? w??rde der Chef mit dem Krankenkassenarzt kommen, w??rde den Eltern wegen des faulen Sohnes Vorw??rfe machen und alle Einw??nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f??r den es ja ??berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h??tte er ??brigens in diesem Falle so ganz unrecht? Gregor f??hlte sich tats??chlich, abgesehen von einer nach dem langen Schlaf wirklich ??berfl??ssigen Schl??frigkeit, ganz wohl und hatte sogar einen besonders kr??ftigen Hunger. Added: trunk/test/packages.lisp ============================================================================== --- (empty file) +++ trunk/test/packages.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,33 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.4 2007/01/01 23:47:16 edi Exp $ + +;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :cl-user) + +(defpackage :flexi-streams-test + (:use :cl :flexi-streams)) Added: trunk/test/russian_koi8r_cr.txt ============================================================================== --- (empty file) +++ trunk/test/russian_koi8r_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +????????????????? ?????? ?? ??????? ????????????? ??????????? ?? Unicode, ??????? ????????? 10-12 ????? 1997 ???? ? ?????? ? ????????. ??????????? ??????? ??????? ???? ????????? ?? ???????? ??????????? ????????? ? Unicode, ??????????? ? ???????????????????, ?????????? ? ?????????? Unicode ? ????????? ???????????? ???????? ? ??????????? ???????????, ???????, ??????? ? ???????????? ???????????? ????????. \ No newline at end of file Added: trunk/test/russian_koi8r_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/russian_koi8r_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,6 @@ +????????????????? ?????? ?? ??????? ????????????? ??????????? ?? +Unicode, ??????? ????????? 10-12 ????? 1997 ???? ? ?????? ? ????????. +??????????? ??????? ??????? ???? ????????? ?? ???????? ??????????? +????????? ? Unicode, ??????????? ? ???????????????????, ?????????? ? +?????????? Unicode ? ????????? ???????????? ???????? ? ??????????? +???????????, ???????, ??????? ? ???????????? ???????????? ????????. Added: trunk/test/russian_koi8r_lf.txt ============================================================================== --- (empty file) +++ trunk/test/russian_koi8r_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,6 @@ +????????????????? ?????? ?? ??????? ????????????? ??????????? ?? +Unicode, ??????? ????????? 10-12 ????? 1997 ???? ? ?????? ? ????????. +??????????? ??????? ??????? ???? ????????? ?? ???????? ??????????? +????????? ? Unicode, ??????????? ? ???????????????????, ?????????? ? +?????????? Unicode ? ????????? ???????????? ???????? ? ??????????? +???????????, ???????, ??????? ? ???????????? ???????????? ????????. Added: trunk/test/russian_utf8_cr.txt ============================================================================== Binary file. No diff available. Added: trunk/test/russian_utf8_crlf.txt ============================================================================== Binary file. No diff available. Added: trunk/test/russian_utf8_lf.txt ============================================================================== Binary file. No diff available. Added: trunk/test/test.lisp ============================================================================== --- (empty file) +++ trunk/test/test.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,342 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.17 2007/12/29 22:58:44 edi Exp $ + +;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams-test) + +(defvar *this-file* (load-time-value + (or #.*compile-file-pathname* *load-pathname*)) + "The pathname of the file \(`test.lisp') where this variable was +defined.") + +#+:lispworks +(defun get-env-variable-as-directory (name) + (lw:when-let (string (lw:environment-variable name)) + (when (plusp (length string)) + (cond ((find (char string (1- (length string))) "\\/" :test #'char=) string) + (t (lw:string-append string "/")))))) + +(defvar *tmp-dir* + (load-time-value + (merge-pathnames "odd-streams-test/" + #+:allegro (system:temporary-directory) + #+:lispworks (pathname (or (get-env-variable-as-directory "TEMP") + (get-env-variable-as-directory "TMP") + #+:win32 "C:/" + #-:win32 "/tmp/")) + #-(or :allegro :lispworks) #p"/tmp/")) + "The pathname of a temporary directory used for testing.") + +(defvar *test-files* + '(("kafka" (:utf8 :latin1 :cp1252)) + ("tilton" (:utf8 :ascii)) + ("hebrew" (:utf8 :latin8)) + ("russian" (:utf8 :koi8r)) + ("unicode_demo" (:utf8 :ucs2 :ucs4))) + "A list of test files where each entry consists of the name +prefix and a list of encodings.") + +(defvar *test-success-counter* 0 + "Counts the number of successful tests.") + +(defun create-file-variants (file-name symbol) + "For a name suffix FILE-NAME and a symbol SYMBOL denoting an +encoding returns a list of pairs where the car is a full file +name and the cdr is the corresponding external format. This list +contains all possible variants w.r.t. to line-end conversion and +endianness." + (let ((args (ecase symbol + (:ascii '(:ascii)) + (:latin1 '(:latin-1)) + (:latin8 '(:hebrew)) + (:cp1252 '(:code-page :id 1252)) + (:koi8r '(:koi8-r)) + (:utf8 '(:utf-8)) + (:ucs2 '(:utf-16)) + (:ucs4 '(:utf-32)))) + (endianp (member symbol '(:ucs2 :ucs4)))) + (loop for little-endian in (if endianp '(t nil) '(t)) + for endian-suffix in (if endianp '("_le" "_be") '("")) + nconc (loop for eol-style in '(:lf :cr :crlf) + collect (cons (format nil "~A_~(~A~)_~(~A~)~A.txt" + file-name symbol eol-style endian-suffix) + (apply #'make-external-format + (append args `(:eol-style ,eol-style + :little-endian ,little-endian)))))))) + +(defun create-test-combinations (file-name symbols) + "For a name suffix FILE-NAME and a list of symbols SYMBOLS +denoting different encodings of the corresponding file returns a +list of lists which can be used as arglists for COMPARE-FILES." + (let ((file-variants (loop for symbol in symbols + nconc (create-file-variants file-name symbol)))) + (loop for (name-in . external-format-in) in file-variants + nconc (loop for (name-out . external-format-out) in file-variants + collect (list name-in external-format-in name-out external-format-out))))) + +(defun file-equal (file1 file2) + "Returns a true value iff FILE1 and FILE2 have the same +contents \(viewed as binary files)." + (with-open-file (stream1 file1 :element-type 'octet) + (with-open-file (stream2 file2 :element-type 'octet) + (and (= (file-length stream1) (file-length stream2)) + (loop for byte1 = (read-byte stream1 nil nil) + for byte2 = (read-byte stream2 nil nil) + while (and byte1 byte2) + always (= byte1 byte2)))))) + +(defun copy-stream (stream-in external-format-in stream-out external-format-out) + "Copies the contents of the binary stream STREAM-IN to the +binary stream STREAM-OUT using flexi streams - STREAM-IN is read +with the external format EXTERNAL-FORMAT-IN and STREAM-OUT is +written with EXTERNAL-FORMAT-OUT." + (let ((in (make-flexi-stream stream-in :external-format external-format-in)) + (out (make-flexi-stream stream-out :external-format external-format-out))) + (loop for line = (read-line in nil nil) + while line + do (write-line line out)))) + +(defun copy-file (path-in external-format-in path-out external-format-out direction-out direction-in) + "Copies the contents of the file denoted by the pathname +PATH-IN to the file denoted by the pathname PATH-OUT using flexi +streams - STREAM-IN is read with the external format +EXTERNAL-FORMAT-IN and STREAM-OUT is written with +EXTERNAL-FORMAT-OUT. The input file is opened with +the :DIRECTION keyword argument DIRECTION-IN, the output file is +opened with the :DIRECTION keyword argument DIRECTION-OUT." + (with-open-file (in path-in + :element-type 'octet + :direction direction-in + :if-does-not-exist :error + :if-exists :overwrite) + (with-open-file (out path-out + :element-type 'octet + :direction direction-out + :if-does-not-exist :create + :if-exists :supersede) + (copy-stream in external-format-in out external-format-out)))) + +#+:lispworks +(defun copy-file-lw (path-in external-format-in path-out external-format-out direction-out direction-in) + "Same as COPY-FILE, but uses character streams instead of +binary streams. Only used to test LispWorks-specific behaviour." + (with-open-file (in path-in + :external-format '(:latin-1 :eol-style :lf) + :element-type 'base-char + :direction direction-in + :if-does-not-exist :error + :if-exists :overwrite) + (with-open-file (out path-out + :external-format '(:latin-1 :eol-style :lf) + :element-type 'base-char + :direction direction-out + :direction :output + :if-does-not-exist :create + :if-exists :supersede) + (copy-stream in external-format-in out external-format-out)))) + +(defun compare-files (path-in external-format-in path-out external-format-out) + "Copies the contents of the file (in the `test') denoted by the +relative pathname PATH-IN to the file (in a temporary directory) +denoted by the relative pathname PATH-OUT using flexi streams - +STREAM-IN is read with the external format EXTERNAL-FORMAT-IN and +STREAM-OUT is written with EXTERNAL-FORMAT-OUT. The resulting +file is compared with an existing file in the `test' directory to +check if the outcome is as expected. Uses various variants of +the :DIRECTION keyword when opening the files." + (let ((full-path-in (merge-pathnames path-in *this-file*)) + (full-path-out (ensure-directories-exist + (merge-pathnames path-out *tmp-dir*))) + (full-path-orig (merge-pathnames path-out *this-file*))) + (dolist (direction-out '(:output :io)) + (dolist (direction-in '(:input :io)) + (format *error-output* "Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (flex::normalize-external-format external-format-in) direction-in + (flex::normalize-external-format external-format-out) direction-out) + (copy-file full-path-in external-format-in + full-path-out external-format-out + direction-out direction-in) + (cond ((file-equal full-path-out full-path-orig) + (incf *test-success-counter*)) + (t (format *error-output* " Test failed!!!~%"))) + (terpri *error-output*) + #+:lispworks + (format *error-output* "LW-Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (flex::normalize-external-format external-format-in) direction-in + (flex::normalize-external-format external-format-out) direction-out) + #+:lispworks + (copy-file-lw full-path-in external-format-in + full-path-out external-format-out + direction-out direction-in) + #+:lispworks + (cond ((file-equal full-path-out full-path-orig) + (incf *test-success-counter*)) + (t (format *error-output* " Test failed!!!~%"))) + #+:lispworks + (terpri *error-output*))))) + +(defmacro with-test ((test-description) &body body) + "Defines a test. Two utilities are available inside of the body of +the maco: The function FAIL, and the macro CHECK. FAIL, the lowest +level utility, marks the test defined by WITH-TEST as failed. CHECK +checks whether its argument is true, otherwise it calls FAIL. If +during evaluation of the specified expression any condition is +signalled, this is also considered a failure. + +WITH-TEST prints reports while the tests run. It also increments +*TEST-SUCCESS-COUNT* if a test completes successfully." + (flex::with-unique-names (successp) + `(let ((,successp t)) + (flet ((fail (format-str &rest format-args) + (setf ,successp nil) + (apply #'format *error-output* format-str format-args))) + (macrolet ((check (expression) + `(handler-case + (unless ,expression + (fail "Expression ~S failed.~%" ',expression)) + (condition (c) + (fail "Expression ~S failed signaling condition of type ~A: ~A.~%" + ',expression (type-of c) c))))) + (format *error-output* "Test ~S~%" ,test-description) + , at body + (if ,successp + (incf *test-success-counter*) + (format *error-output* " Test failed!!!~%")) + (terpri *error-output*) + (terpri *error-output*)) + ,successp)))) + +(defmacro using-values ((&rest values) &body body) + "Executes BODY and feeds an element from VALUES to the USE-VALUE +restart each time a FLEXI-STREAM-ENCODING-ERROR is signalled. Signals +an error when there are more or less FLEXI-STREAM-ENCODING-ERRORs than +there are elements in VALUES." + (flex::with-unique-names (value-stack condition-counter) + `(let ((,value-stack ',values) + (,condition-counter 0)) + (handler-bind ((flexi-stream-encoding-error + #'(lambda (c) + (declare (ignore c)) + (unless ,value-stack + (error "Too many FLEXI-STREAM-ENCODING-ERRORs signalled, expected only ~A." + ,(length values))) + (incf ,condition-counter) + (use-value (pop ,value-stack))))) + (prog1 (progn , at body) + (when ,value-stack + (error "~A FLEXI-STREAM-ENCODING-ERRORs signalled, but ~A were expected." + ,condition-counter ,(length values)))))))) + +(defun read-flexi-line (sequence external-format) + "Creates and returns a string from the octet sequence SEQUENCE using +the external format EXTERNAL-FORMAT." + (with-input-from-sequence (in sequence) + (setq in (make-flexi-stream in :external-format external-format)) + (read-line in))) + +(defun encoding-error-handling-test () + "Tests several possible encoding errors and how they are handled." + (with-test ("Handling of encoding errors.") + (let ((*substitution-char* #\?)) + ;; :ASCII doesn't have characters with char codes > 127 + (check (string= "a??" (read-flexi-line `(,(char-code #\a) 128 200) :ascii))) + ;; :WINDOWS-1253 doesn't have a characters with codes 170 and 210 + (check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253))) + ;; not a valid UTF-8 sequence + (check (string= "??" (read-flexi-line `(#xe4 #xf6 #xfc) :utf8))) + ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 + (check (string= "??" (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + (let ((*substitution-char* nil)) + ;; :ASCII doesn't have characters with char codes > 127 + (check (string= "abc" (using-values (#\b #\c) + (read-flexi-line `(,(char-code #\a) 128 200) :ascii)))) + ;; :WINDOWS-1253 encoding doesn't have a characters with codes 170 and 210 + (check (string= "axy" (using-values (#\x #\y) + (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253)))) + ;; not a valid UTF-8 sequence + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#xe4 #xf6 #xfc) :utf8)))) + ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + ;; only one byte + (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16le)))) + ;; two bytes, but value of resulting word suggests that another word follows + (check (string= "R" (using-values (#\R) (read-flexi-line `(#x01 #xd8) :utf-16le)))) + ;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff + (check (string= "T" (using-values (#\T) (read-flexi-line `(#x01 #xd8 #xff #xdb) :utf-16le)))) + ;; the same as for little endian above, but using inverse order of bytes in words + (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16be)))) + (check (string= "R" (using-values (#\R) (read-flexi-line `(#xd8 #x01) :utf-16be)))) + (check (string= "T" (using-values (#\T) (read-flexi-line `(#xd8 #x01 #xdb #xff) :utf-16be)))) + ;; the only case when error is signalled for UTF-32 is at end of file + ;; in the middle of 4-byte sequence, both for big and little endian + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32le)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line `(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32be)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) + +(defun unread-char-test () + "Tests whether UNREAD-CHAR behaves as expected." + (with-test ("UNREAD-CHAR behaviour.") + (flet ((test-one-file (file-name external-format) + (with-open-file (in (merge-pathnames file-name *this-file*) + :element-type 'flex:octet) + (setq in (make-flexi-stream in :external-format external-format)) + (loop repeat 300 + for char = (read-char in) + do (unread-char char in) + (check (char= (read-char in) char)))))) + (loop for (file-name symbols) in *test-files* + do (loop for symbol in symbols + do (loop for (file-name . external-format) in (create-file-variants file-name symbol) + do (test-one-file file-name external-format))))))) + +(defun run-tests () + "Applies COMPARE-FILES to all test scenarios created with +CREATE-TEST-COMBINATIONS, runs test for handling of encoding errors, +and shows simple statistics at the end." + (let* ((*test-success-counter* 0) + (args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols))) + (no-tests (* 4 (length args-list)))) + #+:lispworks + (setq no-tests (* 2 no-tests)) + (dolist (args args-list) + (apply #'compare-files args)) + (incf no-tests) + (encoding-error-handling-test) + (incf no-tests) + (unread-char-test) + (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%" + (= no-tests *test-success-counter*) (- no-tests *test-success-counter*) no-tests))) + Added: trunk/test/tilton_ascii_cr.txt ============================================================================== --- (empty file) +++ trunk/test/tilton_ascii_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +Programmers who lock onto a design decision and cling to it in the face of contradictory new information -- well, that's almost everyone in my experience, so I better not say what I think of them or people will start saying bad things about me on c.l.l. -- Ken Tilton % This reminds me of the NYC cabby who accepted a fare to Chicago. When they got there and could not find the friend who was supposed to pay the fare he just laughed and said he should have known. -- Ken Tilton % >> Actually, I believe that Aikido, Jazz and Lisp are different appearances >> of the same thing. Yes, the Tao. /Everything/ is a different appearance of the tao. -- Ken Tilton "Ken, I went to the library and read up on Buddhism, and believe me, you are no Buddhist." -- Kenny's mom % That absolutely terrifies the herd-following, lockstep-marching, mainstream-saluting cowards that obediently dash out or online to scoop up books on The Latest Thing. They learn and use atrocities like Java, C++, XML, and even Python for the security it gives them and then sit there slaving away miserably, tediously, joylously paying off mortgages and supporting ungrateful teenagers who despise them, only to look out the double-sealed thermo-pane windows of their central-heated, sound-proofed, dead-bolted, suffocating little nests into the howling gale thinking "what do they know that I do not know?" when they see us under a lean-to hunched over our laptops to shield them from the rain laughing our asses off as we write great code between bong hits.... what was the question? -- Ken Tilton % Shut up! (That last phrase has four or more syllables if pronounced as intended.) -- Ken Tilton % Nonsense. You'll be using it for the GUI, not protein-folding. -- Ken Tilton (responding to a comment that LTK was slow because it was based on TK) % Continuations certainly are clever, but if we learned anything from the rejection of the cover art for "Smell the Glove", it is that "there is a fine line between stupid... and clever". -- Ken Tilton % Ah, there's no place like academia for dispassionate, intellectually honest discussion of new ideas on their merits. Thank god for tenure giving your bold antagonist the protection they needed to shout down your iconoclastic..... hang on... -- Ken Tilton % Whoever objected must be in my killfile, ... -- Ken Tilton % >From memory (but I think I have it right): "But Jesus said, Suffer captured variables, and forbid them not, to come unto thine macro bodies: for of such is are DSLs made." -- Ken Tilton Can I get an Amen? % Awareness of defect is the first step to recovery. -- Ken Tilton % You made a bad analogy (there are no good ones, but you found a new low) ... -- Ken Tilton % Yes, it is true that Kent Pitman was raised by a closet full of Lisp Machines, but the exception only proves the rule. -- Ken Tilton (in a postscript after positing that computer languages are not learned in infancy) % I suggest you try bartender's school to support yourself, start programming for fun again. -- Ken Tilton (responding to a comment that 98% of anything to do with computers was not interesting code) % You could add four lanes to my carpal tunnel and I still could not write all the code I am dying to write. -- Ken Tilton % Neutrality? I want to bury other languages, not have a gateway to them. -- Ken Tilton % Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" Simon: "Hunh? My puppy /always/ gives me companionship." -- Ken Tilton (on how he was understood by a native english speaker) % \ No newline at end of file Added: trunk/test/tilton_ascii_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/tilton_ascii_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: trunk/test/tilton_ascii_lf.txt ============================================================================== --- (empty file) +++ trunk/test/tilton_ascii_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: trunk/test/tilton_utf8_cr.txt ============================================================================== --- (empty file) +++ trunk/test/tilton_utf8_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ +Programmers who lock onto a design decision and cling to it in the face of contradictory new information -- well, that's almost everyone in my experience, so I better not say what I think of them or people will start saying bad things about me on c.l.l. -- Ken Tilton % This reminds me of the NYC cabby who accepted a fare to Chicago. When they got there and could not find the friend who was supposed to pay the fare he just laughed and said he should have known. -- Ken Tilton % >> Actually, I believe that Aikido, Jazz and Lisp are different appearances >> of the same thing. Yes, the Tao. /Everything/ is a different appearance of the tao. -- Ken Tilton "Ken, I went to the library and read up on Buddhism, and believe me, you are no Buddhist." -- Kenny's mom % That absolutely terrifies the herd-following, lockstep-marching, mainstream-saluting cowards that obediently dash out or online to scoop up books on The Latest Thing. They learn and use atrocities like Java, C++, XML, and even Python for the security it gives them and then sit there slaving away miserably, tediously, joylously paying off mortgages and supporting ungrateful teenagers who despise them, only to look out the double-sealed thermo-pane windows of their central-heated, sound-proofed, dead-bolted, suffocating little nests into the howling gale thinking "what do they know that I do not know?" when they see us under a lean-to hunched over our laptops to shield them from the rain laughing our asses off as we write great code between bong hits.... what was the question? -- Ken Tilton % Shut up! (That last phrase has four or more syllables if pronounced as intended.) -- Ken Tilton % Nonsense. You'll be using it for the GUI, not protein-folding. -- Ken Tilton (responding to a comment that LTK was slow because it was based on TK) % Continuations certainly are clever, but if we learned anything from the rejection of the cover art for "Smell the Glove", it is that "there is a fine line between stupid... and clever". -- Ken Tilton % Ah, there's no place like academia for dispassionate, intellectually honest discussion of new ideas on their merits. Thank god for tenure giving your bold antagonist the protection they needed to shout down your iconoclastic..... hang on... -- Ken Tilton % Whoever objected must be in my killfile, ... -- Ken Tilton % >From memory (but I think I have it right): "But Jesus said, Suffer captured variables, and forbid them not, to come unto thine macro bodies: for of such is are DSLs made." -- Ken Tilton Can I get an Amen? % Awareness of defect is the first step to recovery. -- Ken Tilton % You made a bad analogy (there are no good ones, but you found a new low) ... -- Ken Tilton % Yes, it is true that Kent Pitman was raised by a closet full of Lisp Machines, but the exception only proves the rule. -- Ken Tilton (in a postscript after positing that computer languages are not learned in infancy) % I suggest you try bartender's school to support yourself, start programming for fun again. -- Ken Tilton (responding to a comment that 98% of anything to do with computers was not interesting code) % You could add four lanes to my carpal tunnel and I still could not write all the code I am dying to write. -- Ken Tilton % Neutrality? I want to bury other languages, not have a gateway to them. -- Ken Tilton % Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" Simon: "Hunh? My puppy /always/ gives me companionship." -- Ken Tilton (on how he was understood by a native english speaker) % \ No newline at end of file Added: trunk/test/tilton_utf8_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/tilton_utf8_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: trunk/test/tilton_utf8_lf.txt ============================================================================== --- (empty file) +++ trunk/test/tilton_utf8_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: trunk/test/unicode_demo_ucs2_cr_be.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs2_cr_le.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs2_crlf_be.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs2_crlf_le.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs2_lf_be.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs2_lf_le.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs4_cr_be.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs4_cr_le.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs4_crlf_be.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs4_crlf_le.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs4_lf_be.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_ucs4_lf_le.txt ============================================================================== Binary file. No diff available. Added: trunk/test/unicode_demo_utf8_cr.txt ============================================================================== --- (empty file) +++ trunk/test/unicode_demo_utf8_cr.txt Thu May 1 02:25:14 2008 @@ -0,0 +1 @@ + UTF-8 encoded sample plain-text file ???????????????????????????????????????????????????????????????????????????????????????????????????????????? Markus Kuhn [??ma??k??s ku??n] ??? 2002-07-25 The ASCII compatible UTF-8 encoding used in this plain-text file is defined in Unicode, ISO 10646-1, and RFC 2279. Using Unicode/UTF-8, you can write in emails and source code things such as Mathematics and sciences: ??? E???da = Q, n ??? ???, ??? f(i) = ??? g(i), ??????????????????????????????????????? ????????????a??+b?? ????????? ???x??????: ???x??? = ?????????x???, ?? ??? ???? = ??(???? ??? ??), ??????????????????????????? ????????? ???????????? c??? ????????? ??? ??? ?????? ??? ??? ??? ??? ??? ??? ??? ???, ????????? ????????? ????????? ??? ????????? ??? < a ??? b ??? c ??? d ??? ??? ??? (???A??? ??? ???B???), ????????? ??? ????????? ????????? ???a???-b???????????? 2H??? + O??? ??? 2H???O, R = 4.7 k??, ??? 200 mm ?????????i=1 ????????? Linguistics and dictionaries: ??i ??nt????n??????n??l f????n??t??k ??so??si??e????n Y [????psil??n], Yen [j??n], Yoga [??jo??g??] APL: ((V???V)=??????V)/V???,V ???????????????????????????????????? Nicer typography in plain text files: ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? ??? ??? ??? ??? ???single??? and ???double??? quotes ??? ??? ??? ??? ??? Curly apostrophes: ???We???ve been here??? ??? ??? ??? ??? ??? Latin-1 apostrophe and accents: '??` ??? ??? ??? ??? ??? ???deutsche??? ???Anf??hrungszeichen??? ??? ??? ??? ??? ??? ???, ???, ???, ???, 3???4, ???, ???5/+5, ???, ??? ??? ??? ??? ??? ??? ASCII safety test: 1lI|, 0OD, 8B ??? ??? ????????????????????????????????? ??? ??? ??? the euro symbol: ??? 14.95 ??? ??? ??? ??? ????????????????????????????????? ??? ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? Combining characters: STARG????TE SG-1, a = v?? = r??, a??? ??? b??? Greek (in Polytonic): The Greek anthem: ????? ??????????????? ???????? ??????? ????????? ??????? ??????????????? ??????? ???????????????, ????? ??????????????? ???????? ??????? ??????? ??????? ????? ??????? ??????????????? ????? ?????. ?????????? ????? ??????????????? ????????????????? ??????? ?????????????????? ????? ?????????? ??????? ??????? ??????????? ???????????????????????? ???????????, ??? ???????????, ??????????????????????! From a speech of Demosthenes in the 4th century BC: ?????????? ???????????? ?????????????????????? ?????? ?????????????????????, ??? ????????????? ????????????????????, ????????? ????? ??????? ????? ????????????????? ?????????????????? ??????? ????????? ????????? ????????? ????????????? ??????? ?????????????? ????????? ??????? ??????? ????????????? ????????? ??????? ????????????????????????? ????????????????? ???????? ???????????????????????, ????? ????? ?????????????????? ??????? ??????????? ???????????????????, ?????????? ????????? ????? ???????????????????? ???????????? ????????????????? ??????????? ??????????????????? ?????????. ???????????? ??????? ????????? ?????? ????????????????? ????? ????? ??????????????? ????????????????? ??? ??????? ??????????????????, ????????? ????? ???????????????????????, ?????????? ??????? ??????????? ??????????????????????? ?????????? ??????????????????????. ???????? ?????, ??????? ??????? ????????? ?????????? ????? ??????????? ??????? ????? ???????????? ??????????? ???????????????? ??????? ????????????????? ?????????????????????????, ??????? ?????????? ???????????????? ??????????? ???????? ?????????? ???????, ????? ??????????? ??????????????? ???????????? ???????????????????? ??????? ????????????? ??????????????????? ???????????? ?????????????? ??????????????????? ?????????? ??????????? ??????? ?????????????, ????????? ????????? ??????????????????? ???????????????. ???????? ??????? ??????????? ??????????????? ???????????????, ????????? ??????? ????????? ??????? ????????? ???????????????????????? ?????? ??????? ????? ????????????? ???????????????? ????????????????? ????????? ????? ??????? ???????????? ???????????? ????????????????????, ??????????????? ???????????????? ????????? ??????? ????????????????? ?????????????????? ??????????????????? ???????????. ???????????????????????, ????? ???????????????????????? Georgian: From a Unicode conference invitation: ?????????????????? ?????????????????? ????????????????????? ????????????????????????????????? Unicode-?????? ??????????????? ???????????????????????????????????? ??????????????????????????????????????? ?????????????????????????????????, ????????????????????? ?????????????????????????????? 10-12 ???????????????, ???. ?????????????????????, ??????????????????????????????. ????????????????????????????????? ???????????????????????? ??????????????? ???????????????????????? ?????????????????????????????? ???????????? ???????????????????????? ??????????????????????????? ??????????????????????????? ?????? Unicode-???, ????????????????????????????????????????????????????????? ?????? ?????????????????????????????????, Unicode-?????? ?????????????????????????????? ??????????????????????????? ??????????????????????????????, ?????? ????????????????????????????????? ?????????????????????????????????, ???????????????????????????, ??????????????????????????? ???????????????????????????????????? ?????? ???????????????????????????????????? ????????????????????????????????? ??????????????????????????????. Russian: From a Unicode conference invitation: ?????????????????????????????????? ???????????? ???? ?????????????? ?????????????????????????? ?????????????????????? ???? Unicode, ?????????????? ?????????????????? 10-12 ?????????? 1997 ???????? ?? ???????????? ?? ????????????????. ?????????????????????? ?????????????? ?????????????? ???????? ?????????????????? ???? ???????????????? ?????????????????????? ?????????????????? ?? Unicode, ?????????????????????? ?? ??????????????????????????????????????, ???????????????????? ?? ???????????????????? Unicode ?? ?????????????????? ???????????????????????? ???????????????? ?? ?????????????????????? ??????????????????????, ??????????????, ?????????????? ?? ???????????????????????? ???????????????????????? ????????????????. Thai (UCS Level 2): Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese classic 'San Gua'): [----------------------------|------------------------] ??? ?????????????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??? (The above is a two-column text. If combining characters are handled correctly, the lines of the second column should be aligned with the | character above.) Ethiopian: Proverbs in the Amharic language: ????????? ??????????????? ????????? ?????????????????? ?????? ????????? ?????????????????? ?????????????????? ?????? ???????????? ???????????? ????????? ?????? ???????????? ?????? ???????????? ????????? ?????????????????? ????????? ???????????? ????????? ?????????????????? ????????? ????????? ?????? ???????????? ??????????????? ?????????????????? ?????? ???????????? ??????????????? ???????????? ??????????????? ?????? ???????????? ???????????? ???????????? ?????? ??????????????? ????????? ????????? ???????????? ???????????????????????? ???????????? ?????????????????? ????????? ??????????????? ?????????????????? ??????????????? ????????? ???????????? ????????? ???????????? ??????????????? ?????? ??????????????? ????????? ??????????????? ????????? ???????????? ???????????? ????????? ?????? ??????????????? ??????????????? ????????? ?????? ???????????? ????????? ???????????? ???????????? ????????? ???????????? ????????? ???????????? ?????? ????????? ???????????? ?????????????????? ??????????????? ??????????????? ?????? ???????????? Runes: ?????? ???????????? ????????? ?????? ???????????? ?????? ????????? ??????????????? ????????????????????????????????? ????????? ?????? ???????????? (Old English, which transcribed into Latin reads 'He cwaeth that he bude thaem lande northweardum with tha Westsae.' and means 'He said that he lived in the northern land near the Western Sea.') Braille: ???????????? ????????? ????????????????????? ????????? ??????????????? ????????? ??????????????? ?????? ???????????? ???????????? ????????? ?????? ?????? ???????????? ?????????????????? ???????????? ???????????? ?????? ?????????????????? ?????? ????????? ?????????????????? ????????? ??????????????? ?????? ?????? ??????????????????????????? ?????? ??????????????? ?????? ??????????????????????????? ????????? ?????? ???????????? ?????????????????? ????????????????????? ??????????????? ????????? ????????? ??????????????????????????? ???????????? ????????? ???????????? ???????????? ????????????????????? ????????? ?????????????????? ?????? ???????????? ?????? ????????? ????????? ???????????? ????????? ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? ???????????? ??? ??????????????? ???????????? ?????? ????????? ????????? ??? ???????????? ?????? ?????? ?????? ???????????????????????? ????????? ????????? ?????? ?????????????????????????????? ???????????? ???????????? ??? ?????????????????????????????? ??? ???????????? ???????????? ????????? ?????????????????? ????????????????????? ?????? ??????????????? ??? ?????????????????????????????? ?????? ?????? ?????????????????? ??????????????? ?????? ?????????????????????????????? ??? ?????? ?????????????????? ????????? ?????? ?????????????????? ?????? ?????? ???????????????????????? ?????? ??? ?????? ????????????????????? ????????? ?????? ???????????????????????? ??????????????? ???????????? ????????? ?????????????????? ????????? ?????? ?????? ???????????????????????? ???????????? ???????????? ?????? ???????????? ????????????????????? ??????????????? ?????? ?????? ????????????????????? ??????????????????????????????????????? ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? (The first couple of paragraphs of "A Christmas Carol" by Dickens) Compact font selection example text: ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 abcdefghijklmnopqrstuvwxyz ?????????????????????? ???????????????????????????????????????????? ???????????????????? ???????????????????? ???????????????????????? ??????????????? ???????????????????????? ????????????????????????????????????? Greetings in various languages: Hello world, ????????????????? ???????????, ??????????????? Box drawing alignment tests: ??? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??????????????????????????? ????????? ??? ????????????????????? ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? ?????? ??? ?????? ?????? ?????? ????????????????????? ????????????????????? ????????????????????? ???????????? ???????????? ??? ???????????? ??? ??? ????????????????????? ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ?????????????????? ???????????? ??? ???????????? ??? ???????????????????????? ?????????????????? \ No newline at end of file Added: trunk/test/unicode_demo_utf8_crlf.txt ============================================================================== --- (empty file) +++ trunk/test/unicode_demo_utf8_crlf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,212 @@ + +UTF-8 encoded sample plain-text file +???????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Markus Kuhn [??ma??k??s ku??n] ??? 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ??? E???da = Q, n ??? ???, ??? f(i) = ??? g(i), ??????????????????????????????????????? + ????????????a??+b?? ????????? + ???x??????: ???x??? = ?????????x???, ?? ??? ???? = ??(???? ??? ??), ??????????????????????????? ????????? + ???????????? c??? ????????? + ??? ??? ?????? ??? ??? ??? ??? ??? ??? ??? ???, ????????? ????????? + ????????? ??? ????????? + ??? < a ??? b ??? c ??? d ??? ??? ??? (???A??? ??? ???B???), ????????? ??? ????????? + ????????? ???a???-b???????????? + 2H??? + O??? ??? 2H???O, R = 4.7 k??, ??? 200 mm ?????????i=1 ????????? + +Linguistics and dictionaries: + + ??i ??nt????n??????n??l f????n??t??k ??so??si??e????n + Y [????psil??n], Yen [j??n], Yoga [??jo??g??] + +APL: + + ((V???V)=??????V)/V???,V ???????????????????????????????????? + +Nicer typography in plain text files: + + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + ??? ??? + ??? ??? ???single??? and ???double??? quotes ??? + ??? ??? + ??? ??? Curly apostrophes: ???We???ve been here??? ??? + ??? ??? + ??? ??? Latin-1 apostrophe and accents: '??` ??? + ??? ??? + ??? ??? ???deutsche??? ???Anf??hrungszeichen??? ??? + ??? ??? + ??? ??? ???, ???, ???, ???, 3???4, ???, ???5/+5, ???, ??? ??? + ??? ??? + ??? ??? ASCII safety test: 1lI|, 0OD, 8B ??? + ??? ????????????????????????????????? ??? + ??? ??? the euro symbol: ??? 14.95 ??? ??? ??? + ??? ????????????????????????????????? ??? + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Combining characters: + + STARG????TE SG-1, a = v?? = r??, a??? ??? b??? + +Greek (in Polytonic): + + The Greek anthem: + + ????? ??????????????? ???????? ??????? ????????? + ??????? ??????????????? ??????? ???????????????, + ????? ??????????????? ???????? ??????? ??????? + ??????? ????? ??????? ??????????????? ????? ?????. + + ?????????? ????? ??????????????? ????????????????? + ??????? ?????????????????? ????? ?????????? + ??????? ??????? ??????????? ???????????????????????? + ???????????, ??? ???????????, ??????????????????????! + + From a speech of Demosthenes in the 4th century BC: + + ?????????? ???????????? ?????????????????????? ?????? ?????????????????????, ??? ????????????? ????????????????????, + ????????? ????? ??????? ????? ????????????????? ?????????????????? ??????? ????????? ????????? ????????? + ????????????? ??????? ?????????????? ????????? ??????? ??????? ????????????? ????????? ??????? + ????????????????????????? ????????????????? ???????? ???????????????????????, ????? ????? ?????????????????? + ??????? ??????????? ???????????????????, ?????????? ????????? ????? ???????????????????? ???????????? + ????????????????? ??????????? ??????????????????? ?????????. ???????????? ??????? ????????? ?????? ????????????????? + ????? ????? ??????????????? ????????????????? ??? ??????? ??????????????????, ????????? ????? ???????????????????????, + ?????????? ??????? ??????????? ??????????????????????? ?????????? ??????????????????????. ???????? ?????, ??????? ??????? + ????????? ?????????? ????? ??????????? ??????? ????? ???????????? ??????????? ???????????????? ??????? ????????????????? + ?????????????????????????, ??????? ?????????? ???????????????? ??????????? ???????? ?????????? ???????, ????? ??????????? + ??????????????? ???????????? ???????????????????? ??????? ????????????? ??????????????????? ???????????? ?????????????? + ??????????????????? ?????????? ??????????? ??????? ?????????????, ????????? ????????? ??????????????????? + ???????????????. ???????? ??????? ??????????? ??????????????? ???????????????, ????????? ??????? ????????? ??????? + ????????? ???????????????????????? ?????? ??????? ????? ????????????? ???????????????? ????????????????? ????????? ????? + ??????? ???????????? ???????????? ????????????????????, ??????????????? ???????????????? ????????? ??????? + ????????????????? ?????????????????? ??????????????????? ???????????. + + ???????????????????????, ????? ???????????????????????? + +Georgian: + + From a Unicode conference invitation: + + ?????????????????? ?????????????????? ????????????????????? ????????????????????????????????? Unicode-?????? ??????????????? ???????????????????????????????????? + ??????????????????????????????????????? ?????????????????????????????????, ????????????????????? ?????????????????????????????? 10-12 ???????????????, + ???. ?????????????????????, ??????????????????????????????. ????????????????????????????????? ???????????????????????? ??????????????? ???????????????????????? + ?????????????????????????????? ???????????? ???????????????????????? ??????????????????????????? ??????????????????????????? ?????? Unicode-???, + ????????????????????????????????????????????????????????? ?????? ?????????????????????????????????, Unicode-?????? ?????????????????????????????? + ??????????????????????????? ??????????????????????????????, ?????? ????????????????????????????????? ?????????????????????????????????, ???????????????????????????, + ??????????????????????????? ???????????????????????????????????? ?????? ???????????????????????????????????? ????????????????????????????????? ??????????????????????????????. + +Russian: + + From a Unicode conference invitation: + + ?????????????????????????????????? ???????????? ???? ?????????????? ?????????????????????????? ?????????????????????? ???? + Unicode, ?????????????? ?????????????????? 10-12 ?????????? 1997 ???????? ?? ???????????? ?? ????????????????. + ?????????????????????? ?????????????? ?????????????? ???????? ?????????????????? ???? ???????????????? ?????????????????????? + ?????????????????? ?? Unicode, ?????????????????????? ?? ??????????????????????????????????????, ???????????????????? ?? + ???????????????????? Unicode ?? ?????????????????? ???????????????????????? ???????????????? ?? ?????????????????????? + ??????????????????????, ??????????????, ?????????????? ?? ???????????????????????? ???????????????????????? ????????????????. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ??? ?????????????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? + ???????????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??? + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ????????? ??????????????? ????????? ?????????????????? + ?????? ????????? ?????????????????? ?????????????????? + ?????? ???????????? ???????????? ????????? + ?????? ???????????? ?????? ???????????? ????????? ?????????????????? + ????????? ???????????? ????????? ?????????????????? + ????????? ????????? ?????? ???????????? + ??????????????? ?????????????????? + ?????? ???????????? ??????????????? ???????????? ??????????????? + ?????? ???????????? ???????????? ???????????? + ?????? ??????????????? ????????? ????????? ???????????? ???????????????????????? + ???????????? ?????????????????? ????????? ??????????????? ?????????????????? + ??????????????? ????????? ???????????? ????????? ???????????? ??????????????? + ?????? ??????????????? ????????? ??????????????? + ????????? ???????????? ???????????? ????????? ?????? ??????????????? + ??????????????? ????????? ?????? ???????????? ????????? ???????????? + ???????????? ????????? ???????????? ????????? + ???????????? ?????? ????????? ???????????? ?????????????????? + ??????????????? ??????????????? ?????? ???????????? + +Runes: + + ?????? ???????????? ????????? ?????? ???????????? ?????? ????????? ??????????????? ????????????????????????????????? ????????? ?????? ???????????? + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ???????????? ????????? ????????????????????? ????????? + + ??????????????? ????????? ??????????????? ?????? ???????????? ???????????? ????????? ?????? ?????? ???????????? + ?????????????????? ???????????? ???????????? ?????? ?????????????????? ?????? ????????? ?????????????????? ????????? + ??????????????? ?????? ?????? ??????????????????????????? ?????? ??????????????? ?????? ??????????????????????????? + ????????? ?????? ???????????? ?????????????????? ????????????????????? ??????????????? ????????? ????????? + ??????????????????????????? ???????????? ????????? ???????????? ???????????? ????????????????????? ????????? ?????????????????? ?????? + ???????????? ?????? ????????? ????????? ???????????? ????????? + + ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + ???????????? ??? ??????????????? ???????????? ?????? ????????? ????????? ??? ???????????? ?????? ?????? + ?????? ???????????????????????? ????????? ????????? ?????? ?????????????????????????????? ???????????? ???????????? + ??? ?????????????????????????????? ??? ???????????? ???????????? ????????? ?????????????????? ????????????????????? ?????? + ??????????????? ??? ?????????????????????????????? ?????? ?????? ?????????????????? ??????????????? ?????? ?????????????????????????????? + ??? ?????? ?????????????????? ????????? ?????? ?????????????????? ?????? ?????? ???????????????????????? + ?????? ??? ?????? ????????????????????? ????????? ?????? ???????????????????????? ??????????????? + ???????????? ????????? ?????????????????? ????????? ?????? ?????? ???????????????????????? ???????????? ???????????? ?????? + ???????????? ????????????????????? ??????????????? ?????? ?????? ????????????????????? ??????????????????????????????????????? ????????? + ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz ?????????????????????? + ???????????????????????????????????????????? ???????????????????? ???????????????????? + ???????????????????????? ??????????????? ???????????????????????? ????????????????????????????????????? + +Greetings in various languages: + + Hello world, ????????????????? ???????????, ??????????????? + +Box drawing alignment tests: ??? + ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??????????????????????????? ????????? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ?????? ??? ?????? ?????? ?????? ????????????????????? ????????????????????? ????????????????????? ???????????? ???????????? ??? ???????????? ??? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ?????????????????? ???????????? ??? ???????????? ??? ???????????????????????? + ?????????????????? Added: trunk/test/unicode_demo_utf8_lf.txt ============================================================================== --- (empty file) +++ trunk/test/unicode_demo_utf8_lf.txt Thu May 1 02:25:14 2008 @@ -0,0 +1,212 @@ + +UTF-8 encoded sample plain-text file +???????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Markus Kuhn [??ma??k??s ku??n] ??? 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ??? E???da = Q, n ??? ???, ??? f(i) = ??? g(i), ??????????????????????????????????????? + ????????????a??+b?? ????????? + ???x??????: ???x??? = ?????????x???, ?? ??? ???? = ??(???? ??? ??), ??????????????????????????? ????????? + ???????????? c??? ????????? + ??? ??? ?????? ??? ??? ??? ??? ??? ??? ??? ???, ????????? ????????? + ????????? ??? ????????? + ??? < a ??? b ??? c ??? d ??? ??? ??? (???A??? ??? ???B???), ????????? ??? ????????? + ????????? ???a???-b???????????? + 2H??? + O??? ??? 2H???O, R = 4.7 k??, ??? 200 mm ?????????i=1 ????????? + +Linguistics and dictionaries: + + ??i ??nt????n??????n??l f????n??t??k ??so??si??e????n + Y [????psil??n], Yen [j??n], Yoga [??jo??g??] + +APL: + + ((V???V)=??????V)/V???,V ???????????????????????????????????? + +Nicer typography in plain text files: + + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + ??? ??? + ??? ??? ???single??? and ???double??? quotes ??? + ??? ??? + ??? ??? Curly apostrophes: ???We???ve been here??? ??? + ??? ??? + ??? ??? Latin-1 apostrophe and accents: '??` ??? + ??? ??? + ??? ??? ???deutsche??? ???Anf??hrungszeichen??? ??? + ??? ??? + ??? ??? ???, ???, ???, ???, 3???4, ???, ???5/+5, ???, ??? ??? + ??? ??? + ??? ??? ASCII safety test: 1lI|, 0OD, 8B ??? + ??? ????????????????????????????????? ??? + ??? ??? the euro symbol: ??? 14.95 ??? ??? ??? + ??? ????????????????????????????????? ??? + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Combining characters: + + STARG????TE SG-1, a = v?? = r??, a??? ??? b??? + +Greek (in Polytonic): + + The Greek anthem: + + ????? ??????????????? ???????? ??????? ????????? + ??????? ??????????????? ??????? ???????????????, + ????? ??????????????? ???????? ??????? ??????? + ??????? ????? ??????? ??????????????? ????? ?????. + + ?????????? ????? ??????????????? ????????????????? + ??????? ?????????????????? ????? ?????????? + ??????? ??????? ??????????? ???????????????????????? + ???????????, ??? ???????????, ??????????????????????! + + From a speech of Demosthenes in the 4th century BC: + + ?????????? ???????????? ?????????????????????? ?????? ?????????????????????, ??? ????????????? ????????????????????, + ????????? ????? ??????? ????? ????????????????? ?????????????????? ??????? ????????? ????????? ????????? + ????????????? ??????? ?????????????? ????????? ??????? ??????? ????????????? ????????? ??????? + ????????????????????????? ????????????????? ???????? ???????????????????????, ????? ????? ?????????????????? + ??????? ??????????? ???????????????????, ?????????? ????????? ????? ???????????????????? ???????????? + ????????????????? ??????????? ??????????????????? ?????????. ???????????? ??????? ????????? ?????? ????????????????? + ????? ????? ??????????????? ????????????????? ??? ??????? ??????????????????, ????????? ????? ???????????????????????, + ?????????? ??????? ??????????? ??????????????????????? ?????????? ??????????????????????. ???????? ?????, ??????? ??????? + ????????? ?????????? ????? ??????????? ??????? ????? ???????????? ??????????? ???????????????? ??????? ????????????????? + ?????????????????????????, ??????? ?????????? ???????????????? ??????????? ???????? ?????????? ???????, ????? ??????????? + ??????????????? ???????????? ???????????????????? ??????? ????????????? ??????????????????? ???????????? ?????????????? + ??????????????????? ?????????? ??????????? ??????? ?????????????, ????????? ????????? ??????????????????? + ???????????????. ???????? ??????? ??????????? ??????????????? ???????????????, ????????? ??????? ????????? ??????? + ????????? ???????????????????????? ?????? ??????? ????? ????????????? ???????????????? ????????????????? ????????? ????? + ??????? ???????????? ???????????? ????????????????????, ??????????????? ???????????????? ????????? ??????? + ????????????????? ?????????????????? ??????????????????? ???????????. + + ???????????????????????, ????? ???????????????????????? + +Georgian: + + From a Unicode conference invitation: + + ?????????????????? ?????????????????? ????????????????????? ????????????????????????????????? Unicode-?????? ??????????????? ???????????????????????????????????? + ??????????????????????????????????????? ?????????????????????????????????, ????????????????????? ?????????????????????????????? 10-12 ???????????????, + ???. ?????????????????????, ??????????????????????????????. ????????????????????????????????? ???????????????????????? ??????????????? ???????????????????????? + ?????????????????????????????? ???????????? ???????????????????????? ??????????????????????????? ??????????????????????????? ?????? Unicode-???, + ????????????????????????????????????????????????????????? ?????? ?????????????????????????????????, Unicode-?????? ?????????????????????????????? + ??????????????????????????? ??????????????????????????????, ?????? ????????????????????????????????? ?????????????????????????????????, ???????????????????????????, + ??????????????????????????? ???????????????????????????????????? ?????? ???????????????????????????????????? ????????????????????????????????? ??????????????????????????????. + +Russian: + + From a Unicode conference invitation: + + ?????????????????????????????????? ???????????? ???? ?????????????? ?????????????????????????? ?????????????????????? ???? + Unicode, ?????????????? ?????????????????? 10-12 ?????????? 1997 ???????? ?? ???????????? ?? ????????????????. + ?????????????????????? ?????????????? ?????????????? ???????? ?????????????????? ???? ???????????????? ?????????????????????? + ?????????????????? ?? Unicode, ?????????????????????? ?? ??????????????????????????????????????, ???????????????????? ?? + ???????????????????? Unicode ?? ?????????????????? ???????????????????????? ???????????????? ?? ?????????????????????? + ??????????????????????, ??????????????, ?????????????? ?? ???????????????????????? ???????????????????????? ????????????????. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ??? ?????????????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? + ???????????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??? + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ????????? ??????????????? ????????? ?????????????????? + ?????? ????????? ?????????????????? ?????????????????? + ?????? ???????????? ???????????? ????????? + ?????? ???????????? ?????? ???????????? ????????? ?????????????????? + ????????? ???????????? ????????? ?????????????????? + ????????? ????????? ?????? ???????????? + ??????????????? ?????????????????? + ?????? ???????????? ??????????????? ???????????? ??????????????? + ?????? ???????????? ???????????? ???????????? + ?????? ??????????????? ????????? ????????? ???????????? ???????????????????????? + ???????????? ?????????????????? ????????? ??????????????? ?????????????????? + ??????????????? ????????? ???????????? ????????? ???????????? ??????????????? + ?????? ??????????????? ????????? ??????????????? + ????????? ???????????? ???????????? ????????? ?????? ??????????????? + ??????????????? ????????? ?????? ???????????? ????????? ???????????? + ???????????? ????????? ???????????? ????????? + ???????????? ?????? ????????? ???????????? ?????????????????? + ??????????????? ??????????????? ?????? ???????????? + +Runes: + + ?????? ???????????? ????????? ?????? ???????????? ?????? ????????? ??????????????? ????????????????????????????????? ????????? ?????? ???????????? + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ???????????? ????????? ????????????????????? ????????? + + ??????????????? ????????? ??????????????? ?????? ???????????? ???????????? ????????? ?????? ?????? ???????????? + ?????????????????? ???????????? ???????????? ?????? ?????????????????? ?????? ????????? ?????????????????? ????????? + ??????????????? ?????? ?????? ??????????????????????????? ?????? ??????????????? ?????? ??????????????????????????? + ????????? ?????? ???????????? ?????????????????? ????????????????????? ??????????????? ????????? ????????? + ??????????????????????????? ???????????? ????????? ???????????? ???????????? ????????????????????? ????????? ?????????????????? ?????? + ???????????? ?????? ????????? ????????? ???????????? ????????? + + ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + ???????????? ??? ??????????????? ???????????? ?????? ????????? ????????? ??? ???????????? ?????? ?????? + ?????? ???????????????????????? ????????? ????????? ?????? ?????????????????????????????? ???????????? ???????????? + ??? ?????????????????????????????? ??? ???????????? ???????????? ????????? ?????????????????? ????????????????????? ?????? + ??????????????? ??? ?????????????????????????????? ?????? ?????? ?????????????????? ??????????????? ?????? ?????????????????????????????? + ??? ?????? ?????????????????? ????????? ?????? ?????????????????? ?????? ?????? ???????????????????????? + ?????? ??? ?????? ????????????????????? ????????? ?????? ???????????????????????? ??????????????? + ???????????? ????????? ?????????????????? ????????? ?????? ?????? ???????????????????????? ???????????? ???????????? ?????? + ???????????? ????????????????????? ??????????????? ?????? ?????? ????????????????????? ??????????????????????????????????????? ????????? + ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz ?????????????????????? + ???????????????????????????????????????????? ???????????????????? ???????????????????? + ???????????????????????? ??????????????? ???????????????????????? ????????????????????????????????????? + +Greetings in various languages: + + Hello world, ????????????????? ???????????, ??????????????? + +Box drawing alignment tests: ??? + ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??????????????????????????? ????????? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ?????? ??? ?????? ?????? ?????? ????????????????????? ????????????????????? ????????????????????? ???????????? ???????????? ??? ???????????? ??? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ?????????????????? ???????????? ??? ???????????? ??? ???????????????????????? + ?????????????????? Added: trunk/util.lisp ============================================================================== --- (empty file) +++ trunk/util.lisp Thu May 1 02:25:14 2008 @@ -0,0 +1,166 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.13 2007/01/01 23:46:49 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +#+:lispworks +(eval-when (:compile-toplevel :load-toplevel :execute) + (import 'lw:with-unique-names)) + +#-:lispworks +(defmacro with-unique-names ((&rest bindings) &body body) + "Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form* + +Executes a series of forms with each VAR bound to a fresh, +uninterned symbol. The uninterned symbol is as if returned by a call +to GENSYM with the string denoted by X - or, if X is not supplied, the +string denoted by VAR - as argument. + +The variable bindings created are lexical unless special declarations +are specified. The scopes of the name bindings and declarations do not +include the Xs. + +The forms are evaluated in order, and the values of all but the last +are discarded \(that is, the body is an implicit PROGN)." + ;; reference implementation posted to comp.lang.lisp as + ;; by Vebjorn Ljosa - see also + ;; + `(let ,(mapcar #'(lambda (binding) + (check-type binding (or cons symbol)) + (if (consp binding) + (destructuring-bind (var x) binding + (check-type var symbol) + `(,var (gensym ,(etypecase x + (symbol (symbol-name x)) + (character (string x)) + (string x))))) + `(,binding (gensym ,(symbol-name binding))))) + bindings) + , at body)) + +#+:lispworks +(eval-when (:compile-toplevel :load-toplevel :execute) + (setf (macro-function 'with-rebinding) + (macro-function 'lw:rebinding))) + +#-:lispworks +(defmacro with-rebinding (bindings &body body) + "WITH-REBINDING ( { var | (var prefix) }* ) form* + +Evaluates a series of forms in the lexical environment that is +formed by adding the binding of each VAR to a fresh, uninterned +symbol, and the binding of that fresh, uninterned symbol to VAR's +original value, i.e., its value in the current lexical environment. + +The uninterned symbol is created as if by a call to GENSYM with the +string denoted by PREFIX - or, if PREFIX is not supplied, the string +denoted by VAR - as argument. + +The forms are evaluated in order, and the values of all but the last +are discarded \(that is, the body is an implicit PROGN)." + ;; reference implementation posted to comp.lang.lisp as + ;; by Vebjorn Ljosa - see also + ;; + (loop for binding in bindings + for var = (if (consp binding) (car binding) binding) + for name = (gensym) + collect `(,name ,var) into renames + collect ``(,,var ,,name) into temps + finally (return `(let ,renames + (with-unique-names ,bindings + `(let (,, at temps) + ,, at body)))))) + +(defun normalize-external-format-name (name) + "Converts NAME \(a symbol) to a `canonical' name for an +external format, e.g. :LATIN1 will be converted to :ISO-8859-1. +Also checks if there is an external format with that name and +signals an error otherwise." + (let ((real-name (or (cdr (assoc name +name-map+ + :test #'eq)) + name))) + (unless (find real-name +name-map+ + :test #'eq + :key #'cdr) + (error "~S is not known to be a name for an external format." name)) + real-name)) + +(defun ascii-name-p (name) + "Checks whether NAME is the keyword :ASCII." + (eq name :us-ascii)) + +(defun koi8-r-name-p (name) + "Checks whether NAME is the keyword :KOI8-R." + (eq name :koi8-r)) + +(defun code-page-name-p (name) + "Checks whether NAME is the keyword :CODE-PAGE." + (eq name :code-page)) + +(defun iso-8859-name-p (name) + "Checks whether NAME \(a keyword) names one of the known +ISO-8859 encodings." + (find name +iso-8859-tables+ :key #'car)) + +(defun known-code-page-id-p (id) + "Checks whether ID \(a number) denotes one of the known Windows +code pages." + (and (find id +code-page-tables+ :key #'car) + id)) + +#+:lispworks +(defun sans (plist &rest keys) + "Returns PLIST with keyword arguments from KEYS removed." + (sys::remove-properties plist keys)) + +#-:lispworks +(defun sans (plist &rest keys) + "Returns PLIST with keyword arguments from KEYS removed." + ;; stolen from Usenet posting <3247672165664225 at naggum.no> by Erik + ;; Naggum + (let ((sans ())) + (loop + (let ((tail (nth-value 2 (get-properties plist keys)))) + ;; this is how it ends + (unless tail + (return (nreconc sans plist))) + ;; copy all the unmatched keys + (loop until (eq plist tail) do + (push (pop plist) sans) + (push (pop plist) sans)) + ;; skip the matched key + (setq plist (cddr plist)))))) + +#+:lispworks +(defmacro with-accessors (slot-entries instance &body body) + "For LispWorks, we prefer SLOT-VALUE over accessors for better +performance." + `(with-slots ,(mapcar #'car slot-entries) + ,instance + , at body)) \ No newline at end of file From hhubner at common-lisp.net Thu May 1 06:27:40 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 02:27:40 -0400 (EDT) Subject: [flexi-streams-cvs] r2 - branches Message-ID: <20080501062740.8A3007903C@common-lisp.net> Author: hhubner Date: Thu May 1 02:27:40 2008 New Revision: 2 Added: branches/ Log: Create branches directory From hhubner at common-lisp.net Thu May 1 06:27:59 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 02:27:59 -0400 (EDT) Subject: [flexi-streams-cvs] r3 - branches/hans Message-ID: <20080501062759.D8AAB7903C@common-lisp.net> Author: hhubner Date: Thu May 1 02:27:53 2008 New Revision: 3 Added: branches/hans/ - copied from r2, trunk/ Log: Create performance optimization branch From hhubner at common-lisp.net Thu May 1 06:31:48 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 02:31:48 -0400 (EDT) Subject: [flexi-streams-cvs] r4 - branches/hans Message-ID: <20080501063148.4621912069@common-lisp.net> Author: hhubner Date: Thu May 1 02:31:46 2008 New Revision: 4 Modified: branches/hans/input.lisp branches/hans/stream.lisp branches/hans/strings.lisp Log: commit first set of changes to speed up octets-to-string Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 02:31:46 2008 @@ -242,34 +242,78 @@ (decf position) (push #.(char-code #\Return) octet-stack))))) +(defun buffer-code-char (buffer char-code) + "Given a BUFFER, which is assumed to be a + to-string-conversion-buffer (see strings.lisp) and a character + code, convert to a character and perform newline processing for the + stream if the character is a #\Return. This code basically repeats + what the stream-read-char ((stream flexi-cr-mixin)) does, but it + does so in an optimized manner to make octet->string conversion + faster." + (declare (optimize speed (safety 0)) + (type to-string-conversion-buffer buffer) + (type fixnum char-code)) + (let ((char (code-char char-code))) + (if (eql char #\Return) + (case (tscb-eol-style buffer) + (:cr + #\Newline) + (:crlf + (cond + ((= (tscb-position buffer) (tscb-end buffer)) + :eof) + ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer))) + (incf (tscb-position buffer)) + #\Newline) + (t + #\Return))) + (t + #\Return)) + char))) +(declaim (inline buffer-code-char)) + (defmacro define-char-reader ((stream-var stream-class) &body body) - "Helper macro to define methods for STREAM-READ-CHAR. Defines a -method for the class STREAM-CLASS using the variable STREAM-VAR and -the code body BODY wrapped with some standard code common to all -methods defined here. The return value of BODY is a character code. -In case of encoding problems, BODY must return the value returned by -\(RECOVER-FROM-ENCODING-ERROR ...)." - (with-unique-names (char-code body-fn) - `(defmethod stream-read-char ((,stream-var ,stream-class)) - "This method was generated with the DEFINE-CHAR-READER macro." - (declare (optimize speed)) - ;; note that we do nothing for the :LF EOL style because we - ;; assume that #\Newline is the same as #\Linefeed in all - ;; Lisps which will use this library - (with-accessors ((last-octet flexi-stream-last-octet) - (last-char-code flexi-stream-last-char-code)) - ,stream-var - ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after - ;; this operation - (setq last-octet nil) - (let ((,char-code - (flet ((,body-fn () , at body)) - (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) - (,body-fn)))) - ;; remember this character and the current external format - ;; for UNREAD-CHAR - (setq last-char-code ,char-code) - (or (code-char ,char-code) ,char-code)))))) + "Helper macro to define methods for STREAM-READ-CHAR and +BUFFER-READ-CHAR. Defines a method for the class STREAM-CLASS using +the variable STREAM-VAR and the code body BODY wrapped with some +standard code common to all methods defined here. The return value of +BODY is a character code. In case of encoding problems, BODY must +return the value returned by \(RECOVER-FROM-ENCODING-ERROR ...). In +addition, a method on BUFFER-READ-CHAR is defined with the first +argument being the buffer, the second argument a STREAM-CLASS instance +used only for dispatching. The BUFFER-READ-CHAR generic function is +used to shortcut through the streams mechanic from the +OCTETS-TO-STRING function." + (with-unique-names (char-code body-fn dummy-stream) + (let ((body body)) + `(progn + (defmethod stream-read-char ((,stream-var ,stream-class)) + "This method was generated with the DEFINE-CHAR-READER macro." + (declare (optimize speed)) + ;; note that we do nothing for the :LF EOL style because we + ;; assume that #\Newline is the same as #\Linefeed in all + ;; Lisps which will use this library + (with-accessors ((last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code)) + ,stream-var + ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after + ;; this operation + (setq last-octet nil) + (let ((,char-code + (flet ((,body-fn () , at body)) + (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) + (,body-fn)))) + ;; remember this character and the current external format + ;; for UNREAD-CHAR + (setq last-char-code ,char-code) + (or (code-char ,char-code) ,char-code)))) + (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class)) + (declare (optimize speed)) + (declare (ignore ,dummy-stream)) ; used only for dispatch + (block stream-read-char ;; for RETURN-FROM in BODY + (let ((,char-code (progn , at body))) + (declare (type fixnum ,char-code)) + (or (buffer-code-char ,stream-var ,char-code) ,char-code)))))))) (defun recover-from-encoding-error (flexi-stream format-control &rest format-args) "Helper function used by the STREAM-READ-CHAR methods below to deal @@ -582,4 +626,4 @@ (t (= octet peek-type))) finally (unless (eql octet eof-value) (unread-byte octet flexi-input-stream)) - (return octet))) \ No newline at end of file + (return octet))) Modified: branches/hans/stream.lisp ============================================================================== --- branches/hans/stream.lisp (original) +++ branches/hans/stream.lisp Thu May 1 02:31:46 2008 @@ -509,46 +509,50 @@ ;; (set-class stream)) +(defun input-stream-class-name (external-format) + "Given an EXTERNAL-FORMAT, return the flexi-stream class name that + needs to be used for reading such encoded data. Returns the class' + name (a symbol)." + (declare (optimize speed)) + (let ((external-format-name (external-format-name external-format)) + (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) + (cond ((ascii-name-p external-format-name) + (if external-format-cr + 'flexi-cr-ascii-input-stream + 'flexi-ascii-input-stream)) + ((eq external-format-name :iso-8859-1) + (if external-format-cr + 'flexi-cr-latin-1-input-stream + 'flexi-latin-1-input-stream)) + ((or (koi8-r-name-p external-format-name) + (iso-8859-name-p external-format-name) + (code-page-name-p external-format-name)) + (if external-format-cr + 'flexi-cr-8-bit-input-stream + 'flexi-8-bit-input-stream)) + (t (case external-format-name + (:utf-8 (if external-format-cr + 'flexi-cr-utf-8-input-stream + 'flexi-utf-8-input-stream)) + (:utf-16 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-16-le-input-stream + 'flexi-cr-utf-16-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-16-le-input-stream + 'flexi-utf-16-be-input-stream))) + (:utf-32 (if external-format-cr + (if (external-format-little-endian external-format) + 'flexi-cr-utf-32-le-input-stream + 'flexi-cr-utf-32-be-input-stream) + (if (external-format-little-endian external-format) + 'flexi-utf-32-le-input-stream + 'flexi-utf-32-be-input-stream)))))))) + (defmethod set-class ((stream flexi-input-stream)) "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-input-stream - 'flexi-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-input-stream - 'flexi-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-input-stream - 'flexi-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-input-stream - 'flexi-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-input-stream - 'flexi-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-input-stream - 'flexi-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-input-stream - 'flexi-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-input-stream - 'flexi-utf-32-be-input-stream)))))))))) + (change-class stream + (input-stream-class-name (flexi-stream-external-format stream)))) (defmethod set-class ((stream flexi-output-stream)) "Changes the actual class of STREAM depending on its external format." Modified: branches/hans/strings.lisp ============================================================================== --- branches/hans/strings.lisp (original) +++ branches/hans/strings.lisp Thu May 1 02:31:46 2008 @@ -38,19 +38,58 @@ (let ((flexi (make-flexi-stream out :external-format external-format))) (write-string string flexi :start start :end end)))) +;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion +;; of octets to strings, circumventing streams. + +(defstruct (to-string-conversion-buffer + (:conc-name tscb-)) + (vector nil :type (simple-array (unsigned-byte 8) *)) + (position nil :type fixnum) + (end nil :type fixnum) + (eol-style nil :type (or null symbol))) + +(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer)) + (declare (optimize speed (safety 0))) + (let ((position (tscb-position to-string-conversion-buffer))) + (when (< position (tscb-end to-string-conversion-buffer)) + (prog1 + (aref (tscb-vector to-string-conversion-buffer) position) + (incf (tscb-position to-string-conversion-buffer)))))) + (defun octets-to-string (vector &key (external-format (make-external-format :latin1)) (start 0) (end (length vector))) "Converts the Lisp vector VECTOR of octets from START to END to string using the external format EXTERNAL-FORMAT." + (declare (optimize speed (safety 0))) + (declare (type (simple-array (unsigned-byte 8) *) vector) + (type fixnum start end)) + (let ((buffer (make-to-string-conversion-buffer :vector vector + :position start + :end end + :eol-style (external-format-eol-style external-format))) + (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format)) + (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0))) + (declare (type (array character (*)) string)) + (do ((char (buffer-read-char buffer dummy-input-stream) + (buffer-read-char buffer dummy-input-stream))) + ((eql char :eof) + string) + (vector-push char string)))) + +(defun octets-to-string* (vector &key (external-format (make-external-format :latin1)) + (start 0) (end (length vector))) + "Converts the Lisp vector VECTOR of octets from START to END to +string using the external format EXTERNAL-FORMAT." + ;; This version of OCTETS-TO-STRING is here so that one can do speed + ;; comparisons. It should be significantly slower than the version + ;; above. (declare (optimize speed)) (with-input-from-sequence (in vector :start start :end end) (let ((flexi (make-flexi-stream in :external-format external-format)) (result (make-array (- end start) :element-type #+:lispworks 'lw:simple-char - #-:lispworks 'character + #-:lispworks 'character :fill-pointer t))) (setf (fill-pointer result) (read-sequence result flexi)) result))) - - From hhubner at common-lisp.net Thu May 1 13:41:06 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 09:41:06 -0400 (EDT) Subject: [flexi-streams-cvs] r5 - branches/hans Message-ID: <20080501134106.45C93590A0@common-lisp.net> Author: hhubner Date: Thu May 1 09:41:05 2008 New Revision: 5 Added: branches/hans/test-speed.lisp Modified: branches/hans/flexi-streams.asd branches/hans/input.lisp branches/hans/stream.lisp branches/hans/strings.lisp Log: Speed up string-to-octets by shortcutting through the streams mechanic. Modified: branches/hans/flexi-streams.asd ============================================================================== --- branches/hans/flexi-streams.asd (original) +++ branches/hans/flexi-streams.asd Thu May 1 09:41:05 2008 @@ -49,8 +49,8 @@ (:file "stream") #+:lispworks (:file "lw-binary-stream") (:file "output") - (:file "input") - (:file "strings")) + (:file "strings") + (:file "input")) :depends-on (:trivial-gray-streams)) (defsystem :flexi-streams-test Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 09:41:05 2008 @@ -43,8 +43,8 @@ (octet-stack flexi-stream-octet-stack) (s flexi-stream-stream)) flexi-input-stream - (declare (integer position) - (type (or null integer) bound)) + (declare (fixnum position) + (type (or null fixnum) bound)) (when (and bound (>= position bound)) (return-from read-byte* nil)) @@ -290,9 +290,6 @@ (defmethod stream-read-char ((,stream-var ,stream-class)) "This method was generated with the DEFINE-CHAR-READER macro." (declare (optimize speed)) - ;; note that we do nothing for the :LF EOL style because we - ;; assume that #\Newline is the same as #\Linefeed in all - ;; Lisps which will use this library (with-accessors ((last-octet flexi-stream-last-octet) (last-char-code flexi-stream-last-char-code)) ,stream-var @@ -507,6 +504,9 @@ stream (when (eql char #\Return) (case (external-format-eol-style external-format) + ;; note that we do nothing for the :LF EOL style because we + ;; assume that #\Newline is the same as #\Linefeed in all + ;; Lisps which will use this library (:cr (setq char #\Newline last-char-code #.(char-code #\Newline))) ;; in the case :CRLF we have to look ahead one character @@ -627,3 +627,14 @@ finally (unless (eql octet eof-value) (unread-byte octet flexi-input-stream)) (return octet))) + +(defun test-buffer-code-char () + (let* ((vector (make-array 2 :element-type '(unsigned-byte 8) :initial-element (char-code #\F))) + (buffer (make-to-string-conversion-buffer :vector vector + :position 0 + :end 2 + :eol-style :nl)) + (dummy-stream (make-flexi-stream (make-string-input-stream "") :external-format (make-external-format :ascii)))) + (dotimes (i 1000000) + (null (buffer-read-char buffer dummy-stream)) + (setf (tscb-position buffer) 0)))) \ No newline at end of file Modified: branches/hans/stream.lisp ============================================================================== --- branches/hans/stream.lisp (original) +++ branches/hans/stream.lisp Thu May 1 09:41:05 2008 @@ -170,6 +170,12 @@ MAKE-INSTANCE to create a new FLEXI-OUTPUT-STREAM but use MAKE-FLEXI-STREAM instead.")) +(defgeneric flexi-stream-output-size-factor (stream) + (:documentation "The factor to determine the size of the output +buffer when converting strings to octets for this format. The size of +the buffer allocated will be the number of characters in the string to +convert multiplied by this factor.")) + #+:cmu (defmethod input-stream-p ((stream flexi-output-stream)) "Explicitly states whether this is an input stream." @@ -197,7 +203,7 @@ look ahead for a CR/LF line ending.") (position :initform 0 :initarg :position - :type integer + :type fixnum :accessor flexi-stream-position :documentation "The position within the stream where each octet read counts as one.") @@ -327,6 +333,9 @@ (:documentation "The class for all flexi output streams which use an 8-bit encoding.")) +(defmethod flexi-stream-output-size-factor ((stream flexi-8-bit-output-stream)) + 1) + (defclass flexi-cr-8-bit-output-stream (flexi-cr-mixin flexi-8-bit-output-stream) () (:documentation "The class for all flexi output streams which @@ -357,6 +366,9 @@ (:documentation "Special class for flexi output streams which use the UTF-32 encoding with little-endian byte ordering.")) +(defmethod flexi-stream-output-size-factor ((stream flexi-utf-32-le-output-stream)) + 4) + (defclass flexi-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-utf-32-le-output-stream) () (:documentation "Special class for flexi output streams which @@ -368,6 +380,9 @@ (:documentation "Special class for flexi output streams which use the UTF-32 encoding with big-endian byte ordering.")) +(defmethod flexi-stream-output-size-factor ((stream flexi-utf-32-be-output-stream)) + 4) + (defclass flexi-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-utf-32-be-output-stream) () (:documentation "Special class for flexi output streams which @@ -379,6 +394,9 @@ (:documentation "Special class for flexi output streams which use the UTF-16 encoding with little-endian byte ordering.")) +(defmethod flexi-stream-output-size-factor ((stream flexi-utf-16-le-output-stream)) + 2) + (defclass flexi-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-utf-16-le-output-stream) () (:documentation "Special class for flexi output streams which @@ -390,6 +408,9 @@ (:documentation "Special class for flexi output streams which use the UTF-16 encoding with big-endian byte ordering.")) +(defmethod flexi-stream-output-size-factor ((stream flexi-utf-16-be-output-stream)) + 2) + (defclass flexi-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-utf-16-be-output-stream) () (:documentation "Special class for flexi output streams which @@ -401,6 +422,9 @@ (:documentation "Special class for flexi output streams which use the UTF-8 encoding.")) +(defmethod flexi-stream-output-size-factor ((stream flexi-utf-8-output-stream)) + 1.25) + (defclass flexi-cr-utf-8-output-stream (flexi-cr-mixin flexi-utf-8-output-stream) () (:documentation "Special class for flexi output streams which Modified: branches/hans/strings.lisp ============================================================================== --- branches/hans/strings.lisp (original) +++ branches/hans/strings.lisp Thu May 1 09:41:05 2008 @@ -29,11 +29,31 @@ (in-package :flexi-streams) +(defmethod write-byte* (byte (array array)) + (vector-push-extend byte array)) + (defun string-to-octets (string &key (external-format (make-external-format :latin1)) - (start 0) end) + (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of octets corresponding to the external format EXTERNAL-FORMAT." (declare (optimize speed)) + (declare (type (array character (*)) string)) + (declare (fixnum start end)) + (let* ((dummy-stream (make-flexi-stream (make-broadcast-stream) :external-format external-format)) + (octets (make-array (truncate (* (float (- end start)) (flexi-stream-output-size-factor dummy-stream))) + :adjustable t :fill-pointer 0 :element-type '(unsigned-byte 8)))) + (loop + for i of-type fixnum from start below end + do (char-to-octets dummy-stream (aref string i) octets)) + octets)) + +(defun string-to-octets* (string &key (external-format (make-external-format :latin1)) + (start 0) end) + "Converts the Lisp string STRING from START to END to an array of +octets corresponding to the external format EXTERNAL-FORMAT. This +version of STRING-TO-OCTETS is kept around for performance +comparisons." + (declare (optimize speed)) (with-output-to-sequence (out) (let ((flexi (make-flexi-stream out :external-format external-format))) (write-string string flexi :start start :end end)))) @@ -83,6 +103,8 @@ ;; This version of OCTETS-TO-STRING is here so that one can do speed ;; comparisons. It should be significantly slower than the version ;; above. + (declare (type (simple-array (unsigned-byte 8) *) vector)) + (declare (type (integer 0 *) start end)) (declare (optimize speed)) (with-input-from-sequence (in vector :start start :end end) (let ((flexi (make-flexi-stream in :external-format external-format)) Added: branches/hans/test-speed.lisp ============================================================================== --- (empty file) +++ branches/hans/test-speed.lisp Thu May 1 09:41:05 2008 @@ -0,0 +1,92 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.51 2007/12/29 22:58:43 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defmacro without-gcing (&body body) + `(#+openmcl ccl::without-gcing + #+sbcl sb-sys:without-gcing + #-(or openmcl sbcl) + progn + , at body)) + +(defun test-speed () + (without-gcing + (let* ((character-count 10000) + (octets (make-array character-count :element-type '(unsigned-byte 8)))) + (dotimes (i character-count) + (setf (aref octets i) (+ 32 (random 96)))) + (format t "testing with latin-1 encoding, streams based~%") + (time (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :latin-1))))) + (format t "testing with utf-8 encoding, streams based~%") + (time (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :utf-8))))) + (format t "testing with latin-1 encoding, optimized~%") + (time (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :latin-1))))) + (format t "testing with utf-8 encoding, optimized~%") + (time (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :utf-8)))))))) + +(defmacro profile (&body body) + #+sbcl + `(progn + (sb-profile:reset) + (progn + , at body) + (sb-profile:report))) + + +(defun profile-speed () + #+sbcl + (sb-profile:profile "FLEX") + (without-gcing + (let* ((character-count 1000) + (octets (make-array character-count :element-type '(unsigned-byte 8)))) + (dotimes (i character-count) + (setf (aref octets i) (+ 32 (random 96)))) + (format t "profiling with latin-1 encoding, streams based~%") + (profile (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :latin-1))))) + (format t "profiling with utf-8 encoding, streams based~%") + (profile (dotimes (i 10) + (null (octets-to-string* octets :external-format (make-external-format :utf-8))))) + (format t "profiling with latin-1 encoding, optimized~%") + (profile (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :latin-1))))) + (format t "profiling with utf-8 encoding, optimized~%") + (profile (dotimes (i 10) + (null (octets-to-string octets :external-format (make-external-format :utf-8)))))))) + +(defun fixnum-or-nil (i) + (and (oddp i) #.(char-code #\f))) + +(defun fixnum-and-nil (i) + (values #.(char-code #\f) (oddp i))) \ No newline at end of file From hhubner at common-lisp.net Thu May 1 14:00:16 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 10:00:16 -0400 (EDT) Subject: [flexi-streams-cvs] r6 - branches/hans Message-ID: <20080501140016.E99ED4C045@common-lisp.net> Author: hhubner Date: Thu May 1 10:00:16 2008 New Revision: 6 Modified: branches/hans/stream.lisp Log: Fix type of POSITION slot to be INTEGER instead of FIXNUM - Thanks, Edi! Modified: branches/hans/stream.lisp ============================================================================== --- branches/hans/stream.lisp (original) +++ branches/hans/stream.lisp Thu May 1 10:00:16 2008 @@ -203,7 +203,7 @@ look ahead for a CR/LF line ending.") (position :initform 0 :initarg :position - :type fixnum + :type integer :accessor flexi-stream-position :documentation "The position within the stream where each octet read counts as one.") From eweitz at common-lisp.net Thu May 1 14:30:10 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Thu, 1 May 2008 10:30:10 -0400 (EDT) Subject: [flexi-streams-cvs] r7 - branches/hans Message-ID: <20080501143010.64DA03701D@common-lisp.net> Author: eweitz Date: Thu May 1 10:30:03 2008 New Revision: 7 Modified: branches/hans/input.lisp Log: Another fixnum that wants to be an integer Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 10:30:03 2008 @@ -43,8 +43,8 @@ (octet-stack flexi-stream-octet-stack) (s flexi-stream-stream)) flexi-input-stream - (declare (fixnum position) - (type (or null fixnum) bound)) + (declare (integer position) + (type (or null integer) bound)) (when (and bound (>= position bound)) (return-from read-byte* nil)) From hhubner at common-lisp.net Thu May 1 15:49:14 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 11:49:14 -0400 (EDT) Subject: [flexi-streams-cvs] r8 - branches/hans Message-ID: <20080501154914.701BD590BF@common-lisp.net> Author: hhubner Date: Thu May 1 11:49:13 2008 New Revision: 8 Modified: branches/hans/input.lisp branches/hans/output.lisp branches/hans/strings.lisp branches/hans/test-speed.lisp Log: Fast and unhygienic version of OCTETS-TO-STRING. This gives a 10x speedup compared to the streams-based implementation. Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 11:49:13 2008 @@ -242,35 +242,35 @@ (decf position) (push #.(char-code #\Return) octet-stack))))) -(defun buffer-code-char (buffer char-code) - "Given a BUFFER, which is assumed to be a - to-string-conversion-buffer (see strings.lisp) and a character - code, convert to a character and perform newline processing for the - stream if the character is a #\Return. This code basically repeats - what the stream-read-char ((stream flexi-cr-mixin)) does, but it - does so in an optimized manner to make octet->string conversion - faster." +(defun code-char-with-newline-processing (char-code eol-style read-char-code-fn unread-char-code-fn) + "Perform newline conversion during octets-to-string processing. +CHAR-CODE is the code of the current character. If it denotes a +#\Return character, newline processing accoring to EOL-STYLE is +performed. READ-CHAR-CODE-FN and UNREAD-CHAR-CODE-FN are called to +read the next character code from the input, unread-char-code-fn is +called to skip back in the input by one octet. All this works under +the assumption that #\Return and #\Linefeed are single-octet codes." (declare (optimize speed (safety 0)) - (type to-string-conversion-buffer buffer) - (type fixnum char-code)) + (type fixnum char-code) + (type symbol eol-style)) (let ((char (code-char char-code))) (if (eql char #\Return) - (case (tscb-eol-style buffer) + (case eol-style (:cr #\Newline) (:crlf - (cond - ((= (tscb-position buffer) (tscb-end buffer)) + (case (funcall read-char-code-fn) + (:eof :eof) - ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer))) - (incf (tscb-position buffer)) - #\Newline) + (#.(char-code #\Newline) + #\Newline) (t + (funcall unread-char-code-fn) #\Return))) (t #\Return)) char))) -(declaim (inline buffer-code-char)) +(declaim (inline code-char-with-newline-processing)) (defmacro define-char-reader ((stream-var stream-class) &body body) "Helper macro to define methods for STREAM-READ-CHAR and @@ -284,7 +284,7 @@ used only for dispatching. The BUFFER-READ-CHAR generic function is used to shortcut through the streams mechanic from the OCTETS-TO-STRING function." - (with-unique-names (char-code body-fn dummy-stream) + (with-unique-names (char-code body-fn octets-var) (let ((body body)) `(progn (defmethod stream-read-char ((,stream-var ,stream-class)) @@ -304,13 +304,33 @@ ;; for UNREAD-CHAR (setq last-char-code ,char-code) (or (code-char ,char-code) ,char-code)))) - (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class)) - (declare (optimize speed)) - (declare (ignore ,dummy-stream)) ; used only for dispatch - (block stream-read-char ;; for RETURN-FROM in BODY - (let ((,char-code (progn , at body))) - (declare (type fixnum ,char-code)) - (or (buffer-code-char ,stream-var ,char-code) ,char-code)))))))) + (defmethod octets-to-string% ((,stream-var ,stream-class) ,octets-var &key start end) + (let ((position start) + save-position + (eol-style (external-format-eol-style (flexi-stream-external-format ,stream-var))) + (string (make-array (- end start) :element-type 'character :fill-pointer 0))) + (labels ((read-byte* (stream) + (declare (ignore stream)) + (if (< position end) + (prog1 + (aref ,octets-var position) + (incf position)) + :eof)) + (read-char-code () + (setf save-position position) + (block stream-read-char ;; for RETURN-FROM in BODY + , at body)) + (unread-char-code () + (setf position save-position))) + (do ((char-code (read-char-code) (read-char-code))) + ((eql char-code :eof) + string) + (vector-push (or (code-char-with-newline-processing char-code + eol-style + #'read-char-code + #'unread-char-code) + char-code) + string))))))))) (defun recover-from-encoding-error (flexi-stream format-control &rest format-args) "Helper function used by the STREAM-READ-CHAR methods below to deal Modified: branches/hans/output.lisp ============================================================================== --- branches/hans/output.lisp (original) +++ branches/hans/output.lisp Thu May 1 11:49:13 2008 @@ -88,7 +88,9 @@ (defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink) (declare (optimize speed)) + (declare (type character char)) (let ((octet (char-code char))) + (declare (type fixnum char-code)) (when (> octet 127) (signal-encoding-error stream "~S is not an ASCII character." char)) (write-byte* octet sink)) Modified: branches/hans/strings.lisp ============================================================================== --- branches/hans/strings.lisp (original) +++ branches/hans/strings.lisp Thu May 1 11:49:13 2008 @@ -58,24 +58,6 @@ (let ((flexi (make-flexi-stream out :external-format external-format))) (write-string string flexi :start start :end end)))) -;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion -;; of octets to strings, circumventing streams. - -(defstruct (to-string-conversion-buffer - (:conc-name tscb-)) - (vector nil :type (simple-array (unsigned-byte 8) *)) - (position nil :type fixnum) - (end nil :type fixnum) - (eol-style nil :type (or null symbol))) - -(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer)) - (declare (optimize speed (safety 0))) - (let ((position (tscb-position to-string-conversion-buffer))) - (when (< position (tscb-end to-string-conversion-buffer)) - (prog1 - (aref (tscb-vector to-string-conversion-buffer) position) - (incf (tscb-position to-string-conversion-buffer)))))) - (defun octets-to-string (vector &key (external-format (make-external-format :latin1)) (start 0) (end (length vector))) "Converts the Lisp vector VECTOR of octets from START to END to @@ -83,18 +65,9 @@ (declare (optimize speed (safety 0))) (declare (type (simple-array (unsigned-byte 8) *) vector) (type fixnum start end)) - (let ((buffer (make-to-string-conversion-buffer :vector vector - :position start - :end end - :eol-style (external-format-eol-style external-format))) - (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format)) - (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0))) - (declare (type (array character (*)) string)) - (do ((char (buffer-read-char buffer dummy-input-stream) - (buffer-read-char buffer dummy-input-stream))) - ((eql char :eof) - string) - (vector-push char string)))) + (octets-to-string% (make-flexi-stream (make-string-input-stream "") :external-format external-format) + vector + :start start :end end)) (defun octets-to-string* (vector &key (external-format (make-external-format :latin1)) (start 0) (end (length vector))) Modified: branches/hans/test-speed.lisp ============================================================================== --- branches/hans/test-speed.lisp (original) +++ branches/hans/test-speed.lisp Thu May 1 11:49:13 2008 @@ -43,16 +43,16 @@ (dotimes (i character-count) (setf (aref octets i) (+ 32 (random 96)))) (format t "testing with latin-1 encoding, streams based~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string* octets :external-format (make-external-format :latin-1))))) (format t "testing with utf-8 encoding, streams based~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string* octets :external-format (make-external-format :utf-8))))) (format t "testing with latin-1 encoding, optimized~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string octets :external-format (make-external-format :latin-1))))) (format t "testing with utf-8 encoding, optimized~%") - (time (dotimes (i 10) + (time (dotimes (i 100) (null (octets-to-string octets :external-format (make-external-format :utf-8)))))))) (defmacro profile (&body body) From hhubner at common-lisp.net Thu May 1 16:26:48 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 1 May 2008 12:26:48 -0400 (EDT) Subject: [flexi-streams-cvs] r9 - branches/hans Message-ID: <20080501162648.09BFD7903C@common-lisp.net> Author: hhubner Date: Thu May 1 12:26:47 2008 New Revision: 9 Modified: branches/hans/input.lisp branches/hans/output.lisp branches/hans/strings.lisp Log: Checkpoint fast STRING-TO-OCTETS implpementation Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 1 12:26:47 2008 @@ -309,6 +309,16 @@ save-position (eol-style (external-format-eol-style (flexi-stream-external-format ,stream-var))) (string (make-array (- end start) :element-type 'character :fill-pointer 0))) + ;; High-speed version of OCTETS-TO-STRING: We need to + ;; implement this as a macro as we want to stay with the + ;; old "inner" API for bodies of character readers. In + ;; particular, they shall be able to call (READ-BYTE* + ;; STREAM) as before. To achive this, we create a local + ;; function READ-BYTE* that gets the next byte from the + ;; input vector. Additionally, we create local functions + ;; for reading characters in a loop and for unreading a + ;; character that is used by the newline processing + ;; function CODE-CHAR-WITH-NEWLINE-PROCESSING. (labels ((read-byte* (stream) (declare (ignore stream)) (if (< position end) Modified: branches/hans/output.lisp ============================================================================== --- branches/hans/output.lisp (original) +++ branches/hans/output.lisp Thu May 1 12:26:47 2008 @@ -78,17 +78,35 @@ (declare (optimize speed)) (char-to-octets stream char stream)) -(defmethod char-to-octets ((stream flexi-latin-1-output-stream) char sink) - (declare (optimize speed)) +(defmacro define-char-writer (((stream-var stream-class) char-var sink-var) &body body) + (let ((body body)) + (with-unique-names (string-var start-var end-var dummy-sink-var byte-var i-var) + `(progn + (defmethod char-to-octets ((,stream-var ,stream-class) ,char-var ,sink-var) + (declare (optimize speed)) + , at body) + (defmethod string-to-octets% ((,stream-var ,stream-class) ,string-var ,start-var ,end-var) + (declare (optimize speed)) + (let ((,sink-var (make-array (truncate (* (float (- ,end-var ,start-var)) + (flexi-stream-output-size-factor ,stream-var))) + :adjustable t :fill-pointer 0 :element-type '(unsigned-byte 8)))) + (loop + for ,i-var of-type fixnum from ,start-var below ,end-var + for ,char-var of-type character = (aref ,string-var ,i-var) + do (flet ((write-byte* (,byte-var ,dummy-sink-var) + (declare (ignore ,dummy-sink-var)) + (vector-push-extend ,byte-var ,sink-var))) + , at body)) + ,sink-var)))))) + +(define-char-writer ((stream flexi-latin-1-output-stream) char sink) (let ((octet (char-code char))) (when (> octet 255) (signal-encoding-error stream "~S is not a LATIN-1 character." char)) (write-byte* octet sink)) char) -(defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink) - (declare (optimize speed)) - (declare (type character char)) +(define-char-writer ((stream flexi-ascii-output-stream) char sink) (let ((octet (char-code char))) (declare (type fixnum char-code)) (when (> octet 127) @@ -96,8 +114,7 @@ (write-byte* octet sink)) char) -(defmethod char-to-octets ((stream flexi-8-bit-output-stream) char sink) - (declare (optimize speed)) +(define-char-writer ((stream flexi-8-bit-output-stream) char sink) (with-accessors ((encoding-hash flexi-stream-encoding-hash)) stream (let ((octet (gethash (char-code char) encoding-hash))) @@ -106,8 +123,7 @@ (write-byte* octet sink)) char)) -(defmethod char-to-octets ((stream flexi-utf-8-output-stream) char sink) - (declare (optimize speed)) +(define-char-writer ((stream flexi-utf-8-output-stream) char sink) (let ((char-code (char-code char))) (tagbody (cond ((< char-code #x80) @@ -138,8 +154,7 @@ zero)) char) -(defmethod char-to-octets ((stream flexi-utf-16-le-output-stream) char sink) - (declare (optimize speed)) +(define-char-writer ((stream flexi-utf-16-le-output-stream) char sink) (flet ((write-word (word) (write-byte* (ldb (byte 8 0) word) sink) (write-byte* (ldb (byte 8 8) word) sink))) @@ -152,8 +167,7 @@ (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) char) -(defmethod char-to-octets ((stream flexi-utf-16-be-output-stream) char sink) - (declare (optimize speed)) +(define-char-writer ((stream flexi-utf-16-be-output-stream) char sink) (flet ((write-word (word) (write-byte* (ldb (byte 8 8) word) sink) (write-byte* (ldb (byte 8 0) word) sink))) @@ -166,25 +180,22 @@ (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) char) -(defmethod char-to-octets ((stream flexi-utf-32-le-output-stream) char sink) - (declare (optimize speed)) +(define-char-writer ((stream flexi-utf-32-le-output-stream) char sink) (loop with char-code = (char-code char) for position in '(0 8 16 24) do (write-byte* (ldb (byte 8 position) char-code) sink)) char) -(defmethod char-to-octets ((stream flexi-utf-32-be-output-stream) char sink) - (declare (optimize speed)) +(define-char-writer ((stream flexi-utf-32-be-output-stream) char sink) (loop with char-code = (char-code char) for position in '(24 16 8 0) do (write-byte* (ldb (byte 8 position) char-code) sink)) char) -(defmethod char-to-octets ((stream flexi-cr-mixin) char sink) +(define-char-writer ((stream flexi-cr-mixin) char sink) "The `base' method for all streams which need end-of-line conversion. Uses CALL-NEXT-METHOD to do the actual work of sending one or more characters to SINK." - (declare (optimize speed)) (with-accessors ((external-format flexi-stream-external-format)) stream (case char Modified: branches/hans/strings.lisp ============================================================================== --- branches/hans/strings.lisp (original) +++ branches/hans/strings.lisp Thu May 1 12:26:47 2008 @@ -39,13 +39,8 @@ (declare (optimize speed)) (declare (type (array character (*)) string)) (declare (fixnum start end)) - (let* ((dummy-stream (make-flexi-stream (make-broadcast-stream) :external-format external-format)) - (octets (make-array (truncate (* (float (- end start)) (flexi-stream-output-size-factor dummy-stream))) - :adjustable t :fill-pointer 0 :element-type '(unsigned-byte 8)))) - (loop - for i of-type fixnum from start below end - do (char-to-octets dummy-stream (aref string i) octets)) - octets)) + (string-to-octets% (make-flexi-stream (make-broadcast-stream) :external-format external-format) + string start end)) (defun string-to-octets* (string &key (external-format (make-external-format :latin1)) (start 0) end) From hhubner at common-lisp.net Fri May 2 12:59:37 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Fri, 2 May 2008 08:59:37 -0400 (EDT) Subject: [flexi-streams-cvs] r10 - branches/hans Message-ID: <20080502125937.AA9323D0B5@common-lisp.net> Author: hhubner Date: Fri May 2 08:59:37 2008 New Revision: 10 Modified: branches/hans/input.lisp Log: Fix READ-BYTE* local function for OCTETS-TO-STRING% - Need to return NIL, not :EOF on end of file. Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Fri May 2 08:59:37 2008 @@ -321,11 +321,10 @@ ;; function CODE-CHAR-WITH-NEWLINE-PROCESSING. (labels ((read-byte* (stream) (declare (ignore stream)) - (if (< position end) - (prog1 - (aref ,octets-var position) - (incf position)) - :eof)) + (when (< position end) + (prog1 + (aref ,octets-var position) + (incf position)))) (read-char-code () (setf save-position position) (block stream-read-char ;; for RETURN-FROM in BODY From hhubner at common-lisp.net Tue May 6 13:27:03 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Tue, 6 May 2008 09:27:03 -0400 (EDT) Subject: [flexi-streams-cvs] r11 - branches/hans Message-ID: <20080506132703.E2B3B2E2D5@common-lisp.net> Author: hhubner Date: Tue May 6 09:27:01 2008 New Revision: 11 Modified: branches/hans/input.lisp Log: Update docstring Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Tue May 6 09:27:01 2008 @@ -274,16 +274,16 @@ (defmacro define-char-reader ((stream-var stream-class) &body body) "Helper macro to define methods for STREAM-READ-CHAR and -BUFFER-READ-CHAR. Defines a method for the class STREAM-CLASS using +OCTETS-TO-STRING%. Defines a method for the class STREAM-CLASS using the variable STREAM-VAR and the code body BODY wrapped with some standard code common to all methods defined here. The return value of BODY is a character code. In case of encoding problems, BODY must return the value returned by \(RECOVER-FROM-ENCODING-ERROR ...). In -addition, a method on BUFFER-READ-CHAR is defined with the first -argument being the buffer, the second argument a STREAM-CLASS instance -used only for dispatching. The BUFFER-READ-CHAR generic function is -used to shortcut through the streams mechanic from the -OCTETS-TO-STRING function." +addition, a method on OCTETS-TO-STRING% is defined with the first +argument being the STREAM-CLASS (which is used only for dispatching), +the second argument being the vector of octets to convert and the +BEGIN and END keyword arguments which can be used to limit the +conversion to a subsequence of the octet vector." (with-unique-names (char-code body-fn octets-var) (let ((body body)) `(progn @@ -655,15 +655,4 @@ (t (= octet peek-type))) finally (unless (eql octet eof-value) (unread-byte octet flexi-input-stream)) - (return octet))) - -(defun test-buffer-code-char () - (let* ((vector (make-array 2 :element-type '(unsigned-byte 8) :initial-element (char-code #\F))) - (buffer (make-to-string-conversion-buffer :vector vector - :position 0 - :end 2 - :eol-style :nl)) - (dummy-stream (make-flexi-stream (make-string-input-stream "") :external-format (make-external-format :ascii)))) - (dotimes (i 1000000) - (null (buffer-read-char buffer dummy-stream)) - (setf (tscb-position buffer) 0)))) \ No newline at end of file + (return octet))) \ No newline at end of file From hhubner at common-lisp.net Tue May 6 16:15:04 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Tue, 6 May 2008 12:15:04 -0400 (EDT) Subject: [flexi-streams-cvs] r12 - branches/hans Message-ID: <20080506161504.2890346120@common-lisp.net> Author: hhubner Date: Tue May 6 12:15:02 2008 New Revision: 12 Modified: branches/hans/output.lisp Log: remove spurious declaration Modified: branches/hans/output.lisp ============================================================================== --- branches/hans/output.lisp (original) +++ branches/hans/output.lisp Tue May 6 12:15:02 2008 @@ -108,7 +108,6 @@ (define-char-writer ((stream flexi-ascii-output-stream) char sink) (let ((octet (char-code char))) - (declare (type fixnum char-code)) (when (> octet 127) (signal-encoding-error stream "~S is not an ASCII character." char)) (write-byte* octet sink)) From hhubner at common-lisp.net Thu May 8 16:18:16 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Thu, 8 May 2008 12:18:16 -0400 (EDT) Subject: [flexi-streams-cvs] r13 - branches/hans Message-ID: <20080508161816.6961316380@common-lisp.net> Author: hhubner Date: Thu May 8 12:18:11 2008 New Revision: 13 Modified: branches/hans/input.lisp branches/hans/output.lisp Log: Incorporate review comments from Edi Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Thu May 8 12:18:11 2008 @@ -242,40 +242,39 @@ (decf position) (push #.(char-code #\Return) octet-stack))))) +(declaim (inline code-char-with-newline-processing)) (defun code-char-with-newline-processing (char-code eol-style read-char-code-fn unread-char-code-fn) "Perform newline conversion during octets-to-string processing. CHAR-CODE is the code of the current character. If it denotes a -#\Return character, newline processing accoring to EOL-STYLE is +#\Return character, newline processing according to EOL-STYLE is performed. READ-CHAR-CODE-FN and UNREAD-CHAR-CODE-FN are called to read the next character code from the input, unread-char-code-fn is called to skip back in the input by one octet. All this works under the assumption that #\Return and #\Linefeed are single-octet codes." - (declare (optimize speed (safety 0)) - (type fixnum char-code) - (type symbol eol-style)) - (let ((char (code-char char-code))) - (if (eql char #\Return) - (case eol-style - (:cr - #\Newline) - (:crlf - (case (funcall read-char-code-fn) - (:eof - :eof) - (#.(char-code #\Newline) - #\Newline) - (t - (funcall unread-char-code-fn) - #\Return))) - (t - #\Return)) - char))) -(declaim (inline code-char-with-newline-processing)) + (if (eql char-code :eof) + (return-from code-char-with-newline-processing :eof) + (let ((char (code-char char-code))) + (if (eql char #\Return) + (case eol-style + (:cr + #\Newline) + (:crlf + (case (funcall read-char-code-fn) + (:eof + :eof) + (#.(char-code #\Linefeed) + #\Newline) + (t + (funcall unread-char-code-fn) + #\Return))) + (t + #\Return)) + char)))) -(defmacro define-char-reader ((stream-var stream-class) &body body) +(defmacro define-char-reader ((stream stream-class) &body body) "Helper macro to define methods for STREAM-READ-CHAR and OCTETS-TO-STRING%. Defines a method for the class STREAM-CLASS using -the variable STREAM-VAR and the code body BODY wrapped with some +the variable STREAM and the code body BODY wrapped with some standard code common to all methods defined here. The return value of BODY is a character code. In case of encoding problems, BODY must return the value returned by \(RECOVER-FROM-ENCODING-ERROR ...). In @@ -284,15 +283,15 @@ the second argument being the vector of octets to convert and the BEGIN and END keyword arguments which can be used to limit the conversion to a subsequence of the octet vector." - (with-unique-names (char-code body-fn octets-var) + (with-unique-names (char-code body-fn octets) (let ((body body)) `(progn - (defmethod stream-read-char ((,stream-var ,stream-class)) + (defmethod stream-read-char ((,stream ,stream-class)) "This method was generated with the DEFINE-CHAR-READER macro." (declare (optimize speed)) (with-accessors ((last-octet flexi-stream-last-octet) (last-char-code flexi-stream-last-char-code)) - ,stream-var + ,stream ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after ;; this operation (setq last-octet nil) @@ -304,16 +303,18 @@ ;; for UNREAD-CHAR (setq last-char-code ,char-code) (or (code-char ,char-code) ,char-code)))) - (defmethod octets-to-string% ((,stream-var ,stream-class) ,octets-var &key start end) + (defmethod octets-to-string% ((,stream ,stream-class) ,octets &key start end) (let ((position start) save-position - (eol-style (external-format-eol-style (flexi-stream-external-format ,stream-var))) - (string (make-array (- end start) :element-type 'character :fill-pointer 0))) + (eol-style (external-format-eol-style (flexi-stream-external-format ,stream))) + (string (make-array (- end start) + :element-type #-:lispworks 'character #+:lispworks 'lw:simple-char + :fill-pointer 0))) ;; High-speed version of OCTETS-TO-STRING: We need to ;; implement this as a macro as we want to stay with the ;; old "inner" API for bodies of character readers. In ;; particular, they shall be able to call (READ-BYTE* - ;; STREAM) as before. To achive this, we create a local + ;; STREAM) as before. To achieve this, we create a local ;; function READ-BYTE* that gets the next byte from the ;; input vector. Additionally, we create local functions ;; for reading characters in a loop and for unreading a @@ -323,7 +324,7 @@ (declare (ignore stream)) (when (< position end) (prog1 - (aref ,octets-var position) + (aref ,octets position) (incf position)))) (read-char-code () (setf save-position position) @@ -331,15 +332,15 @@ , at body)) (unread-char-code () (setf position save-position))) - (do ((char-code (read-char-code) (read-char-code))) - ((eql char-code :eof) - string) - (vector-push (or (code-char-with-newline-processing char-code - eol-style - #'read-char-code - #'unread-char-code) - char-code) - string))))))))) + (loop + for char = (code-char-with-newline-processing (read-char-code) + eol-style + #'read-char-code + #'unread-char-code) + until (eql char :eof) + do (format t "char ~S~%" char) + do (vector-push char string)) + string))))))) (defun recover-from-encoding-error (flexi-stream format-control &rest format-args) "Helper function used by the STREAM-READ-CHAR methods below to deal Modified: branches/hans/output.lisp ============================================================================== --- branches/hans/output.lisp (original) +++ branches/hans/output.lisp Thu May 8 12:18:11 2008 @@ -78,26 +78,26 @@ (declare (optimize speed)) (char-to-octets stream char stream)) -(defmacro define-char-writer (((stream-var stream-class) char-var sink-var) &body body) +(defmacro define-char-writer (((stream stream-class) char sink) &body body) (let ((body body)) - (with-unique-names (string-var start-var end-var dummy-sink-var byte-var i-var) + (with-unique-names (string start end dummy-sink byte i) `(progn - (defmethod char-to-octets ((,stream-var ,stream-class) ,char-var ,sink-var) + (defmethod char-to-octets ((,stream ,stream-class) ,char ,sink) (declare (optimize speed)) , at body) - (defmethod string-to-octets% ((,stream-var ,stream-class) ,string-var ,start-var ,end-var) + (defmethod string-to-octets% ((,stream ,stream-class) ,string ,start ,end) (declare (optimize speed)) - (let ((,sink-var (make-array (truncate (* (float (- ,end-var ,start-var)) - (flexi-stream-output-size-factor ,stream-var))) - :adjustable t :fill-pointer 0 :element-type '(unsigned-byte 8)))) + (let ((,sink (make-array (truncate (* (- ,end ,start) + (flexi-stream-output-size-factor ,stream))) + :adjustable t :fill-pointer 0 :element-type '(unsigned-byte 8)))) (loop - for ,i-var of-type fixnum from ,start-var below ,end-var - for ,char-var of-type character = (aref ,string-var ,i-var) - do (flet ((write-byte* (,byte-var ,dummy-sink-var) - (declare (ignore ,dummy-sink-var)) - (vector-push-extend ,byte-var ,sink-var))) + for ,i of-type fixnum from ,start below ,end + for ,char of-type character = (aref ,string ,i) + do (flet ((write-byte* (,byte ,dummy-sink) + (declare (ignore ,dummy-sink)) + (vector-push-extend ,byte ,sink))) , at body)) - ,sink-var)))))) + ,sink)))))) (define-char-writer ((stream flexi-latin-1-output-stream) char sink) (let ((octet (char-code char))) @@ -125,31 +125,31 @@ (define-char-writer ((stream flexi-utf-8-output-stream) char sink) (let ((char-code (char-code char))) (tagbody - (cond ((< char-code #x80) - (write-byte* char-code sink) - (go zero)) - ((< char-code #x800) - (write-byte* (logior #b11000000 (ldb (byte 5 6) char-code)) sink) - (go one)) - ((< char-code #x10000) - (write-byte* (logior #b11100000 (ldb (byte 4 12) char-code)) sink) - (go two)) - ((< char-code #x200000) - (write-byte* (logior #b11110000 (ldb (byte 3 18) char-code)) sink) - (go three)) - ((< char-code #x4000000) - (write-byte* (logior #b11111000 (ldb (byte 2 24) char-code)) sink) - (go four)) - (t (write-byte* (logior #b11111100 (ldb (byte 1 30) char-code)) sink))) - (write-byte* (logior #b10000000 (ldb (byte 6 24) char-code)) sink) + (cond ((< char-code #x80) + (write-byte* char-code sink) + (go zero)) + ((< char-code #x800) + (write-byte* (logior #b11000000 (ldb (byte 5 6) char-code)) sink) + (go one)) + ((< char-code #x10000) + (write-byte* (logior #b11100000 (ldb (byte 4 12) char-code)) sink) + (go two)) + ((< char-code #x200000) + (write-byte* (logior #b11110000 (ldb (byte 3 18) char-code)) sink) + (go three)) + ((< char-code #x4000000) + (write-byte* (logior #b11111000 (ldb (byte 2 24) char-code)) sink) + (go four)) + (t (write-byte* (logior #b11111100 (ldb (byte 1 30) char-code)) sink))) + (write-byte* (logior #b10000000 (ldb (byte 6 24) char-code)) sink) four - (write-byte* (logior #b10000000 (ldb (byte 6 18) char-code)) sink) + (write-byte* (logior #b10000000 (ldb (byte 6 18) char-code)) sink) three - (write-byte* (logior #b10000000 (ldb (byte 6 12) char-code)) sink) + (write-byte* (logior #b10000000 (ldb (byte 6 12) char-code)) sink) two - (write-byte* (logior #b10000000 (ldb (byte 6 6) char-code)) sink) + (write-byte* (logior #b10000000 (ldb (byte 6 6) char-code)) sink) one - (write-byte* (logior #b10000000 (ldb (byte 6 0) char-code)) sink) + (write-byte* (logior #b10000000 (ldb (byte 6 0) char-code)) sink) zero)) char) @@ -202,7 +202,7 @@ (case (external-format-eol-style external-format) (:cr (call-next-method stream #\Return sink)) (:crlf (call-next-method stream #\Return sink) - (call-next-method stream #\Linefeed sink)))) + (call-next-method stream #\Linefeed sink)))) (otherwise (call-next-method))) char)) From hhubner at common-lisp.net Fri May 9 08:37:58 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Fri, 9 May 2008 04:37:58 -0400 (EDT) Subject: [flexi-streams-cvs] r14 - branches/hans Message-ID: <20080509083758.C999A30BA@common-lisp.net> Author: hhubner Date: Fri May 9 04:37:58 2008 New Revision: 14 Modified: branches/hans/input.lisp Log: remove debug print Modified: branches/hans/input.lisp ============================================================================== --- branches/hans/input.lisp (original) +++ branches/hans/input.lisp Fri May 9 04:37:58 2008 @@ -338,7 +338,6 @@ #'read-char-code #'unread-char-code) until (eql char :eof) - do (format t "char ~S~%" char) do (vector-push char string)) string))))))) From eweitz at common-lisp.net Sat May 10 15:16:30 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 10 May 2008 11:16:30 -0400 (EDT) Subject: [flexi-streams-cvs] r15 - branches/edi Message-ID: <20080510151630.0642F4C04D@common-lisp.net> Author: eweitz Date: Sat May 10 11:16:29 2008 New Revision: 15 Added: branches/edi/ - copied from r14, branches/hans/ Log: Copy Hans' branch From eweitz at common-lisp.net Sat May 10 15:18:56 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 10 May 2008 11:18:56 -0400 (EDT) Subject: [flexi-streams-cvs] r16 - branches/edi/test Message-ID: <20080510151856.20F714C04E@common-lisp.net> Author: eweitz Date: Sat May 10 11:18:54 2008 New Revision: 16 Modified: branches/edi/test/test.lisp Log: More tests Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sat May 10 11:18:54 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.17 2007/12/29 22:58:44 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.19 2008/05/10 14:32:13 edi Exp $ ;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. @@ -89,13 +89,17 @@ (append args `(:eol-style ,eol-style :little-endian ,little-endian)))))))) -(defun create-test-combinations (file-name symbols) - "For a name suffix FILE-NAME and a list of symbols SYMBOLS -denoting different encodings of the corresponding file returns a -list of lists which can be used as arglists for COMPARE-FILES." +(defun create-test-combinations (file-name symbols &optional simplep) + "For a name suffix FILE-NAME and a list of symbols SYMBOLS denoting +different encodings of the corresponding file returns a list of lists +which can be used as arglists for COMPARE-FILES. If SIMPLEP is true, +a list which can be used for the string tests below is returned." (let ((file-variants (loop for symbol in symbols nconc (create-file-variants file-name symbol)))) (loop for (name-in . external-format-in) in file-variants + when simplep + collect (list name-in external-format-in) + else nconc (loop for (name-out . external-format-out) in file-variants collect (list name-in external-format-in name-out external-format-out))))) @@ -200,6 +204,27 @@ #+:lispworks (terpri *error-output*))))) +(defun file-as-octet-vector (pathspec) + "Returns the contents of the file denoted by PATHSPEC as a vector of +octets." + (with-open-file (in pathspec :element-type 'octet) + (let ((vector (make-array (file-length in) :element-type 'octet))) + (read-sequence vector in) + vector))) + +(defun file-as-string (pathspec external-format) + "Reads the contents of the file denoted by PATHSPEC using the +external format EXTERNAL-FORMAT and returns the result as a string." + (with-open-file (in pathspec :element-type 'octet) + (let* ((number-of-octets (file-length in)) + (in (make-flexi-stream in :external-format external-format)) + (string (make-array number-of-octets + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer string) (read-sequence string in)) + string))) + (defmacro with-test ((test-description) &body body) "Defines a test. Two utilities are available inside of the body of the maco: The function FAIL, and the macro CHECK. FAIL, the lowest @@ -231,6 +256,21 @@ (terpri *error-output*)) ,successp)))) +(defun string-test (pathspec external-format) + "Tests whether conversion from strings to octets and vice versa +using the external format EXTERNAL-FORMAT works as expected, using the +contents of the file denoted by PATHSPEC as test data and assuming +that the stream conversion functions work." + (let* ((full-path (merge-pathnames pathspec *this-file*)) + (octets-vector (file-as-octet-vector full-path)) + (octets-list (coerce octets-vector 'list)) + (string (file-as-string full-path external-format))) + (with-test ((format nil "String tests with format ~S." + (flex::normalize-external-format external-format))) + (check (string= (octets-to-string octets-vector :external-format external-format) string)) + (check (string= (octets-to-string octets-list :external-format external-format) string)) + (check (equalp (string-to-octets string :external-format external-format) octets-vector))))) + (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE restart each time a FLEXI-STREAM-ENCODING-ERROR is signalled. Signals @@ -262,6 +302,9 @@ (defun encoding-error-handling-test () "Tests several possible encoding errors and how they are handled." (with-test ("Handling of encoding errors.") + ;; handling of EOF in the middle of CRLF + (check (string= #.(string #\Return) + (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) (let ((*substitution-char* #\?)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "a??" (read-flexi-line `(,(char-code #\a) 128 200) :ascii))) @@ -326,13 +369,18 @@ CREATE-TEST-COMBINATIONS, runs test for handling of encoding errors, and shows simple statistics at the end." (let* ((*test-success-counter* 0) - (args-list (loop for (file-name symbols) in *test-files* - nconc (create-test-combinations file-name symbols))) - (no-tests (* 4 (length args-list)))) + (compare-files-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols))) + (no-tests (* 4 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - (dolist (args args-list) - (apply #'compare-files args)) + (dolist (args compare-files-args-list) + (apply 'compare-files args)) + (let ((string-test-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols t)))) + (incf no-tests (length string-test-args-list)) + (dolist (args string-test-args-list) + (apply 'string-test args))) (incf no-tests) (encoding-error-handling-test) (incf no-tests) From hhubner at common-lisp.net Wed May 14 10:59:58 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Wed, 14 May 2008 06:59:58 -0400 (EDT) Subject: [flexi-streams-cvs] r17 - branches/hans Message-ID: <20080514105958.A95634C00A@common-lisp.net> Author: hhubner Date: Wed May 14 06:59:57 2008 New Revision: 17 Modified: branches/hans/output.lisp Log: Fix bug in output cr-lf handling Modified: branches/hans/output.lisp ============================================================================== --- branches/hans/output.lisp (original) +++ branches/hans/output.lisp Wed May 14 06:59:57 2008 @@ -80,7 +80,7 @@ (defmacro define-char-writer (((stream stream-class) char sink) &body body) (let ((body body)) - (with-unique-names (string start end dummy-sink byte i) + (with-unique-names (string start end dummy-sink input-char byte i eol-style) `(progn (defmethod char-to-octets ((,stream ,stream-class) ,char ,sink) (declare (optimize speed)) @@ -90,14 +90,23 @@ (let ((,sink (make-array (truncate (* (- ,end ,start) (flexi-stream-output-size-factor ,stream))) :adjustable t :fill-pointer 0 :element-type '(unsigned-byte 8)))) - (loop + (labels ((write-byte* (,byte ,dummy-sink) + (declare (ignore ,dummy-sink)) + (vector-push-extend ,byte ,sink)) + (write-char (,char) + , at body)) + (loop + with ,eol-style = (external-format-eol-style (flexi-stream-external-format ,stream)) for ,i of-type fixnum from ,start below ,end - for ,char of-type character = (aref ,string ,i) - do (flet ((write-byte* (,byte ,dummy-sink) - (declare (ignore ,dummy-sink)) - (vector-push-extend ,byte ,sink))) - , at body)) - ,sink)))))) + for ,input-char of-type character = (aref ,string ,i) + do (if (eql ,input-char #\Newline) + (case ,eol-style + (:cr (write-char #\Return)) + (:crlf (write-char #\Return) + (write-char #\Newline)) + (t (write-char #\Newline))) + (write-char ,input-char))) + ,sink))))))) (define-char-writer ((stream flexi-latin-1-output-stream) char sink) (let ((octet (char-code char))) @@ -191,18 +200,20 @@ (write-byte* (ldb (byte 8 position) char-code) sink)) char) -(define-char-writer ((stream flexi-cr-mixin) char sink) +(defmethod char-to-octets ((stream flexi-cr-mixin) char sink) + (declare (optimize speed)) "The `base' method for all streams which need end-of-line conversion. Uses CALL-NEXT-METHOD to do the actual work of sending one or more characters to SINK." - (with-accessors ((external-format flexi-stream-external-format)) + (with-accessors + ((external-format flexi-stream-external-format)) stream (case char - (#\Newline + (#\Newline (case (external-format-eol-style external-format) (:cr (call-next-method stream #\Return sink)) (:crlf (call-next-method stream #\Return sink) - (call-next-method stream #\Linefeed sink)))) + (call-next-method stream #\Newline sink)))) (otherwise (call-next-method))) char)) From hhubner at common-lisp.net Wed May 14 12:59:12 2008 From: hhubner at common-lisp.net (hhubner at common-lisp.net) Date: Wed, 14 May 2008 08:59:12 -0400 (EDT) Subject: [flexi-streams-cvs] r18 - branches/hans Message-ID: <20080514125912.6BC6C29105@common-lisp.net> Author: hhubner Date: Wed May 14 08:59:11 2008 New Revision: 18 Modified: branches/hans/output.lisp Log: make sbcl happy Modified: branches/hans/output.lisp ============================================================================== --- branches/hans/output.lisp (original) +++ branches/hans/output.lisp Wed May 14 08:59:11 2008 @@ -93,7 +93,7 @@ (labels ((write-byte* (,byte ,dummy-sink) (declare (ignore ,dummy-sink)) (vector-push-extend ,byte ,sink)) - (write-char (,char) + (write-char* (,char) , at body)) (loop with ,eol-style = (external-format-eol-style (flexi-stream-external-format ,stream)) @@ -101,11 +101,11 @@ for ,input-char of-type character = (aref ,string ,i) do (if (eql ,input-char #\Newline) (case ,eol-style - (:cr (write-char #\Return)) - (:crlf (write-char #\Return) - (write-char #\Newline)) - (t (write-char #\Newline))) - (write-char ,input-char))) + (:cr (write-char* #\Return)) + (:crlf (write-char* #\Return) + (write-char* #\Newline)) + (t (write-char* #\Newline))) + (write-char* ,input-char))) ,sink))))))) (define-char-writer ((stream flexi-latin-1-output-stream) char sink) From eweitz at common-lisp.net Sat May 17 16:47:03 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 12:47:03 -0400 (EDT) Subject: [flexi-streams-cvs] r19 - branches/edi Message-ID: <20080517164703.5B8283700E@common-lisp.net> Author: eweitz Date: Sat May 17 12:47:02 2008 New Revision: 19 Removed: branches/edi/ Log: Remove From eweitz at common-lisp.net Sat May 17 16:49:33 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 12:49:33 -0400 (EDT) Subject: [flexi-streams-cvs] r20 - in branches/edi: . doc test Message-ID: <20080517164933.AB826392D7@common-lisp.net> Author: eweitz Date: Sat May 17 12:49:25 2008 New Revision: 20 Added: branches/edi/ branches/edi/CHANGELOG branches/edi/ascii.lisp branches/edi/code-pages.lisp branches/edi/conditions.lisp (contents, props changed) branches/edi/decode.lisp (contents, props changed) branches/edi/doc/ branches/edi/doc/foo.txt (contents, props changed) branches/edi/doc/index.html branches/edi/encode.lisp (contents, props changed) branches/edi/external-format.lisp branches/edi/flexi-streams.asd branches/edi/in-memory.lisp branches/edi/input.lisp branches/edi/iso-8859.lisp branches/edi/koi8-r.lisp branches/edi/lw-binary-stream.lisp branches/edi/output.lisp branches/edi/packages.lisp branches/edi/specials.lisp branches/edi/stream.lisp branches/edi/strings.lisp branches/edi/test/ branches/edi/test/README (contents, props changed) branches/edi/test/hebrew_latin8_cr.txt branches/edi/test/hebrew_latin8_crlf.txt branches/edi/test/hebrew_latin8_lf.txt branches/edi/test/hebrew_utf8_cr.txt branches/edi/test/hebrew_utf8_crlf.txt branches/edi/test/hebrew_utf8_lf.txt branches/edi/test/kafka_cp1252_cr.txt branches/edi/test/kafka_cp1252_crlf.txt branches/edi/test/kafka_cp1252_lf.txt branches/edi/test/kafka_latin1_cr.txt branches/edi/test/kafka_latin1_crlf.txt branches/edi/test/kafka_latin1_lf.txt branches/edi/test/kafka_utf8_cr.txt branches/edi/test/kafka_utf8_crlf.txt branches/edi/test/kafka_utf8_lf.txt branches/edi/test/packages.lisp branches/edi/test/russian_koi8r_cr.txt branches/edi/test/russian_koi8r_crlf.txt branches/edi/test/russian_koi8r_lf.txt branches/edi/test/russian_utf8_cr.txt (contents, props changed) branches/edi/test/russian_utf8_crlf.txt (contents, props changed) branches/edi/test/russian_utf8_lf.txt (contents, props changed) branches/edi/test/test.lisp branches/edi/test/tilton_ascii_cr.txt branches/edi/test/tilton_ascii_crlf.txt branches/edi/test/tilton_ascii_lf.txt branches/edi/test/tilton_utf8_cr.txt branches/edi/test/tilton_utf8_crlf.txt branches/edi/test/tilton_utf8_lf.txt branches/edi/test/unicode_demo_ucs2_cr_be.txt (contents, props changed) branches/edi/test/unicode_demo_ucs2_cr_le.txt (contents, props changed) branches/edi/test/unicode_demo_ucs2_crlf_be.txt (contents, props changed) branches/edi/test/unicode_demo_ucs2_crlf_le.txt (contents, props changed) branches/edi/test/unicode_demo_ucs2_lf_be.txt (contents, props changed) branches/edi/test/unicode_demo_ucs2_lf_le.txt (contents, props changed) branches/edi/test/unicode_demo_ucs4_cr_be.txt (contents, props changed) branches/edi/test/unicode_demo_ucs4_cr_le.txt (contents, props changed) branches/edi/test/unicode_demo_ucs4_crlf_be.txt (contents, props changed) branches/edi/test/unicode_demo_ucs4_crlf_le.txt (contents, props changed) branches/edi/test/unicode_demo_ucs4_lf_be.txt (contents, props changed) branches/edi/test/unicode_demo_ucs4_lf_le.txt (contents, props changed) branches/edi/test/unicode_demo_utf8_cr.txt branches/edi/test/unicode_demo_utf8_crlf.txt branches/edi/test/unicode_demo_utf8_lf.txt branches/edi/util.lisp Log: First part of reorganisation Added: branches/edi/CHANGELOG ============================================================================== --- (empty file) +++ branches/edi/CHANGELOG Sat May 17 12:49:25 2008 @@ -0,0 +1,196 @@ +Version 0.14.0 +2007-12-30 +Some fixes for LispWorks (when the underlying stream is a character stream) +Optimized methods for UNREAD-CHAR% in case of 8-bit encodings +More tests + +Version 0.13.1 +2007-10-11 +Small fix for AllegroCL's "modern" mode + +Version 0.13.0 +2007-09-13 +Better optimizations for STREAM-WRITE-SEQUENCE (thanks to Anton Vodonosov) +Bugfix for STREAM-WRITE-BYTE + +Version 0.12.0 +2007-09-07 +Added "bound" for flexi input streams + +Version 0.11.2 +2007-04-06 +Fixed bug in STREAM-WRITE-STRING implementation (reported by quasi) + +Version 0.11.1 +2007-03-22 +More ugliness for a bit of output performance in special cases + +Version 0.11.0 +2007-03-09 +Re-factoring of how encoding errors are handled (patch by Anton Vodonosov) + +Version 0.10.3 +2007-02-19 +Fixed bug in UTF-16 output (patch by Stelian Ionescu) +Fixed *SUBSTITUTION-CHAR* example in docs + +Version 0.10.2 +2007-01-12 +Another fix - sigh... + +Version 0.10.1 +2007-01-11 +Fixed the last change (thanks to Red Daly) + +Version 0.10.0 +2007-01-10 +Added transformers to in-memory streams (thanks to Chris Dean) +Documentation fixes + +Version 0.9.1 +2006-12-27 +More performance improvements (thanks to Robert J. Macomber for SBCL hints) + +Version 0.9.0 +2006-12-27 +Complete re-factoring to improve performance and reduce consing (at least for LispWorks) +Added some tests +Added *PROVIDE-USE-VALUE-RESTART* +Added FLEXI-STREAM-POSITION-SPEC-ERROR condition + +Version 0.8.0 +2006-11-14 +Added USE-VALUE restart for STREAM-READ-CHAR (thanks to Anton Vodonosov) +Added *SUBSTITUTION-CHAR* + +Version 0.7.2 +2006-11-06 +Removed unnecessary CHECK-EOF-NO-HANG also for in-memory streams (see 0.5.8) + +Version 0.7.1 +2006-10-31 +Argh, missed the most important part... + +Version 0.7.0 +2006-10-31 +Added KOI8-R (thanks to Igor Plekhov) + +Version 0.6.6 +2006-10-06 +Made sure not to apply Gray stream generic function to underlying stream + +Version 0.6.5 +2006-10-06 +Optimized STREAM-WRITE-SEQUENCE and STREAM-READ-SEQUENCE for arrays of octets + +Version 0.6.4 +2006-10-05 +Made READ-BYTE/WRITE-BYTE the default behaviour, i.e. we only use the sequence functions for LW if necessary + +Version 0.6.3 +2006-10-02 +Fixed problems with CMUCL Gray streams implementation (reported by Ivan Toshkov) + +Version 0.6.2 +2006-09-23 +Added method for MAKE-LOAD-FORM which is needed for OpenMCL (reported by Robert Synnott, see Drakma mailing list) + +Version 0.6.1 +2006-09-15 +Switched FILE-POSITION implementation to TRIVIAL-GRAY-STREAMS (thanks to David Lichteblau) + +Version 0.6.0 +2006-09-13 +Implemented file positions for LispWorks + +Version 0.5.10 +2006-09-04 +Flexi streams can have binary element types now + +Version 0.5.9 +2006-09-01 +Added string functions + +Version 0.5.8 +2006-09-01 +CHECK-EOF-NO-HANG is not necessary +Updated LW links in documentation +Changed package handling in system definition (thanks to Christophe Rhodes) + +Version 0.5.7 +2006-06-29 +Removed incompatibility with AllegroCL, see mailing list archive for details + +Version 0.5.6 +2006-06-13 +Fixed Emacs mode lines (reported by Robert Goldman) + +Version 0.5.5 +2006-05-24 +Some small fixes for LW + +Version 0.5.4 +2006-05-18 +Workaround for CMUCL (thanks to Satyaki Das) + +Version 0.5.3 +2006-03-06 +Fixed more typos in stream.lisp +Added missing exports in packages.lisp + +Version 0.5.2 +2006-01-26 +Fixed typos in stream.lisp (thanks to James Bielman) + +Version 0.5.1 +2005-12-14 +Some bugfixes in output.lisp (thanks to Jan Idzikowski) + +Version 0.5.0 +2005-12-11 +Added in-memory streams +Exported types +Added specific conditions + +Version 0.4.1 +2005-12-05 +Updated docs + +Version 0.4.0 +2005-12-05 +Added US-ASCII encoding +Added *USE-REPLACEMENT-CHAR* + +Version 0.3.0 +2005-11-26 +Added UNREAD-BYTE and PEEK-BYTE + +Version 0.2.4 +2005-11-26 +WIN32:CODE-PAGE only for LispWorks + +Version 0.2.3 +2005-11-26 +Added STREAM-TERPRI to appease AllegroCL +Fixed typo in docs + +Version 0.2.2 +2005-11-26 +Patch to make class precendence list work in AllegroCL (David Lichteblau) + +Version 0.2.1 +2005-11-25 +Adapted to new TRIVIAL-GRAY-STREAMS API (David Lichteblau) +More changes for portability, specifically for SBCL (David Lichteblau) + +Version 0.2.0 +2005-11-25 +Portable version thanks to TRIVIAL-GRAY-STREAMS (David Lichteblau) + +Version 0.1.1 +2005-11-25 +Documentation enhancements + +Version 0.1.0 +2005-11-25 +Initial public release Added: branches/edi/ascii.lisp ============================================================================== --- (empty file) +++ branches/edi/ascii.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,35 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.8 2008/05/17 13:50:15 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defvar +ascii-table+ + #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533) + "An array enumerating the character codes for the US-ASCII +encoding.") Added: branches/edi/code-pages.lisp ============================================================================== --- (empty file) +++ branches/edi/code-pages.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,62 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.6 2008/05/17 13:50:15 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +;;; the following code was auto-generated with LWW + +(defvar +code-page-tables+ + '((437 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (720 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 233 226 132 224 134 231 234 235 232 239 238 141 142 143 144 1617 1618 244 164 1600 251 249 1569 1570 1571 1572 163 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1590 1591 1592 1593 1594 1601 181 1602 1603 1604 1605 1606 1607 1608 1609 1610 8801 1611 1612 1613 1614 1615 1616 8776 176 8729 183 8730 8319 178 9632 160)) + (737 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 931 932 933 934 935 936 937 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 965 966 967 968 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 969 940 941 942 970 943 972 973 971 974 902 904 905 906 908 910 911 177 8805 8804 938 939 247 8776 176 8729 183 8730 8319 178 9632 160)) + (775 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 262 252 233 257 228 291 229 263 322 275 342 343 299 377 196 197 201 230 198 333 246 290 162 346 347 214 220 248 163 216 215 164 256 298 243 379 380 378 8221 166 169 174 172 189 188 321 171 187 9617 9618 9619 9474 9508 260 268 280 278 9571 9553 9559 9565 302 352 9488 9492 9524 9516 9500 9472 9532 370 362 9562 9556 9577 9574 9568 9552 9580 381 261 269 281 279 303 353 371 363 382 9496 9484 9608 9604 9612 9616 9600 211 223 332 323 245 213 181 324 310 311 315 316 326 274 325 8217 173 177 8220 190 182 167 247 8222 176 8729 183 185 179 178 9632 160)) + (850 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 215 402 225 237 243 250 241 209 170 186 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 240 208 202 203 200 305 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 254 222 218 219 217 253 221 175 180 173 177 8215 190 182 167 247 184 176 168 183 185 179 178 9632 160)) + (852 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 367 263 231 322 235 336 337 238 377 196 262 201 313 314 244 246 317 318 346 347 214 220 356 357 321 215 269 225 237 243 250 260 261 381 382 280 281 172 378 268 351 171 187 9617 9618 9619 9474 9508 193 194 282 350 9571 9553 9559 9565 379 380 9488 9492 9524 9516 9500 9472 9532 258 259 9562 9556 9577 9574 9568 9552 9580 164 273 272 270 203 271 327 205 206 283 9496 9484 9608 9604 354 366 9600 211 223 212 323 324 328 352 353 340 218 341 368 253 221 355 180 173 733 731 711 728 167 247 184 176 168 729 369 344 345 9632 160)) + (855 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1106 1026 1107 1027 1105 1025 1108 1028 1109 1029 1110 1030 1111 1031 1112 1032 1113 1033 1114 1034 1115 1035 1116 1036 1118 1038 1119 1039 1102 1070 1098 1066 1072 1040 1073 1041 1094 1062 1076 1044 1077 1045 1092 1060 1075 1043 171 187 9617 9618 9619 9474 9508 1093 1061 1080 1048 9571 9553 9559 9565 1081 1049 9488 9492 9524 9516 9500 9472 9532 1082 1050 9562 9556 9577 9574 9568 9552 9580 164 1083 1051 1084 1052 1085 1053 1086 1054 1087 9496 9484 9608 9604 1055 1103 9600 1071 1088 1056 1089 1057 1090 1058 1091 1059 1078 1046 1074 1042 1100 1068 8470 173 1099 1067 1079 1047 1096 1064 1101 1069 1097 1065 1095 1063 167 9632 160)) + (857 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 305 196 197 201 230 198 244 246 242 251 249 304 214 220 248 163 216 350 351 225 237 243 250 241 209 286 287 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 186 170 202 203 200 65533 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 65533 215 218 219 217 236 255 175 180 173 177 65533 190 182 167 247 184 176 168 183 185 179 178 9632 160)) + (860 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 227 224 193 231 234 202 232 205 212 236 195 194 201 192 200 244 245 242 218 249 204 213 220 162 163 217 8359 211 225 237 243 250 241 209 170 186 191 210 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (861 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 208 240 222 196 197 201 230 198 244 246 254 251 221 253 214 220 248 163 216 8359 402 225 237 243 250 193 205 211 218 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (862 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (863 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 194 224 182 231 234 235 232 239 238 8215 192 167 201 200 202 244 203 207 251 249 164 212 220 162 163 217 219 402 166 180 243 250 168 184 179 175 206 8976 172 189 188 190 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (864 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 176 183 8729 8730 9618 9472 9474 9532 9508 9516 9500 9524 9488 9484 9492 9496 946 8734 966 177 189 188 8776 171 187 65271 65272 155 156 65275 65276 159 160 173 65154 163 164 65156 65533 65533 65166 65167 65173 65177 1548 65181 65185 65189 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 65233 1563 65201 65205 65209 1567 162 65152 65153 65155 65157 65226 65163 65165 65169 65171 65175 65179 65183 65187 65191 65193 65195 65197 65199 65203 65207 65211 65215 65217 65221 65227 65231 166 172 247 215 65225 1600 65235 65239 65243 65247 65251 65255 65259 65261 65263 65267 65213 65228 65230 65229 65249 65149 1617 65253 65257 65260 65264 65266 65232 65237 65269 65270 65245 65241 65265 9632 65533)) + (865 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 164 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) + (866 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1025 1105 1028 1108 1031 1111 1038 1118 176 8729 183 8730 8470 164 9632 160)) + (869 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 902 135 183 172 166 8216 8217 904 8213 905 906 938 908 147 148 910 939 169 911 178 179 940 163 941 942 943 970 912 972 973 913 914 915 916 917 918 919 189 920 921 171 187 9617 9618 9619 9474 9508 922 923 924 925 9571 9553 9559 9565 926 927 9488 9492 9524 9516 9500 9472 9532 928 929 9562 9556 9577 9574 9568 9552 9580 931 932 933 934 935 936 937 945 946 947 9496 9484 9608 9604 948 949 9600 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 900 173 177 965 966 967 167 968 901 176 168 969 971 944 974 9632 160)) + (1250 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 352 8249 346 356 381 377 144 8216 8217 8220 8221 8226 8211 8212 152 8482 353 8250 347 357 382 378 160 711 728 321 164 260 166 167 168 169 350 171 172 173 174 379 176 177 731 322 180 181 182 183 184 261 351 187 317 733 318 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) + (1251 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1026 1027 8218 1107 8222 8230 8224 8225 8364 8240 1033 8249 1034 1036 1035 1039 1106 8216 8217 8220 8221 8226 8211 8212 152 8482 1113 8250 1114 1116 1115 1119 160 1038 1118 1032 164 1168 166 167 1025 169 1028 171 172 173 174 1031 176 177 1030 1110 1169 181 182 183 1105 8470 1108 187 1112 1029 1109 1111 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103)) + (1252 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 381 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 382 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) + (1253 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 136 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 157 158 159 160 901 902 163 164 165 166 167 168 169 65533 171 172 173 174 8213 176 177 178 179 900 181 182 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) + (1254 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) + (1255 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 156 157 158 159 160 161 162 163 8362 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 191 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1520 1521 1522 1523 1524 65533 65533 65533 65533 65533 65533 65533 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) + (1256 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 1662 8218 402 8222 8230 8224 8225 710 8240 1657 8249 338 1670 1688 1672 1711 8216 8217 8220 8221 8226 8211 8212 1705 8482 1681 8250 339 8204 8205 1722 160 1548 162 163 164 165 166 167 168 169 1726 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 1563 187 188 189 190 1567 1729 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 215 1591 1592 1593 1594 1600 1601 1602 1603 224 1604 226 1605 1606 1607 1608 231 232 233 234 235 1609 1610 238 239 1611 1612 1613 1614 244 1615 1616 247 1617 249 1618 251 252 8206 8207 1746)) + (1257 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 138 8249 140 168 711 184 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 175 731 159 160 65533 162 163 164 65533 166 167 216 169 342 171 172 173 174 198 176 177 178 179 180 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 729)) + (1258 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 258 196 197 198 199 200 201 202 203 768 205 206 207 272 209 777 211 212 416 214 215 216 217 218 219 220 431 771 223 224 225 226 259 228 229 230 231 232 233 234 235 769 237 238 239 273 241 803 243 244 417 246 247 248 249 250 251 252 432 8363 255))) + "A list of 8-bit Windows code pages where each element is a +cons with the car being the ID of the code page and the cdr being +a vector enumerating the corresponding character codes.") Added: branches/edi/conditions.lisp ============================================================================== --- (empty file) +++ branches/edi/conditions.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,84 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.3 2008/05/17 15:56:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(define-condition flexi-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +flexi streams.")) + +(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) + () + (:documentation "Like FLEXI-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition flexi-stream-element-type-error (flexi-stream-error) + ((element-type :initarg :element-type + :reader flexi-stream-element-type-error-element-type)) + (:report (lambda (condition stream) + (format stream "Element type ~S not allowed." + (flexi-stream-element-type-error-element-type condition)))) + (:documentation "Errors of this type are signalled if the flexi +stream has a wrong element type.")) + +(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) + () + (:documentation "Errors of this type are signalled if there is an +encoding problem.")) + +(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) + ((position-spec :initarg :position-spec + :reader flexi-stream-position-spec-error-position-spec)) + (:documentation "Errors of this type are signalled if an +erroneous position spec is used in conjunction with +FILE-POSITION.")) + +;; TODO: stream might not be a stream... +(defun signal-encoding-error (flexi-stream format-control &rest format-args) + "Convenience function similar to ERROR to signal conditions of type +FLEXI-STREAM-ENCODING-ERROR." + (error 'flexi-stream-encoding-error + :format-control format-control + :format-arguments format-args + :stream flexi-stream)) + +(define-condition in-memory-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +IN-MEMORY streams.")) + +(define-condition in-memory-stream-closed-error (in-memory-stream-error) + () + (:report (lambda (condition stream) + (format stream "~S is closed." + (stream-error-stream condition)))) + (:documentation "An error that is signalled when someone is trying +to read from or write to a closed IN-MEMORY stream.")) + Added: branches/edi/decode.lisp ============================================================================== --- (empty file) +++ branches/edi/decode.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,151 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric char-to-octets (format char writer stream) + (:documentation "Converts the character CHAR to sequence of octets +and sends this sequence to SINK. STREAM will always be a flexi stream +which is used to determine how the character should be converted. +This function does all the work for STREAM-WRITE-CHAR in which case +SINK is the same as STREAM. It is also used in the implementation of +STREAM-WRITE-SEQUENCE below.")) + +(defmethod char-to-octets ((format flexi-latin-1-format) char writer stream) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 255) + (signal-encoding-error stream "~S is not a LATIN-1 character." char)) + (funcall writer octet)) + char) + +(defmethod char-to-octets ((format flexi-ascii-format) char writer stream) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 127) + (signal-encoding-error stream "~S is not an ASCII character." char)) + (funcall writer octet)) + char) + +(defmethod char-to-octets ((format flexi-8-bit-format) char writer stream) + (declare (optimize speed)) + (with-accessors ((encoding-hash external-format-encoding-hash)) + format + (let ((octet (gethash (char-code char) encoding-hash))) + (unless octet + (signal-encoding-error stream "~S is not in this encoding." char)) + (funcall writer octet)) + char)) + +(defmethod char-to-octets ((format flexi-utf-8-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (let ((char-code (char-code char))) + (tagbody + (cond ((< char-code #x80) + (funcall writer char-code) + (go zero)) + ((< char-code #x800) + (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code))) + (go one)) + ((< char-code #x10000) + (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code))) + (go two)) + ((< char-code #x200000) + (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code))) + (go three)) + ((< char-code #x4000000) + (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code))) + (go four)) + (t (funcall writer (logior #b11111100 (ldb (byte 1 30) char-code))))) + (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code))) + four + (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code))) + three + (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code))) + two + (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code))) + one + (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) + zero)) + char) + +(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 0) word)) + (funcall writer (ldb (byte 8 8) word)))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 8) word)) + (funcall writer (ldb (byte 8 0) word)))) + (declare (inline write-word) (dynamic-extent (function write-word))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (loop with char-code = (char-code char) + for position in '(0 8 16 24) do + (funcall writer (ldb (byte 8 position) char-code))) + char) + +(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (loop with char-code = (char-code char) + for position in '(24 16 8 0) do + (funcall writer (ldb (byte 8 position) char-code))) + char) + +(defmethod char-to-octets ((format flexi-cr-mixin) char writer stream) + "The `base' method for all formats which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of sending +one or more characters to SINK." + (declare (optimize speed)) + (case char + (#\Newline + (case (external-format-eol-style format) + (:cr (call-next-method format #\Return writer stream)) + (:crlf (call-next-method format #\Return writer stream) + (call-next-method format #\Linefeed writer stream)))) + (otherwise (call-next-method))) + char) Added: branches/edi/doc/foo.txt ============================================================================== Binary file. No diff available. Added: branches/edi/doc/index.html ============================================================================== --- (empty file) +++ branches/edi/doc/index.html Sat May 17 12:49:25 2008 @@ -0,0 +1,1044 @@ + + + + + + FLEXI-STREAMS - Flexible bivalent streams for Common Lisp + + + + + +

FLEXI-STREAMS - Flexible bivalent streams for Common Lisp

+ +
+
 

Abstract

+ +FLEXI-STREAMS implements "virtual" bivalent streams that can be +layered atop real binary or bivalent streams and that can be used to +read and write character data in various single- or multi-octet +encodings which can be changed on the fly. It also supplies +in-memory binary streams which are similar to string streams. +

+The library needs a Common Lisp implementation that +supports Gray +streams and relies on David +Lichteblau's trivial-gray-streams +to offer portability between different Lisps. +

+The code comes with +a BSD-style +license so you can basically do with it whatever you want. + +

+Download shortcut: http://weitz.de/files/flexi-streams.tar.gz. +

+ +
 

Contents

+
    +
  1. Example usage +
  2. Download and installation +
  3. Backward compatibility with version 0.10.3 and before +
  4. Support and mailing lists +
  5. The FLEXI-STREAMS dictionary +
      +
    1. External formats +
        +
      1. make-external-format +
      2. external-format-name +
      3. external-format-eol-style +
      4. external-format-little-endian +
      5. external-format-id +
      6. external-format-equal +
      7. *default-eol-style* +
      8. *default-little-endian* +
      +
    2. Flexi streams +
        +
      1. flexi-stream +
      2. flexi-input-stream +
      3. flexi-output-stream +
      4. flexi-io-stream +
      5. make-flexi-stream +
      6. flexi-stream-external-format +
      7. flexi-stream-element-type +
      8. flexi-stream-column +
      9. flexi-stream-position +
      10. flexi-stream-bound +
      11. flexi-stream-stream +
      12. unread-byte +
      13. peek-byte +
      14. *substitution-char* +
      15. octet +
      16. flexi-stream-error +
      17. flexi-stream-encoding-error +
      18. flexi-stream-element-type-error +
      19. flexi-stream-element-type-error-element-type +
      20. flexi-stream-position-spec-error +
      21. flexi-stream-position-spec-error-position-spec +
      +
    3. In-memory streams +
        +
      1. in-memory-stream +
      2. in-memory-input-stream +
      3. in-memory-output-stream +
      4. list-stream +
      5. vector-stream +
      6. make-in-memory-input-stream +
      7. make-in-memory-output-stream +
      8. get-output-stream-sequence +
      9. output-stream-sequence-length +
      10. with-input-from-sequence +
      11. with-output-to-sequence +
      12. in-memory-stream-error +
      13. in-memory-stream-closed-error +
      +
    4. Strings +
        +
      1. string-to-octets +
      2. octets-to-string +
      +
    +
  6. File positions +
  7. Acknowledgements +
+ +
 

Example usage

+ +The examples were created with LispWorks 4.4.6 pro on Windows. The following two functions create the same file: + +
+(defun foo (pathspec)
+  "With standard LispWorks streams."
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :supersede
+                       :external-format '(:utf-8 :eol-style :crlf))
+    (write-line "ÄÖÜ1" out))
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :append
+                       :external-format '(:latin-1 :eol-style :lf))
+    (write-line "ÄÖÜ2" out))
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :append
+                       :element-type 'octet)
+    (write-byte #xeb out)
+    (write-sequence #(#xa3 #xa4 #xa5) out))
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :append
+                       :external-format '(:unicode :little-endian nil :eol-style :crlf))
+    (write-line "ÄÖÜ3" out)))
+
+(defun bar (pathspec)
+  "With a flexi stream."
+  (with-open-file (out pathspec
+                       :direction :output
+                       :if-exists :supersede
+                       :external-format '(:latin-1 :eol-style :lf))
+    (setq out (make-flexi-stream out :external-format :utf-8))
+    (write-line "ÄÖÜ1" out)
+    (setf (flexi-stream-external-format out) '(:latin-1 :eol-style :lf))
+    (write-line "ÄÖÜ2" out) 
+    (write-byte #xeb out)
+    (write-sequence #(#xa3 #xa4 #xa5) out)
+    (setf (flexi-stream-external-format out) :ucs-2be)
+    (write-line "ÄÖÜ3" out)))
+
+ +

+And applying this function +

+(defun baz (pathspec)
+  (let (result)
+    (with-open-file (in pathspec :element-type 'octet)
+      (setq in (make-flexi-stream in :external-format :utf-8))
+      (push (read-line in) result)
+      (push (read-byte in) result)
+      (setf (flexi-stream-external-format in) '(:latin-1 :eol-style :lf))
+      (push (read-line in) result) 
+      (setf (flexi-stream-external-format in) :greek)
+      (push (read-char in) result)
+      (setf (flexi-stream-external-format in) :latin0)
+      (let ((string (make-string 3 :element-type 'character)))
+        (read-sequence string in)
+        (push string result))
+      (let ((octets (make-array 2 :element-type 'octet)))
+        (read-sequence octets in)
+        (push octets result))
+      (setf (flexi-stream-external-format in) :ucs-2be)
+      (push (read-line in) result))
+    (nreverse result)))
+
+to the file created above will yield the list +
+("ÄÖÜ1" 196 "ÖÜ2" #\λ "£€¥" #(0 196) "ÖÜ3")
+
+ +

+For more examples see the source code +of Drakma, Chunga, +or CL-WBXML. + +
 

Download and installation

+ +Before you try to install FLEXI-STREAMS, first check that in your Lisp +each character's +character +code is equal to +its Unicode code point and +that (CHAR-CODE #\Newline) +and (CHAR-CODE #\Linefeed) have the same +value (10). (This is the case for all relevant CL +implementations which were in use when this library was written. It +is not mandated by the ANSI standard, though.) +

+FLEXI-STREAMS together with this documentation can be downloaded from http://weitz.de/files/flexi-streams.tar.gz. The +current version is 0.14.0. +

+Before you install FLEXI-STREAMS you first need to +install the trivial-gray-streams library +unless you already have it. +

+FLEXI-STREAMS comes with a system definition for ASDF so you can install the library with +

+(asdf:oos 'asdf:load-op :flexi-streams)
+
+if you've unpacked it in a place where ASDF can find it. Installation +via asdf-install +should also be possible, and there's a port +to Gentoo Lisp thanks to +Matthew Kennedy. +

+You can run a test suite which tests some (but +not all) aspects of the library with +

+(asdf:oos 'asdf:test-op :flexi-streams)
+
+This might take a while... +

+Luís Oliveira maintains a darcs +repository of FLEXI-STREAMS +at http://common-lisp.net/~loliveira/ediware/. +

+A Mercurial +repository of older versions is available +at http://arcanes.fr.eu.org/~pierre/2007/02/weitz/ +thanks to Pierre Thierry. + + +
 
+

+Backward compatibility with version 0.10.3 and before

+ +Two special variables used in flexi-streams 0.10.3 and before were removed - +*PROVIDE-USE-VALUE-RESTART* and *USE-REPLACEMENT-CHAR*. + +

+The code now behaves as if +*PROVIDE-USE-VALUE-RESTART* is always T. +Instead of *USE-REPLACEMENT-CHAR*, you can use +*SUBSTITUTION-CHAR* or +invoke +a USE-VALUE +restart +when a FLEXI-STREAM-ENCODING-ERROR +is signalled. + +
 

Support and mailing lists

+ +For questions, bug reports, feature requests, improvements, or patches +please use the flexi-streams-devel +mailing list. If you want to be notified about future releases, +subscribe to the flexi-streams-announce +mailing list. These mailing lists were made available thanks to +the services of common-lisp.net. +

+If you want to send patches, please read this first. + + +
 

The FLEXI-STREAMS dictionary

+ +

External formats

+ +EXTERNAL-FORMAT objects are used to denote the external +formats of flexi streams. These objects are created using +the MAKE-EXTERNAL-FORMAT +function, and there are various +readers to query their attributes. Once such an object is +created it can't be changed. +

+An external format consists of a basic encoding +(like ISO 8859-1 +or UTF-8), a +definition how line endings are denoted - by a carriage return +character (ASCII 13), by a line feed character (ASCII 10), +or by both of these characters in a row -, and optionally (for +encodings that use units larger than 8 bits) information +about the endianess +of the encoding. +

+The following encodings are currently supported by FLEXI-STREAMS: +

+

+A couple of alternative names are allowed that are listed below: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
:UTF-8:UTF8
:UTF-16:UTF16
:UCS-2
:UCS2
:UNICODE
:UTF-32:UTF32
:UCS-4
:UCS4
:ISO-8859-1:LATIN-1
:LATIN1
:ISO-8859-2:LATIN-2
:LATIN2
:ISO-8859-3:LATIN-3
:LATIN3
:ISO-8859-4:LATIN-4
:LATIN4
:ISO-8859-5:CYRILLIC
:ISO-8859-6:ARABIC
:ISO-8859-7:GREEK
:ISO-8859-8:HEBREW
:ISO-8859-9:LATIN-5
:LATIN5
:ISO-8859-10:LATIN-6
:LATIN6
:ISO-8859-11:THAI
:ISO-8859-13:LATIN-7
:LATIN7
:ISO-8859-14:LATIN-8
:LATIN8
:ISO-8859-15:LATIN-9
:LATIN9
:LATIN-0
:LATIN0
:ISO-8859-16:LATIN-10
:LATIN10
:CODE-PAGE:CODEPAGE
WIN32:CODE-PAGE
(only on LWW)
:KOI8-R:KOI8R
:US-ASCII:ASCII
+

+(Note that we treat UCS-2 exactly like UTF-16 although there +are subtle +differences. Also note that even though we support encodings like +UTF-32 some Lisps only supports characters contained within +the Basic +Multilingual Plane (like LispWorks) or even less (like CMUCL), so +if other characters are read from a +flexi +stream, READ-CHAR +will try to be helpful and return the corresponding Unicode code point - +an integer - instead. This might lead to an error if you're using +functions +like READ-LINE, though.) + +

+Whenever a FLEXI-STREAMS function accepts an external format as one of +its arguments, you can provide either an EXTERNAL-FORMAT +object or a shortcut which can be a list or a symbol. The list +shortcuts have a syntax similar +to the +one used by LispWorks - the cars are the names of and encoding +and the cdrs of these lists correspond to the keyword arguments +to MAKE-EXTERNAL-FORMAT, so +for example +

(:latin-1 :eol-style :crlf)
+is equivalent to +
(make-external-format :latin-1 :eol-style :crlf)
The +symbol shortcuts are equivalent to +calling MAKE-EXTERNAL-FORMAT +without keyword arguments, i.e. +
:thai
+behaves like +
(make-external-format :thai)
+Finally, the following expansions are +available: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
:UCS-2LE(:UCS-2 :LITTLE-ENDIAN T)
:UCS-2BE(:UCS-2 :LITTLE-ENDIAN NIL)
:UCS-4LE(:UCS-4 :LITTLE-ENDIAN T)
:UCS-4BE(:UCS-4 :LITTLE-ENDIAN NIL)
:UTF-16LE(:UTF-16 :LITTLE-ENDIAN T)
:UTF-16BE(:UTF-16 :LITTLE-ENDIAN NIL)
:UTF-32LE(:UTF-32 :LITTLE-ENDIAN T)
:UTF-32BE(:UTF-32 :LITTLE-ENDIAN NIL)
:IBM437(:CODE-PAGE :ID 437)
:IBM850(:CODE-PAGE :ID 850)
:IBM852(:CODE-PAGE :ID 852)
:IBM855(:CODE-PAGE :ID 855)
:IBM857(:CODE-PAGE :ID 857)
:IBM860(:CODE-PAGE :ID 860)
:IBM861(:CODE-PAGE :ID 861)
:IBM862(:CODE-PAGE :ID 862)
:IBM863(:CODE-PAGE :ID 863)
:IBM864(:CODE-PAGE :ID 864)
:IBM865(:CODE-PAGE :ID 865)
:IBM866(:CODE-PAGE :ID 866)
:IBM869(:CODE-PAGE :ID 869)
:WINDOWS-1250(:CODE-PAGE :ID 1250)
:WINDOWS-1251(:CODE-PAGE :ID 1251)
:WINDOWS-1252(:CODE-PAGE :ID 1252)
:WINDOWS-1253(:CODE-PAGE :ID 1253)
:WINDOWS-1254(:CODE-PAGE :ID 1254)
:WINDOWS-1255(:CODE-PAGE :ID 1255)
:WINDOWS-1256(:CODE-PAGE :ID 1256)
:WINDOWS-1257(:CODE-PAGE :ID 1257)
:WINDOWS-1258(:CODE-PAGE :ID 1258)
+

+Note that if you provide a shortcut, it +will be converted to an EXTERNAL-FORMAT object first. +So, if you're concerned about efficiency, create these objects once and +re-use them. + +


[Function] +
make-external-format name &key eol-style little-endian id => external-format + +


Creates and returns +an EXTERNAL-FORMAT +object. name is a +symbol, eol-style is one of the +keywords :CR, :LF, or :CRLF, +and little-endian is +a generalized +boolean. The default value for eol-style is the value of *DEFAULT-EOL-STYLE* except for Windows code pages where it is :CRLF. The default value +for little-endian is the value of *DEFAULT-LITTLE-ENDIAN* - this value is ignored unless name denotes one of UTF-16 or UTF-32. +id must be an integer denoting a Windows code page +known by FLEXI-STREAMS if name +is :CODE-PAGE or WIN32:CODE-PAGE, otherwise +the value is ignored. See the section +about external formats for more info. +

+Examples (run on Windows): + +

+CL-USER 1 > (make-external-format :latin-1)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:ISO-8859-1 :EOL-STYLE :CRLF) 2067DA84>
+
+CL-USER 2 > (make-external-format :latin-1 :eol-style :lf)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:ISO-8859-1 :EOL-STYLE :LF) 2068B4D4>
+
+CL-USER 3 > (make-external-format :ibm437)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:CODE-PAGE :ID 437 :EOL-STYLE :CRLF) 2069B33C>
+
+CL-USER 4 > (make-external-format :ucs-2)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CRLF :LITTLE-ENDIAN T) 206B4F4C>
+
+CL-USER 5 > (make-external-format :ucs-2be)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CRLF :LITTLE-ENDIAN NIL) 2067DBE4>
+
+CL-USER 6 > (make-external-format :ucs-2be :eol-style :br)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :BR :LITTLE-ENDIAN NIL) 206B54AC>
+
+
+ +


[Readers] +
external-format-name external-format => name +
external-format-eol-style external-format => eol-style +
external-format-little-endian external-format => little-endian +
external-format-id external-format => id + +


+These methods can be used to query an EXTERNAL-FORMAT object for its attributes. +
+ +


[Functions] +
external-format-equal external-format-1 external-format-2 => generalized-boolean + +


+Checks whether the two external formats external-format-1 and external-format-2 are equivalent with respect to their effects on flexi streams. +

+Examples (run on Windows): + +

+CL-USER 1 > (make-external-format :ucs-4le)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-32 :EOL-STYLE :CRLF :LITTLE-ENDIAN T) 2067FB74>
+
+CL-USER 2 > (external-format-equal * (make-external-format :utf32 :little-endian t))
+T
+
+CL-USER 3 > (make-external-format :code-page :id 437)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:CODE-PAGE :ID 437 :EOL-STYLE :CRLF) 2069428C>
+
+CL-USER 4 > (external-format-equal * (make-external-format :ibm437))
+T
+
+ +
+ +


[Special variable] +
*default-eol-style* + +


+The default value for the eol-style keyword argument of MAKE-EXTERNAL-FORMAT. Its initial value is :CRLF on Windows and :LF on other operating systems. +
+ +


[Special variable] +
*default-little-endian* + +


+The default value for the little-endian keyword argument of MAKE-EXTERNAL-FORMAT. Its initial value corresponds to the endianess of the platform FLEXI-STREAMS is used on as revealed by the :LITTLE-ENDIAN feature. +
+ +

Flexi streams

+ +Flexi streams are the core of the FLEXI-STREAMS library. You +create them using the +function MAKE-FLEXI-STREAM which +takes an open binary stream (called the underlying stream) as its only required argument. +A binary stream in this context means that if it's an input +stream, you can read from it with +READ-BYTE +(or, as a workaround for LispWorks, you can at least apply +READ-SEQUENCE +to it where the sequence is an array of element +type OCTET), and similarly for +WRITE-BYTE +(WRITE-SEQUENCE +for LispWorks) +and output +streams. (Note that this specifically holds +for bivalent +streams like socket streams.) +

+A flexi stream behaves like an ordinary Lisp stream. It is an input +stream if the underlying binary stream is an input stream, and it is +an output stream when the underlying binary stream is an output +stream. You can write characters as well +as octets to an output flexi stream and similarly +you can read characters and octets from an input flexi stream. +

+A flexi stream always has an external +format associated with it which is deployed whenever you read +characters from the stream or write characters to it. You +can change the external +format while you use the stream. +

+Once you're using a flexi stream you should not read from or +write to the underlying stream directly anymore. +

+If +you close +a flexi stream, the underlying stream will also be closed. However, it +also suffices to close the underlying stream directly should you not +want to use the flexi stream anymore. So, the following usage +(where IN is implicitly closed at the end) is OK: +

+(with-open-file (in "/foo/bar/baz.txt")
+  (let ((flexi (make-flexi-stream in :external-format :hebrew)))
+    (read-line flexi)))
+
+

+Output flexi streams will try to keep track of +the column +they're in but you can also set the +column directly. This value will be incremented by one for each +character written to the stream and it will be set to 0 +if you send a #\Newline character. The column will be +set to NIL if an OCTET +is sent to the stream. Once the column is NIL it'll stay +like that unless it is explicitly set to another value. +

+Input flexi streams keep track of +their position within the stream. +This value is incremented by one for +each OCTET read from the stream, and +it is incremented by the number of octets actually read for each +character read from the stream. So, if the encoding is UTF-8, reading +the character #\ä (a-umlaut) will advance the position by two. +If the encoding is UTF-32 and the end-of-line style +is :CRLF, reading a #\Newline will advance +the position by eight. +

+You can also set the bound of an +input flexi stream. Initially it is NIL, but when it's +an integer and the +stream's position has gone beyond +this bound, the stream will behave as if no more input is available. +

+Caveat: You can +only unread +a character from a flexi stream if you haven't changed the external format after you read it. +

+Caveat: The underlying stream should either be a binary stream (i.e. have an element type that is a subtype of integer) or it should explicitly use an external format with :LF as its end-of-line style. Otherwise it might perform unwanted conversion of line endings on its own. (LispWorks does this even if you write binary data to the stream using WRITE-SEQUENCE.) + +


[Standard class] +
flexi-stream + +


+Every flexi stream returned by MAKE-FLEXI-STREAM is of this type which is a subtype of STREAM. +
+ +


[Standard class] +
flexi-input-stream + +


+A flexi stream is of this type if its underlying stream is an input stream. This is a subtype of FLEXI-STREAM. +
+ +


[Standard class] +
flexi-output-stream + +


+A flexi stream is of this type if its underlying stream is an output stream. This is a subtype of FLEXI-STREAM. +
+ +


[Standard class] +
flexi-io-stream + +


+A flexi stream is of this type if it is both a FLEXI-INPUT-STREAM as well as a FLEXI-OUTPUT-STREAM. +
+ +


[Function] +
make-flexi-stream stream &key external-format element-type column position bound => flexi-stream + +


+Creates and returns a flexi stream, i.e. an object of type FLEXI-STREAM. stream is the underlying Lisp stream. external-format is the initial external format to be used by the stream, the default is the value of evaluating (MAKE-EXTERNAL-FORMAT :LATIN1). element-type is the initial element type of the flexi stream the default of which is LW:SIMPLE-CHAR for LispWorks and CHARACTER otherwise. column is the initial column of the stream and should only be provided for output streams, the default is 0. position is the initial octet position of the stream and must only be provided for input streams, the default is 0. bound should be NIL (the default) or an integer and must only be provided for input streams. If the octet position of the stream has gone beyond this bound, the stream will behave as if no more input is available. See the section about flexi streams for more information. +
+ +


[Accessors] +
flexi-stream-external-format flexi-stream => external-format +
(setf (flexi-stream-external-format flexi-stream) external-format) +
flexi-stream-element-type flexi-stream => element-type +
(setf (flexi-stream-element-type flexi-stream) element-type) +
flexi-stream-column flexi-output-stream => column +
(setf (flexi-stream-column flexi-output-stream) column) +
flexi-stream-position flexi-input-stream => position +
(setf (flexi-stream-position flexi-input-stream) position) +
flexi-stream-bound flexi-input-stream => bound +
(setf (flexi-stream-bound flexi-input-stream) bound) + +


+These methods can be used to get and set the corresponding attributes of a flexi stream. +

+(SETF +FLEXI-STREAM-EXTERNAL-FORMAT) accepts keyword symbols +(names of external formats), lists +(which should be valid lists of parameters +to MAKE-EXTERNAL-FORMAT), or EXTERNAL-FORMAT objects: +

+CL-USER 1 > (setf (flexi-stream-external-format *my-stream*) :ucs-4le)
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-32 :EOL-STYLE :CRLF :LITTLE-ENDIAN T) 206920DC>
+
+CL-USER 2 > (setf (flexi-stream-external-format *my-stream*) '(:ucs-2be :eol-style :br))
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :BR :LITTLE-ENDIAN NIL) 20696934>
+
+CL-USER 3 > (setf (flexi-stream-external-format *my-stream*) (make-external-format :ibm437))
+#<FLEXI-STREAMS::EXTERNAL-FORMAT (:CODE-PAGE :ID 437 :EOL-STYLE :CRLF) 2068716C>
+
+
+ +


[Reader] +
flexi-stream-stream flexi-stream => stream + +


+This method returns the underlying stream of a flexi stream. +
+ +


[Generic function] +
unread-byte byte stream => nil + +


+Similar to UNREAD-CHAR in that it "unreads" the last octet from +stream which must be a flexi stream. Note that you can only call UNREAD-BYTE after a corresponding +READ-BYTE, not after READ-CHAR. +
+ +


[Generic function] +
peek-byte stream &optional peek-type eof-error-p eof-value => byte + +


+PEEK-BYTE is like PEEK-CHAR, i.e. it returns an octet from stream (which must be a flexi stream) +without actually removing it. If peek-type is NIL, the next octet is +returned, if peek-type is T, the next octet which is not 0 is +returned, if peek-type is an octet, the next octet which equals +peek-type is returned. eof-error-p and eof-value are interpreted as usual. +

+Note that the parameters aren't in the same order as with PEEK-CHAR because it doesn't make much sense to make stream an optional argument. +

+ +


[Special variable] +
*substitution-char* + +


+If this value is not NIL, it should be a character which is used +(as if by a USE-VALUE restart) whenever during reading an error of +type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise. + +
+CL-USER 1 > (defun foo ()
+              ;; not a valid UTF-8 sequence
+              (with-input-from-sequence (in '(#xe4 #xf6 #xfc))
+                (setq in (make-flexi-stream in :external-format :utf8))
+                (read-line in)))
+FOO
+
+CL-USER 2 > (foo)
+
+Error: Unexpected value #xF6 in UTF-8 sequence.
+  1 (continue) Specify a character to be used instead.
+  2 (abort) Return to level 0.
+  3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed,  or :? for other options
+
+CL-USER 3 : 1 > :c
+Type a character: x
+
+Error: End of file while in UTF-8 sequence.
+  1 (continue) Specify a character to be used instead.
+  2 (abort) Return to level 0.
+  3 Return to top loop level 0.
+
+Type :b for backtrace, :c <option number> to proceed,  or :? for other options
+
+CL-USER 4 : 1 > :c
+Type a character: y
+"xy"
+T
+
+CL-USER 5 > (handler-bind ((flexi-stream-encoding-error (lambda (condition)
+                                                          (use-value #\-))))
+              (foo))
+"--"
+T
+
+CL-USER 6 > (let ((*substitution-char* #\?))
+              (foo))
+"??"
+T
+
+
+ +


[Type] +
octet + +


+Just a shortcut for (UNSIGNED-BYTE 8). +
+ +


[Condition] +
flexi-stream-error + +


+All errors related to flexi streams are of this type. This is a subtype of STREAM-ERROR. +
+ +


[Condition] +
flexi-stream-encoding-error + +


+All errors related to encoding problems with flexi streams are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE +restart is provided. See also *SUBSTITUTION-CHAR* and example for it. FLEXI-STREAM-ENCODING-ERROR is a subtype of FLEXI-STREAM-ERROR. +
+ +


[Condition] +
flexi-stream-element-type-error + +


+All errors related to problems with the element type of flexi streams are of this type. This is a subtype of FLEXI-STREAM-ERROR and has an additional slot for the element type which can be accessed with FLEXI-STREAM-ELEMENT-TYPE-ERROR-ELEMENT-TYPE. +
+ +


[Reader] +
flexi-stream-element-type-error-element-type condition => element-type + +


+If condition is of type FLEXI-STREAM-ELEMENT-TYPE-ERROR, this function will return the offending element type. +
+ +


[Condition] +
flexi-stream-position-spec-error + +


Errors of this type are signalled if an erroneous +position spec is used in conjunction +with FILE-POSITION. This is a +subtype +of FLEXI-STREAM-ERROR +and has an additional slot for the position spec which can be accessed +with FLEXI-STREAM-POSITION-SPEC-ERROR-POSITION-SPEC. +
+ +


[Reader] +
flexi-stream-position-spec-error-position-spec condition => position-spec + +


+If condition is of type FLEXI-STREAM-POSITION-SPEC-ERROR, this function will return the offending position spec. +
+ +

In-memory streams

+ +The library also provides in-memory binary streams which are modeled after string streams and behave very similar only that they deal with octets instead of characters and the underlying data structure is not a string but either a list or a vector. These streams can obviously be used as the underlying streams for flexi streams. + +


[Standard class] +
in-memory-stream + +


+Every in-memory stream returned by MAKE-IN-MEMORY-INPUT-STREAM or MAKE-IN-MEMORY-OUTPUT-STREAM is of this type which is a subtype of STREAM. +
+ +


[Standard class] +
in-memory-input-stream + +


+Every in-memory stream returned by MAKE-IN-MEMORY-INPUT-STREAM is of this type which is a subtype of IN-MEMORY-STREAM. +
+ +


[Standard class] +
in-memory-output-stream + +


+Every in-memory stream returned by MAKE-IN-MEMORY-OUTPUT-STREAM is of this type which is a subtype of IN-MEMORY-STREAM. +
+ +


[Standard class] +
list-stream + +


+Every in-memory input stream is of this type if it reads from a list. +
+ +


[Standard class] +
vector-stream + +


+Every in-memory stream is of this type if it reads from or writes to a vector. +
+ +


[Generic function] +
make-in-memory-input-stream sequence &key start end transformer => in-memory-input-stream + +


+Returns a binary input stream (of type IN-MEMORY-INPUT-STREAM) which will supply, in order, the +octets in the subsequence of sequence bounded by start (the default is 0) and end (the default is the length of sequence). sequence must either be a list or a vector of octets. +Each octet returned will be transformed in turn by the optional +transformer function. +
+ +


[Function] +
make-in-memory-output-stream &key element-type transformer => in-memory-output-stream + +


+Returns a binary output stream (of type IN-MEMORY-OUTPUT-STREAM) which accepts objects of type element-type (a subtype of OCTET) and makes +available a sequence (see GET-OUTPUT-STREAM-SEQUENCE) that contains the octets that were actually +output. The octets stored will each be transformed by the optional transformer function. +
+ +


[Generic function] +
get-output-stream-sequence stream &key as-list => sequence + +


+Returns a vector containing, in order, all the octets that have +been output to the in-memory output stream stream. This operation clears any +octets on stream, so the vector contains only those octets which have +been output since the last call to GET-OUTPUT-STREAM-SEQUENCE or since +the creation of the stream, whichever occurred most recently. If +as-list is true the return value is coerced to a list. +
+ +


[Generic function] +
output-stream-sequence-length stream => length + +


Returns the current length of the underlying vector +of the in-memory output +stream stream, i.e. this is the length of the +sequence that GET-OUTPUT-STREAM-SEQUENCE would return if called at +this very moment. +
+ +


[Macro] +
with-input-from-sequence (var sequence &key start end transformer) statement* => result* + +


Creates an in-memory input +stream from the sequence sequence using the +parameters start and end +(see MAKE-IN-MEMORY-INPUT-STREAM), +binds var to this stream and then executes +the statement* forms. A +function transformer may optionally be specified +to transform the returned octets. The stream is automatically closed +on exit from +WITH-OUTPUT-TO-SEQUENCE, no matter whether the exit is normal or +abnormal. The return value of this macro is the return value of +the last statement of statement*. +
+ +


[Macro] +
with-output-to-sequence (var &key as-list element-type transformer) statement* => sequence + +


+Creates an in-memory output stream, binds var to this stream and +then executes the statement* forms. The stream stores +data of type element-type (a subtype of OCTET) which is (optionally) transformed by the +function transformer prior to storage. The stream is automatically closed on +exit from WITH-OUTPUT-TO-SEQUENCE, no matter whether the exit is +normal or abnormal. The return value of this macro is a vector (or a +list if as-list is true) containing the octets that were sent to the +stream within the body of the macro. +
+ +


[Condition] +
in-memory-stream-error + +


+All errors related to in-memory streams are of this type. This is a subtype of STREAM-ERROR. +
+ +


[Condition] +
in-memory-stream-closed-error + +


+An error of this type is signalled if one tries to read from or write to an in-memory stream which had already been closed. This is a subtype of IN-MEMORY-STREAM-ERROR. +
+ +

Strings

+ +This section collects a few convenience functions for strings conversions: + +


[Function] +
string-to-octets string &key external-format start end => vector + +


+ +Converts the Lisp string string from start to end to an array of +octets corresponding to the external format external-format. The defaults for +start and end +are 0 and NIL (meaning the length of the +vector). The default for external-format is the +value of +evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) + +
+ +


[Function] +
octets-to-string vector &key external-format start end => string + +


Converts the Lisp vector vector +of octets from start +to end to string using +the external +format external-format. The defaults for +start and end +are 0 and the length of the vector. The default +for external-format is the value of +evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) +
+ +
 

File positions

+ +For flexi streams as well +as for in-memory +streams, FILE-POSITION +will usually return NIL and do nothing when a second +argument is supplied. This is correct +w.r.t. the ANSI +standard, but not very helpful. However, even +with Gray +streams there is no portable way to implement a better +behaviour. +

+For LispWorks +and CLISP, +FILE-POSITION +for flexi streams will work as if the +function had been applied to the underlying stream, and +for in-memory streams it will try to do +something sensible if the underlying data structure is a vector +(i.e. not a list). Patches for other Common Lisp +implementations should be sent to +the trivial-gray-streams +maintainers. + +
 

Acknowledgements

+ +Thanks to David Lichteblau for numerous portability patches. Thanks +to Igor Plekhov for the KOI8-R code. Thanks to Anton Vodonosov for +numerous patches and additions. + +

+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.98 2007/12/29 23:15:27 edi Exp $ +

BACK TO MY HOMEPAGE + + + Added: branches/edi/encode.lisp ============================================================================== --- (empty file) +++ branches/edi/encode.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,237 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defun recover-from-encoding-error (stream format-control &rest format-args) + "Helper function used by the STREAM-READ-CHAR methods below to deal +with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and +returns its character code in this case. Otherwise signals a +FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this +function and provides a corresponding USE-VALUE restart." + (when *substitution-char* + (return-from recover-from-encoding-error (char-code *substitution-char*))) + (restart-case + (apply #'signal-encoding-error stream format-control format-args) + (use-value (char) + :report "Specify a character to be used instead." + :interactive (lambda () + (loop + (format *query-io* "Type a character: ") + (let ((line (read-line *query-io*))) + (when (= 1 (length line)) + (return (list (char line 0))))))) + (char-code char)))) + +(defmethod octets-to-char-code ((format flexi-latin-1-format) reader unreader stream) + (declare (ignore unreader stream)) + (or (funcall reader) :eof)) + +(defmethod octets-to-char-code ((format flexi-ascii-format) reader unreader stream) + (declare (ignore unreader)) + (let ((octet (or (funcall reader) + (return-from octets-to-char-code :eof)))) + (declare (type octet octet)) + (if (> octet 127) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + octet))) + +(defmethod octets-to-char-code ((format flexi-8-bit-format) reader unreader stream) + (declare (ignore unreader)) + (with-accessors ((decoding-table external-format-decoding-table)) + format + (let* ((octet (or (funcall reader) + (return-from octets-to-char-code :eof))) + (char-code (aref (the (simple-array * *) decoding-table) octet))) + (declare (type octet octet)) + (if (or (null char-code) + (= char-code 65533)) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + char-code)))) + +(defmethod octets-to-char-code ((format flexi-utf-8-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-8 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (let ((octet (read-next-byte))) + (declare (type octet octet)) + (multiple-value-bind (start count) + (cond ((zerop (logand octet #b10000000)) + (values octet 0)) + ((= #b11000000 (logand octet #b11100000)) + (values (logand octet #b00011111) 1)) + ((= #b11100000 (logand octet #b11110000)) + (values (logand octet #b00001111) 2)) + ((= #b11110000 (logand octet #b11111000)) + (values (logand octet #b00000111) 3)) + ((= #b11111000 (logand octet #b11111100)) + (values (logand octet #b00000011) 4)) + ((= #b11111100 (logand octet #b11111110)) + (values (logand octet #b00000001) 5)) + (t (return-from octets-to-char-code + (recover-from-encoding-error stream + "Unexpected value #x~X at start of UTF-8 sequence." + octet)))) + ;; note that we currently don't check for "overlong" + ;; sequences or other illegal values + (loop for result of-type (unsigned-byte 32) + = start then (+ (ash result 6) + (logand octet #b111111)) + repeat count + for octet of-type octet = (read-next-byte) + unless (= #b10000000 (logand octet #b11000000)) + do (return-from octets-to-char-code + (recover-from-encoding-error stream + "Unexpected value #x~X in UTF-8 sequence." octet)) + finally (return result))))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (the octet (read-next-byte)) + (ash (the octet (read-next-byte)) 8)))) + (declare (inline read-next-byte read-next-word) + (dynamic-extent (function read-next-byte) (function read-next-word))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word)))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (ash (the octet (read-next-byte)) 8) + (the octet (read-next-byte))))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word)))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader unreader stream) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 0 to 24 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 24 downto 0 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-cr-mixin) reader unreader stream) + "The `base' method for all streams which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of reading +one or more encoded characters." + (declare (optimize speed)) + (let ((char-code (call-next-method))) + (when (eq char-code :eof) + (return-from octets-to-char-code :eof)) + (with-accessors ((eol-style external-format-eol-style)) + format + (cond ((= char-code #.(char-code #\Return)) + (case eol-style + (:cr #.(char-code #\Newline)) + ;; in the case :CRLF we have to look ahead one character + (:crlf (let ((next-char-code (call-next-method))) + (case next-char-code + (#.(char-code #\Linefeed) + #.(char-code #\Newline)) + (:eof char-code) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise + (funcall unreader (code-char next-char-code)) + char-code)))))) + (t char-code))))) + Added: branches/edi/external-format.lisp ============================================================================== --- (empty file) +++ branches/edi/external-format.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,295 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.15 2008/05/17 16:38:24 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass external-format () + ((name :initarg :name + :reader external-format-name + :documentation "The name of the external format - a +keyword.") + (id :initarg :id + :initform nil + :reader external-format-id + :documentation "If the external format denotes a Windows +code page this ID specifies which one to use. Otherwise the +value is ignored \(and usually NIL).") + (little-endian :initarg :little-endian + :initform *default-little-endian* + :reader external-format-little-endian + :documentation "Whether multi-octet values are +read and written with the least significant octet first. For +8-bit encodings like :ISO-8859-1 this value is ignored.") + (eol-style :initarg :eol-style + :reader external-format-eol-style + :documentation "The character\(s) to or from which +a #\Newline will be translated - one of the keywords :CR, :LF, +or :CRLF.")) + (:documentation "EXTERNAL-FORMAT objects are used to denote +encodings for flexi streams.")) + +(defmethod make-load-form ((thing external-format) &optional environment) + "Defines a way to reconstruct external formats. Needed for OpenMCL." + (make-load-form-saving-slots thing :environment environment)) + +(defclass flexi-cr-mixin () + () + (:documentation "A mixin for external-formats which need +end-of-line conversion, i.e. for those where the end-of-line +designator is /not/ the single character #\Linefeed.")) + +(defclass flexi-8-bit-format (external-format) + ((encoding-hash :accessor external-format-encoding-hash) + (decoding-table :accessor external-format-decoding-table)) + (:documentation "The class for all flexi streams which use an 8-bit +encoding and thus need additional slots for the encoding/decoding +tables.")) + +(defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) + () + (:documentation "The class for all external formats which use an +8-bit encoding /and/ need end-of-line conversion.")) + +(defclass flexi-ascii-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +US-ASCCI encoding.")) + +(defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) + () + (:documentation "Special class for external formats which use the +US-ASCCI encoding /and/ need end-of-line conversion.")) + +(defclass flexi-latin-1-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding.")) + +(defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-utf-32-le-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-32-be-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering /and/ need end-of-line +conversion.")) + +(defclass flexi-utf-16-le-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-16-be-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering /and/ need end-of-line +conversion.")) + +(defclass flexi-utf-8-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding.")) + +(defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding /and/ need end-of-line conversion.")) + +(defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs) + "Sets the fixed encoding/decoding tables for this particular +external format." + (declare (ignore initargs)) + (with-accessors ((encoding-hash external-format-encoding-hash) + (decoding-table flexi-stream-decoding-table) + (name external-format-name) + (id external-format-id)) + external-format + (multiple-value-setq (encoding-hash decoding-table) + (cond ((ascii-name-p name) + (values +ascii-hash+ +ascii-table+)) + ((koi8-r-name-p name) + (values +koi8-r-hash+ +koi8-r-table+)) + ((iso-8859-name-p name) + (values (cdr (assoc name +iso-8859-hashes+ :test #'eq)) + (cdr (assoc name +iso-8859-tables+ :test #'eq)))) + ((code-page-name-p name) + (values (cdr (assoc id +code-page-hashes+)) + (cdr (assoc id +code-page-tables+)))))))) + +(defun external-format-class-name (real-name eol-style little-endian) + (let ((crp (not (eq eol-style :lf)))) + (cond ((ascii-name-p real-name) + (if crp + 'flexi-cr-ascii-format + 'flexi-ascii-format)) + ((eq real-name :iso-8859-1) + (if crp + 'flexi-cr-latin-1-format + 'flexi-latin-1-format)) + ((or (koi8-r-name-p real-name) + (iso-8859-name-p real-name) + (code-page-name-p real-name)) + (if crp + 'flexi-cr-8-bit-format + 'flexi-8-bit-format)) + (t (case real-name + (:utf-8 (if crp + 'flexi-cr-utf-8-format + 'flexi-utf-8-format)) + (:utf-16 (if crp + (if little-endian + 'flexi-cr-utf-16-le-format + 'flexi-cr-utf-16-be-format) + (if little-endian + 'flexi-utf-16-le-format + 'flexi-utf-16-be-format))) + (:utf-32 (if crp + (if little-endian + 'flexi-cr-utf-32-le-format + 'flexi-cr-utf-32-be-format) + (if little-endian + 'flexi-utf-32-le-format + 'flexi-utf-32-be-format)))))))) + +(defun make-external-format% (name &key (little-endian *default-little-endian*) + id eol-style) + "Used internally by MAKE-EXTERNAL-FORMAT." + (let* ((real-name (normalize-external-format-name name)) + (initargs + (cond ((or (iso-8859-name-p real-name) + (koi8-r-name-p real-name) + (ascii-name-p real-name)) + (list :eol-style (or eol-style *default-eol-style*))) + ((code-page-name-p real-name) + (list :id (or (known-code-page-id-p id) + (error "Unknown code page ID ~S" id)) + ;; default EOL style for Windows code pages is :CRLF + :eol-style (or eol-style :crlf))) + (t (list :eol-style (or eol-style *default-eol-style*) + :little-endian little-endian))))) + (apply #'make-instance (external-format-class-name real-name eol-style little-endian) + :name real-name + initargs))) + +(defun make-external-format (name &rest args + &key (little-endian *default-little-endian*) + id eol-style) + "Creates and returns an external format object as specified. +NAME is a keyword like :LATIN1 or :UTF-8, LITTLE-ENDIAN specifies +the `endianess' of the external format and is ignored for 8-bit +encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF +which denote the end-of-line character \(sequence), ID is the ID +of a Windows code page \(and ignored for other encodings)." + (declare (ignore id little-endian)) + (let ((shortcut-args (cdr (assoc name +shortcut-map+)))) + (cond (shortcut-args + (apply #'make-external-format% + (append shortcut-args + `(:eol-style ,eol-style)))) + (t (apply #'make-external-format% name args))))) + +(defun external-format-equal (ef1 ef2) + "Checks whether two EXTERNAL-FORMAT objects denote the same +encoding." + (let* ((name1 (external-format-name ef1)) + (code-page-name-p (code-page-name-p name1))) + ;; they must habe the same canonical name + (and (eq name1 + (external-format-name ef2)) + ;; if both are code pages the IDs must be the same + (or (not code-page-name-p) + (eql (external-format-id ef1) + (external-format-id ef2))) + ;; for non-8-bit encodings the endianess must be the same + (or code-page-name-p + (ascii-name-p name1) + (koi8-r-name-p name1) + (iso-8859-name-p name1) + (eq name1 :utf-8) + (eq (not (external-format-little-endian ef1)) + (not (external-format-little-endian ef2)))) + ;; the EOL style must also be the same + (eq (external-format-eol-style ef1) + (external-format-eol-style ef2))))) + +(defun normalize-external-format (external-format) + "Returns a list which is a `normalized' representation of the +external format EXTERNAL-FORMAT. Used internally by +PRINT-OBJECT, for example. Basically, the result is argument +list that can be fed back to MAKE-EXTERNAL-FORMAT to create an +equivalent object." + (let ((name (external-format-name external-format)) + (eol-style (external-format-eol-style external-format))) + (cond ((or (ascii-name-p name) + (koi8-r-name-p name) + (iso-8859-name-p name) + (eq name :utf-8)) + (list name :eol-style eol-style)) + ((code-page-name-p name) + (list name + :id (external-format-id external-format) + :eol-style eol-style)) + (t (list name + :eol-style eol-style + :little-endian (external-format-little-endian external-format)))))) + +(defmethod print-object ((object external-format) stream) + "How an EXTERNAL-FORMAT object is rendered. Uses +NORMALIZE-EXTERNAL-FORMAT." + (print-unreadable-object (object stream :type t :identity t) + (prin1 (normalize-external-format object) stream))) \ No newline at end of file Added: branches/edi/flexi-streams.asd ============================================================================== --- (empty file) +++ branches/edi/flexi-streams.asd Sat May 17 12:49:25 2008 @@ -0,0 +1,69 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.60 2008/05/17 15:56:16 edi Exp $ + +;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :cl-user) + +(defpackage :flexi-streams-system + (:use :asdf :cl)) + +(in-package :flexi-streams-system) + +(defsystem :flexi-streams + :version "0.14.0" + :serial t + :components ((:file "packages") + (:file "ascii") + (:file "koi8-r") + (:file "iso-8859") + (:file "code-pages") + (:file "specials") + (:file "util") + (:file "external-format") + (:file "encode") + (:file "decode") + (:file "in-memory") + (:file "conditions") + (:file "stream") + #+:lispworks (:file "lw-binary-stream") + (:file "output") + (:file "input") + (:file "strings")) + :depends-on (:trivial-gray-streams)) + +(defsystem :flexi-streams-test + :components ((:module "test" + :serial t + :components ((:file "packages") + (:file "test")))) + :depends-on (:flexi-streams)) + +(defmethod perform ((o test-op) (c (eql (find-system 'flexi-streams)))) + (operate 'load-op 'flexi-streams-test) + (funcall (intern (symbol-name :run-tests) + (find-package :flexi-streams-test)))) Added: branches/edi/in-memory.lisp ============================================================================== --- (empty file) +++ branches/edi/in-memory.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,371 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.29 2008/05/17 16:35:58 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass in-memory-stream (trivial-gray-stream-mixin) + ((transformer :initarg :transformer + :accessor in-memory-stream-transformer + :documentation "A function used to transform the +written/read octet to the value stored/retrieved in/from the +underlying vector.") + #+:cmu + (open-p :initform t + :accessor in-memory-stream-open-p + :documentation "For CMUCL we have to keep track of this +manually.")) + (:documentation "An IN-MEMORY-STREAM is a binary stream that reads +octets from or writes octets to a sequence in RAM.")) + +(defclass in-memory-input-stream (in-memory-stream fundamental-binary-input-stream) + () + (:documentation "An IN-MEMORY-INPUT-STREAM is a binary stream that +reads octets from a sequence in RAM.")) + +#+:cmu +(defmethod output-stream-p ((stream in-memory-input-stream)) + "Explicitly states whether this is an output stream." + (declare (optimize speed)) + nil) + +(defclass in-memory-output-stream (in-memory-stream fundamental-binary-output-stream) + () + (:documentation "An IN-MEMORY-OUTPUT-STREAM is a binary stream that +writes octets to a sequence in RAM.")) + +#+:cmu +(defmethod input-stream-p ((stream in-memory-output-stream)) + "Explicitly states whether this is an input stream." + (declare (optimize speed)) + nil) + +(defclass list-stream () + ((list :initarg :list + :accessor list-stream-list + :documentation "The underlying list of the stream.")) + (:documentation "A LIST-STREAM is a mixin for IN-MEMORY streams +where the underlying sequence is a list.")) + +(defclass vector-stream () + ((vector :initarg :vector + :accessor vector-stream-vector + :documentation "The underlying vector of the stream which +\(for output) must always be adjustable and have a fill pointer.")) + (:documentation "A VECTOR-STREAM is a mixin for IN-MEMORY streams +where the underlying sequence is a vector.")) + +(defclass list-input-stream (list-stream in-memory-input-stream) + () + (:documentation "A binary input stream that gets its data from an +associated list of octets.")) + +(defclass vector-input-stream (vector-stream in-memory-input-stream) + ((index :initarg :index + :accessor vector-stream-index + :type (integer 0 #.array-dimension-limit) + :documentation "An index into the underlying vector denoting +the current position.") + (end :initarg :end + :accessor vector-stream-end + :type (integer 0 #.array-dimension-limit) + :documentation "An index into the underlying vector denoting +the end of the available data.")) + (:documentation "A binary input stream that gets its data from an +associated vector of octets.")) + +(defclass vector-output-stream (vector-stream in-memory-output-stream) + () + (:documentation "A binary output stream that writes its data to an +associated vector.")) + +#+:cmu +(defmethod open-stream-p ((stream in-memory-stream)) + "Returns a true value if STREAM is open. See ANSI standard." + (declare (optimize speed)) + (in-memory-stream-open-p stream)) + +#+:cmu +(defmethod close ((stream in-memory-stream) &key abort) + "Closes the stream STREAM. See ANSI standard." + (declare (ignore abort) + (optimize speed)) + (prog1 + (in-memory-stream-open-p stream) + (setf (in-memory-stream-open-p stream) nil))) + +(defmethod check-if-open ((stream in-memory-stream)) + "Checks if STREAM is open and signals an error otherwise." + (declare (optimize speed)) + (unless (open-stream-p stream) + (error 'in-memory-stream-closed-error + :stream stream))) + +(defmethod stream-element-type ((stream in-memory-stream)) + "The element type is always OCTET by definition." + (declare (optimize speed)) + 'octet) + +(defmethod transform-octet ((stream in-memory-stream) octet) + "Applies the transformer of STREAM to octet and returns the result." + (funcall (or (in-memory-stream-transformer stream) + #'identity) octet)) + +(defmethod stream-read-byte ((stream list-input-stream)) + "Reads one byte by simply popping it off of the top of the list." + (declare (optimize speed)) + (check-if-open stream) + (transform-octet stream (or (pop (list-stream-list stream)) + (return-from stream-read-byte :eof)))) + +(defmethod stream-listen ((stream list-input-stream)) + "Checks whether list is not empty." + (declare (optimize speed)) + (check-if-open stream) + (list-stream-list stream)) + +(defmethod stream-read-sequence ((stream list-input-stream) sequence start end &key) + "Repeatedly pops elements from the list until it's empty." + (declare (optimize speed) (type (integer 0 *) start end)) + (loop for index from start below end + while (list-stream-list stream) + do (setf (elt sequence index) + (pop (list-stream-list stream))) + finally (return index))) + +(defmethod stream-read-byte ((stream vector-input-stream)) + "Reads one byte and increments INDEX pointer unless we're beyond +END pointer." + (declare (optimize speed)) + (check-if-open stream) + (let ((index (vector-stream-index stream))) + (cond ((< index (vector-stream-end stream)) + (incf (vector-stream-index stream)) + (transform-octet stream (aref (vector-stream-vector stream) index))) + (t :eof)))) + +(defmethod stream-listen ((stream vector-input-stream)) + "Checking whether INDEX is beyond END." + (declare (optimize speed)) + (check-if-open stream) + (< (vector-stream-index stream) (vector-stream-end stream))) + +(defmethod stream-read-sequence ((stream vector-input-stream) sequence start end &key) + "Traverses both sequences in parallel until the end of one of them +is reached." + (declare (optimize speed) (type (integer 0 *) start end)) + (loop with vector-end of-type (integer 0 #.array-dimension-limit) = (vector-stream-end stream) + with vector = (vector-stream-vector stream) + for index from start below end + for vector-index of-type (integer 0 #.array-dimension-limit) = (vector-stream-index stream) + while (< vector-index vector-end) + do (setf (elt sequence index) + (aref vector vector-index)) + (incf (vector-stream-index stream)) + finally (return index))) + +(defmethod stream-write-byte ((stream vector-output-stream) byte) + "Writes a byte \(octet) by extending the underlying vector." + (declare (optimize speed)) + (check-if-open stream) + (vector-push-extend (transform-octet stream byte) + (vector-stream-vector stream))) + +(defmethod stream-write-sequence ((stream vector-output-stream) sequence start end &key) + "Just calls VECTOR-PUSH-EXTEND repeatedly." + (declare (optimize speed) (type (integer 0 *) start end)) + (loop with vector = (vector-stream-vector stream) + for index from start below end + do (vector-push-extend (elt sequence index) vector)) + sequence) + +(defmethod stream-file-position ((stream vector-input-stream)) + "Simply returns the index into the underlying vector." + (declare (optimize speed)) + (vector-stream-index stream)) + +(defmethod (setf stream-file-position) (position-spec (stream vector-input-stream)) + "Sets the index into the underlying vector if POSITION-SPEC is acceptable." + (declare (optimize speed)) + (setf (vector-stream-index stream) + (case position-spec + (:start 0) + (:end (vector-stream-end stream)) + (otherwise + (unless (integerp position-spec) + (error 'flexi-stream-position-spec-error + :format-control "Unknown file position designator: ~S." + :format-arguments (list position-spec) + :position-spec position-spec)) + (unless (<= 0 position-spec (vector-stream-end stream)) + (error 'flexi-stream-position-spec-error + :format-control "File position designator ~S is out of bounds." + :format-arguments (list position-spec) + :position-spec position-spec)) + position-spec))) + position-spec) + +(defmethod stream-file-position ((stream vector-output-stream)) + "Simply returns the fill pointer of the underlying vector." + (declare (optimize speed)) + (fill-pointer (vector-stream-vector stream))) + +(defmethod (setf stream-file-position) (position-spec (stream vector-output-stream)) + "Sets the fill pointer underlying vector if POSITION-SPEC is +acceptable. Adjusts the vector if necessary." + (declare (optimize speed)) + (let* ((vector (vector-stream-vector stream)) + (total-size (array-total-size vector)) + (new-fill-pointer + (case position-spec + (:start 0) + (:end + (warn "File position designator :END doesn't really make sense for an output stream.") + total-size) + (otherwise + (unless (integerp position-spec) + (error 'flexi-stream-position-spec-error + :format-control "Unknown file position designator: ~S." + :format-arguments (list position-spec) + :position-spec position-spec)) + (unless (<= 0 position-spec array-total-size-limit) + (error 'flexi-stream-position-spec-error + :format-control "File position designator ~S is out of bounds." + :format-arguments (list position-spec) + :position-spec position-spec)) + position-spec)))) + (when (> new-fill-pointer total-size) + (adjust-array vector new-fill-pointer)) + (setf (fill-pointer vector) new-fill-pointer) + position-spec)) + +(defmethod make-in-memory-input-stream ((vector vector) &key (start 0) + (end (length vector)) + transformer) + "Returns a binary input stream which will supply, in order, the +octets in the subsequence of VECTOR bounded by START and END. +Each octet returned will be transformed in turn by the optional +TRANSFORMER function." + (declare (optimize speed)) + (make-instance 'vector-input-stream + :vector vector + :index start + :end end + :transformer transformer)) + +(defmethod make-in-memory-input-stream ((list list) &key (start 0) + (end (length list)) + transformer) + "Returns a binary input stream which will supply, in order, the +octets in the subsequence of LIST bounded by START and END. Each +octet returned will be transformed in turn by the optional +TRANSFORMER function." + (declare (optimize speed)) + (make-instance 'list-input-stream + :list (subseq list start end) + :transformer transformer)) + +(defun make-output-vector (&key (element-type 'octet)) + "Creates and returns an array which can be used as the underlying +vector for a VECTOR-OUTPUT-STREAM." + (declare (optimize speed)) + (make-array 0 :adjustable t + :fill-pointer 0 + :element-type element-type)) + +(defun make-in-memory-output-stream (&key (element-type 'octet) transformer) + "Returns a binary output stream which accepts objects of type +ELEMENT-TYPE \(a subtype of OCTET) and makes available a sequence +that contains the octes that were actually output. The octets +stored will each be transformed by the optional TRANSFORMER +function." + (declare (optimize speed)) + (make-instance 'vector-output-stream + :vector (make-output-vector :element-type element-type) + :transformer transformer)) + +(defmethod get-output-stream-sequence ((stream in-memory-output-stream) &key as-list) + "Returns a vector containing, in order, all the octets that have +been output to the IN-MEMORY stream STREAM. This operation clears any +octets on STREAM, so the vector contains only those octets which have +been output since the last call to GET-OUTPUT-STREAM-SEQUENCE or since +the creation of the stream, whichever occurred most recently. If +AS-LIST is true the return value is coerced to a list." + (declare (optimize speed)) + (prog1 + (if as-list + (coerce (vector-stream-vector stream) 'list) + (vector-stream-vector stream)) + (setf (vector-stream-vector stream) + (make-output-vector)))) + +(defmethod output-stream-sequence-length ((stream in-memory-output-stream)) + "Returns the current length of the underlying vector of the +IN-MEMORY output stream STREAM." + (declare (optimize speed)) + (length (the (simple-array * (*)) (vector-stream-vector stream)))) + +(defmacro with-input-from-sequence ((var sequence &key start end transformer) + &body body) + "Creates an IN-MEMORY input stream from SEQUENCE using the +parameters START and END, binds VAR to this stream and then +executes the code in BODY. A function TRANSFORMER may optionally +be specified to transform the returned octets. The stream is +automatically closed on exit from WITH-INPUT-FROM-SEQUENCE, no +matter whether the exit is normal or abnormal. The return value +of this macro is the return value of BODY." + (with-rebinding (sequence) + `(let (,var) + (unwind-protect + (progn + (setq ,var (make-in-memory-input-stream ,sequence + :start (or ,start 0) + :end (or ,end (length ,sequence)) + :transformer ,transformer)) + , at body) + (when ,var (close ,var)))))) + +(defmacro with-output-to-sequence ((var &key as-list (element-type ''octet) transformer) + &body body) + "Creates an IN-MEMORY output stream, binds VAR to this stream +and then executes the code in BODY. The stream stores data of +type ELEMENT-TYPE \(a subtype of OCTET) which is \(optionally) +transformed by the function TRANSFORMER prior to storage. The +stream is automatically closed on exit from +WITH-OUTPUT-TO-SEQUENCE, no matter whether the exit is normal or +abnormal. The return value of this macro is a vector \(or a list +if AS-LIST is true) containing the octets that were sent to the +stream within BODY." + `(let (,var) + (unwind-protect + (progn + (setq ,var (make-in-memory-output-stream :element-type ,element-type + :transformer ,transformer)) + , at body + (get-output-stream-sequence ,var :as-list ,as-list)) + (when ,var (close ,var))))) Added: branches/edi/input.lisp ============================================================================== --- (empty file) +++ branches/edi/input.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,288 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.57 2008/05/17 16:44:53 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +#-:lispworks +(defmethod read-byte* ((flexi-input-stream flexi-input-stream)) + "Reads one byte \(octet) from the underlying stream of +FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not +empty)." + (declare (optimize speed)) + ;; we're using S instead of STREAM here because of an + ;; issue with SBCL: + ;; + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (s flexi-stream-stream)) + flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) + (when (and bound + (>= position bound)) + (return-from read-byte* nil)) + (incf position) + (or (pop octet-stack) + (read-byte s nil nil) + (progn (decf position) nil)))) + +#+:lispworks +(defmethod read-byte* ((flexi-input-stream flexi-input-stream)) + "Reads one byte \(octet) from the underlying stream of +FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not +empty)." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) + (when (and bound + (>= position bound)) + (return-from read-byte* nil)) + (incf position) + (or (pop octet-stack) + ;; we use READ-SEQUENCE because READ-BYTE doesn't work with all + ;; bivalent streams in LispWorks + (let* ((buffer (make-array 1 :element-type 'octet)) + (new-position (read-sequence buffer stream))) + (cond ((zerop new-position) + (decf position) nil) + (t (aref buffer 0))))))) + +#+:lispworks +(defmethod read-byte* ((flexi-input-stream flexi-binary-input-stream)) + "Reads one byte \(octet) from the underlying stream of +FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty). +Optimized version \(only needed for LispWorks) in case the underlying +stream is binary." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) + (when (and bound + (>= position bound)) + (return-from read-byte* nil)) + (incf position) + (or (pop octet-stack) + (read-byte stream nil nil) + (progn (decf position) nil)))) + +(defmethod stream-clear-input ((flexi-input-stream flexi-input-stream)) + "Calls the corresponding method for the underlying input stream +and also clears the value of the OCTET-STACK slot." + (declare (optimize speed)) + ;; note that we don't reset the POSITION slot + (with-accessors ((octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (setq octet-stack nil) + (clear-input stream))) + +(defmethod stream-listen ((flexi-input-stream flexi-input-stream)) + "Calls the corresponding method for the underlying input stream +but first checks if \(old) input is available in the OCTET-STACK +slot." + (declare (optimize speed)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-input-stream + (when (and bound + (>= position bound)) + (return-from stream-listen nil)) + (or octet-stack (listen stream)))) + +(defmethod stream-read-byte ((stream flexi-input-stream)) + "Reads one byte \(octet) from the underlying stream." + (declare (optimize speed)) + ;; set LAST-CHAR-CODE slot to NIL because we can't UNREAD-CHAR after + ;; this operation + (with-accessors ((last-char-code flexi-stream-last-char-code) + (last-octet flexi-stream-last-octet)) + stream + (setq last-char-code nil) + (let ((octet (read-byte* stream))) + (setq last-octet octet) + (or octet :eof)))) + +(defun unread-char% (char flexi-input-stream) + "Used internally to put a character CHAR which was already read back +on the stream. Uses the OCTET-STACK slot and decrements the POSITION +slot accordingly." + (with-accessors ((position flexi-stream-position) + (octet-stack flexi-stream-octet-stack) + (external-format flexi-stream-external-format)) + flexi-input-stream + (let ((counter 0) octets-reversed) + (declare (integer position) + (fixnum counter)) + (char-to-octets external-format + char + (lambda (octet) + (incf counter) + (push octet octets-reversed)) + nil) + (decf position counter) + (setq octet-stack (nreconc octets-reversed octet-stack))))) + +(defmethod stream-read-char ((stream flexi-input-stream)) + (declare (optimize speed)) + ;; note that we do nothing for the :LF EOL style because we assume + ;; that #\Newline is the same as #\Linefeed in all Lisps which will + ;; use this library + (with-accessors ((external-format flexi-stream-external-format) + (last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code)) + stream + ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after + ;; this operation + (setq last-octet nil) + (let ((char-code (octets-to-char-code external-format + (lambda () + (read-byte* stream)) + (lambda (char) + (unread-char% char stream)) + stream))) + ;; remember this character and its char code for UNREAD-CHAR + (setq last-char-code char-code) + (or (code-char char-code) char-code)))) + +(defmethod stream-read-char-no-hang ((stream flexi-input-stream)) + "Reads one character if the underlying stream has at least one +octet available." + (declare (optimize speed)) + ;; note that this may block for non-8-bit encodings - I think + ;; there's no easy way to handle this correctly + (and (stream-listen stream) + (stream-read-char stream))) + +(defmethod stream-read-sequence ((flexi-input-stream flexi-input-stream) sequence start end &key) + "Reads enough input from STREAM to fill SEQUENCE from START to END. +If SEQUENCE is an array which can store octets we use READ-SEQUENCE to +fill it in one fell swoop, otherwise we iterate using +STREAM-READ-CHAR." + (declare (optimize speed) + (type (integer 0 *) start end)) + (with-accessors ((last-char-code flexi-stream-last-char-code) + (last-octet flexi-stream-last-octet) + (stream flexi-stream-stream) + (position flexi-stream-position) + (octet-stack flexi-stream-octet-stack)) + flexi-input-stream + (declare (integer position)) + (cond ((and (arrayp sequence) + (subtypep 'octet (array-element-type sequence))) + (setf last-char-code nil) + (let ((cursor start)) + (loop with stack = octet-stack + for continuep = (< cursor end) + for octet = (and continuep (pop stack)) + while octet + do (setf (aref sequence cursor) (the octet octet)) + (incf cursor)) + (let ((index + (read-sequence sequence stream :start cursor :end end))) + (incf position (- index start)) + (when (> index start) + (setq last-octet (aref sequence (1- index)))) + index))) + (t + (loop for index from start below end + for element = (stream-read-char flexi-input-stream) + until (eq element :eof) + do (setf (elt sequence index) element) + finally (return index)))))) + +(defmethod stream-unread-char ((stream flexi-input-stream) char) + "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM. +Makes sure CHAR will only be unread if it was the last character +read and if it was read with the same encoding that's currently +being used by the stream." + (declare (optimize speed)) + (with-accessors ((last-char-code flexi-stream-last-char-code)) + stream + (unless last-char-code + (error 'flexi-stream-simple-error + :format-control "No character to unread from this stream \(or external format has changed or last reading operation was binary).")) + (unless (= (char-code char) last-char-code) + (error 'flexi-stream-simple-error + :format-control "Last character read (~S) was different from ~S." + :format-arguments (list (code-char last-char-code) char))) + (unread-char% char stream) + (setq last-char-code nil) + nil)) + +(defmethod unread-byte (byte (flexi-input-stream flexi-input-stream)) + "Similar to UNREAD-CHAR in that it `unreads' the last octet from +STREAM. Note that you can only call UNREAD-BYTE after a corresponding +READ-BYTE." + (declare (optimize speed)) + (with-accessors ((last-octet flexi-stream-last-octet) + (octet-stack flexi-stream-octet-stack) + (position flexi-stream-position)) + flexi-input-stream + (unless last-octet + (error 'flexi-stream-simple-error + :format-control "No byte to unread from this stream \(or last reading operation read a character).")) + (unless (= byte last-octet) + (error 'flexi-stream-simple-error + :format-control "Last byte read was different from #x~X." + :format-arguments (list byte))) + (setq last-octet nil) + (decf (the integer position)) + (push byte octet-stack) + nil)) + +(defmethod peek-byte ((flexi-input-stream flexi-input-stream) + &optional peek-type (eof-error-p t) eof-value) + "PEEK-BYTE is like PEEK-CHAR, i.e. it returns an octet from +FLEXI-INPUT-STREAM without actually removing it. If PEEK-TYPE is NIL +the next octet is returned, if PEEK-TYPE is T, the next octet which is +not 0 is returned, if PEEK-TYPE is an octet, the next octet which +equals PEEK-TYPE is returned. EOF-ERROR-P and EOF-VALUE are +interpreted as usual." + (declare (optimize speed)) + (loop for octet = (read-byte flexi-input-stream eof-error-p eof-value) + until (cond ((null peek-type)) + ((eql octet eof-value)) + ((eq peek-type t) + (plusp octet)) + (t (= octet peek-type))) + finally (unless (eql octet eof-value) + (unread-byte octet flexi-input-stream)) + (return octet))) \ No newline at end of file Added: branches/edi/iso-8859.lisp ============================================================================== --- (empty file) +++ branches/edi/iso-8859.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,53 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.6 2008/05/17 13:50:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +;;; the following code was auto-generated from files which can be +;;; found at + +(defvar +iso-8859-tables+ + '((:iso-8859-1 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) + (:iso-8859-2 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 352 350 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 357 378 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) + (:iso-8859-3 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168 304 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305 351 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 202 203 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219 220 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236 237 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365 349 729)) + (:iso-8859-4 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 352 274 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 291 359 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 326 333 311 244 245 246 247 248 371 250 251 252 361 363 729)) + (:iso-8859-5 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 167 1118 1119)) + (:iso-8859-6 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533 65533 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 1567 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 65533 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) + (:iso-8859-7 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 168 169 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) + (:iso-8859-8 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) + (:iso-8859-9 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) + (:iso-8859-10 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 272 352 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 359 382 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 326 333 243 244 245 246 361 248 371 250 251 252 253 254 312)) + (:iso-8859-11 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 65533 65533 65533 65533)) + (:iso-8859-13 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216 169 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 8217)) + (:iso-8859-14 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 7808 169 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7809 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 217 218 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 253 375 255)) + (:iso-8859-15 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 169 170 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 187 338 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) + (:iso-8859-16 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353 169 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 537 187 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 204 205 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 538 223 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 273 324 242 243 244 337 246 347 369 249 250 251 252 281 539 255))) + "A list of the ISO-8859 encodings where each element is a cons +with the car being a keyword denoting the encoding and the cdr +being a vector enumerating the corresponding character codes.") Added: branches/edi/koi8-r.lisp ============================================================================== --- (empty file) +++ branches/edi/koi8-r.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,6 @@ +(in-package :flexi-streams) + +;; http://unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT +(defvar +koi8-r-table+ + #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 9472 9474 9484 9488 9492 9496 9500 9508 9516 9524 9532 9600 9604 9608 9612 9616 9617 9618 9619 8992 9632 8729 8730 8776 8804 8805 160 8993 176 178 183 247 9552 9553 9554 1105 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 1025 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 169 1102 1072 1073 1094 1076 1077 1092 1075 1093 1080 1081 1082 1083 1084 1085 1086 1087 1103 1088 1089 1090 1091 1078 1074 1100 1099 1079 1096 1101 1097 1095 1098 1070 1040 1041 1062 1044 1045 1060 1043 1061 1048 1049 1050 1051 1052 1053 1054 1055 1071 1056 1057 1058 1059 1046 1042 1068 1067 1047 1064 1069 1065 1063 1066) + "An array enumerating the character codes for the KOI8-R encoding.") Added: branches/edi/lw-binary-stream.lisp ============================================================================== --- (empty file) +++ branches/edi/lw-binary-stream.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,315 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.13 2008/05/17 14:21:20 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass flexi-binary-output-stream (flexi-output-stream) + () + (:documentation "This class is for output streams where the +underlying stream is binary. It exists solely for the purpose of +optimizing output on LispWorks. See WRITE-BYTE*.")) + +(defclass flexi-binary-input-stream (flexi-input-stream) + () + (:documentation "This class is for input streams where the +underlying stream is binary. It exists solely for the purpose of +optimizing input on LispWorks. See READ-BYTE*.")) + +(defclass flexi-binary-io-stream (flexi-binary-input-stream flexi-binary-output-stream flexi-io-stream) + () + (:documentation "This class is for bidirectional streams where the +underlying stream is binary. It exists solely for the purpose of +optimizing input and output on LispWorks. See READ-BYTE* and +WRITE-BYTE*.")) + +(defclass flexi-binary-8-bit-input-stream (flexi-8-bit-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-8-BIT-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-8-bit-input-stream (flexi-cr-mixin flexi-binary-8-bit-input-stream) + () + (:documentation "Like FLEXI-CR-8-BIT-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-ascii-input-stream (flexi-ascii-input-stream flexi-binary-8-bit-input-stream) + () + (:documentation "Like FLEXI-ASCII-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-ascii-input-stream (flexi-cr-mixin flexi-binary-ascii-input-stream) + () + (:documentation "Like FLEXI-CR-ASCII-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-latin-1-input-stream (flexi-latin-1-input-stream flexi-binary-8-bit-input-stream) + () + (:documentation "Like FLEXI-LATIN-1-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-latin-1-input-stream (flexi-cr-mixin flexi-binary-latin-1-input-stream) + () + (:documentation "Like FLEXI-CR-LATIN-1-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-le-input-stream (flexi-utf-32-le-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-32-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-binary-utf-32-le-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-be-input-stream (flexi-utf-32-be-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-32-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-binary-utf-32-be-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-le-input-stream (flexi-utf-16-le-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-16-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-binary-utf-16-le-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-LE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-be-input-stream (flexi-utf-16-be-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-16-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-binary-utf-16-be-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-BE-INPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-8-input-stream (flexi-utf-8-input-stream flexi-binary-input-stream) + () + (:documentation "Like FLEXI-UTF-8-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-8-input-stream (flexi-cr-mixin flexi-binary-utf-8-input-stream) + () + (:documentation "Like FLEXI-CR-UTF-8-INPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-8-bit-output-stream (flexi-8-bit-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-8-BIT-OUTPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-8-bit-output-stream (flexi-cr-mixin flexi-binary-8-bit-output-stream) + () + (:documentation "Like FLEXI-CR-8-BIT-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-ascii-output-stream (flexi-ascii-output-stream flexi-binary-8-bit-output-stream) + () + (:documentation "Like FLEXI-ASCII-OUTPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-ascii-output-stream (flexi-cr-mixin flexi-binary-ascii-output-stream) + () + (:documentation "Like FLEXI-CR-ASCII-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-latin-1-output-stream (flexi-latin-1-output-stream flexi-binary-8-bit-output-stream) + () + (:documentation "Like FLEXI-LATIN-1-OUTPUT-STREAM but optimized +for LispWorks binary streams.")) + +(defclass flexi-binary-cr-latin-1-output-stream (flexi-cr-mixin flexi-binary-latin-1-output-stream) + () + (:documentation "Like FLEXI-CR-LATIN-1-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-le-output-stream (flexi-utf-32-le-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-32-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-binary-utf-32-le-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-be-output-stream (flexi-utf-32-be-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-32-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-binary-utf-32-be-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-le-output-stream (flexi-utf-16-le-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-16-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-binary-utf-16-le-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-LE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-be-output-stream (flexi-utf-16-be-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-16-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-binary-utf-16-be-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-BE-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-8-output-stream (flexi-utf-8-output-stream flexi-binary-output-stream) + () + (:documentation "Like FLEXI-UTF-8-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-8-output-stream (flexi-cr-mixin flexi-binary-utf-8-output-stream) + () + (:documentation "Like FLEXI-CR-UTF-8-OUTPUT-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-8-bit-io-stream (flexi-binary-io-stream flexi-8-bit-io-stream) + () + (:documentation "Like FLEXI-8-BIT-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-8-bit-io-stream (flexi-cr-mixin flexi-binary-8-bit-io-stream) + () + (:documentation "Like FLEXI-CR-8-BIT-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-ascii-io-stream (flexi-ascii-io-stream flexi-binary-8-bit-io-stream) + () + (:documentation "Like FLEXI-ASCII-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-ascii-io-stream (flexi-cr-mixin flexi-binary-ascii-io-stream) + () + (:documentation "Like FLEXI-CR-ASCII-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-latin-1-io-stream (flexi-latin-1-io-stream flexi-binary-8-bit-io-stream) + () + (:documentation "Like FLEXI-LATIN-1-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-latin-1-io-stream (flexi-cr-mixin flexi-binary-latin-1-io-stream) + () + (:documentation "Like FLEXI-CR-LATIN-1-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-le-io-stream (flexi-utf-32-le-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-32-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-binary-utf-32-le-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-32-be-io-stream (flexi-utf-32-be-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-32-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-binary-utf-32-be-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-32-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-le-io-stream (flexi-utf-16-le-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-16-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-binary-utf-16-le-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-LE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-16-be-io-stream (flexi-utf-16-be-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-16-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-binary-utf-16-be-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-16-BE-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-utf-8-io-stream (flexi-utf-8-io-stream flexi-binary-io-stream) + () + (:documentation "Like FLEXI-UTF-8-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defclass flexi-binary-cr-utf-8-io-stream (flexi-cr-mixin flexi-binary-utf-8-io-stream) + () + (:documentation "Like FLEXI-CR-UTF-8-IO-STREAM but +optimized for LispWorks binary streams.")) + +(defmethod initialize-instance :after ((flexi-stream flexi-output-stream) &rest initargs) + "Might change the class of FLEXI-STREAM for optimization purposes. +Only needed for LispWorks." + (declare (ignore initargs) + (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-stream + (when (subtypep (stream-element-type stream) 'octet) + (change-class flexi-stream + (typecase flexi-stream + (flexi-io-stream 'flexi-binary-io-stream) + (otherwise 'flexi-binary-output-stream)))))) + +(defmethod initialize-instance :after ((flexi-stream flexi-input-stream) &rest initargs) + "Might change the class of FLEXI-STREAM for optimization purposes. +Only needed for LispWorks." + (declare (ignore initargs) + (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-stream + (when (subtypep (stream-element-type stream) 'octet) + (change-class flexi-stream + (typecase flexi-stream + (flexi-io-stream 'flexi-binary-io-stream) + (otherwise 'flexi-binary-input-stream)))))) Added: branches/edi/output.lisp ============================================================================== --- (empty file) +++ branches/edi/output.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,199 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.47 2008/05/17 16:40:33 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric write-byte* (byte sink) + (:documentation "Writes one byte \(octet) to the underlying stream +of SINK \(if SINK is a flexi stream) or adds the byte to the end of +SINK \(if SINK is an array with a fill pointer).")) + +#-:lispworks +(defmethod write-byte* (byte (sink flexi-output-stream)) + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + sink + (write-byte byte stream))) + +#+:lispworks +(defmethod write-byte* (byte (sink flexi-output-stream)) + (declare (optimize speed)) + ;; we use WRITE-SEQUENCE because WRITE-BYTE doesn't work with all + ;; bivalent streams in LispWorks (4.4.6) + (with-accessors ((stream flexi-stream-stream)) + sink + (write-sequence (make-array 1 :element-type 'octet + :initial-element byte) + stream) + byte)) + +#+:lispworks +(defmethod write-byte* (byte (sink flexi-binary-output-stream)) + "Optimized version \(only needed for LispWorks) in case the +underlying stream is binary." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + sink + (write-byte byte stream))) + +(defmethod stream-write-char ((stream flexi-output-stream) char) + (declare (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format)) + stream + (char-to-octets external-format + char + (lambda (octet) + (write-byte* octet stream)) + stream))) + +(defmethod stream-write-char :after ((stream flexi-output-stream) char) + (declare (optimize speed)) + ;; update the column unless we're in the middle of the line and + ;; the current value is NIL + (with-accessors ((column flexi-stream-column)) + stream + (cond ((char= char #\Newline) (setq column 0)) + (column (incf (the integer column)))))) + +(defmethod stream-clear-output ((flexi-output-stream flexi-output-stream)) + "Simply calls the corresponding method for the underlying +output stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-output-stream + (clear-output stream))) + +(defmethod stream-finish-output ((flexi-output-stream flexi-output-stream)) + "Simply calls the corresponding method for the underlying +output stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-output-stream + (finish-output stream))) + +(defmethod stream-force-output ((flexi-output-stream flexi-output-stream)) + "Simply calls the corresponding method for the underlying +output stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + flexi-output-stream + (force-output stream))) + +(defmethod stream-line-column ((flexi-output-stream flexi-output-stream)) + "Returns the column stored in the COLUMN slot of the +FLEXI-OUTPUT-STREAM object STREAM." + (declare (optimize speed)) + (with-accessors ((column flexi-stream-column)) + flexi-output-stream + column)) + +(defmethod stream-write-byte ((flexi-output-stream flexi-output-stream) byte) + "Writes a byte \(octet) to the underlying stream." + (declare (optimize speed)) + (with-accessors ((column flexi-stream-column)) + flexi-output-stream + ;; set column to NIL because we don't know how to handle binary + ;; output mixed with character output + (setq column nil) + (write-byte* byte flexi-output-stream))) + +#+:allegro +(defmethod stream-terpri ((stream flexi-output-stream)) + "Writes a #\Newline character to the underlying stream." + (declare (optimize speed)) + ;; needed for AllegroCL - grrr... + (stream-write-char stream #\Newline)) + +(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) + "Writes all elements of the sequence SEQUENCE from START to END +to the underlying stream. The elements can be either octets or +characters. Characters are output according to the current +encoding \(external format) of the FLEXI-OUTPUT-STREAM object +STREAM." + (declare (optimize speed) + (type (integer 0 *) start end)) + (with-accessors ((stream flexi-stream-stream) + (column flexi-stream-column)) + flexi-output-stream + (cond ((and (arrayp sequence) + (subtypep (array-element-type sequence) 'octet)) + ;; set column to NIL because we don't know how to handle binary + ;; output mixed with character output + (setq column nil) + (write-sequence sequence stream :start start :end end)) + (t (loop for index from start below end + for element = (elt sequence index) + when (characterp element) do + (stream-write-char flexi-output-stream element) + else do + (stream-write-byte flexi-output-stream element)) + sequence)))) + +(defmethod stream-write-sequence ((stream flexi-output-stream) (sequence string) start end &key) + "Optimized method for the cases where SEQUENCE is a string. Fills +an internal buffer and uses repeated calls to WRITE-SEQUENCE to write +to the underlying stream." + (declare (optimize speed) + (type (integer 0 *) start end)) + ;; don't use this optimized method for bivalent character streams on + ;; LispWorks, as it currently gets confused by the fill pointer + #+:lispworks + (unless (typep stream 'flexi-binary-output-stream) + (return-from stream-write-sequence + (call-next-method))) + (let* ((buffer (make-array (+ +buffer-size+ 20) + :element-type '(unsigned-byte 8) + :fill-pointer 0)) + (last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (loop with format = (flexi-stream-external-format stream) + for index from start below end + do (char-to-octets format + (aref sequence index) + (lambda (octet) + (vector-push octet buffer)) + stream) + when (>= (fill-pointer buffer) +buffer-size+) do + (write-sequence buffer (flexi-stream-stream stream)) + (setf (fill-pointer buffer) 0) + finally (when (>= (fill-pointer buffer) 0) + (write-sequence buffer (flexi-stream-stream stream)))) + (setf (flexi-stream-column stream) + (cond (last-newline-pos (- end last-newline-pos 1)) + ((flexi-stream-column stream) + (+ (flexi-stream-column stream) (- end start)))))) + sequence) + +(defmethod stream-write-string ((stream flexi-output-stream) string + &optional (start 0) (end (length string))) + "Simply hands over to the optimized method for STREAM-WRITE-SEQUENCE." + (stream-write-sequence stream string start (or end (length string)))) Added: branches/edi/packages.lisp ============================================================================== --- (empty file) +++ branches/edi/packages.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,83 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.31 2008/05/17 13:50:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :cl-user) + +(unless (find-symbol (symbol-name :stream-file-position) :trivial-gray-streams) + (error "You need a newer version of TRIVIAL-GRAY-STREAMS.")) + +(defpackage :flexi-streams + (:use :cl :trivial-gray-streams) + (:nicknames :flex) + #+:lispworks + (:shadow :with-accessors) + (:export :*default-eol-style* + :*default-little-endian* + :*substitution-char* + :external-format-eol-style + :external-format-equal + :external-format-id + :external-format-little-endian + :external-format-name + :flexi-input-stream + :flexi-output-stream + :flexi-io-stream + :flexi-stream + :flexi-stream-bound + :flexi-stream-external-format + :flexi-stream-encoding-error + :flexi-stream-element-type + :flexi-stream-element-type-error + :flexi-stream-element-type-error-element-type + :flexi-stream-error + :flexi-stream-column + :flexi-stream-position + :flexi-stream-position-spec-error + :flexi-stream-position-spec-error-position-spec + :flexi-stream-stream + :get-output-stream-sequence + :in-memory-stream + :in-memory-stream-closed-error + :in-memory-stream-error + :in-memory-input-stream + :in-memory-output-stream + :list-stream + :make-external-format + :make-in-memory-input-stream + :make-in-memory-output-stream + :make-flexi-stream + :octet + :octets-to-string + :output-stream-sequence-length + :peek-byte + :string-to-octets + :unread-byte + :vector-stream + :with-input-from-sequence + :with-output-to-sequence)) Added: branches/edi/specials.lisp ============================================================================== --- (empty file) +++ branches/edi/specials.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,184 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.26 2008/05/17 13:50:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(deftype octet () + "A shortcut for \(UNSIGNED-BYTE 8)." + '(unsigned-byte 8)) + +(defvar +name-map+ + '((:utf8 . :utf-8) + (:utf16 . :utf-16) + (:ucs2 . :utf-16) + (:ucs-2 . :utf-16) + (:unicode . :utf-16) + (:utf32 . :utf-32) + (:ucs4 . :utf-32) + (:ucs-4 . :utf-32) + (:ascii . :us-ascii) + (:koi8r . :koi8-r) + (:latin-1 . :iso-8859-1) + (:latin1 . :iso-8859-1) + (:latin-2 . :iso-8859-2) + (:latin2 . :iso-8859-2) + (:latin-3 . :iso-8859-3) + (:latin3 . :iso-8859-3) + (:latin-4 . :iso-8859-4) + (:latin4 . :iso-8859-4) + (:cyrillic . :iso-8859-5) + (:arabic . :iso-8859-6) + (:greek . :iso-8859-7) + (:hebrew . :iso-8859-8) + (:latin-5 . :iso-8859-9) + (:latin5 . :iso-8859-9) + (:latin-6 . :iso-8859-10) + (:latin6 . :iso-8859-10) + (:thai . :iso-8859-11) + (:latin-7 . :iso-8859-13) + (:latin7 . :iso-8859-13) + (:latin-8 . :iso-8859-14) + (:latin8 . :iso-8859-14) + (:latin-9 . :iso-8859-15) + (:latin9 . :iso-8859-15) + (:latin-0 . :iso-8859-15) + (:latin0 . :iso-8859-15) + (:latin-10 . :iso-8859-16) + (:latin10 . :iso-8859-16) + (:codepage . :code-page) + #+(and :lispworks :win32) + (win32:code-page . :code-page)) + "An alist which mapes alternative names for external formats to +their canonical counterparts.") + +(defvar +shortcut-map+ + '((:ucs-2le . (:ucs-2 :little-endian t)) + (:ucs-2be . (:ucs-2 :little-endian nil)) + (:ucs-4le . (:ucs-4 :little-endian t)) + (:ucs-4be . (:ucs-4 :little-endian nil)) + (:utf-16le . (:utf-16 :little-endian t)) + (:utf-16be . (:utf-16 :little-endian nil)) + (:utf-32le . (:utf-32 :little-endian t)) + (:utf-32be . (:utf-32 :little-endian nil)) + (:ibm437 . (:code-page :id 437)) + (:ibm850 . (:code-page :id 850)) + (:ibm852 . (:code-page :id 852)) + (:ibm855 . (:code-page :id 855)) + (:ibm857 . (:code-page :id 857)) + (:ibm860 . (:code-page :id 860)) + (:ibm861 . (:code-page :id 861)) + (:ibm862 . (:code-page :id 862)) + (:ibm863 . (:code-page :id 863)) + (:ibm864 . (:code-page :id 864)) + (:ibm865 . (:code-page :id 865)) + (:ibm866 . (:code-page :id 866)) + (:ibm869 . (:code-page :id 869)) + (:windows-1250 . (:code-page :id 1250)) + (:windows-1251 . (:code-page :id 1251)) + (:windows-1252 . (:code-page :id 1252)) + (:windows-1253 . (:code-page :id 1253)) + (:windows-1254 . (:code-page :id 1254)) + (:windows-1255 . (:code-page :id 1255)) + (:windows-1256 . (:code-page :id 1256)) + (:windows-1257 . (:code-page :id 1257)) + (:windows-1258 . (:code-page :id 1258))) + "An alist which maps shortcuts for external formats to their +long forms.") + +(defvar *default-eol-style* + #+:win32 :crlf + #-:win32 :lf + "The end-of-line style used by external formats if none is +explicitly given. Depends on the OS the code is compiled on.") + +(defvar *default-little-endian* + #+:little-endian t + #-:little-endian nil + "Whether external formats are little-endian by default +\(i.e. unless explicitly specified). Depends on the platform +the code is compiled on.") + +(defvar *substitution-char* nil + "If this value is not NIL, it should be a character which is used +\(as if by a USE-VALUE restart) whenever during reading an error of +type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise.") + +(defun invert-table (table) + "`Inverts' an array which maps octets to character codes to a +hash tables which maps character codes to octets." + (let ((hash (make-hash-table))) + (loop for octet from 0 + for char-code across table + unless (= char-code 65533) + do (setf (gethash char-code hash) octet)) + hash)) + +(defvar +iso-8859-hashes+ + (loop for (name . table) in +iso-8859-tables+ + collect (cons name (invert-table table))) + "An alist which maps names for ISO-8859 encodings to hash +tables which map character codes to the corresponding octets.") + +(defvar +code-page-hashes+ + (loop for (id . table) in +code-page-tables+ + collect (cons id (invert-table table))) + "An alist which maps IDs of Windows code pages to hash tables +which map character codes to the corresponding octets.") + +(defvar +ascii-hash+ (invert-table +ascii-table+) + "A hash table which maps US-ASCII character codes to the +corresponding octets.") + +(defvar +koi8-r-hash+ (invert-table +koi8-r-table+) + "A hash table which maps KOI8-R character codes to the +corresponding octets.") + +(defconstant +buffer-size+ 8192 + "Size of buffers used for internal purposes.") + +(pushnew :flexi-streams *features*) + +;; stuff for Nikodemus Siivola's HYPERDOC +;; see +;; and +;; also used by LW-ADD-ONS + +(defvar *hyperdoc-base-uri* "http://weitz.de/flexi-streams/") + +(let ((exported-symbols-alist + (loop for symbol being the external-symbols of :flexi-streams + collect (cons symbol + (concatenate 'string + "#" + (string-downcase symbol)))))) + (defun hyperdoc-lookup (symbol type) + (declare (ignore type)) + (cdr (assoc symbol + exported-symbols-alist + :test #'eq)))) Added: branches/edi/stream.lisp ============================================================================== --- (empty file) +++ branches/edi/stream.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,236 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.57 2008/05/17 14:21:20 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass flexi-stream (trivial-gray-stream-mixin) + ((stream :initarg :stream + :reader flexi-stream-stream + :documentation "The actual stream that's used for +input and/or output. It must be capable of reading/writing +octets with READ-SEQUENCE and/or WRITE-SEQUENCE.") + (external-format :initform (make-external-format :iso-8859-1) + :initarg :flexi-stream-external-format + :accessor flexi-stream-external-format + :documentation "The encoding currently used +by this stream. Can be changed on the fly.") + (element-type :initform #+:lispworks 'lw:simple-char #-:lispworks 'character + :initarg :element-type + :accessor flexi-stream-element-type + :documentation "The element type of this stream.")) + (:documentation "A FLEXI-STREAM object is a stream that's +`layered' atop an existing binary/bivalent stream in order to +allow for multi-octet external formats. FLEXI-STREAM itself is a +mixin and should not be instantiated.")) + +(defun maybe-convert-external-format (external-format) + "Given an external format designator \(a keyword, a list, or an +EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT +object." + (typecase external-format + (symbol (make-external-format external-format)) + (list (apply #'make-external-format external-format)) + (otherwise external-format))) + +(defmethod initialize-instance :after ((flexi-stream flexi-stream) &rest initargs) + "Makes sure the EXTERNAL-FORMAT and ELEMENT-TYPE slots contain +reasonable values." + (declare (ignore initargs) + (optimize speed)) + (with-accessors ((external-format flexi-stream-external-format) + (element-type flexi-stream-element-type)) + flexi-stream + (unless (or (subtypep element-type 'character) + (subtypep element-type 'octet)) + (error 'flexi-stream-element-type-error + :element-type element-type + :stream flexi-stream)) + (setq external-format (maybe-convert-external-format external-format)))) + +(defmethod (setf flexi-stream-external-format) :around (new-value (flexi-stream flexi-stream)) + "Converts the new value to an EXTERNAL-FORMAT object if +necessary." + (call-next-method (maybe-convert-external-format new-value) flexi-stream)) + +(defmethod (setf flexi-stream-element-type) :before (new-value (flexi-stream flexi-stream)) + "Checks whether the new value makes sense before it is set." + (unless (or (subtypep new-value 'character) + (subtypep new-value 'octet)) + (error 'flexi-stream-element-type-error + :element-type new-value + :stream flexi-stream))) + +(defmethod stream-element-type ((stream flexi-stream)) + "Returns the element type that was provided by the creator of +the stream." + (declare (optimize speed)) + (flexi-stream-element-type stream)) + +(defmethod close ((stream flexi-stream) &key abort) + "Closes the flexi stream by closing the underlying `real' +stream." + (declare (optimize speed)) + (with-accessors ((stream flexi-stream-stream)) + stream + (cond ((open-stream-p stream) + (close stream :abort abort)) + (t nil)))) + +(defmethod open-stream-p ((stream flexi-stream)) + "A flexi stream is open if its underlying stream is open." + (declare (optimize speed)) + (open-stream-p (flexi-stream-stream stream))) + +(defmethod stream-file-position ((stream flexi-stream)) + "Dispatch to method for underlying stream." + (declare (optimize speed)) + (stream-file-position (flexi-stream-stream stream))) + +(defmethod (setf stream-file-position) (position-spec (stream flexi-stream)) + "Dispatch to method for underlying stream." + (declare (optimize speed)) + (setf (stream-file-position (flexi-stream-stream stream)) + position-spec)) + +(defclass flexi-output-stream (flexi-stream fundamental-binary-output-stream + fundamental-character-output-stream) + ((column :initform 0 + :accessor flexi-stream-column + :documentation "The current output column. A +non-negative integer or NIL.")) + (:documentation "A FLEXI-OUTPUT-STREAM is a FLEXI-STREAM that +can actually be instatiated and used for output. Don't use +MAKE-INSTANCE to create a new FLEXI-OUTPUT-STREAM but use +MAKE-FLEXI-STREAM instead.")) + +#+:cmu +(defmethod input-stream-p ((stream flexi-output-stream)) + "Explicitly states whether this is an input stream." + (declare (optimize speed)) + nil) + +(defclass flexi-input-stream (flexi-stream fundamental-binary-input-stream + fundamental-character-input-stream) + ((last-char-code :initform nil + :accessor flexi-stream-last-char-code + :documentation "This slot either holds NIL or the +last character \(code) read successfully. This is mainly used for +UNREAD-CHAR sanity checks.") + (last-octet :initform nil + :accessor flexi-stream-last-octet + :documentation "This slot either holds NIL or the last +octet read successfully from the stream using a `binary' operation +such as READ-BYTE. This is mainly used for UNREAD-BYTE sanity +checks.") + (octet-stack :initform nil + :accessor flexi-stream-octet-stack + :documentation "A small buffer which holds octets +that were already read from the underlying stream but not yet +used to produce characters. This is mainly used if we have to +look ahead for a CR/LF line ending.") + (position :initform 0 + :initarg :position + :type integer + :accessor flexi-stream-position + :documentation "The position within the stream where each +octet read counts as one.") + (bound :initform nil + :initarg :bound + :type (or null integer) + :accessor flexi-stream-bound + :documentation "When this is not NIL, it must be an integer +and the stream will behave as if no more data is available as soon as +POSITION is greater or equal than this value.")) + (:documentation "A FLEXI-INPUT-STREAM is a FLEXI-STREAM that +can actually be instatiated and used for input. Don't use +MAKE-INSTANCE to create a new FLEXI-INPUT-STREAM but use +MAKE-FLEXI-STREAM instead.")) + +#+:cmu +(defmethod output-stream-p ((stream flexi-input-stream)) + "Explicitly states whether this is an output stream." + (declare (optimize speed)) + nil) + +(defclass flexi-io-stream (flexi-input-stream flexi-output-stream) + () + (:documentation "A FLEXI-IO-STREAM is a FLEXI-STREAM that can +actually be instatiated and used for input and output. Don't use +MAKE-INSTANCE to create a new FLEXI-IO-STREAM but use +MAKE-FLEXI-STREAM instead.")) + +#+:cmu +(defmethod input-stream-p ((stream flexi-io-stream)) + "Explicitly states whether this is an input stream." + (declare (optimize speed)) + t) + +#+:cmu +(defmethod output-stream-p ((stream flexi-io-stream)) + "Explicitly states whether this is an output stream." + (declare (optimize speed)) + t) + +(defun make-flexi-stream (stream &rest args + &key (external-format (make-external-format :iso-8859-1)) + element-type column position bound) + "Creates and returns a new flexi stream. STREAM must be an open +binary or `bivalent' stream, i.e. it must be capable of +reading/writing octets with READ-SEQUENCE and/or WRITE-SEQUENCE. The +resulting flexi stream is an input stream if and only if STREAM is an +input stream. Likewise, it's an output stream if and only if STREAM +is an output stream. The default for ELEMENT-TYPE is LW:SIMPLE-CHAR +on LispWorks and CHARACTER on other Lisps. EXTERNAL-FORMAT must be an +EXTERNAL-FORMAT object or a symbol or a list denoting such an object. +COLUMN is the initial column of the stream which is either a +non-negative integer or NIL. The COLUMN argument must only be used +for output streams. POSITION \(only used for input streams) should be +an integer and it denotes the position the stream is in - it will be +increased by one for each octet read. BOUND \(only used for input +streams) should be NIL or an integer. If BOUND is not NIL and +POSITION has gone beyond BOUND, then the stream will behave as if no +more input is available." + ;; these arguments are ignored - they are only there to provide a + ;; meaningful parameter list for IDEs + (declare (ignore element-type column position bound)) + (unless (and (streamp stream) + (open-stream-p stream)) + (error "~S should have been an open stream." stream)) + (apply #'make-instance + ;; actual type depends on STREAM + (cond ((and (input-stream-p stream) + (output-stream-p stream)) + 'flexi-io-stream) + ((input-stream-p stream) + 'flexi-input-stream) + ((output-stream-p stream) + 'flexi-output-stream)) + :stream stream + :flexi-stream-external-format external-format + (sans args :external-format))) Added: branches/edi/strings.lisp ============================================================================== --- (empty file) +++ branches/edi/strings.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,56 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.5 2008/05/17 13:50:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defun string-to-octets (string &key (external-format (make-external-format :latin1)) + (start 0) end) + "Converts the Lisp string STRING from START to END to an array of +octets corresponding to the external format EXTERNAL-FORMAT." + (declare (optimize speed)) + (with-output-to-sequence (out) + (let ((flexi (make-flexi-stream out :external-format external-format))) + (write-string string flexi :start start :end end)))) + +(defun octets-to-string (vector &key (external-format (make-external-format :latin1)) + (start 0) (end (length vector))) + "Converts the Lisp vector VECTOR of octets from START to END to +string using the external format EXTERNAL-FORMAT." + (declare (optimize speed)) + (with-input-from-sequence (in vector :start start :end end) + (let ((flexi (make-flexi-stream in :external-format external-format)) + (result (make-array (- end start) + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer result) + (read-sequence result flexi)) + result))) + + Added: branches/edi/test/README ============================================================================== --- (empty file) +++ branches/edi/test/README Sat May 17 12:49:25 2008 @@ -0,0 +1,4 @@ +The reference files in this directory were created/converted using a +mixture of GNU recode and the native internationalization facilities +of LispWorks and AllegroCL, i.e. we're not testing FLEXI-STREAMS +against files created by itself (which would be kind of useless). \ No newline at end of file Added: branches/edi/test/hebrew_latin8_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/hebrew_latin8_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +:???? ??? ????? ?? ????? ??? ?????? ? 1 ???? ???-?? ???? ???? ??? ???? ????? ? 2 :???? ???-?? ????? ????? ???? :???-???? ??? ??? ????? ????? ? 3 ????? ????? ???-?? ????-?? ????? ???? ? 4 :???? ???? ???? ??? ???? ??? ????? ??? ???? ????? ????? ? 5 :??? ??? ???-???? ???-???? ???? ???? ???? ???? ??? ????? ????? ? 6 :???? ??? ??? ????? ??? ???? ??? ????? ?????-?? ????? ???? ? 7 ????? ??? ??? ???? ???? ????? ???? :??-???? ???-???? ???-???? ???? ????? ????? ????? ? 8 :??? ??? ????-?? ????? ???? ???? ???? ????? ????? ? 9 :??-???? ????? ????? ??? ??? ???? ?????? ??? ????? ????? ????? ? 10 :???-?? ????? ???? ???? ??? ????? ??? ??? ???? ???? ????? ????? ?? 11 ????-?? ??-???? ??? ????? ??? ??? ??? ?? :??-???? ??? ?????? ??? ????? ??? ??? ???? ????? ?? 12 ????? ???? ?????? ??-???? ??? ???-??? :???-?? :????? ??? ???-???? ???-???? ?? 13 ?????? ????? ????? ???? ??? ????? ????? ?? 14 ???????? ???? ???? ????? ???? ???? ??? :????? ?????? ????-?? ????? ????? ????? ?????? ???? ?? 15 :??-???? ?????-?? ?????? ????? ???-?? ????? ???? ?? 16 ?????? ???? ?????-??? ???? ?????? ???? :??????? ??? ????? ????? ????? ????? ????? ??? ???? ?? 17 :????-?? ???? ???? ??? ??????? ?????? ???? ????? ?? 18 :???-?? ????? ???? ???? :????? ??? ???-???? ???-???? ?? 19 ???? ??? ??? ??? ???? ????? ????? ????? ? 20 :????? ???? ???-?? ????-?? ????? ???-?? ??? ?????? ??????-?? ????? ????? ?? 21 ??? ?????? ???? ???? ??? ????? ???? :???-?? ????? ???? ?????? ??? ???-?? ????? ???? ??? ???? ????? ??? ????? ?? 22 :???? ??? ????? ????? ????-?? :????? ??? ???-???? ???-???? ?? 23 ???? ????? ??? ??? ???? ???? ????? ????? ?? 24 :??-???? ????? ???-????? ???? ?????-??? ????? ???? ???-?? ????? ???? ?? 25 ????? ???? ?????? ????? ???-?? ??? ????? :???-?? ??????? ?????? ??? ???? ????? ????? ?? 26 ?????? ????? ????? ??? ???? ????? :????-?? ???? ????-???? ????-???? ??? ????? ???? ????? ????-?? ????? ????? ?? 27 :??? ??? ????? ??? ??? ???? ??? ????? ??? ????? ????? ??? ????? ?? 28 ????? ??? ???? ???? ????? ????-?? ????? :????-?? ????? ???-???? ????? ??? ???-??-?? ??? ???? ??? ????? ????? ?? 29 ??-??? ???-??-??? ????-?? ???-?? ??? ??? :????? ???? ??? ??? ??? ??-??? ???? ???? ????? ???-???? ???? ???-???? ? 30 ??? ???-??-?? ??? ??? ??-??? ????-?? :??-???? ????? ??? ???-???? ??? ???-??-?? ????? ???? ?? 31 :???? ??? ???-???? ?? \ No newline at end of file Added: branches/edi/test/hebrew_latin8_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/hebrew_latin8_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,68 @@ +:???? ??? ????? ?? ????? ??? ?????? ? 1 +???? ???-?? ???? ???? ??? ???? ????? ? 2 +:???? ???-?? ????? ????? ???? +:???-???? ??? ??? ????? ????? ? 3 +????? ????? ???-?? ????-?? ????? ???? ? 4 +:???? ???? ???? ??? +???? ??? ????? ??? ???? ????? ????? ? 5 +:??? ??? ???-???? ???-???? +???? ???? ???? ???? ??? ????? ????? ? 6 +:???? ??? ??? ????? +??? ???? ??? ????? ?????-?? ????? ???? ? 7 +????? ??? ??? ???? ???? ????? ???? +:??-???? +???-???? ???-???? ???? ????? ????? ????? ? 8 +:??? ??? +????-?? ????? ???? ???? ???? ????? ????? ? 9 +:??-???? ????? ????? ??? +??? ???? ?????? ??? ????? ????? ????? ? 10 +:???-?? ????? ???? ???? +??? ????? ??? ??? ???? ???? ????? ????? ?? 11 +????-?? ??-???? ??? ????? ??? ??? ??? ?? +:??-???? +??? ?????? ??? ????? ??? ??? ???? ????? ?? 12 +????? ???? ?????? ??-???? ??? ???-??? +:???-?? +:????? ??? ???-???? ???-???? ?? 13 +?????? ????? ????? ???? ??? ????? ????? ?? 14 +???????? ???? ???? ????? ???? ???? ??? +:????? ?????? +????-?? ????? ????? ????? ?????? ???? ?? 15 +:??-???? +?????-?? ?????? ????? ???-?? ????? ???? ?? 16 +?????? ???? ?????-??? ???? ?????? ???? +:??????? ??? ????? +????? ????? ????? ????? ??? ???? ?? 17 +:????-?? +???? ???? ??? ??????? ?????? ???? ????? ?? 18 +:???-?? ????? ???? ???? +:????? ??? ???-???? ???-???? ?? 19 +???? ??? ??? ??? ???? ????? ????? ????? ? 20 +:????? ???? ???-?? ????-?? ????? +???-?? ??? ?????? ??????-?? ????? ????? ?? 21 +??? ?????? ???? ???? ??? ????? ???? +:???-?? ????? ???? ?????? ??? ???-?? +????? ???? ??? ???? ????? ??? ????? ?? 22 +:???? ??? ????? ????? ????-?? +:????? ??? ???-???? ???-???? ?? 23 +???? ????? ??? ??? ???? ???? ????? ????? ?? 24 +:??-???? ????? ???-????? ???? +?????-??? ????? ???? ???-?? ????? ???? ?? 25 +????? ???? ?????? ????? ???-?? ??? ????? +:???-?? +??????? ?????? ??? ???? ????? ????? ?? 26 +?????? ????? ????? ??? ???? ????? +:????-?? ???? ????-???? ????-???? +??? ????? ???? ????? ????-?? ????? ????? ?? 27 +:??? ??? ????? ??? ??? +???? ??? ????? ??? ????? ????? ??? ????? ?? 28 +????? ??? ???? ???? ????? ????-?? ????? +:????-?? ????? ???-???? ????? +??? ???-??-?? ??? ???? ??? ????? ????? ?? 29 +??-??? ???-??-??? ????-?? ???-?? ??? ??? +:????? ???? ??? ??? ??? ??-??? +???? ???? ????? ???-???? ???? ???-???? ? 30 +??? ???-??-?? ??? ??? ??-??? ????-?? +:??-???? ????? +??? ???-???? ??? ???-??-?? ????? ???? ?? 31 +:???? ??? ???-???? ?? Added: branches/edi/test/hebrew_latin8_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/hebrew_latin8_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,68 @@ +:???? ??? ????? ?? ????? ??? ?????? ? 1 +???? ???-?? ???? ???? ??? ???? ????? ? 2 +:???? ???-?? ????? ????? ???? +:???-???? ??? ??? ????? ????? ? 3 +????? ????? ???-?? ????-?? ????? ???? ? 4 +:???? ???? ???? ??? +???? ??? ????? ??? ???? ????? ????? ? 5 +:??? ??? ???-???? ???-???? +???? ???? ???? ???? ??? ????? ????? ? 6 +:???? ??? ??? ????? +??? ???? ??? ????? ?????-?? ????? ???? ? 7 +????? ??? ??? ???? ???? ????? ???? +:??-???? +???-???? ???-???? ???? ????? ????? ????? ? 8 +:??? ??? +????-?? ????? ???? ???? ???? ????? ????? ? 9 +:??-???? ????? ????? ??? +??? ???? ?????? ??? ????? ????? ????? ? 10 +:???-?? ????? ???? ???? +??? ????? ??? ??? ???? ???? ????? ????? ?? 11 +????-?? ??-???? ??? ????? ??? ??? ??? ?? +:??-???? +??? ?????? ??? ????? ??? ??? ???? ????? ?? 12 +????? ???? ?????? ??-???? ??? ???-??? +:???-?? +:????? ??? ???-???? ???-???? ?? 13 +?????? ????? ????? ???? ??? ????? ????? ?? 14 +???????? ???? ???? ????? ???? ???? ??? +:????? ?????? +????-?? ????? ????? ????? ?????? ???? ?? 15 +:??-???? +?????-?? ?????? ????? ???-?? ????? ???? ?? 16 +?????? ???? ?????-??? ???? ?????? ???? +:??????? ??? ????? +????? ????? ????? ????? ??? ???? ?? 17 +:????-?? +???? ???? ??? ??????? ?????? ???? ????? ?? 18 +:???-?? ????? ???? ???? +:????? ??? ???-???? ???-???? ?? 19 +???? ??? ??? ??? ???? ????? ????? ????? ? 20 +:????? ???? ???-?? ????-?? ????? +???-?? ??? ?????? ??????-?? ????? ????? ?? 21 +??? ?????? ???? ???? ??? ????? ???? +:???-?? ????? ???? ?????? ??? ???-?? +????? ???? ??? ???? ????? ??? ????? ?? 22 +:???? ??? ????? ????? ????-?? +:????? ??? ???-???? ???-???? ?? 23 +???? ????? ??? ??? ???? ???? ????? ????? ?? 24 +:??-???? ????? ???-????? ???? +?????-??? ????? ???? ???-?? ????? ???? ?? 25 +????? ???? ?????? ????? ???-?? ??? ????? +:???-?? +??????? ?????? ??? ???? ????? ????? ?? 26 +?????? ????? ????? ??? ???? ????? +:????-?? ???? ????-???? ????-???? +??? ????? ???? ????? ????-?? ????? ????? ?? 27 +:??? ??? ????? ??? ??? +???? ??? ????? ??? ????? ????? ??? ????? ?? 28 +????? ??? ???? ???? ????? ????-?? ????? +:????-?? ????? ???-???? ????? +??? ???-??-?? ??? ???? ??? ????? ????? ?? 29 +??-??? ???-??-??? ????-?? ???-?? ??? ??? +:????? ???? ??? ??? ??? ??-??? +???? ???? ????? ???-???? ???? ???-???? ? 30 +??? ???-??-?? ??? ??? ??-??? ????-?? +:??-???? ????? +??? ???-???? ??? ???-??-?? ????? ???? ?? 31 +:???? ??? ???-???? ?? Added: branches/edi/test/hebrew_utf8_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/hebrew_utf8_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +:???????? ?????? ?????????? ???? ?????????? ?????? ???????????? ?? 1 ???????? ??????-???? ???????? ???????? ?????? ???????? ?????????? ?? 2 :???????? ??????-???? ?????????? ?????????? ???????? :??????-???????? ?????? ?????? ?????????? ?????????? ?? 3 ?????????? ?????????? ??????-???? ????????-???? ?????????? ???????? ?? 4 :???????? ???????? ???????? ?????? ???????? ?????? ?????????? ?????? ???????? ?????????? ?????????? ?? 5 :?????? ?????? ??????-???????? ??????-???????? ???????? ???????? ???????? ???????? ?????? ?????????? ?????????? ?? 6 :???????? ?????? ?????? ?????????? ?????? ???????? ?????? ?????????? ??????????-???? ?????????? ???????? ?? 7 ?????????? ?????? ?????? ???????? ???????? ?????????? ???????? :????-???????? ??????-???????? ??????-???????? ???????? ?????????? ?????????? ?????????? ?? 8 :?????? ?????? ????????-???? ?????????? ???????? ???????? ???????? ?????????? ?????????? ?? 9 :????-???????? ?????????? ?????????? ?????? ?????? ???????? ???????????? ?????? ?????????? ?????????? ?????????? ?? 10 :??????-???? ?????????? ???????? ???????? ?????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 11 ????????-???? ????-???????? ?????? ?????????? ?????? ?????? ?????? ???? :????-???????? ?????? ???????????? ?????? ?????????? ?????? ?????? ???????? ?????????? ???? 12 ?????????? ???????? ???????????? ????-???????? ?????? ??????-?????? :??????-???? :?????????? ?????? ??????-???????? ??????-???????? ???? 13 ???????????? ?????????? ?????????? ???????? ?????? ?????????? ?????????? ???? 14 ???????????????? ???????? ???????? ?????????? ???????? ???????? ?????? :?????????? ???????????? ????????-???? ?????????? ?????????? ?????????? ???????????? ???????? ???? 15 :????-???????? ??????????-???? ???????????? ?????????? ??????-???? ?????????? ???????? ???? 16 ???????????? ???????? ??????????-?????? ???????? ???????????? ???????? :?????????????? ?????? ?????????? ?????????? ?????????? ?????????? ?????????? ?????? ???????? ???? 17 :????????-???? ???????? ???????? ?????? ?????????????? ???????????? ???????? ?????????? ???? 18 :??????-???? ?????????? ???????? ???????? :?????????? ?????? ??????-???????? ??????-???????? ???? 19 ???????? ?????? ?????? ?????? ???????? ?????????? ?????????? ?????????? ?? 20 :?????????? ???????? ??????-???? ????????-???? ?????????? ??????-???? ?????? ???????????? ????????????-???? ?????????? ?????????? ???? 21 ?????? ???????????? ???????? ???????? ?????? ?????????? ???????? :??????-???? ?????????? ???????? ???????????? ?????? ??????-???? ?????????? ???????? ?????? ???????? ?????????? ?????? ?????????? ???? 22 :???????? ?????? ?????????? ?????????? ????????-???? :?????????? ?????? ??????-???????? ??????-???????? ???? 23 ???????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 24 :????-???????? ?????????? ??????-?????????? ???????? ??????????-?????? ?????????? ???????? ??????-???? ?????????? ???????? ???? 25 ?????????? ???????? ???????????? ?????????? ??????-???? ?????? ?????????? :??????-???? ?????????????? ???????????? ?????? ???????? ?????????? ?????????? ???? 26 ???????????? ?????????? ?????????? ?????? ???????? ?????????? :????????-???? ???????? ????????-???????? ????????-???????? ?????? ?????????? ???????? ?????????? ????????-???? ?????????? ?????????? ???? 27 :?????? ?????? ?????????? ?????? ?????? ???????? ?????? ?????????? ?????? ?????????? ?????????? ?????? ?????????? ???? 28 ?????????? ?????? ???????? ???????? ?????????? ????????-???? ?????????? :????????-???? ?????????? ??????-???????? ?????????? ?????? ??????-????-???? ?????? ???????? ?????? ?????????? ?????????? ???? 29 ????-?????? ??????-????-?????? ????????-???? ??????-???? ?????? ?????? :?????????? ???????? ?????? ?????? ?????? ????-?????? ???????? ???????? ?????????? ??????-???????? ???????? ??????-???????? ?? 30 ?????? ??????-????-???? ?????? ?????? ????-?????? ????????-???? :????-???????? ?????????? ?????? ??????-???????? ?????? ??????-????-???? ?????????? ???????? ???? 31 :???????? ?????? ??????-???????? ???? \ No newline at end of file Added: branches/edi/test/hebrew_utf8_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/hebrew_utf8_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,68 @@ +:???????? ?????? ?????????? ???? ?????????? ?????? ???????????? ?? 1 +???????? ??????-???? ???????? ???????? ?????? ???????? ?????????? ?? 2 +:???????? ??????-???? ?????????? ?????????? ???????? +:??????-???????? ?????? ?????? ?????????? ?????????? ?? 3 +?????????? ?????????? ??????-???? ????????-???? ?????????? ???????? ?? 4 +:???????? ???????? ???????? ?????? +???????? ?????? ?????????? ?????? ???????? ?????????? ?????????? ?? 5 +:?????? ?????? ??????-???????? ??????-???????? +???????? ???????? ???????? ???????? ?????? ?????????? ?????????? ?? 6 +:???????? ?????? ?????? ?????????? +?????? ???????? ?????? ?????????? ??????????-???? ?????????? ???????? ?? 7 +?????????? ?????? ?????? ???????? ???????? ?????????? ???????? +:????-???????? +??????-???????? ??????-???????? ???????? ?????????? ?????????? ?????????? ?? 8 +:?????? ?????? +????????-???? ?????????? ???????? ???????? ???????? ?????????? ?????????? ?? 9 +:????-???????? ?????????? ?????????? ?????? +?????? ???????? ???????????? ?????? ?????????? ?????????? ?????????? ?? 10 +:??????-???? ?????????? ???????? ???????? +?????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 11 +????????-???? ????-???????? ?????? ?????????? ?????? ?????? ?????? ???? +:????-???????? +?????? ???????????? ?????? ?????????? ?????? ?????? ???????? ?????????? ???? 12 +?????????? ???????? ???????????? ????-???????? ?????? ??????-?????? +:??????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 13 +???????????? ?????????? ?????????? ???????? ?????? ?????????? ?????????? ???? 14 +???????????????? ???????? ???????? ?????????? ???????? ???????? ?????? +:?????????? ???????????? +????????-???? ?????????? ?????????? ?????????? ???????????? ???????? ???? 15 +:????-???????? +??????????-???? ???????????? ?????????? ??????-???? ?????????? ???????? ???? 16 +???????????? ???????? ??????????-?????? ???????? ???????????? ???????? +:?????????????? ?????? ?????????? +?????????? ?????????? ?????????? ?????????? ?????? ???????? ???? 17 +:????????-???? +???????? ???????? ?????? ?????????????? ???????????? ???????? ?????????? ???? 18 +:??????-???? ?????????? ???????? ???????? +:?????????? ?????? ??????-???????? ??????-???????? ???? 19 +???????? ?????? ?????? ?????? ???????? ?????????? ?????????? ?????????? ?? 20 +:?????????? ???????? ??????-???? ????????-???? ?????????? +??????-???? ?????? ???????????? ????????????-???? ?????????? ?????????? ???? 21 +?????? ???????????? ???????? ???????? ?????? ?????????? ???????? +:??????-???? ?????????? ???????? ???????????? ?????? ??????-???? +?????????? ???????? ?????? ???????? ?????????? ?????? ?????????? ???? 22 +:???????? ?????? ?????????? ?????????? ????????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 23 +???????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 24 +:????-???????? ?????????? ??????-?????????? ???????? +??????????-?????? ?????????? ???????? ??????-???? ?????????? ???????? ???? 25 +?????????? ???????? ???????????? ?????????? ??????-???? ?????? ?????????? +:??????-???? +?????????????? ???????????? ?????? ???????? ?????????? ?????????? ???? 26 +???????????? ?????????? ?????????? ?????? ???????? ?????????? +:????????-???? ???????? ????????-???????? ????????-???????? +?????? ?????????? ???????? ?????????? ????????-???? ?????????? ?????????? ???? 27 +:?????? ?????? ?????????? ?????? ?????? +???????? ?????? ?????????? ?????? ?????????? ?????????? ?????? ?????????? ???? 28 +?????????? ?????? ???????? ???????? ?????????? ????????-???? ?????????? +:????????-???? ?????????? ??????-???????? ?????????? +?????? ??????-????-???? ?????? ???????? ?????? ?????????? ?????????? ???? 29 +????-?????? ??????-????-?????? ????????-???? ??????-???? ?????? ?????? +:?????????? ???????? ?????? ?????? ?????? ????-?????? +???????? ???????? ?????????? ??????-???????? ???????? ??????-???????? ?? 30 +?????? ??????-????-???? ?????? ?????? ????-?????? ????????-???? +:????-???????? ?????????? +?????? ??????-???????? ?????? ??????-????-???? ?????????? ???????? ???? 31 +:???????? ?????? ??????-???????? ???? Added: branches/edi/test/hebrew_utf8_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/hebrew_utf8_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,68 @@ +:???????? ?????? ?????????? ???? ?????????? ?????? ???????????? ?? 1 +???????? ??????-???? ???????? ???????? ?????? ???????? ?????????? ?? 2 +:???????? ??????-???? ?????????? ?????????? ???????? +:??????-???????? ?????? ?????? ?????????? ?????????? ?? 3 +?????????? ?????????? ??????-???? ????????-???? ?????????? ???????? ?? 4 +:???????? ???????? ???????? ?????? +???????? ?????? ?????????? ?????? ???????? ?????????? ?????????? ?? 5 +:?????? ?????? ??????-???????? ??????-???????? +???????? ???????? ???????? ???????? ?????? ?????????? ?????????? ?? 6 +:???????? ?????? ?????? ?????????? +?????? ???????? ?????? ?????????? ??????????-???? ?????????? ???????? ?? 7 +?????????? ?????? ?????? ???????? ???????? ?????????? ???????? +:????-???????? +??????-???????? ??????-???????? ???????? ?????????? ?????????? ?????????? ?? 8 +:?????? ?????? +????????-???? ?????????? ???????? ???????? ???????? ?????????? ?????????? ?? 9 +:????-???????? ?????????? ?????????? ?????? +?????? ???????? ???????????? ?????? ?????????? ?????????? ?????????? ?? 10 +:??????-???? ?????????? ???????? ???????? +?????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 11 +????????-???? ????-???????? ?????? ?????????? ?????? ?????? ?????? ???? +:????-???????? +?????? ???????????? ?????? ?????????? ?????? ?????? ???????? ?????????? ???? 12 +?????????? ???????? ???????????? ????-???????? ?????? ??????-?????? +:??????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 13 +???????????? ?????????? ?????????? ???????? ?????? ?????????? ?????????? ???? 14 +???????????????? ???????? ???????? ?????????? ???????? ???????? ?????? +:?????????? ???????????? +????????-???? ?????????? ?????????? ?????????? ???????????? ???????? ???? 15 +:????-???????? +??????????-???? ???????????? ?????????? ??????-???? ?????????? ???????? ???? 16 +???????????? ???????? ??????????-?????? ???????? ???????????? ???????? +:?????????????? ?????? ?????????? +?????????? ?????????? ?????????? ?????????? ?????? ???????? ???? 17 +:????????-???? +???????? ???????? ?????? ?????????????? ???????????? ???????? ?????????? ???? 18 +:??????-???? ?????????? ???????? ???????? +:?????????? ?????? ??????-???????? ??????-???????? ???? 19 +???????? ?????? ?????? ?????? ???????? ?????????? ?????????? ?????????? ?? 20 +:?????????? ???????? ??????-???? ????????-???? ?????????? +??????-???? ?????? ???????????? ????????????-???? ?????????? ?????????? ???? 21 +?????? ???????????? ???????? ???????? ?????? ?????????? ???????? +:??????-???? ?????????? ???????? ???????????? ?????? ??????-???? +?????????? ???????? ?????? ???????? ?????????? ?????? ?????????? ???? 22 +:???????? ?????? ?????????? ?????????? ????????-???? +:?????????? ?????? ??????-???????? ??????-???????? ???? 23 +???????? ?????????? ?????? ?????? ???????? ???????? ?????????? ?????????? ???? 24 +:????-???????? ?????????? ??????-?????????? ???????? +??????????-?????? ?????????? ???????? ??????-???? ?????????? ???????? ???? 25 +?????????? ???????? ???????????? ?????????? ??????-???? ?????? ?????????? +:??????-???? +?????????????? ???????????? ?????? ???????? ?????????? ?????????? ???? 26 +???????????? ?????????? ?????????? ?????? ???????? ?????????? +:????????-???? ???????? ????????-???????? ????????-???????? +?????? ?????????? ???????? ?????????? ????????-???? ?????????? ?????????? ???? 27 +:?????? ?????? ?????????? ?????? ?????? +???????? ?????? ?????????? ?????? ?????????? ?????????? ?????? ?????????? ???? 28 +?????????? ?????? ???????? ???????? ?????????? ????????-???? ?????????? +:????????-???? ?????????? ??????-???????? ?????????? +?????? ??????-????-???? ?????? ???????? ?????? ?????????? ?????????? ???? 29 +????-?????? ??????-????-?????? ????????-???? ??????-???? ?????? ?????? +:?????????? ???????? ?????? ?????? ?????? ????-?????? +???????? ???????? ?????????? ??????-???????? ???????? ??????-???????? ?? 30 +?????? ??????-????-???? ?????? ?????? ????-?????? ????????-???? +:????-???????? ?????????? +?????? ??????-???????? ?????? ??????-????-???? ?????????? ???????? ???? 31 +:???????? ?????? ??????-???????? ???? Added: branches/edi/test/kafka_cp1252_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_cp1252_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. ?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. ?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. \ No newline at end of file Added: branches/edi/test/kafka_cp1252_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_cp1252_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: branches/edi/test/kafka_cp1252_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_cp1252_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: branches/edi/test/kafka_latin1_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_latin1_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. ?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. ?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. \ No newline at end of file Added: branches/edi/test/kafka_latin1_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_latin1_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: branches/edi/test/kafka_latin1_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_latin1_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr?umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R?cken und sah, wenn er den Kopf ein wenig hob, seinen gew?lbten, braunen, von bogenf?rmigen Versteifungen geteilten Bauch, auf dessen H?he sich die Bettdecke, zum g?nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl?glich d?nnen Beine flimmerten ihm hilflos vor den Augen. + +?Was ist mit mir geschehen??, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W?nden. ?ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h?bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr?be Wetter - man h?rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ?Wie w?re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg??e?, dachte er, aber das war g?nzlich undurchf?hrbar, denn er war gew?hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw?rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R?ckenlage zur?ck. Er versuchte es wohl hundertmal, schlo? die Augen, um die zappelnden Beine nicht sehen zu m?ssen, und lie? erst ab, als er in der Seite einen noch nie gef?hlten, leichten, dumpfen Schmerz zu f?hlen begann. + +?Ach Gott?, dachte er, ?was f?r einen anstrengenden Beruf habe ich gew?hlt! Tag aus, Tag ein auf der Reise. Die gesch?ftlichen Aufregungen sind viel gr??er, als im eigentlichen Gesch?ft zu Hause, und au?erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl?sse, das unregelm??ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!? Er f?hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R?cken langsam n?her zum Bettpfosten, um den Kopf besser heben zu k?nnen; fand die juckende Stelle, die mit lauter kleinen wei?en P?nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur?ck, denn bei der Ber?hrung umwehten ihn K?lteschauer. + +Er glitt wieder in seine fr?here Lage zur?ck. ?Dies fr?hzeitige Aufstehen?, dachte er, ?macht einen ganz bl?dsinnig. Der Mensch mu? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur?ckgehe, um die erlangten Auftr?ge zu ?berschreiben, sitzen diese Herren erst beim Fr?hst?ck. Das sollte ich bei meinem Chef versuchen; ich w?rde auf der Stelle hinausfliegen. Wer wei? ?brigens, ob das nicht sehr gut f?r mich w?re. Wenn ich mich nicht wegen meiner Eltern zur?ckhielte, ich h?tte l?ngst gek?ndigt, ich w?re vor den Chef hin getreten und h?tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h?tte er fallen m?ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H?he herab mit dem Angestellten zu reden, der ?berdies wegen der Schwerh?rigkeit des Chefs ganz nahe herantreten mu?. Nun, die Hoffnung ist noch nicht g?nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d?rfte noch f?nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro?e Schnitt gemacht. Vorl?ufig allerdings mu? ich aufstehen, denn mein Zug f?hrt um f?nf.? + +Und er sah zur Weckuhr hin?ber, die auf dem Kasten tickte. ?Himmlischer Vater!?, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw?rts, es war sogar halb vor?ber, es n?herte sich schon dreiviertel. Sollte der Wecker nicht gel?utet haben? Man sah vom Bett aus, da? er auf vier Uhr richtig eingestellt war; gewi? hatte er auch gel?utet. Ja, aber war es m?glich, dieses m?belersch?tternde L?uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n?chste Zug ging um sieben Uhr; um den einzuholen, h?tte er sich unsinnig beeilen m?ssen, und die Kollektion war noch nicht eingepackt, und er selbst f?hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch?ftsdiener hatte beim F?nfuhrzug gewartet und die Meldung von seiner Vers?umnis l?ngst erstattet. Es war eine Kreatur des Chefs, ohne R?ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w?re aber ?u?erst peinlich und verd?chtig, denn Gregor war w?hrend seines f?nfj?hrigen Dienstes noch nicht einmal krank gewesen. Gewi? w?rde der Chef mit dem Krankenkassenarzt kommen, w?rde den Eltern wegen des faulen Sohnes Vorw?rfe machen und alle Einw?nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f?r den es ja ?berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h?tte er ?brigens in diesem Falle so ganz unrecht? Gregor f?hlte sich tats?chlich, abgesehen von einer nach dem langen Schlaf wirklich ?berfl?ssigen Schl?frigkeit, ganz wohl und hatte sogar einen besonders kr?ftigen Hunger. Added: branches/edi/test/kafka_utf8_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_utf8_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr??umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R??cken und sah, wenn er den Kopf ein wenig hob, seinen gew??lbten, braunen, von bogenf??rmigen Versteifungen geteilten Bauch, auf dessen H??he sich die Bettdecke, zum g??nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl??glich d??nnen Beine flimmerten ihm hilflos vor den Augen. ??Was ist mit mir geschehen???, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W??nden. ??ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h??bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa?? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. Gregors Blick richtete sich dann zum Fenster, und das tr??be Wetter - man h??rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ??Wie w??re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg????e??, dachte er, aber das war g??nzlich undurchf??hrbar, denn er war gew??hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw??rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R??ckenlage zur??ck. Er versuchte es wohl hundertmal, schlo?? die Augen, um die zappelnden Beine nicht sehen zu m??ssen, und lie?? erst ab, als er in der Seite einen noch nie gef??hlten, leichten, dumpfen Schmerz zu f??hlen begann. ??Ach Gott??, dachte er, ??was f??r einen anstrengenden Beruf habe ich gew??hlt! Tag aus, Tag ein auf der Reise. Die gesch??ftlichen Aufregungen sind viel gr????er, als im eigentlichen Gesch??ft zu Hause, und au??erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl??sse, das unregelm????ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!?? Er f??hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R??cken langsam n??her zum Bettpfosten, um den Kopf besser heben zu k??nnen; fand die juckende Stelle, die mit lauter kleinen wei??en P??nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur??ck, denn bei der Ber??hrung umwehten ihn K??lteschauer. Er glitt wieder in seine fr??here Lage zur??ck. ??Dies fr??hzeitige Aufstehen??, dachte er, ??macht einen ganz bl??dsinnig. Der Mensch mu?? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur??ckgehe, um die erlangten Auftr??ge zu ??berschreiben, sitzen diese Herren erst beim Fr??hst??ck. Das sollte ich bei meinem Chef versuchen; ich w??rde auf der Stelle hinausfliegen. Wer wei?? ??brigens, ob das nicht sehr gut f??r mich w??re. Wenn ich mich nicht wegen meiner Eltern zur??ckhielte, ich h??tte l??ngst gek??ndigt, ich w??re vor den Chef hin getreten und h??tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h??tte er fallen m??ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H??he herab mit dem Angestellten zu reden, der ??berdies wegen der Schwerh??rigkeit des Chefs ganz nahe herantreten mu??. Nun, die Hoffnung ist noch nicht g??nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d??rfte noch f??nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro??e Schnitt gemacht. Vorl??ufig allerdings mu?? ich aufstehen, denn mein Zug f??hrt um f??nf.?? Und er sah zur Weckuhr hin??ber, die auf dem Kasten tickte. ??Himmlischer Vater!??, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw??rts, es war sogar halb vor??ber, es n??herte sich schon dreiviertel. Sollte der Wecker nicht gel??utet haben? Man sah vom Bett aus, da?? er auf vier Uhr richtig eingestellt war; gewi?? hatte er auch gel??utet. Ja, aber war es m??glich, dieses m??belersch??tternde L??uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n??chste Zug ging um sieben Uhr; um den einzuholen, h??tte er sich unsinnig beeilen m??ssen, und die Kollektion war noch nicht eingepackt, und er selbst f??hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch??ftsdiener hatte beim F??nfuhrzug gewartet und die Meldung von seiner Vers??umnis l??ngst erstattet. Es war eine Kreatur des Chefs, ohne R??ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w??re aber ??u??erst peinlich und verd??chtig, denn Gregor war w??hrend seines f??nfj??hrigen Dienstes noch nicht einmal krank gewesen. Gewi?? w??rde der Chef mit dem Krankenkassenarzt kommen, w??rde den Eltern wegen des faulen Sohnes Vorw??rfe machen und alle Einw??nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f??r den es ja ??berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h??tte er ??brigens in diesem Falle so ganz unrecht? Gregor f??hlte sich tats??chlich, abgesehen von einer nach dem langen Schlaf wirklich ??berfl??ssigen Schl??frigkeit, ganz wohl und hatte sogar einen besonders kr??ftigen Hunger. \ No newline at end of file Added: branches/edi/test/kafka_utf8_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_utf8_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr??umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R??cken und sah, wenn er den Kopf ein wenig hob, seinen gew??lbten, braunen, von bogenf??rmigen Versteifungen geteilten Bauch, auf dessen H??he sich die Bettdecke, zum g??nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl??glich d??nnen Beine flimmerten ihm hilflos vor den Augen. + +??Was ist mit mir geschehen???, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W??nden. ??ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h??bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa?? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr??be Wetter - man h??rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ??Wie w??re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg????e??, dachte er, aber das war g??nzlich undurchf??hrbar, denn er war gew??hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw??rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R??ckenlage zur??ck. Er versuchte es wohl hundertmal, schlo?? die Augen, um die zappelnden Beine nicht sehen zu m??ssen, und lie?? erst ab, als er in der Seite einen noch nie gef??hlten, leichten, dumpfen Schmerz zu f??hlen begann. + +??Ach Gott??, dachte er, ??was f??r einen anstrengenden Beruf habe ich gew??hlt! Tag aus, Tag ein auf der Reise. Die gesch??ftlichen Aufregungen sind viel gr????er, als im eigentlichen Gesch??ft zu Hause, und au??erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl??sse, das unregelm????ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!?? Er f??hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R??cken langsam n??her zum Bettpfosten, um den Kopf besser heben zu k??nnen; fand die juckende Stelle, die mit lauter kleinen wei??en P??nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur??ck, denn bei der Ber??hrung umwehten ihn K??lteschauer. + +Er glitt wieder in seine fr??here Lage zur??ck. ??Dies fr??hzeitige Aufstehen??, dachte er, ??macht einen ganz bl??dsinnig. Der Mensch mu?? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur??ckgehe, um die erlangten Auftr??ge zu ??berschreiben, sitzen diese Herren erst beim Fr??hst??ck. Das sollte ich bei meinem Chef versuchen; ich w??rde auf der Stelle hinausfliegen. Wer wei?? ??brigens, ob das nicht sehr gut f??r mich w??re. Wenn ich mich nicht wegen meiner Eltern zur??ckhielte, ich h??tte l??ngst gek??ndigt, ich w??re vor den Chef hin getreten und h??tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h??tte er fallen m??ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H??he herab mit dem Angestellten zu reden, der ??berdies wegen der Schwerh??rigkeit des Chefs ganz nahe herantreten mu??. Nun, die Hoffnung ist noch nicht g??nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d??rfte noch f??nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro??e Schnitt gemacht. Vorl??ufig allerdings mu?? ich aufstehen, denn mein Zug f??hrt um f??nf.?? + +Und er sah zur Weckuhr hin??ber, die auf dem Kasten tickte. ??Himmlischer Vater!??, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw??rts, es war sogar halb vor??ber, es n??herte sich schon dreiviertel. Sollte der Wecker nicht gel??utet haben? Man sah vom Bett aus, da?? er auf vier Uhr richtig eingestellt war; gewi?? hatte er auch gel??utet. Ja, aber war es m??glich, dieses m??belersch??tternde L??uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n??chste Zug ging um sieben Uhr; um den einzuholen, h??tte er sich unsinnig beeilen m??ssen, und die Kollektion war noch nicht eingepackt, und er selbst f??hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch??ftsdiener hatte beim F??nfuhrzug gewartet und die Meldung von seiner Vers??umnis l??ngst erstattet. Es war eine Kreatur des Chefs, ohne R??ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w??re aber ??u??erst peinlich und verd??chtig, denn Gregor war w??hrend seines f??nfj??hrigen Dienstes noch nicht einmal krank gewesen. Gewi?? w??rde der Chef mit dem Krankenkassenarzt kommen, w??rde den Eltern wegen des faulen Sohnes Vorw??rfe machen und alle Einw??nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f??r den es ja ??berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h??tte er ??brigens in diesem Falle so ganz unrecht? Gregor f??hlte sich tats??chlich, abgesehen von einer nach dem langen Schlaf wirklich ??berfl??ssigen Schl??frigkeit, ganz wohl und hatte sogar einen besonders kr??ftigen Hunger. Added: branches/edi/test/kafka_utf8_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/kafka_utf8_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,11 @@ +Als Gregor Samsa eines Morgens aus unruhigen Tr??umen erwachte, fand er sich in seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem panzerartig harten R??cken und sah, wenn er den Kopf ein wenig hob, seinen gew??lbten, braunen, von bogenf??rmigen Versteifungen geteilten Bauch, auf dessen H??he sich die Bettdecke, zum g??nzlichen Niedergleiten bereit, kaum noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang kl??glich d??nnen Beine flimmerten ihm hilflos vor den Augen. + +??Was ist mit mir geschehen???, dachte er. Es war kein Traum. Sein Zimmer, ein richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier wohlbekannten W??nden. ??ber dem Tisch, auf dem eine auseinandergepackte Musterkollektion von Tuchwaren ausgebreitet war - Samsa war Reisender - hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift ausgeschnitten und in einem h??bschen, vergoldeten Rahmen untergebracht hatte. Es stellte eine Dame dar, die mit einem Pelzhut und einer Pelzboa versehen, aufrecht dasa?? und einen schweren Pelzmuff, in dem ihr ganzer Unterarm verschwunden war, dem Beschauer entgegenhob. + +Gregors Blick richtete sich dann zum Fenster, und das tr??be Wetter - man h??rte Regentropfen auf das Fensterblech aufschlagen - machte ihn ganz melancholisch. ??Wie w??re es, wenn ich noch ein wenig weiterschliefe und alle Narrheiten verg????e??, dachte er, aber das war g??nzlich undurchf??hrbar, denn er war gew??hnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem gegenw??rtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich auch auf die rechte Seite warf, immer wieder schaukelte er in die R??ckenlage zur??ck. Er versuchte es wohl hundertmal, schlo?? die Augen, um die zappelnden Beine nicht sehen zu m??ssen, und lie?? erst ab, als er in der Seite einen noch nie gef??hlten, leichten, dumpfen Schmerz zu f??hlen begann. + +??Ach Gott??, dachte er, ??was f??r einen anstrengenden Beruf habe ich gew??hlt! Tag aus, Tag ein auf der Reise. Die gesch??ftlichen Aufregungen sind viel gr????er, als im eigentlichen Gesch??ft zu Hause, und au??erdem ist mir noch diese Plage des Reisens auferlegt, die Sorgen um die Zuganschl??sse, das unregelm????ige, schlechte Essen, ein immer wechselnder, nie andauernder, nie herzlich werdender menschlicher Verkehr. Der Teufel soll das alles holen!?? Er f??hlte ein leichtes Jucken oben auf dem Bauch; schob sich auf dem R??cken langsam n??her zum Bettpfosten, um den Kopf besser heben zu k??nnen; fand die juckende Stelle, die mit lauter kleinen wei??en P??nktchen besetzt war, die er nicht zu beurteilen verstand; und wollte mit einem Bein die Stelle betasten, zog es aber gleich zur??ck, denn bei der Ber??hrung umwehten ihn K??lteschauer. + +Er glitt wieder in seine fr??here Lage zur??ck. ??Dies fr??hzeitige Aufstehen??, dachte er, ??macht einen ganz bl??dsinnig. Der Mensch mu?? seinen Schlaf haben. Andere Reisende leben wie Haremsfrauen. Wenn ich zum Beispiel im Laufe des Vormittags ins Gasthaus zur??ckgehe, um die erlangten Auftr??ge zu ??berschreiben, sitzen diese Herren erst beim Fr??hst??ck. Das sollte ich bei meinem Chef versuchen; ich w??rde auf der Stelle hinausfliegen. Wer wei?? ??brigens, ob das nicht sehr gut f??r mich w??re. Wenn ich mich nicht wegen meiner Eltern zur??ckhielte, ich h??tte l??ngst gek??ndigt, ich w??re vor den Chef hin getreten und h??tte ihm meine Meinung von Grund des Herzens aus gesagt. Vom Pult h??tte er fallen m??ssen! Es ist auch eine sonderbare Art, sich auf das Pult zu setzen und von der H??he herab mit dem Angestellten zu reden, der ??berdies wegen der Schwerh??rigkeit des Chefs ganz nahe herantreten mu??. Nun, die Hoffnung ist noch nicht g??nzlich aufgegeben; habe ich einmal das Geld beisammen, um die Schuld der Eltern an ihn abzuzahlen - es d??rfte noch f??nf bis sechs Jahre dauern - , mache ich die Sache unbedingt. Dann wird der gro??e Schnitt gemacht. Vorl??ufig allerdings mu?? ich aufstehen, denn mein Zug f??hrt um f??nf.?? + +Und er sah zur Weckuhr hin??ber, die auf dem Kasten tickte. ??Himmlischer Vater!??, dachte er. Es war halb sieben Uhr, und die Zeiger gingen ruhig vorw??rts, es war sogar halb vor??ber, es n??herte sich schon dreiviertel. Sollte der Wecker nicht gel??utet haben? Man sah vom Bett aus, da?? er auf vier Uhr richtig eingestellt war; gewi?? hatte er auch gel??utet. Ja, aber war es m??glich, dieses m??belersch??tternde L??uten ruhig zu verschlafen? Nun, ruhig hatte er ja nicht geschlafen, aber wahrscheinlich desto fester. Was aber sollte er jetzt tun? Der n??chste Zug ging um sieben Uhr; um den einzuholen, h??tte er sich unsinnig beeilen m??ssen, und die Kollektion war noch nicht eingepackt, und er selbst f??hlte sich durchaus nicht besonders frisch und beweglich. Und selbst wenn er den Zug einholte, ein Donnerwetter des Chefs war nicht zu vermeiden, denn der Gesch??ftsdiener hatte beim F??nfuhrzug gewartet und die Meldung von seiner Vers??umnis l??ngst erstattet. Es war eine Kreatur des Chefs, ohne R??ckgrat und Verstand. Wie nun, wenn er sich krank meldete? Das w??re aber ??u??erst peinlich und verd??chtig, denn Gregor war w??hrend seines f??nfj??hrigen Dienstes noch nicht einmal krank gewesen. Gewi?? w??rde der Chef mit dem Krankenkassenarzt kommen, w??rde den Eltern wegen des faulen Sohnes Vorw??rfe machen und alle Einw??nde durch den Hinweis auf den Krankenkassenarzt abschneiden, f??r den es ja ??berhaupt nur ganz gesunde, aber arbeitsscheue Menschen gibt. Und h??tte er ??brigens in diesem Falle so ganz unrecht? Gregor f??hlte sich tats??chlich, abgesehen von einer nach dem langen Schlaf wirklich ??berfl??ssigen Schl??frigkeit, ganz wohl und hatte sogar einen besonders kr??ftigen Hunger. Added: branches/edi/test/packages.lisp ============================================================================== --- (empty file) +++ branches/edi/test/packages.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,34 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.6 2008/05/17 16:38:26 edi Exp $ + +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :cl-user) + +(defpackage :flexi-streams-test + (:use :cl :flexi-streams) + (:export :run-tests)) Added: branches/edi/test/russian_koi8r_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/russian_koi8r_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +????????????????? ?????? ?? ??????? ????????????? ??????????? ?? Unicode, ??????? ????????? 10-12 ????? 1997 ???? ? ?????? ? ????????. ??????????? ??????? ??????? ???? ????????? ?? ???????? ??????????? ????????? ? Unicode, ??????????? ? ???????????????????, ?????????? ? ?????????? Unicode ? ????????? ???????????? ???????? ? ??????????? ???????????, ???????, ??????? ? ???????????? ???????????? ????????. \ No newline at end of file Added: branches/edi/test/russian_koi8r_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/russian_koi8r_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,6 @@ +????????????????? ?????? ?? ??????? ????????????? ??????????? ?? +Unicode, ??????? ????????? 10-12 ????? 1997 ???? ? ?????? ? ????????. +??????????? ??????? ??????? ???? ????????? ?? ???????? ??????????? +????????? ? Unicode, ??????????? ? ???????????????????, ?????????? ? +?????????? Unicode ? ????????? ???????????? ???????? ? ??????????? +???????????, ???????, ??????? ? ???????????? ???????????? ????????. Added: branches/edi/test/russian_koi8r_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/russian_koi8r_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,6 @@ +????????????????? ?????? ?? ??????? ????????????? ??????????? ?? +Unicode, ??????? ????????? 10-12 ????? 1997 ???? ? ?????? ? ????????. +??????????? ??????? ??????? ???? ????????? ?? ???????? ??????????? +????????? ? Unicode, ??????????? ? ???????????????????, ?????????? ? +?????????? Unicode ? ????????? ???????????? ???????? ? ??????????? +???????????, ???????, ??????? ? ???????????? ???????????? ????????. Added: branches/edi/test/russian_utf8_cr.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/russian_utf8_crlf.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/russian_utf8_lf.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/test.lisp ============================================================================== --- (empty file) +++ branches/edi/test/test.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,390 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.20 2008/05/17 13:50:18 edi Exp $ + +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams-test) + +(defvar *this-file* (load-time-value + (or #.*compile-file-pathname* *load-pathname*)) + "The pathname of the file \(`test.lisp') where this variable was +defined.") + +#+:lispworks +(defun get-env-variable-as-directory (name) + (lw:when-let (string (lw:environment-variable name)) + (when (plusp (length string)) + (cond ((find (char string (1- (length string))) "\\/" :test #'char=) string) + (t (lw:string-append string "/")))))) + +(defvar *tmp-dir* + (load-time-value + (merge-pathnames "odd-streams-test/" + #+:allegro (system:temporary-directory) + #+:lispworks (pathname (or (get-env-variable-as-directory "TEMP") + (get-env-variable-as-directory "TMP") + #+:win32 "C:/" + #-:win32 "/tmp/")) + #-(or :allegro :lispworks) #p"/tmp/")) + "The pathname of a temporary directory used for testing.") + +(defvar *test-files* + '(("kafka" (:utf8 :latin1 :cp1252)) + ("tilton" (:utf8 :ascii)) + ("hebrew" (:utf8 :latin8)) + ("russian" (:utf8 :koi8r)) + ("unicode_demo" (:utf8 :ucs2 :ucs4))) + "A list of test files where each entry consists of the name +prefix and a list of encodings.") + +(defvar *test-success-counter* 0 + "Counts the number of successful tests.") + +(defun create-file-variants (file-name symbol) + "For a name suffix FILE-NAME and a symbol SYMBOL denoting an +encoding returns a list of pairs where the car is a full file +name and the cdr is the corresponding external format. This list +contains all possible variants w.r.t. to line-end conversion and +endianness." + (let ((args (ecase symbol + (:ascii '(:ascii)) + (:latin1 '(:latin-1)) + (:latin8 '(:hebrew)) + (:cp1252 '(:code-page :id 1252)) + (:koi8r '(:koi8-r)) + (:utf8 '(:utf-8)) + (:ucs2 '(:utf-16)) + (:ucs4 '(:utf-32)))) + (endianp (member symbol '(:ucs2 :ucs4)))) + (loop for little-endian in (if endianp '(t nil) '(t)) + for endian-suffix in (if endianp '("_le" "_be") '("")) + nconc (loop for eol-style in '(:lf :cr :crlf) + collect (cons (format nil "~A_~(~A~)_~(~A~)~A.txt" + file-name symbol eol-style endian-suffix) + (apply #'make-external-format + (append args `(:eol-style ,eol-style + :little-endian ,little-endian)))))))) + +(defun create-test-combinations (file-name symbols &optional simplep) + "For a name suffix FILE-NAME and a list of symbols SYMBOLS denoting +different encodings of the corresponding file returns a list of lists +which can be used as arglists for COMPARE-FILES. If SIMPLEP is true, +a list which can be used for the string tests below is returned." + (let ((file-variants (loop for symbol in symbols + nconc (create-file-variants file-name symbol)))) + (loop for (name-in . external-format-in) in file-variants + when simplep + collect (list name-in external-format-in) + else + nconc (loop for (name-out . external-format-out) in file-variants + collect (list name-in external-format-in name-out external-format-out))))) + +(defun file-equal (file1 file2) + "Returns a true value iff FILE1 and FILE2 have the same +contents \(viewed as binary files)." + (with-open-file (stream1 file1 :element-type 'octet) + (with-open-file (stream2 file2 :element-type 'octet) + (and (= (file-length stream1) (file-length stream2)) + (loop for byte1 = (read-byte stream1 nil nil) + for byte2 = (read-byte stream2 nil nil) + while (and byte1 byte2) + always (= byte1 byte2)))))) + +(defun copy-stream (stream-in external-format-in stream-out external-format-out) + "Copies the contents of the binary stream STREAM-IN to the +binary stream STREAM-OUT using flexi streams - STREAM-IN is read +with the external format EXTERNAL-FORMAT-IN and STREAM-OUT is +written with EXTERNAL-FORMAT-OUT." + (let ((in (make-flexi-stream stream-in :external-format external-format-in)) + (out (make-flexi-stream stream-out :external-format external-format-out))) + (loop for line = (read-line in nil nil) + while line + do (write-line line out)))) + +(defun copy-file (path-in external-format-in path-out external-format-out direction-out direction-in) + "Copies the contents of the file denoted by the pathname +PATH-IN to the file denoted by the pathname PATH-OUT using flexi +streams - STREAM-IN is read with the external format +EXTERNAL-FORMAT-IN and STREAM-OUT is written with +EXTERNAL-FORMAT-OUT. The input file is opened with +the :DIRECTION keyword argument DIRECTION-IN, the output file is +opened with the :DIRECTION keyword argument DIRECTION-OUT." + (with-open-file (in path-in + :element-type 'octet + :direction direction-in + :if-does-not-exist :error + :if-exists :overwrite) + (with-open-file (out path-out + :element-type 'octet + :direction direction-out + :if-does-not-exist :create + :if-exists :supersede) + (copy-stream in external-format-in out external-format-out)))) + +#+:lispworks +(defun copy-file-lw (path-in external-format-in path-out external-format-out direction-out direction-in) + "Same as COPY-FILE, but uses character streams instead of +binary streams. Only used to test LispWorks-specific behaviour." + (with-open-file (in path-in + :external-format '(:latin-1 :eol-style :lf) + :element-type 'base-char + :direction direction-in + :if-does-not-exist :error + :if-exists :overwrite) + (with-open-file (out path-out + :external-format '(:latin-1 :eol-style :lf) + :element-type 'base-char + :direction direction-out + :direction :output + :if-does-not-exist :create + :if-exists :supersede) + (copy-stream in external-format-in out external-format-out)))) + +(defun compare-files (path-in external-format-in path-out external-format-out) + "Copies the contents of the file (in the `test') denoted by the +relative pathname PATH-IN to the file (in a temporary directory) +denoted by the relative pathname PATH-OUT using flexi streams - +STREAM-IN is read with the external format EXTERNAL-FORMAT-IN and +STREAM-OUT is written with EXTERNAL-FORMAT-OUT. The resulting +file is compared with an existing file in the `test' directory to +check if the outcome is as expected. Uses various variants of +the :DIRECTION keyword when opening the files." + (let ((full-path-in (merge-pathnames path-in *this-file*)) + (full-path-out (ensure-directories-exist + (merge-pathnames path-out *tmp-dir*))) + (full-path-orig (merge-pathnames path-out *this-file*))) + (dolist (direction-out '(:output :io)) + (dolist (direction-in '(:input :io)) + (format *error-output* "Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (flex::normalize-external-format external-format-in) direction-in + (flex::normalize-external-format external-format-out) direction-out) + (copy-file full-path-in external-format-in + full-path-out external-format-out + direction-out direction-in) + (cond ((file-equal full-path-out full-path-orig) + (incf *test-success-counter*)) + (t (format *error-output* " Test failed!!!~%"))) + (terpri *error-output*) + #+:lispworks + (format *error-output* "LW-Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (flex::normalize-external-format external-format-in) direction-in + (flex::normalize-external-format external-format-out) direction-out) + #+:lispworks + (copy-file-lw full-path-in external-format-in + full-path-out external-format-out + direction-out direction-in) + #+:lispworks + (cond ((file-equal full-path-out full-path-orig) + (incf *test-success-counter*)) + (t (format *error-output* " Test failed!!!~%"))) + #+:lispworks + (terpri *error-output*))))) + +(defun file-as-octet-vector (pathspec) + "Returns the contents of the file denoted by PATHSPEC as a vector of +octets." + (with-open-file (in pathspec :element-type 'octet) + (let ((vector (make-array (file-length in) :element-type 'octet))) + (read-sequence vector in) + vector))) + +(defun file-as-string (pathspec external-format) + "Reads the contents of the file denoted by PATHSPEC using the +external format EXTERNAL-FORMAT and returns the result as a string." + (with-open-file (in pathspec :element-type 'octet) + (let* ((number-of-octets (file-length in)) + (in (make-flexi-stream in :external-format external-format)) + (string (make-array number-of-octets + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer string) (read-sequence string in)) + string))) + +(defmacro with-test ((test-description) &body body) + "Defines a test. Two utilities are available inside of the body of +the maco: The function FAIL, and the macro CHECK. FAIL, the lowest +level utility, marks the test defined by WITH-TEST as failed. CHECK +checks whether its argument is true, otherwise it calls FAIL. If +during evaluation of the specified expression any condition is +signalled, this is also considered a failure. + +WITH-TEST prints reports while the tests run. It also increments +*TEST-SUCCESS-COUNT* if a test completes successfully." + (flex::with-unique-names (successp) + `(let ((,successp t)) + (flet ((fail (format-str &rest format-args) + (setf ,successp nil) + (apply #'format *error-output* format-str format-args))) + (macrolet ((check (expression) + `(handler-case + (unless ,expression + (fail "Expression ~S failed.~%" ',expression)) + (condition (c) + (fail "Expression ~S failed signaling condition of type ~A: ~A.~%" + ',expression (type-of c) c))))) + (format *error-output* "Test ~S~%" ,test-description) + , at body + (if ,successp + (incf *test-success-counter*) + (format *error-output* " Test failed!!!~%")) + (terpri *error-output*) + (terpri *error-output*)) + ,successp)))) + +(defun string-test (pathspec external-format) + "Tests whether conversion from strings to octets and vice versa +using the external format EXTERNAL-FORMAT works as expected, using the +contents of the file denoted by PATHSPEC as test data and assuming +that the stream conversion functions work." + (let* ((full-path (merge-pathnames pathspec *this-file*)) + (octets-vector (file-as-octet-vector full-path)) + (octets-list (coerce octets-vector 'list)) + (string (file-as-string full-path external-format))) + (with-test ((format nil "String tests with format ~S." + (flex::normalize-external-format external-format))) + (check (string= (octets-to-string octets-vector :external-format external-format) string)) + (check (string= (octets-to-string octets-list :external-format external-format) string)) + (check (equalp (string-to-octets string :external-format external-format) octets-vector))))) + +(defmacro using-values ((&rest values) &body body) + "Executes BODY and feeds an element from VALUES to the USE-VALUE +restart each time a FLEXI-STREAM-ENCODING-ERROR is signalled. Signals +an error when there are more or less FLEXI-STREAM-ENCODING-ERRORs than +there are elements in VALUES." + (flex::with-unique-names (value-stack condition-counter) + `(let ((,value-stack ',values) + (,condition-counter 0)) + (handler-bind ((flexi-stream-encoding-error + #'(lambda (c) + (declare (ignore c)) + (unless ,value-stack + (error "Too many FLEXI-STREAM-ENCODING-ERRORs signalled, expected only ~A." + ,(length values))) + (incf ,condition-counter) + (use-value (pop ,value-stack))))) + (prog1 (progn , at body) + (when ,value-stack + (error "~A FLEXI-STREAM-ENCODING-ERRORs signalled, but ~A were expected." + ,condition-counter ,(length values)))))))) + +(defun read-flexi-line (sequence external-format) + "Creates and returns a string from the octet sequence SEQUENCE using +the external format EXTERNAL-FORMAT." + (with-input-from-sequence (in sequence) + (setq in (make-flexi-stream in :external-format external-format)) + (read-line in))) + +(defun encoding-error-handling-test () + "Tests several possible encoding errors and how they are handled." + (with-test ("Handling of encoding errors.") + ;; handling of EOF in the middle of CRLF + (check (string= #.(string #\Return) + (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) + (let ((*substitution-char* #\?)) + ;; :ASCII doesn't have characters with char codes > 127 + (check (string= "a??" (read-flexi-line `(,(char-code #\a) 128 200) :ascii))) + ;; :WINDOWS-1253 doesn't have a characters with codes 170 and 210 + (check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253))) + ;; not a valid UTF-8 sequence + (check (string= "??" (read-flexi-line `(#xe4 #xf6 #xfc) :utf8))) + ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 + (check (string= "??" (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + (let ((*substitution-char* nil)) + ;; :ASCII doesn't have characters with char codes > 127 + (check (string= "abc" (using-values (#\b #\c) + (read-flexi-line `(,(char-code #\a) 128 200) :ascii)))) + ;; :WINDOWS-1253 encoding doesn't have a characters with codes 170 and 210 + (check (string= "axy" (using-values (#\x #\y) + (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253)))) + ;; not a valid UTF-8 sequence + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#xe4 #xf6 #xfc) :utf8)))) + ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + ;; only one byte + (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16le)))) + ;; two bytes, but value of resulting word suggests that another word follows + (check (string= "R" (using-values (#\R) (read-flexi-line `(#x01 #xd8) :utf-16le)))) + ;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff + (check (string= "T" (using-values (#\T) (read-flexi-line `(#x01 #xd8 #xff #xdb) :utf-16le)))) + ;; the same as for little endian above, but using inverse order of bytes in words + (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16be)))) + (check (string= "R" (using-values (#\R) (read-flexi-line `(#xd8 #x01) :utf-16be)))) + (check (string= "T" (using-values (#\T) (read-flexi-line `(#xd8 #x01 #xdb #xff) :utf-16be)))) + ;; the only case when error is signalled for UTF-32 is at end of file + ;; in the middle of 4-byte sequence, both for big and little endian + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32le)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line `(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32be)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) + +(defun unread-char-test () + "Tests whether UNREAD-CHAR behaves as expected." + (with-test ("UNREAD-CHAR behaviour.") + (flet ((test-one-file (file-name external-format) + (with-open-file (in (merge-pathnames file-name *this-file*) + :element-type 'flex:octet) + (setq in (make-flexi-stream in :external-format external-format)) + (loop repeat 300 + for char = (read-char in) + do (unread-char char in) + (check (char= (read-char in) char)))))) + (loop for (file-name symbols) in *test-files* + do (loop for symbol in symbols + do (loop for (file-name . external-format) in (create-file-variants file-name symbol) + do (test-one-file file-name external-format))))))) + +(defun run-tests () + "Applies COMPARE-FILES to all test scenarios created with +CREATE-TEST-COMBINATIONS, runs test for handling of encoding errors, +and shows simple statistics at the end." + (let* ((*test-success-counter* 0) + (compare-files-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols))) + (no-tests (* 4 (length compare-files-args-list)))) + #+:lispworks + (setq no-tests (* 2 no-tests)) + (dolist (args compare-files-args-list) + (apply 'compare-files args)) + (let ((string-test-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols t)))) + (incf no-tests (length string-test-args-list)) + (dolist (args string-test-args-list) + (apply 'string-test args))) + (incf no-tests) + (encoding-error-handling-test) + (incf no-tests) + (unread-char-test) + (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%" + (= no-tests *test-success-counter*) (- no-tests *test-success-counter*) no-tests))) + Added: branches/edi/test/tilton_ascii_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/tilton_ascii_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +Programmers who lock onto a design decision and cling to it in the face of contradictory new information -- well, that's almost everyone in my experience, so I better not say what I think of them or people will start saying bad things about me on c.l.l. -- Ken Tilton % This reminds me of the NYC cabby who accepted a fare to Chicago. When they got there and could not find the friend who was supposed to pay the fare he just laughed and said he should have known. -- Ken Tilton % >> Actually, I believe that Aikido, Jazz and Lisp are different appearances >> of the same thing. Yes, the Tao. /Everything/ is a different appearance of the tao. -- Ken Tilton "Ken, I went to the library and read up on Buddhism, and believe me, you are no Buddhist." -- Kenny's mom % That absolutely terrifies the herd-following, lockstep-marching, mainstream-saluting cowards that obediently dash out or online to scoop up books on The Latest Thing. They learn and use atrocities like Java, C++, XML, and even Python for the security it gives them and then sit there slaving away miserably, tediously, joylously paying off mortgages and supporting ungrateful teenagers who despise them, only to look out the double-sealed thermo-pane windows of their central-heated, sound-proofed, dead-bolted, suffocating little nests into the howling gale thinking "what do they know that I do not know?" when they see us under a lean-to hunched over our laptops to shield them from the rain laughing our asses off as we write great code between bong hits.... what was the question? -- Ken Tilton % Shut up! (That last phrase has four or more syllables if pronounced as intended.) -- Ken Tilton % Nonsense. You'll be using it for the GUI, not protein-folding. -- Ken Tilton (responding to a comment that LTK was slow because it was based on TK) % Continuations certainly are clever, but if we learned anything from the rejection of the cover art for "Smell the Glove", it is that "there is a fine line between stupid... and clever". -- Ken Tilton % Ah, there's no place like academia for dispassionate, intellectually honest discussion of new ideas on their merits. Thank god for tenure giving your bold antagonist the protection they needed to shout down your iconoclastic..... hang on... -- Ken Tilton % Whoever objected must be in my killfile, ... -- Ken Tilton % >From memory (but I think I have it right): "But Jesus said, Suffer captured variables, and forbid them not, to come unto thine macro bodies: for of such is are DSLs made." -- Ken Tilton Can I get an Amen? % Awareness of defect is the first step to recovery. -- Ken Tilton % You made a bad analogy (there are no good ones, but you found a new low) ... -- Ken Tilton % Yes, it is true that Kent Pitman was raised by a closet full of Lisp Machines, but the exception only proves the rule. -- Ken Tilton (in a postscript after positing that computer languages are not learned in infancy) % I suggest you try bartender's school to support yourself, start programming for fun again. -- Ken Tilton (responding to a comment that 98% of anything to do with computers was not interesting code) % You could add four lanes to my carpal tunnel and I still could not write all the code I am dying to write. -- Ken Tilton % Neutrality? I want to bury other languages, not have a gateway to them. -- Ken Tilton % Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" Simon: "Hunh? My puppy /always/ gives me companionship." -- Ken Tilton (on how he was understood by a native english speaker) % \ No newline at end of file Added: branches/edi/test/tilton_ascii_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/tilton_ascii_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: branches/edi/test/tilton_ascii_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/tilton_ascii_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: branches/edi/test/tilton_utf8_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/tilton_utf8_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ +Programmers who lock onto a design decision and cling to it in the face of contradictory new information -- well, that's almost everyone in my experience, so I better not say what I think of them or people will start saying bad things about me on c.l.l. -- Ken Tilton % This reminds me of the NYC cabby who accepted a fare to Chicago. When they got there and could not find the friend who was supposed to pay the fare he just laughed and said he should have known. -- Ken Tilton % >> Actually, I believe that Aikido, Jazz and Lisp are different appearances >> of the same thing. Yes, the Tao. /Everything/ is a different appearance of the tao. -- Ken Tilton "Ken, I went to the library and read up on Buddhism, and believe me, you are no Buddhist." -- Kenny's mom % That absolutely terrifies the herd-following, lockstep-marching, mainstream-saluting cowards that obediently dash out or online to scoop up books on The Latest Thing. They learn and use atrocities like Java, C++, XML, and even Python for the security it gives them and then sit there slaving away miserably, tediously, joylously paying off mortgages and supporting ungrateful teenagers who despise them, only to look out the double-sealed thermo-pane windows of their central-heated, sound-proofed, dead-bolted, suffocating little nests into the howling gale thinking "what do they know that I do not know?" when they see us under a lean-to hunched over our laptops to shield them from the rain laughing our asses off as we write great code between bong hits.... what was the question? -- Ken Tilton % Shut up! (That last phrase has four or more syllables if pronounced as intended.) -- Ken Tilton % Nonsense. You'll be using it for the GUI, not protein-folding. -- Ken Tilton (responding to a comment that LTK was slow because it was based on TK) % Continuations certainly are clever, but if we learned anything from the rejection of the cover art for "Smell the Glove", it is that "there is a fine line between stupid... and clever". -- Ken Tilton % Ah, there's no place like academia for dispassionate, intellectually honest discussion of new ideas on their merits. Thank god for tenure giving your bold antagonist the protection they needed to shout down your iconoclastic..... hang on... -- Ken Tilton % Whoever objected must be in my killfile, ... -- Ken Tilton % >From memory (but I think I have it right): "But Jesus said, Suffer captured variables, and forbid them not, to come unto thine macro bodies: for of such is are DSLs made." -- Ken Tilton Can I get an Amen? % Awareness of defect is the first step to recovery. -- Ken Tilton % You made a bad analogy (there are no good ones, but you found a new low) ... -- Ken Tilton % Yes, it is true that Kent Pitman was raised by a closet full of Lisp Machines, but the exception only proves the rule. -- Ken Tilton (in a postscript after positing that computer languages are not learned in infancy) % I suggest you try bartender's school to support yourself, start programming for fun again. -- Ken Tilton (responding to a comment that 98% of anything to do with computers was not interesting code) % You could add four lanes to my carpal tunnel and I still could not write all the code I am dying to write. -- Ken Tilton % Neutrality? I want to bury other languages, not have a gateway to them. -- Ken Tilton % Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" Simon: "Hunh? My puppy /always/ gives me companionship." -- Ken Tilton (on how he was understood by a native english speaker) % \ No newline at end of file Added: branches/edi/test/tilton_utf8_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/tilton_utf8_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: branches/edi/test/tilton_utf8_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/tilton_utf8_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,96 @@ +Programmers who lock onto a design decision and cling to it in the face of +contradictory new information -- well, that's almost everyone in my +experience, so I better not say what I think of them or people will start +saying bad things about me on c.l.l. + -- Ken Tilton +% +This reminds me of the NYC cabby who accepted a fare to Chicago. When +they got there and could not find the friend who was supposed to pay the +fare he just laughed and said he should have known. + -- Ken Tilton +% +>> Actually, I believe that Aikido, Jazz and Lisp are different appearances +>> of the same thing. +Yes, the Tao. /Everything/ is a different appearance of the tao. + -- Ken Tilton + +"Ken, I went to the library and read up on Buddhism, and believe me, you +are no Buddhist." + -- Kenny's mom +% +That absolutely terrifies the herd-following, lockstep-marching, +mainstream-saluting cowards that obediently dash out or online to +scoop up books on The Latest Thing. They learn and use atrocities like +Java, C++, XML, and even Python for the security it gives them and +then sit there slaving away miserably, tediously, joylously paying off +mortgages and supporting ungrateful teenagers who despise them, only +to look out the double-sealed thermo-pane windows of their +central-heated, sound-proofed, dead-bolted, suffocating little nests +into the howling gale thinking "what do they know that I do not know?" +when they see us under a lean-to hunched over our laptops to shield +them from the rain laughing our asses off as we write great code +between bong hits.... what was the question? + -- Ken Tilton +% +Shut up! (That last phrase has four or more syllables if pronounced as +intended.) + -- Ken Tilton +% +Nonsense. You'll be using it for the GUI, not protein-folding. + -- Ken Tilton + (responding to a comment that LTK was slow because it + was based on TK) +% +Continuations certainly are clever, but if we learned anything from the +rejection of the cover art for "Smell the Glove", it is that "there is a +fine line between stupid... and clever". + -- Ken Tilton +% +Ah, there's no place like academia for dispassionate, intellectually +honest discussion of new ideas on their merits. Thank god for tenure +giving your bold antagonist the protection they needed to shout down +your iconoclastic..... hang on... + -- Ken Tilton +% +Whoever objected must be in my killfile, ... + -- Ken Tilton +% +From memory (but I think I have it right): + +"But Jesus said, Suffer captured variables, and forbid them not, to come +unto thine macro bodies: for of such is are DSLs made." + -- Ken Tilton + +Can I get an Amen? +% +Awareness of defect is the first step to recovery. + -- Ken Tilton +% +You made a bad analogy (there are no good ones, but you found a new +low) ... + -- Ken Tilton +% +Yes, it is true that Kent Pitman was raised by a closet full of Lisp +Machines, but the exception only proves the rule. + -- Ken Tilton + (in a postscript after positing that computer + languages are not learned in infancy) +% +I suggest you try bartender's school to support yourself, start +programming for fun again. + -- Ken Tilton + (responding to a comment that 98% of anything to do + with computers was not interesting code) +% +You could add four lanes to my carpal tunnel and I still could not +write all the code I am dying to write. + -- Ken Tilton +% +Neutrality? I want to bury other languages, not have a gateway to them. + -- Ken Tilton +% +Ken: "Cute puppy. Did you get it for companionship or to pick up chicks?" +Simon: "Hunh? My puppy /always/ gives me companionship." + -- Ken Tilton + (on how he was understood by a native english speaker) +% Added: branches/edi/test/unicode_demo_ucs2_cr_be.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs2_cr_le.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs2_crlf_be.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs2_crlf_le.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs2_lf_be.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs2_lf_le.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs4_cr_be.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs4_cr_le.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs4_crlf_be.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs4_crlf_le.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs4_lf_be.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_ucs4_lf_le.txt ============================================================================== Binary file. No diff available. Added: branches/edi/test/unicode_demo_utf8_cr.txt ============================================================================== --- (empty file) +++ branches/edi/test/unicode_demo_utf8_cr.txt Sat May 17 12:49:25 2008 @@ -0,0 +1 @@ + UTF-8 encoded sample plain-text file ???????????????????????????????????????????????????????????????????????????????????????????????????????????? Markus Kuhn [??ma??k??s ku??n] ??? 2002-07-25 The ASCII compatible UTF-8 encoding used in this plain-text file is defined in Unicode, ISO 10646-1, and RFC 2279. Using Unicode/UTF-8, you can write in emails and source code things such as Mathematics and sciences: ??? E???da = Q, n ??? ???, ??? f(i) = ??? g(i), ??????????????????????????????????????? ????????????a??+b?? ????????? ???x??????: ???x??? = ?????????x???, ?? ??? ???? = ??(???? ??? ??), ??????????????????????????? ????????? ???????????? c??? ????????? ??? ??? ?????? ??? ??? ??? ??? ??? ??? ??? ???, ????????? ????????? ????????? ??? ????????? ??? < a ??? b ??? c ??? d ??? ??? ??? (???A??? ??? ???B???), ????????? ??? ????????? ????????? ???a???-b???????????? 2H??? + O??? ??? 2H???O, R = 4.7 k??, ??? 200 mm ?????????i=1 ????????? Linguistics and dictionaries: ??i ??nt????n??????n??l f????n??t??k ??so??si??e????n Y [????psil??n], Yen [j??n], Yoga [??jo??g??] APL: ((V???V)=??????V)/V???,V ???????????????????????????????????? Nicer typography in plain text files: ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? ??? ??? ??? ??? ???single??? and ???double??? quotes ??? ??? ??? ??? ??? Curly apostrophes: ???We???ve been here??? ??? ??? ??? ??? ??? Latin-1 apostrophe and accents: '??` ??? ??? ??? ??? ??? ???deutsche??? ???Anf??hrungszeichen??? ??? ??? ??? ??? ??? ???, ???, ???, ???, 3???4, ???, ???5/+5, ???, ??? ??? ??? ??? ??? ??? ASCII safety test: 1lI|, 0OD, 8B ??? ??? ????????????????????????????????? ??? ??? ??? the euro symbol: ??? 14.95 ??? ??? ??? ??? ????????????????????????????????? ??? ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? Combining characters: STARG????TE SG-1, a = v?? = r??, a??? ??? b??? Greek (in Polytonic): The Greek anthem: ????? ??????????????? ???????? ??????? ????????? ??????? ??????????????? ??????? ???????????????, ????? ??????????????? ???????? ??????? ??????? ??????? ????? ??????? ??????????????? ????? ?????. ?????????? ????? ??????????????? ????????????????? ??????? ?????????????????? ????? ?????????? ??????? ??????? ??????????? ???????????????????????? ???????????, ??? ???????????, ??????????????????????! From a speech of Demosthenes in the 4th century BC: ?????????? ???????????? ?????????????????????? ?????? ?????????????????????, ??? ????????????? ????????????????????, ????????? ????? ??????? ????? ????????????????? ?????????????????? ??????? ????????? ????????? ????????? ????????????? ??????? ?????????????? ????????? ??????? ??????? ????????????? ????????? ??????? ????????????????????????? ????????????????? ???????? ???????????????????????, ????? ????? ?????????????????? ??????? ??????????? ???????????????????, ?????????? ????????? ????? ???????????????????? ???????????? ????????????????? ??????????? ??????????????????? ?????????. ???????????? ??????? ????????? ?????? ????????????????? ????? ????? ??????????????? ????????????????? ??? ??????? ??????????????????, ????????? ????? ???????????????????????, ?????????? ??????? ??????????? ??????????????????????? ?????????? ??????????????????????. ???????? ?????, ??????? ??????? ????????? ?????????? ????? ??????????? ??????? ????? ???????????? ??????????? ???????????????? ??????? ????????????????? ?????????????????????????, ??????? ?????????? ???????????????? ??????????? ???????? ?????????? ???????, ????? ??????????? ??????????????? ???????????? ???????????????????? ??????? ????????????? ??????????????????? ???????????? ?????????????? ??????????????????? ?????????? ??????????? ??????? ?????????????, ????????? ????????? ??????????????????? ???????????????. ???????? ??????? ??????????? ??????????????? ???????????????, ????????? ??????? ????????? ??????? ????????? ???????????????????????? ?????? ??????? ????? ????????????? ???????????????? ????????????????? ????????? ????? ??????? ???????????? ???????????? ????????????????????, ??????????????? ???????????????? ????????? ??????? ????????????????? ?????????????????? ??????????????????? ???????????. ???????????????????????, ????? ???????????????????????? Georgian: From a Unicode conference invitation: ?????????????????? ?????????????????? ????????????????????? ????????????????????????????????? Unicode-?????? ??????????????? ???????????????????????????????????? ??????????????????????????????????????? ?????????????????????????????????, ????????????????????? ?????????????????????????????? 10-12 ???????????????, ???. ?????????????????????, ??????????????????????????????. ????????????????????????????????? ???????????????????????? ??????????????? ???????????????????????? ?????????????????????????????? ???????????? ???????????????????????? ??????????????????????????? ??????????????????????????? ?????? Unicode-???, ????????????????????????????????????????????????????????? ?????? ?????????????????????????????????, Unicode-?????? ?????????????????????????????? ??????????????????????????? ??????????????????????????????, ?????? ????????????????????????????????? ?????????????????????????????????, ???????????????????????????, ??????????????????????????? ???????????????????????????????????? ?????? ???????????????????????????????????? ????????????????????????????????? ??????????????????????????????. Russian: From a Unicode conference invitation: ?????????????????????????????????? ???????????? ???? ?????????????? ?????????????????????????? ?????????????????????? ???? Unicode, ?????????????? ?????????????????? 10-12 ?????????? 1997 ???????? ?? ???????????? ?? ????????????????. ?????????????????????? ?????????????? ?????????????? ???????? ?????????????????? ???? ???????????????? ?????????????????????? ?????????????????? ?? Unicode, ?????????????????????? ?? ??????????????????????????????????????, ???????????????????? ?? ???????????????????? Unicode ?? ?????????????????? ???????????????????????? ???????????????? ?? ?????????????????????? ??????????????????????, ??????????????, ?????????????? ?? ???????????????????????? ???????????????????????? ????????????????. Thai (UCS Level 2): Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese classic 'San Gua'): [----------------------------|------------------------] ??? ?????????????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??? (The above is a two-column text. If combining characters are handled correctly, the lines of the second column should be aligned with the | character above.) Ethiopian: Proverbs in the Amharic language: ????????? ??????????????? ????????? ?????????????????? ?????? ????????? ?????????????????? ?????????????????? ?????? ???????????? ???????????? ????????? ?????? ???????????? ?????? ???????????? ????????? ?????????????????? ????????? ???????????? ????????? ?????????????????? ????????? ????????? ?????? ???????????? ??????????????? ?????????????????? ?????? ???????????? ??????????????? ???????????? ??????????????? ?????? ???????????? ???????????? ???????????? ?????? ??????????????? ????????? ????????? ???????????? ???????????????????????? ???????????? ?????????????????? ????????? ??????????????? ?????????????????? ??????????????? ????????? ???????????? ????????? ???????????? ??????????????? ?????? ??????????????? ????????? ??????????????? ????????? ???????????? ???????????? ????????? ?????? ??????????????? ??????????????? ????????? ?????? ???????????? ????????? ???????????? ???????????? ????????? ???????????? ????????? ???????????? ?????? ????????? ???????????? ?????????????????? ??????????????? ??????????????? ?????? ???????????? Runes: ?????? ???????????? ????????? ?????? ???????????? ?????? ????????? ??????????????? ????????????????????????????????? ????????? ?????? ???????????? (Old English, which transcribed into Latin reads 'He cwaeth that he bude thaem lande northweardum with tha Westsae.' and means 'He said that he lived in the northern land near the Western Sea.') Braille: ???????????? ????????? ????????????????????? ????????? ??????????????? ????????? ??????????????? ?????? ???????????? ???????????? ????????? ?????? ?????? ???????????? ?????????????????? ???????????? ???????????? ?????? ?????????????????? ?????? ????????? ?????????????????? ????????? ??????????????? ?????? ?????? ??????????????????????????? ?????? ??????????????? ?????? ??????????????????????????? ????????? ?????? ???????????? ?????????????????? ????????????????????? ??????????????? ????????? ????????? ??????????????????????????? ???????????? ????????? ???????????? ???????????? ????????????????????? ????????? ?????????????????? ?????? ???????????? ?????? ????????? ????????? ???????????? ????????? ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? ???????????? ??? ??????????????? ???????????? ?????? ????????? ????????? ??? ???????????? ?????? ?????? ?????? ???????????????????????? ????????? ????????? ?????? ?????????????????????????????? ???????????? ???????????? ??? ?????????????????????????????? ??? ???????????? ???????????? ????????? ?????????????????? ????????????????????? ?????? ??????????????? ??? ?????????????????????????????? ?????? ?????? ?????????????????? ??????????????? ?????? ?????????????????????????????? ??? ?????? ?????????????????? ????????? ?????? ?????????????????? ?????? ?????? ???????????????????????? ?????? ??? ?????? ????????????????????? ????????? ?????? ???????????????????????? ??????????????? ???????????? ????????? ?????????????????? ????????? ?????? ?????? ???????????????????????? ???????????? ???????????? ?????? ???????????? ????????????????????? ??????????????? ?????? ?????? ????????????????????? ??????????????????????????????????????? ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? (The first couple of paragraphs of "A Christmas Carol" by Dickens) Compact font selection example text: ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 abcdefghijklmnopqrstuvwxyz ?????????????????????? ???????????????????????????????????????????? ???????????????????? ???????????????????? ???????????????????????? ??????????????? ???????????????????????? ????????????????????????????????????? Greetings in various languages: Hello world, ????????????????? ???????????, ??????????????? Box drawing alignment tests: ??? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??????????????????????????? ????????? ??? ????????????????????? ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? ?????? ??? ?????? ?????? ?????? ????????????????????? ????????????????????? ????????????????????? ???????????? ???????????? ??? ???????????? ??? ??? ????????????????????? ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ?????????????????? ???????????? ??? ???????????? ??? ???????????????????????? ?????????????????? \ No newline at end of file Added: branches/edi/test/unicode_demo_utf8_crlf.txt ============================================================================== --- (empty file) +++ branches/edi/test/unicode_demo_utf8_crlf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,212 @@ + +UTF-8 encoded sample plain-text file +???????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Markus Kuhn [??ma??k??s ku??n] ??? 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ??? E???da = Q, n ??? ???, ??? f(i) = ??? g(i), ??????????????????????????????????????? + ????????????a??+b?? ????????? + ???x??????: ???x??? = ?????????x???, ?? ??? ???? = ??(???? ??? ??), ??????????????????????????? ????????? + ???????????? c??? ????????? + ??? ??? ?????? ??? ??? ??? ??? ??? ??? ??? ???, ????????? ????????? + ????????? ??? ????????? + ??? < a ??? b ??? c ??? d ??? ??? ??? (???A??? ??? ???B???), ????????? ??? ????????? + ????????? ???a???-b???????????? + 2H??? + O??? ??? 2H???O, R = 4.7 k??, ??? 200 mm ?????????i=1 ????????? + +Linguistics and dictionaries: + + ??i ??nt????n??????n??l f????n??t??k ??so??si??e????n + Y [????psil??n], Yen [j??n], Yoga [??jo??g??] + +APL: + + ((V???V)=??????V)/V???,V ???????????????????????????????????? + +Nicer typography in plain text files: + + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + ??? ??? + ??? ??? ???single??? and ???double??? quotes ??? + ??? ??? + ??? ??? Curly apostrophes: ???We???ve been here??? ??? + ??? ??? + ??? ??? Latin-1 apostrophe and accents: '??` ??? + ??? ??? + ??? ??? ???deutsche??? ???Anf??hrungszeichen??? ??? + ??? ??? + ??? ??? ???, ???, ???, ???, 3???4, ???, ???5/+5, ???, ??? ??? + ??? ??? + ??? ??? ASCII safety test: 1lI|, 0OD, 8B ??? + ??? ????????????????????????????????? ??? + ??? ??? the euro symbol: ??? 14.95 ??? ??? ??? + ??? ????????????????????????????????? ??? + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Combining characters: + + STARG????TE SG-1, a = v?? = r??, a??? ??? b??? + +Greek (in Polytonic): + + The Greek anthem: + + ????? ??????????????? ???????? ??????? ????????? + ??????? ??????????????? ??????? ???????????????, + ????? ??????????????? ???????? ??????? ??????? + ??????? ????? ??????? ??????????????? ????? ?????. + + ?????????? ????? ??????????????? ????????????????? + ??????? ?????????????????? ????? ?????????? + ??????? ??????? ??????????? ???????????????????????? + ???????????, ??? ???????????, ??????????????????????! + + From a speech of Demosthenes in the 4th century BC: + + ?????????? ???????????? ?????????????????????? ?????? ?????????????????????, ??? ????????????? ????????????????????, + ????????? ????? ??????? ????? ????????????????? ?????????????????? ??????? ????????? ????????? ????????? + ????????????? ??????? ?????????????? ????????? ??????? ??????? ????????????? ????????? ??????? + ????????????????????????? ????????????????? ???????? ???????????????????????, ????? ????? ?????????????????? + ??????? ??????????? ???????????????????, ?????????? ????????? ????? ???????????????????? ???????????? + ????????????????? ??????????? ??????????????????? ?????????. ???????????? ??????? ????????? ?????? ????????????????? + ????? ????? ??????????????? ????????????????? ??? ??????? ??????????????????, ????????? ????? ???????????????????????, + ?????????? ??????? ??????????? ??????????????????????? ?????????? ??????????????????????. ???????? ?????, ??????? ??????? + ????????? ?????????? ????? ??????????? ??????? ????? ???????????? ??????????? ???????????????? ??????? ????????????????? + ?????????????????????????, ??????? ?????????? ???????????????? ??????????? ???????? ?????????? ???????, ????? ??????????? + ??????????????? ???????????? ???????????????????? ??????? ????????????? ??????????????????? ???????????? ?????????????? + ??????????????????? ?????????? ??????????? ??????? ?????????????, ????????? ????????? ??????????????????? + ???????????????. ???????? ??????? ??????????? ??????????????? ???????????????, ????????? ??????? ????????? ??????? + ????????? ???????????????????????? ?????? ??????? ????? ????????????? ???????????????? ????????????????? ????????? ????? + ??????? ???????????? ???????????? ????????????????????, ??????????????? ???????????????? ????????? ??????? + ????????????????? ?????????????????? ??????????????????? ???????????. + + ???????????????????????, ????? ???????????????????????? + +Georgian: + + From a Unicode conference invitation: + + ?????????????????? ?????????????????? ????????????????????? ????????????????????????????????? Unicode-?????? ??????????????? ???????????????????????????????????? + ??????????????????????????????????????? ?????????????????????????????????, ????????????????????? ?????????????????????????????? 10-12 ???????????????, + ???. ?????????????????????, ??????????????????????????????. ????????????????????????????????? ???????????????????????? ??????????????? ???????????????????????? + ?????????????????????????????? ???????????? ???????????????????????? ??????????????????????????? ??????????????????????????? ?????? Unicode-???, + ????????????????????????????????????????????????????????? ?????? ?????????????????????????????????, Unicode-?????? ?????????????????????????????? + ??????????????????????????? ??????????????????????????????, ?????? ????????????????????????????????? ?????????????????????????????????, ???????????????????????????, + ??????????????????????????? ???????????????????????????????????? ?????? ???????????????????????????????????? ????????????????????????????????? ??????????????????????????????. + +Russian: + + From a Unicode conference invitation: + + ?????????????????????????????????? ???????????? ???? ?????????????? ?????????????????????????? ?????????????????????? ???? + Unicode, ?????????????? ?????????????????? 10-12 ?????????? 1997 ???????? ?? ???????????? ?? ????????????????. + ?????????????????????? ?????????????? ?????????????? ???????? ?????????????????? ???? ???????????????? ?????????????????????? + ?????????????????? ?? Unicode, ?????????????????????? ?? ??????????????????????????????????????, ???????????????????? ?? + ???????????????????? Unicode ?? ?????????????????? ???????????????????????? ???????????????? ?? ?????????????????????? + ??????????????????????, ??????????????, ?????????????? ?? ???????????????????????? ???????????????????????? ????????????????. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ??? ?????????????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? + ???????????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??? + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ????????? ??????????????? ????????? ?????????????????? + ?????? ????????? ?????????????????? ?????????????????? + ?????? ???????????? ???????????? ????????? + ?????? ???????????? ?????? ???????????? ????????? ?????????????????? + ????????? ???????????? ????????? ?????????????????? + ????????? ????????? ?????? ???????????? + ??????????????? ?????????????????? + ?????? ???????????? ??????????????? ???????????? ??????????????? + ?????? ???????????? ???????????? ???????????? + ?????? ??????????????? ????????? ????????? ???????????? ???????????????????????? + ???????????? ?????????????????? ????????? ??????????????? ?????????????????? + ??????????????? ????????? ???????????? ????????? ???????????? ??????????????? + ?????? ??????????????? ????????? ??????????????? + ????????? ???????????? ???????????? ????????? ?????? ??????????????? + ??????????????? ????????? ?????? ???????????? ????????? ???????????? + ???????????? ????????? ???????????? ????????? + ???????????? ?????? ????????? ???????????? ?????????????????? + ??????????????? ??????????????? ?????? ???????????? + +Runes: + + ?????? ???????????? ????????? ?????? ???????????? ?????? ????????? ??????????????? ????????????????????????????????? ????????? ?????? ???????????? + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ???????????? ????????? ????????????????????? ????????? + + ??????????????? ????????? ??????????????? ?????? ???????????? ???????????? ????????? ?????? ?????? ???????????? + ?????????????????? ???????????? ???????????? ?????? ?????????????????? ?????? ????????? ?????????????????? ????????? + ??????????????? ?????? ?????? ??????????????????????????? ?????? ??????????????? ?????? ??????????????????????????? + ????????? ?????? ???????????? ?????????????????? ????????????????????? ??????????????? ????????? ????????? + ??????????????????????????? ???????????? ????????? ???????????? ???????????? ????????????????????? ????????? ?????????????????? ?????? + ???????????? ?????? ????????? ????????? ???????????? ????????? + + ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + ???????????? ??? ??????????????? ???????????? ?????? ????????? ????????? ??? ???????????? ?????? ?????? + ?????? ???????????????????????? ????????? ????????? ?????? ?????????????????????????????? ???????????? ???????????? + ??? ?????????????????????????????? ??? ???????????? ???????????? ????????? ?????????????????? ????????????????????? ?????? + ??????????????? ??? ?????????????????????????????? ?????? ?????? ?????????????????? ??????????????? ?????? ?????????????????????????????? + ??? ?????? ?????????????????? ????????? ?????? ?????????????????? ?????? ?????? ???????????????????????? + ?????? ??? ?????? ????????????????????? ????????? ?????? ???????????????????????? ??????????????? + ???????????? ????????? ?????????????????? ????????? ?????? ?????? ???????????????????????? ???????????? ???????????? ?????? + ???????????? ????????????????????? ??????????????? ?????? ?????? ????????????????????? ??????????????????????????????????????? ????????? + ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz ?????????????????????? + ???????????????????????????????????????????? ???????????????????? ???????????????????? + ???????????????????????? ??????????????? ???????????????????????? ????????????????????????????????????? + +Greetings in various languages: + + Hello world, ????????????????? ???????????, ??????????????? + +Box drawing alignment tests: ??? + ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??????????????????????????? ????????? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ?????? ??? ?????? ?????? ?????? ????????????????????? ????????????????????? ????????????????????? ???????????? ???????????? ??? ???????????? ??? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ?????????????????? ???????????? ??? ???????????? ??? ???????????????????????? + ?????????????????? Added: branches/edi/test/unicode_demo_utf8_lf.txt ============================================================================== --- (empty file) +++ branches/edi/test/unicode_demo_utf8_lf.txt Sat May 17 12:49:25 2008 @@ -0,0 +1,212 @@ + +UTF-8 encoded sample plain-text file +???????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Markus Kuhn [??ma??k??s ku??n] ??? 2002-07-25 + + +The ASCII compatible UTF-8 encoding used in this plain-text file +is defined in Unicode, ISO 10646-1, and RFC 2279. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and sciences: + + ??? E???da = Q, n ??? ???, ??? f(i) = ??? g(i), ??????????????????????????????????????? + ????????????a??+b?? ????????? + ???x??????: ???x??? = ?????????x???, ?? ??? ???? = ??(???? ??? ??), ??????????????????????????? ????????? + ???????????? c??? ????????? + ??? ??? ?????? ??? ??? ??? ??? ??? ??? ??? ???, ????????? ????????? + ????????? ??? ????????? + ??? < a ??? b ??? c ??? d ??? ??? ??? (???A??? ??? ???B???), ????????? ??? ????????? + ????????? ???a???-b???????????? + 2H??? + O??? ??? 2H???O, R = 4.7 k??, ??? 200 mm ?????????i=1 ????????? + +Linguistics and dictionaries: + + ??i ??nt????n??????n??l f????n??t??k ??so??si??e????n + Y [????psil??n], Yen [j??n], Yoga [??jo??g??] + +APL: + + ((V???V)=??????V)/V???,V ???????????????????????????????????? + +Nicer typography in plain text files: + + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + ??? ??? + ??? ??? ???single??? and ???double??? quotes ??? + ??? ??? + ??? ??? Curly apostrophes: ???We???ve been here??? ??? + ??? ??? + ??? ??? Latin-1 apostrophe and accents: '??` ??? + ??? ??? + ??? ??? ???deutsche??? ???Anf??hrungszeichen??? ??? + ??? ??? + ??? ??? ???, ???, ???, ???, 3???4, ???, ???5/+5, ???, ??? ??? + ??? ??? + ??? ??? ASCII safety test: 1lI|, 0OD, 8B ??? + ??? ????????????????????????????????? ??? + ??? ??? the euro symbol: ??? 14.95 ??? ??? ??? + ??? ????????????????????????????????? ??? + ???????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????? + +Combining characters: + + STARG????TE SG-1, a = v?? = r??, a??? ??? b??? + +Greek (in Polytonic): + + The Greek anthem: + + ????? ??????????????? ???????? ??????? ????????? + ??????? ??????????????? ??????? ???????????????, + ????? ??????????????? ???????? ??????? ??????? + ??????? ????? ??????? ??????????????? ????? ?????. + + ?????????? ????? ??????????????? ????????????????? + ??????? ?????????????????? ????? ?????????? + ??????? ??????? ??????????? ???????????????????????? + ???????????, ??? ???????????, ??????????????????????! + + From a speech of Demosthenes in the 4th century BC: + + ?????????? ???????????? ?????????????????????? ?????? ?????????????????????, ??? ????????????? ????????????????????, + ????????? ????? ??????? ????? ????????????????? ?????????????????? ??????? ????????? ????????? ????????? + ????????????? ??????? ?????????????? ????????? ??????? ??????? ????????????? ????????? ??????? + ????????????????????????? ????????????????? ???????? ???????????????????????, ????? ????? ?????????????????? + ??????? ??????????? ???????????????????, ?????????? ????????? ????? ???????????????????? ???????????? + ????????????????? ??????????? ??????????????????? ?????????. ???????????? ??????? ????????? ?????? ????????????????? + ????? ????? ??????????????? ????????????????? ??? ??????? ??????????????????, ????????? ????? ???????????????????????, + ?????????? ??????? ??????????? ??????????????????????? ?????????? ??????????????????????. ???????? ?????, ??????? ??????? + ????????? ?????????? ????? ??????????? ??????? ????? ???????????? ??????????? ???????????????? ??????? ????????????????? + ?????????????????????????, ??????? ?????????? ???????????????? ??????????? ???????? ?????????? ???????, ????? ??????????? + ??????????????? ???????????? ???????????????????? ??????? ????????????? ??????????????????? ???????????? ?????????????? + ??????????????????? ?????????? ??????????? ??????? ?????????????, ????????? ????????? ??????????????????? + ???????????????. ???????? ??????? ??????????? ??????????????? ???????????????, ????????? ??????? ????????? ??????? + ????????? ???????????????????????? ?????? ??????? ????? ????????????? ???????????????? ????????????????? ????????? ????? + ??????? ???????????? ???????????? ????????????????????, ??????????????? ???????????????? ????????? ??????? + ????????????????? ?????????????????? ??????????????????? ???????????. + + ???????????????????????, ????? ???????????????????????? + +Georgian: + + From a Unicode conference invitation: + + ?????????????????? ?????????????????? ????????????????????? ????????????????????????????????? Unicode-?????? ??????????????? ???????????????????????????????????? + ??????????????????????????????????????? ?????????????????????????????????, ????????????????????? ?????????????????????????????? 10-12 ???????????????, + ???. ?????????????????????, ??????????????????????????????. ????????????????????????????????? ???????????????????????? ??????????????? ???????????????????????? + ?????????????????????????????? ???????????? ???????????????????????? ??????????????????????????? ??????????????????????????? ?????? Unicode-???, + ????????????????????????????????????????????????????????? ?????? ?????????????????????????????????, Unicode-?????? ?????????????????????????????? + ??????????????????????????? ??????????????????????????????, ?????? ????????????????????????????????? ?????????????????????????????????, ???????????????????????????, + ??????????????????????????? ???????????????????????????????????? ?????? ???????????????????????????????????? ????????????????????????????????? ??????????????????????????????. + +Russian: + + From a Unicode conference invitation: + + ?????????????????????????????????? ???????????? ???? ?????????????? ?????????????????????????? ?????????????????????? ???? + Unicode, ?????????????? ?????????????????? 10-12 ?????????? 1997 ???????? ?? ???????????? ?? ????????????????. + ?????????????????????? ?????????????? ?????????????? ???????? ?????????????????? ???? ???????????????? ?????????????????????? + ?????????????????? ?? Unicode, ?????????????????????? ?? ??????????????????????????????????????, ???????????????????? ?? + ???????????????????? Unicode ?? ?????????????????? ???????????????????????? ???????????????? ?? ?????????????????????? + ??????????????????????, ??????????????, ?????????????? ?? ???????????????????????? ???????????????????????? ????????????????. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ??? ?????????????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ???????????????????????????????????????????????????????????????????????????????????? + ???????????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????????????????? + ??????????????????????????????????????????????????????????????????????????? ?????????????????????????????????????????????????????????????????????????????? + ?????????????????????????????????????????????????????????????????? ????????????????????????????????????????????????????????????????????? ??? + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ????????? ??????????????? ????????? ?????????????????? + ?????? ????????? ?????????????????? ?????????????????? + ?????? ???????????? ???????????? ????????? + ?????? ???????????? ?????? ???????????? ????????? ?????????????????? + ????????? ???????????? ????????? ?????????????????? + ????????? ????????? ?????? ???????????? + ??????????????? ?????????????????? + ?????? ???????????? ??????????????? ???????????? ??????????????? + ?????? ???????????? ???????????? ???????????? + ?????? ??????????????? ????????? ????????? ???????????? ???????????????????????? + ???????????? ?????????????????? ????????? ??????????????? ?????????????????? + ??????????????? ????????? ???????????? ????????? ???????????? ??????????????? + ?????? ??????????????? ????????? ??????????????? + ????????? ???????????? ???????????? ????????? ?????? ??????????????? + ??????????????? ????????? ?????? ???????????? ????????? ???????????? + ???????????? ????????? ???????????? ????????? + ???????????? ?????? ????????? ???????????? ?????????????????? + ??????????????? ??????????????? ?????? ???????????? + +Runes: + + ?????? ???????????? ????????? ?????? ???????????? ?????? ????????? ??????????????? ????????????????????????????????? ????????? ?????? ???????????? + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ???????????? ????????? ????????????????????? ????????? + + ??????????????? ????????? ??????????????? ?????? ???????????? ???????????? ????????? ?????? ?????? ???????????? + ?????????????????? ???????????? ???????????? ?????? ?????????????????? ?????? ????????? ?????????????????? ????????? + ??????????????? ?????? ?????? ??????????????????????????? ?????? ??????????????? ?????? ??????????????????????????? + ????????? ?????? ???????????? ?????????????????? ????????????????????? ??????????????? ????????? ????????? + ??????????????????????????? ???????????? ????????? ???????????? ???????????? ????????????????????? ????????? ?????????????????? ?????? + ???????????? ?????? ????????? ????????? ???????????? ????????? + + ????????? ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + ???????????? ??? ??????????????? ???????????? ?????? ????????? ????????? ??? ???????????? ?????? ?????? + ?????? ???????????????????????? ????????? ????????? ?????? ?????????????????????????????? ???????????? ???????????? + ??? ?????????????????????????????? ??? ???????????? ???????????? ????????? ?????????????????? ????????????????????? ?????? + ??????????????? ??? ?????????????????????????????? ?????? ?????? ?????????????????? ??????????????? ?????? ?????????????????????????????? + ??? ?????? ?????????????????? ????????? ?????? ?????????????????? ?????? ?????? ???????????????????????? + ?????? ??? ?????? ????????????????????? ????????? ?????? ???????????????????????? ??????????????? + ???????????? ????????? ?????????????????? ????????? ?????? ?????? ???????????????????????? ???????????? ???????????? ?????? + ???????????? ????????????????????? ??????????????? ?????? ?????? ????????????????????? ??????????????????????????????????????? ????????? + ??????????????? ????????? ?????? ???????????? ?????? ??? ?????????????????????????????? + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz ?????????????????????? + ???????????????????????????????????????????? ???????????????????? ???????????????????? + ???????????????????????? ??????????????? ???????????????????????? ????????????????????????????????????? + +Greetings in various languages: + + Hello world, ????????????????? ???????????, ??????????????? + +Box drawing alignment tests: ??? + ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????? ??????????????????????????? ????????? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????? ??? ??? ????????? ????????? ??? ????????????????????? + ?????? ??? ?????? ?????? ?????? ????????????????????? ????????????????????? ????????????????????? ???????????? ???????????? ??? ???????????? ??? ??? ????????????????????? + ????????? ????????? ?????? ?????? ?????? ??? ?????? ?????? ??? ?????? ?????? ??? ?????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ???????????????????????? ??? ??? ??? ??? ??? ??? ??? + ????????????????????? ????????????????????? ????????????????????? ????????????????????? ????????????????????? ?????????????????? ???????????? ??? ???????????? ??? ???????????????????????? + ?????????????????? Added: branches/edi/util.lisp ============================================================================== --- (empty file) +++ branches/edi/util.lisp Sat May 17 12:49:25 2008 @@ -0,0 +1,166 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.14 2008/05/17 13:50:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +#+:lispworks +(eval-when (:compile-toplevel :load-toplevel :execute) + (import 'lw:with-unique-names)) + +#-:lispworks +(defmacro with-unique-names ((&rest bindings) &body body) + "Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form* + +Executes a series of forms with each VAR bound to a fresh, +uninterned symbol. The uninterned symbol is as if returned by a call +to GENSYM with the string denoted by X - or, if X is not supplied, the +string denoted by VAR - as argument. + +The variable bindings created are lexical unless special declarations +are specified. The scopes of the name bindings and declarations do not +include the Xs. + +The forms are evaluated in order, and the values of all but the last +are discarded \(that is, the body is an implicit PROGN)." + ;; reference implementation posted to comp.lang.lisp as + ;; by Vebjorn Ljosa - see also + ;; + `(let ,(mapcar #'(lambda (binding) + (check-type binding (or cons symbol)) + (if (consp binding) + (destructuring-bind (var x) binding + (check-type var symbol) + `(,var (gensym ,(etypecase x + (symbol (symbol-name x)) + (character (string x)) + (string x))))) + `(,binding (gensym ,(symbol-name binding))))) + bindings) + , at body)) + +#+:lispworks +(eval-when (:compile-toplevel :load-toplevel :execute) + (setf (macro-function 'with-rebinding) + (macro-function 'lw:rebinding))) + +#-:lispworks +(defmacro with-rebinding (bindings &body body) + "WITH-REBINDING ( { var | (var prefix) }* ) form* + +Evaluates a series of forms in the lexical environment that is +formed by adding the binding of each VAR to a fresh, uninterned +symbol, and the binding of that fresh, uninterned symbol to VAR's +original value, i.e., its value in the current lexical environment. + +The uninterned symbol is created as if by a call to GENSYM with the +string denoted by PREFIX - or, if PREFIX is not supplied, the string +denoted by VAR - as argument. + +The forms are evaluated in order, and the values of all but the last +are discarded \(that is, the body is an implicit PROGN)." + ;; reference implementation posted to comp.lang.lisp as + ;; by Vebjorn Ljosa - see also + ;; + (loop for binding in bindings + for var = (if (consp binding) (car binding) binding) + for name = (gensym) + collect `(,name ,var) into renames + collect ``(,,var ,,name) into temps + finally (return `(let ,renames + (with-unique-names ,bindings + `(let (,, at temps) + ,, at body)))))) + +(defun normalize-external-format-name (name) + "Converts NAME \(a symbol) to a `canonical' name for an +external format, e.g. :LATIN1 will be converted to :ISO-8859-1. +Also checks if there is an external format with that name and +signals an error otherwise." + (let ((real-name (or (cdr (assoc name +name-map+ + :test #'eq)) + name))) + (unless (find real-name +name-map+ + :test #'eq + :key #'cdr) + (error "~S is not known to be a name for an external format." name)) + real-name)) + +(defun ascii-name-p (name) + "Checks whether NAME is the keyword :ASCII." + (eq name :us-ascii)) + +(defun koi8-r-name-p (name) + "Checks whether NAME is the keyword :KOI8-R." + (eq name :koi8-r)) + +(defun code-page-name-p (name) + "Checks whether NAME is the keyword :CODE-PAGE." + (eq name :code-page)) + +(defun iso-8859-name-p (name) + "Checks whether NAME \(a keyword) names one of the known +ISO-8859 encodings." + (find name +iso-8859-tables+ :key #'car)) + +(defun known-code-page-id-p (id) + "Checks whether ID \(a number) denotes one of the known Windows +code pages." + (and (find id +code-page-tables+ :key #'car) + id)) + +#+:lispworks +(defun sans (plist &rest keys) + "Returns PLIST with keyword arguments from KEYS removed." + (sys::remove-properties plist keys)) + +#-:lispworks +(defun sans (plist &rest keys) + "Returns PLIST with keyword arguments from KEYS removed." + ;; stolen from Usenet posting <3247672165664225 at naggum.no> by Erik + ;; Naggum + (let ((sans ())) + (loop + (let ((tail (nth-value 2 (get-properties plist keys)))) + ;; this is how it ends + (unless tail + (return (nreconc sans plist))) + ;; copy all the unmatched keys + (loop until (eq plist tail) do + (push (pop plist) sans) + (push (pop plist) sans)) + ;; skip the matched key + (setq plist (cddr plist)))))) + +#+:lispworks +(defmacro with-accessors (slot-entries instance &body body) + "For LispWorks, we prefer SLOT-VALUE over accessors for better +performance." + `(with-slots ,(mapcar #'car slot-entries) + ,instance + , at body)) \ No newline at end of file From eweitz at common-lisp.net Sat May 17 22:27:24 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 18:27:24 -0400 (EDT) Subject: [flexi-streams-cvs] r21 - branches/edi Message-ID: <20080517222724.C568233079@common-lisp.net> Author: eweitz Date: Sat May 17 18:27:24 2008 New Revision: 21 Removed: branches/edi/ Log: Start again... From eweitz at common-lisp.net Sat May 17 22:28:41 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 18:28:41 -0400 (EDT) Subject: [flexi-streams-cvs] r22 - branches/edi Message-ID: <20080517222841.63803340A6@common-lisp.net> Author: eweitz Date: Sat May 17 18:28:41 2008 New Revision: 22 Added: branches/edi/ - copied from r21, trunk/ Log: Copy trunk to branch so we can see the diffs From eweitz at common-lisp.net Sat May 17 22:31:14 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 18:31:14 -0400 (EDT) Subject: [flexi-streams-cvs] r23 - in branches/edi: . test Message-ID: <20080517223114.3A7277C073@common-lisp.net> Author: eweitz Date: Sat May 17 18:31:08 2008 New Revision: 23 Added: branches/edi/conditions.lisp (contents, props changed) branches/edi/decode.lisp (contents, props changed) branches/edi/encode.lisp (contents, props changed) Modified: branches/edi/ascii.lisp branches/edi/code-pages.lisp branches/edi/external-format.lisp branches/edi/flexi-streams.asd branches/edi/in-memory.lisp branches/edi/input.lisp branches/edi/iso-8859.lisp branches/edi/lw-binary-stream.lisp branches/edi/output.lisp branches/edi/packages.lisp branches/edi/specials.lisp branches/edi/stream.lisp branches/edi/strings.lisp branches/edi/test/packages.lisp branches/edi/test/test.lisp branches/edi/util.lisp Log: Start of reorg - this time as a diff from trunk Modified: branches/edi/ascii.lisp ============================================================================== --- branches/edi/ascii.lisp (original) +++ branches/edi/ascii.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.7 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.8 2008/05/17 13:50:15 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions Modified: branches/edi/code-pages.lisp ============================================================================== --- branches/edi/code-pages.lisp (original) +++ branches/edi/code-pages.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.6 2008/05/17 13:50:15 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions Added: branches/edi/conditions.lisp ============================================================================== --- (empty file) +++ branches/edi/conditions.lisp Sat May 17 18:31:08 2008 @@ -0,0 +1,84 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.3 2008/05/17 15:56:16 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(define-condition flexi-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +flexi streams.")) + +(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) + () + (:documentation "Like FLEXI-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition flexi-stream-element-type-error (flexi-stream-error) + ((element-type :initarg :element-type + :reader flexi-stream-element-type-error-element-type)) + (:report (lambda (condition stream) + (format stream "Element type ~S not allowed." + (flexi-stream-element-type-error-element-type condition)))) + (:documentation "Errors of this type are signalled if the flexi +stream has a wrong element type.")) + +(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) + () + (:documentation "Errors of this type are signalled if there is an +encoding problem.")) + +(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) + ((position-spec :initarg :position-spec + :reader flexi-stream-position-spec-error-position-spec)) + (:documentation "Errors of this type are signalled if an +erroneous position spec is used in conjunction with +FILE-POSITION.")) + +;; TODO: stream might not be a stream... +(defun signal-encoding-error (flexi-stream format-control &rest format-args) + "Convenience function similar to ERROR to signal conditions of type +FLEXI-STREAM-ENCODING-ERROR." + (error 'flexi-stream-encoding-error + :format-control format-control + :format-arguments format-args + :stream flexi-stream)) + +(define-condition in-memory-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +IN-MEMORY streams.")) + +(define-condition in-memory-stream-closed-error (in-memory-stream-error) + () + (:report (lambda (condition stream) + (format stream "~S is closed." + (stream-error-stream condition)))) + (:documentation "An error that is signalled when someone is trying +to read from or write to a closed IN-MEMORY stream.")) + Added: branches/edi/decode.lisp ============================================================================== --- (empty file) +++ branches/edi/decode.lisp Sat May 17 18:31:08 2008 @@ -0,0 +1,151 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric char-to-octets (format char writer stream) + (:documentation "Converts the character CHAR to sequence of octets +and sends this sequence to SINK. STREAM will always be a flexi stream +which is used to determine how the character should be converted. +This function does all the work for STREAM-WRITE-CHAR in which case +SINK is the same as STREAM. It is also used in the implementation of +STREAM-WRITE-SEQUENCE below.")) + +(defmethod char-to-octets ((format flexi-latin-1-format) char writer stream) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 255) + (signal-encoding-error stream "~S is not a LATIN-1 character." char)) + (funcall writer octet)) + char) + +(defmethod char-to-octets ((format flexi-ascii-format) char writer stream) + (declare (optimize speed)) + (let ((octet (char-code char))) + (when (> octet 127) + (signal-encoding-error stream "~S is not an ASCII character." char)) + (funcall writer octet)) + char) + +(defmethod char-to-octets ((format flexi-8-bit-format) char writer stream) + (declare (optimize speed)) + (with-accessors ((encoding-hash external-format-encoding-hash)) + format + (let ((octet (gethash (char-code char) encoding-hash))) + (unless octet + (signal-encoding-error stream "~S is not in this encoding." char)) + (funcall writer octet)) + char)) + +(defmethod char-to-octets ((format flexi-utf-8-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (let ((char-code (char-code char))) + (tagbody + (cond ((< char-code #x80) + (funcall writer char-code) + (go zero)) + ((< char-code #x800) + (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code))) + (go one)) + ((< char-code #x10000) + (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code))) + (go two)) + ((< char-code #x200000) + (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code))) + (go three)) + ((< char-code #x4000000) + (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code))) + (go four)) + (t (funcall writer (logior #b11111100 (ldb (byte 1 30) char-code))))) + (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code))) + four + (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code))) + three + (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code))) + two + (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code))) + one + (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) + zero)) + char) + +(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 0) word)) + (funcall writer (ldb (byte 8 8) word)))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 8) word)) + (funcall writer (ldb (byte 8 0) word)))) + (declare (inline write-word) (dynamic-extent (function write-word))) + (let ((char-code (char-code char))) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) + char) + +(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (loop with char-code = (char-code char) + for position in '(0 8 16 24) do + (funcall writer (ldb (byte 8 position) char-code))) + char) + +(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer stream) + (declare (ignore stream) (optimize speed)) + (loop with char-code = (char-code char) + for position in '(24 16 8 0) do + (funcall writer (ldb (byte 8 position) char-code))) + char) + +(defmethod char-to-octets ((format flexi-cr-mixin) char writer stream) + "The `base' method for all formats which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of sending +one or more characters to SINK." + (declare (optimize speed)) + (case char + (#\Newline + (case (external-format-eol-style format) + (:cr (call-next-method format #\Return writer stream)) + (:crlf (call-next-method format #\Return writer stream) + (call-next-method format #\Linefeed writer stream)))) + (otherwise (call-next-method))) + char) Added: branches/edi/encode.lisp ============================================================================== --- (empty file) +++ branches/edi/encode.lisp Sat May 17 18:31:08 2008 @@ -0,0 +1,237 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defun recover-from-encoding-error (stream format-control &rest format-args) + "Helper function used by the STREAM-READ-CHAR methods below to deal +with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and +returns its character code in this case. Otherwise signals a +FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this +function and provides a corresponding USE-VALUE restart." + (when *substitution-char* + (return-from recover-from-encoding-error (char-code *substitution-char*))) + (restart-case + (apply #'signal-encoding-error stream format-control format-args) + (use-value (char) + :report "Specify a character to be used instead." + :interactive (lambda () + (loop + (format *query-io* "Type a character: ") + (let ((line (read-line *query-io*))) + (when (= 1 (length line)) + (return (list (char line 0))))))) + (char-code char)))) + +(defmethod octets-to-char-code ((format flexi-latin-1-format) reader unreader stream) + (declare (ignore unreader stream)) + (or (funcall reader) :eof)) + +(defmethod octets-to-char-code ((format flexi-ascii-format) reader unreader stream) + (declare (ignore unreader)) + (let ((octet (or (funcall reader) + (return-from octets-to-char-code :eof)))) + (declare (type octet octet)) + (if (> octet 127) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + octet))) + +(defmethod octets-to-char-code ((format flexi-8-bit-format) reader unreader stream) + (declare (ignore unreader)) + (with-accessors ((decoding-table external-format-decoding-table)) + format + (let* ((octet (or (funcall reader) + (return-from octets-to-char-code :eof))) + (char-code (aref (the (simple-array * *) decoding-table) octet))) + (declare (type octet octet)) + (if (or (null char-code) + (= char-code 65533)) + (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + char-code)))) + +(defmethod octets-to-char-code ((format flexi-utf-8-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-8 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (let ((octet (read-next-byte))) + (declare (type octet octet)) + (multiple-value-bind (start count) + (cond ((zerop (logand octet #b10000000)) + (values octet 0)) + ((= #b11000000 (logand octet #b11100000)) + (values (logand octet #b00011111) 1)) + ((= #b11100000 (logand octet #b11110000)) + (values (logand octet #b00001111) 2)) + ((= #b11110000 (logand octet #b11111000)) + (values (logand octet #b00000111) 3)) + ((= #b11111000 (logand octet #b11111100)) + (values (logand octet #b00000011) 4)) + ((= #b11111100 (logand octet #b11111110)) + (values (logand octet #b00000001) 5)) + (t (return-from octets-to-char-code + (recover-from-encoding-error stream + "Unexpected value #x~X at start of UTF-8 sequence." + octet)))) + ;; note that we currently don't check for "overlong" + ;; sequences or other illegal values + (loop for result of-type (unsigned-byte 32) + = start then (+ (ash result 6) + (logand octet #b111111)) + repeat count + for octet of-type octet = (read-next-byte) + unless (= #b10000000 (logand octet #b11000000)) + do (return-from octets-to-char-code + (recover-from-encoding-error stream + "Unexpected value #x~X in UTF-8 sequence." octet)) + finally (return result))))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (the octet (read-next-byte)) + (ash (the octet (read-next-byte)) 8)))) + (declare (inline read-next-byte read-next-word) + (dynamic-extent (function read-next-byte) (function read-next-word))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word)))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (labels ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t))) + (read-next-word () + (+ (ash (the octet (read-next-byte)) 8) + (the octet (read-next-byte))))) + (let ((word (read-next-word))) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word)))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader unreader stream) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 0 to 24 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader unreader stream) + (declare (ignore unreader)) + (let (first-octet-seen) + (flet ((read-next-byte () + (prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error stream + "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (loop for count from 24 downto 0 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-cr-mixin) reader unreader stream) + "The `base' method for all streams which need end-of-line +conversion. Uses CALL-NEXT-METHOD to do the actual work of reading +one or more encoded characters." + (declare (optimize speed)) + (let ((char-code (call-next-method))) + (when (eq char-code :eof) + (return-from octets-to-char-code :eof)) + (with-accessors ((eol-style external-format-eol-style)) + format + (cond ((= char-code #.(char-code #\Return)) + (case eol-style + (:cr #.(char-code #\Newline)) + ;; in the case :CRLF we have to look ahead one character + (:crlf (let ((next-char-code (call-next-method))) + (case next-char-code + (#.(char-code #\Linefeed) + #.(char-code #\Newline)) + (:eof char-code) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise + (funcall unreader (code-char next-char-code)) + char-code)))))) + (t char-code))))) + Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.11 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.15 2008/05/17 16:38:24 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -58,6 +58,154 @@ "Defines a way to reconstruct external formats. Needed for OpenMCL." (make-load-form-saving-slots thing :environment environment)) +(defclass flexi-cr-mixin () + () + (:documentation "A mixin for external-formats which need +end-of-line conversion, i.e. for those where the end-of-line +designator is /not/ the single character #\Linefeed.")) + +(defclass flexi-8-bit-format (external-format) + ((encoding-hash :accessor external-format-encoding-hash) + (decoding-table :accessor external-format-decoding-table)) + (:documentation "The class for all flexi streams which use an 8-bit +encoding and thus need additional slots for the encoding/decoding +tables.")) + +(defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) + () + (:documentation "The class for all external formats which use an +8-bit encoding /and/ need end-of-line conversion.")) + +(defclass flexi-ascii-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +US-ASCCI encoding.")) + +(defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) + () + (:documentation "Special class for external formats which use the +US-ASCCI encoding /and/ need end-of-line conversion.")) + +(defclass flexi-latin-1-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding.")) + +(defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding /and/ need end-of-line conversion.")) + +(defclass flexi-utf-32-le-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-32-be-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering /and/ need end-of-line +conversion.")) + +(defclass flexi-utf-16-le-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering /and/ need +end-of-line conversion.")) + +(defclass flexi-utf-16-be-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering /and/ need end-of-line +conversion.")) + +(defclass flexi-utf-8-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding.")) + +(defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding /and/ need end-of-line conversion.")) + +(defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs) + "Sets the fixed encoding/decoding tables for this particular +external format." + (declare (ignore initargs)) + (with-accessors ((encoding-hash external-format-encoding-hash) + (decoding-table flexi-stream-decoding-table) + (name external-format-name) + (id external-format-id)) + external-format + (multiple-value-setq (encoding-hash decoding-table) + (cond ((ascii-name-p name) + (values +ascii-hash+ +ascii-table+)) + ((koi8-r-name-p name) + (values +koi8-r-hash+ +koi8-r-table+)) + ((iso-8859-name-p name) + (values (cdr (assoc name +iso-8859-hashes+ :test #'eq)) + (cdr (assoc name +iso-8859-tables+ :test #'eq)))) + ((code-page-name-p name) + (values (cdr (assoc id +code-page-hashes+)) + (cdr (assoc id +code-page-tables+)))))))) + +(defun external-format-class-name (real-name eol-style little-endian) + (let ((crp (not (eq eol-style :lf)))) + (cond ((ascii-name-p real-name) + (if crp + 'flexi-cr-ascii-format + 'flexi-ascii-format)) + ((eq real-name :iso-8859-1) + (if crp + 'flexi-cr-latin-1-format + 'flexi-latin-1-format)) + ((or (koi8-r-name-p real-name) + (iso-8859-name-p real-name) + (code-page-name-p real-name)) + (if crp + 'flexi-cr-8-bit-format + 'flexi-8-bit-format)) + (t (case real-name + (:utf-8 (if crp + 'flexi-cr-utf-8-format + 'flexi-utf-8-format)) + (:utf-16 (if crp + (if little-endian + 'flexi-cr-utf-16-le-format + 'flexi-cr-utf-16-be-format) + (if little-endian + 'flexi-utf-16-le-format + 'flexi-utf-16-be-format))) + (:utf-32 (if crp + (if little-endian + 'flexi-cr-utf-32-le-format + 'flexi-cr-utf-32-be-format) + (if little-endian + 'flexi-utf-32-le-format + 'flexi-utf-32-be-format)))))))) + (defun make-external-format% (name &key (little-endian *default-little-endian*) id eol-style) "Used internally by MAKE-EXTERNAL-FORMAT." @@ -74,7 +222,7 @@ :eol-style (or eol-style :crlf))) (t (list :eol-style (or eol-style *default-eol-style*) :little-endian little-endian))))) - (apply #'make-instance 'external-format + (apply #'make-instance (external-format-class-name real-name eol-style little-endian) :name real-name initargs))) Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Sat May 17 18:31:08 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.58 2007/12/29 23:15:26 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.60 2008/05/17 15:56:16 edi Exp $ ;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. @@ -45,7 +45,10 @@ (:file "specials") (:file "util") (:file "external-format") + (:file "encode") + (:file "decode") (:file "in-memory") + (:file "conditions") (:file "stream") #+:lispworks (:file "lw-binary-stream") (:file "output") Modified: branches/edi/in-memory.lisp ============================================================================== --- branches/edi/in-memory.lisp (original) +++ branches/edi/in-memory.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.26 2007/12/29 21:17:05 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.29 2008/05/17 16:35:58 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -104,19 +104,6 @@ (:documentation "A binary output stream that writes its data to an associated vector.")) -(define-condition in-memory-stream-error (stream-error) - () - (:documentation "Superclass for all errors related to -IN-MEMORY streams.")) - -(define-condition in-memory-stream-closed-error (in-memory-stream-error) - () - (:report (lambda (condition stream) - (format stream "~S is closed." - (stream-error-stream condition)))) - (:documentation "An error that is signalled when someone is trying -to read from or write to a closed IN-MEMORY stream.")) - #+:cmu (defmethod open-stream-p ((stream in-memory-stream)) "Returns a true value if STREAM is open. See ANSI standard." @@ -382,14 +369,3 @@ , at body (get-output-stream-sequence ,var :as-list ,as-list)) (when ,var (close ,var))))) - -(declaim (inline translate-char)) -(defun translate-char (char-code external-format) - "Returns a list of octets which correspond to the -representation of the character with character code CHAR-CODE -when sent to a flexi stream with external format EXTERNAL-FORMAT. -Used internally by UNREAD-CHAR%. See also STRING-TO-OCTETS." - (declare (optimize speed)) - (with-output-to-sequence (list :as-list t) - (let ((stream (make-flexi-stream list :external-format external-format))) - (write-char (code-char char-code) stream)))) \ No newline at end of file Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.51 2007/12/29 22:58:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.57 2008/05/17 16:44:53 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -140,344 +140,47 @@ (setq last-octet octet) (or octet :eof)))) -(defgeneric unread-char% (char-code flexi-input-stream) - (:documentation "Used internally to put a character denoted by the -character code CHAR-CODE which was already read back on the stream. -Uses the OCTET-STACK slot and decrements the POSITION slot -accordingly.")) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-input-stream)) - "The default method which is un-optimized and uses TRANSLATE-CHAR to -figure out which octets to put on the octet stack." - (declare (optimize speed) (inline translate-char)) +(defun unread-char% (char flexi-input-stream) + "Used internally to put a character CHAR which was already read back +on the stream. Uses the OCTET-STACK slot and decrements the POSITION +slot accordingly." (with-accessors ((position flexi-stream-position) (octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format)) flexi-input-stream - (declare (integer position)) - (let ((octets-read (translate-char char-code external-format))) - (decf position (length octets-read)) - (setq octet-stack (append octets-read octet-stack))))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-latin-1-input-stream)) - "For ISO-8859-1 we can simply put the character code itself on the -octet stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push char-code octet-stack))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-ascii-input-stream)) - "For ASCII we can simply put the character code itself on the octet -stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push char-code octet-stack))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-8-bit-input-stream)) - "For 8-bit encodings we just have to put one octet on the octet -stack which we can look up in the encoding hash." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (encoding-hash flexi-stream-encoding-hash)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push (gethash char-code encoding-hash) octet-stack))) - -(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) - (flexi-input-stream flexi-cr-8-bit-input-stream)) - "A kind of `safety net' for the optimized 8-bit versions of -UNREAD-CHAR% which checks for the single case where more than one -octet has to be put on the octet stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (external-format flexi-stream-external-format)) - flexi-input-stream - (declare (integer position)) - ;; note that below we use the knowledge that in all 8-bit encodings - ;; #\Return and #\Linefeed are mapped to the same octets - (case (external-format-eol-style external-format) - (:crlf - (decf position 2) - (push #.(char-code #\Linefeed) octet-stack) - (push #.(char-code #\Return) octet-stack)) - (otherwise - (decf position) - (push #.(char-code #\Return) octet-stack))))) - -#+:lispworks -(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) - (flexi-input-stream flexi-binary-cr-8-bit-input-stream)) - "A kind of `safety net' for the optimized 8-bit versions of -UNREAD-CHAR% which checks for the single case where more than one -octet has to be put on the octet stack. - -This method \(identical to the one defined directly above) exists only -for LispWorks' \"binary\" streams and must be there due to the -slightly clunky class hierarchy." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (external-format flexi-stream-external-format)) - flexi-input-stream - (declare (integer position)) - ;; note that below we use the knowledge that in all 8-bit encodings - ;; #\Return and #\Linefeed are mapped to the same octets - (case (external-format-eol-style external-format) - (:crlf - (decf position 2) - (push #.(char-code #\Linefeed) octet-stack) - (push #.(char-code #\Return) octet-stack)) - (otherwise - (decf position) - (push #.(char-code #\Return) octet-stack))))) - -(defmacro define-char-reader ((stream-var stream-class) &body body) - "Helper macro to define methods for STREAM-READ-CHAR. Defines a -method for the class STREAM-CLASS using the variable STREAM-VAR and -the code body BODY wrapped with some standard code common to all -methods defined here. The return value of BODY is a character code. -In case of encoding problems, BODY must return the value returned by -\(RECOVER-FROM-ENCODING-ERROR ...)." - (with-unique-names (char-code body-fn) - `(defmethod stream-read-char ((,stream-var ,stream-class)) - "This method was generated with the DEFINE-CHAR-READER macro." - (declare (optimize speed)) - ;; note that we do nothing for the :LF EOL style because we - ;; assume that #\Newline is the same as #\Linefeed in all - ;; Lisps which will use this library - (with-accessors ((last-octet flexi-stream-last-octet) - (last-char-code flexi-stream-last-char-code)) - ,stream-var - ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after - ;; this operation - (setq last-octet nil) - (let ((,char-code - (flet ((,body-fn () , at body)) - (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) - (,body-fn)))) - ;; remember this character and the current external format - ;; for UNREAD-CHAR - (setq last-char-code ,char-code) - (or (code-char ,char-code) ,char-code)))))) - -(defun recover-from-encoding-error (flexi-stream format-control &rest format-args) - "Helper function used by the STREAM-READ-CHAR methods below to deal -with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and -returns its character code in this case. Otherwise signals a -FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this -function and provides a corresponding USE-VALUE restart." - (when *substitution-char* - (return-from recover-from-encoding-error (char-code *substitution-char*))) - (restart-case - (apply #'signal-encoding-error flexi-stream format-control format-args) - (use-value (char) - :report "Specify a character to be used instead." - :interactive (lambda () - (loop - (format *query-io* "Type a character: ") - (let ((line (read-line *query-io*))) - (when (= 1 (length line)) - (return (list (char line 0))))))) - (char-code char)))) - -(define-char-reader (stream flexi-latin-1-input-stream) - (or (read-byte* stream) - (return-from stream-read-char :eof))) - -(define-char-reader (stream flexi-ascii-input-stream) - (let ((octet (or (read-byte* stream) - (return-from stream-read-char :eof)))) - (declare (type octet octet)) - (if (> octet 127) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) - octet))) - -(define-char-reader (stream flexi-8-bit-input-stream) - (with-accessors ((encoding-table flexi-stream-encoding-table)) + (let ((counter 0) octets-reversed) + (declare (integer position) + (fixnum counter)) + (char-to-octets external-format + char + (lambda (octet) + (incf counter) + (push octet octets-reversed)) + nil) + (decf position counter) + (setq octet-stack (nreconc octets-reversed octet-stack))))) + +(defmethod stream-read-char ((stream flexi-input-stream)) + (declare (optimize speed)) + ;; note that we do nothing for the :LF EOL style because we assume + ;; that #\Newline is the same as #\Linefeed in all Lisps which will + ;; use this library + (with-accessors ((external-format flexi-stream-external-format) + (last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code)) stream - (let* ((octet (or (read-byte* stream) - (return-from stream-read-char :eof))) - (char-code (aref (the (simple-array * *) encoding-table) octet))) - (declare (type octet octet)) - (if (or (null char-code) - (= char-code 65533)) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) - char-code)))) - -(define-char-reader (stream flexi-utf-8-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-8 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (let ((octet (read-next-byte))) - (declare (type octet octet)) - (multiple-value-bind (start count) - (cond ((zerop (logand octet #b10000000)) - (values octet 0)) - ((= #b11000000 (logand octet #b11100000)) - (values (logand octet #b00011111) 1)) - ((= #b11100000 (logand octet #b11110000)) - (values (logand octet #b00001111) 2)) - ((= #b11110000 (logand octet #b11111000)) - (values (logand octet #b00000111) 3)) - ((= #b11111000 (logand octet #b11111100)) - (values (logand octet #b00000011) 4)) - ((= #b11111100 (logand octet #b11111110)) - (values (logand octet #b00000001) 5)) - (t (return-from body - (recover-from-encoding-error stream - "Unexpected value #x~X at start of UTF-8 sequence." - octet)))) - ;; note that we currently don't check for "overlong" - ;; sequences or other illegal values - (loop for result of-type (unsigned-byte 32) - = start then (+ (ash result 6) - (logand octet #b111111)) - repeat count - for octet of-type octet = (read-next-byte) - unless (= #b10000000 (logand octet #b11000000)) - do (return-from body - (recover-from-encoding-error stream - "Unexpected value #x~X in UTF-8 sequence." octet)) - finally (return result)))))))) - -(define-char-reader (stream flexi-utf-16-le-input-stream) - (block body - (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t))) - (read-next-word () - (+ (the octet (read-next-byte)) - (ash (the octet (read-next-byte)) 8)))) - (declare (inline read-next-byte read-next-word) - (dynamic-extent (function read-next-byte) (function read-next-word))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from body - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word))))))) - -(define-char-reader (stream flexi-utf-16-be-input-stream) - (block body - (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t))) - (read-next-word () - (+ (ash (the octet (read-next-byte)) 8) - (the octet (read-next-byte))))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from body - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word))))))) - -(define-char-reader (stream flexi-utf-32-le-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 0 to 24 by 8 - for octet of-type octet = (read-next-byte) - sum (ash octet count)))))) - -(define-char-reader (stream flexi-utf-32-be-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 24 downto 0 by 8 - for octet of-type octet = (read-next-byte) - sum (ash octet count)))))) - -(defmethod stream-read-char ((stream flexi-cr-mixin)) - "The `base' method for all streams which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of -reading one or more characters from the stream." - (declare (optimize speed)) - (let ((char (call-next-method))) - (when (eq char :eof) - (return-from stream-read-char :eof)) - (with-accessors ((external-format flexi-stream-external-format) - (last-char-code flexi-stream-last-char-code)) - stream - (when (eql char #\Return) - (case (external-format-eol-style external-format) - (:cr (setq char #\Newline - last-char-code #.(char-code #\Newline))) - ;; in the case :CRLF we have to look ahead one character - (:crlf (let ((next-char (call-next-method))) - (case next-char - (#\Linefeed - (setq char #\Newline - last-char-code #.(char-code #\Newline))) - (:eof) - ;; if the character we peeked at wasn't a - ;; linefeed character we push its - ;; constituents back onto our internal - ;; octet stack - (otherwise (unread-char% (char-code next-char) stream))))))) - char))) + ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after + ;; this operation + (setq last-octet nil) + (let ((char-code (octets-to-char-code external-format + (lambda () + (read-byte* stream)) + (lambda (char) + (unread-char% char stream)) + stream))) + ;; remember this character and its char code for UNREAD-CHAR + (setq last-char-code char-code) + (or (code-char char-code) char-code)))) (defmethod stream-read-char-no-hang ((stream flexi-input-stream)) "Reads one character if the underlying stream has at least one @@ -540,7 +243,7 @@ (error 'flexi-stream-simple-error :format-control "Last character read (~S) was different from ~S." :format-arguments (list (code-char last-char-code) char))) - (unread-char% last-char-code stream) + (unread-char% char stream) (setq last-char-code nil) nil)) Modified: branches/edi/iso-8859.lisp ============================================================================== --- branches/edi/iso-8859.lisp (original) +++ branches/edi/iso-8859.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.6 2008/05/17 13:50:16 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions Modified: branches/edi/lw-binary-stream.lisp ============================================================================== --- branches/edi/lw-binary-stream.lisp (original) +++ branches/edi/lw-binary-stream.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.10 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.13 2008/05/17 14:21:20 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -286,131 +286,7 @@ (defclass flexi-binary-cr-utf-8-io-stream (flexi-cr-mixin flexi-binary-utf-8-io-stream) () (:documentation "Like FLEXI-CR-UTF-8-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defmethod set-class ((stream flexi-binary-input-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-input-stream - 'flexi-binary-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-input-stream - 'flexi-binary-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-input-stream - 'flexi-binary-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-input-stream - 'flexi-binary-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-input-stream - 'flexi-binary-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-input-stream - 'flexi-binary-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-input-stream - 'flexi-binary-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-input-stream - 'flexi-binary-utf-32-be-input-stream)))))))))) - -(defmethod set-class ((stream flexi-binary-output-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-output-stream - 'flexi-binary-ascii-output-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-output-stream - 'flexi-binary-latin-1-output-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-output-stream - 'flexi-binary-8-bit-output-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-output-stream - 'flexi-binary-utf-8-output-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-output-stream - 'flexi-binary-cr-utf-16-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-output-stream - 'flexi-binary-utf-16-be-output-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-output-stream - 'flexi-binary-cr-utf-32-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-output-stream - 'flexi-binary-utf-32-be-output-stream)))))))))) - -(defmethod set-class ((stream flexi-binary-io-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-io-stream - 'flexi-binary-ascii-io-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-io-stream - 'flexi-binary-latin-1-io-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-io-stream - 'flexi-binary-8-bit-io-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-io-stream - 'flexi-binary-utf-8-io-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-io-stream - 'flexi-binary-cr-utf-16-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-io-stream - 'flexi-binary-utf-16-be-io-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-io-stream - 'flexi-binary-cr-utf-32-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-io-stream - 'flexi-binary-utf-32-be-io-stream)))))))))) - +optimized for LispWorks binary streams.")) (defmethod initialize-instance :after ((flexi-stream flexi-output-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. @@ -423,8 +299,7 @@ (change-class flexi-stream (typecase flexi-stream (flexi-io-stream 'flexi-binary-io-stream) - (otherwise 'flexi-binary-output-stream))) - (set-class flexi-stream)))) + (otherwise 'flexi-binary-output-stream)))))) (defmethod initialize-instance :after ((flexi-stream flexi-input-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. @@ -437,5 +312,4 @@ (change-class flexi-stream (typecase flexi-stream (flexi-io-stream 'flexi-binary-io-stream) - (otherwise 'flexi-binary-input-stream))) - (set-class flexi-stream)))) + (otherwise 'flexi-binary-input-stream)))))) Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.44 2007/12/29 22:23:23 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.47 2008/05/17 16:40:33 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -62,137 +62,15 @@ sink (write-byte byte stream))) -(defmethod write-byte* (byte (sink array)) - (declare (optimize speed)) - (vector-push byte sink)) - -(defgeneric char-to-octets (stream char sink) - (:documentation "Converts the character CHAR to sequence of octets -and sends this sequence to SINK. STREAM will always be a flexi stream -which is used to determine how the character should be converted. -This function does all the work for STREAM-WRITE-CHAR in which case -SINK is the same as STREAM. It is also used in the implementation of -STREAM-WRITE-SEQUENCE below.")) - (defmethod stream-write-char ((stream flexi-output-stream) char) (declare (optimize speed)) - (char-to-octets stream char stream)) - -(defmethod char-to-octets ((stream flexi-latin-1-output-stream) char sink) - (declare (optimize speed)) - (let ((octet (char-code char))) - (when (> octet 255) - (signal-encoding-error stream "~S is not a LATIN-1 character." char)) - (write-byte* octet sink)) - char) - -(defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink) - (declare (optimize speed)) - (let ((octet (char-code char))) - (when (> octet 127) - (signal-encoding-error stream "~S is not an ASCII character." char)) - (write-byte* octet sink)) - char) - -(defmethod char-to-octets ((stream flexi-8-bit-output-stream) char sink) - (declare (optimize speed)) - (with-accessors ((encoding-hash flexi-stream-encoding-hash)) - stream - (let ((octet (gethash (char-code char) encoding-hash))) - (unless octet - (signal-encoding-error stream "~S is not in this encoding." char)) - (write-byte* octet sink)) - char)) - -(defmethod char-to-octets ((stream flexi-utf-8-output-stream) char sink) - (declare (optimize speed)) - (let ((char-code (char-code char))) - (tagbody - (cond ((< char-code #x80) - (write-byte* char-code sink) - (go zero)) - ((< char-code #x800) - (write-byte* (logior #b11000000 (ldb (byte 5 6) char-code)) sink) - (go one)) - ((< char-code #x10000) - (write-byte* (logior #b11100000 (ldb (byte 4 12) char-code)) sink) - (go two)) - ((< char-code #x200000) - (write-byte* (logior #b11110000 (ldb (byte 3 18) char-code)) sink) - (go three)) - ((< char-code #x4000000) - (write-byte* (logior #b11111000 (ldb (byte 2 24) char-code)) sink) - (go four)) - (t (write-byte* (logior #b11111100 (ldb (byte 1 30) char-code)) sink))) - (write-byte* (logior #b10000000 (ldb (byte 6 24) char-code)) sink) - four - (write-byte* (logior #b10000000 (ldb (byte 6 18) char-code)) sink) - three - (write-byte* (logior #b10000000 (ldb (byte 6 12) char-code)) sink) - two - (write-byte* (logior #b10000000 (ldb (byte 6 6) char-code)) sink) - one - (write-byte* (logior #b10000000 (ldb (byte 6 0) char-code)) sink) - zero)) - char) - -(defmethod char-to-octets ((stream flexi-utf-16-le-output-stream) char sink) - (declare (optimize speed)) - (flet ((write-word (word) - (write-byte* (ldb (byte 8 0) word) sink) - (write-byte* (ldb (byte 8 8) word) sink))) - (declare (inline write-word) (dynamic-extent (function write-word))) - (let ((char-code (char-code char))) - (cond ((< char-code #x10000) - (write-word char-code)) - (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) - -(defmethod char-to-octets ((stream flexi-utf-16-be-output-stream) char sink) - (declare (optimize speed)) - (flet ((write-word (word) - (write-byte* (ldb (byte 8 8) word) sink) - (write-byte* (ldb (byte 8 0) word) sink))) - (declare (inline write-word) (dynamic-extent (function write-word))) - (let ((char-code (char-code char))) - (cond ((< char-code #x10000) - (write-word char-code)) - (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) - -(defmethod char-to-octets ((stream flexi-utf-32-le-output-stream) char sink) - (declare (optimize speed)) - (loop with char-code = (char-code char) - for position in '(0 8 16 24) do - (write-byte* (ldb (byte 8 position) char-code) sink)) - char) - -(defmethod char-to-octets ((stream flexi-utf-32-be-output-stream) char sink) - (declare (optimize speed)) - (loop with char-code = (char-code char) - for position in '(24 16 8 0) do - (write-byte* (ldb (byte 8 position) char-code) sink)) - char) - -(defmethod char-to-octets ((stream flexi-cr-mixin) char sink) - "The `base' method for all streams which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of sending -one or more characters to SINK." - (declare (optimize speed)) (with-accessors ((external-format flexi-stream-external-format)) stream - (case char - (#\Newline - (case (external-format-eol-style external-format) - (:cr (call-next-method stream #\Return sink)) - (:crlf (call-next-method stream #\Return sink) - (call-next-method stream #\Linefeed sink)))) - (otherwise (call-next-method))) - char)) + (char-to-octets external-format + char + (lambda (octet) + (write-byte* octet stream)) + stream))) (defmethod stream-write-char :after ((stream flexi-output-stream) char) (declare (optimize speed)) @@ -297,8 +175,13 @@ :start start :end end :from-end t))) - (loop for index from start below end - do (char-to-octets stream (aref sequence index) buffer) + (loop with format = (flexi-stream-external-format stream) + for index from start below end + do (char-to-octets format + (aref sequence index) + (lambda (octet) + (vector-push octet buffer)) + stream) when (>= (fill-pointer buffer) +buffer-size+) do (write-sequence buffer (flexi-stream-stream stream)) (setf (fill-pointer buffer) 0) Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.30 2007/10/11 20:23:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.31 2008/05/17 13:50:16 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.25 2007/12/29 21:17:06 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.26 2008/05/17 13:50:16 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions Modified: branches/edi/stream.lisp ============================================================================== --- branches/edi/stream.lisp (original) +++ branches/edi/stream.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.53 2007/12/29 22:26:04 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.57 2008/05/17 14:21:20 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -49,45 +49,6 @@ allow for multi-octet external formats. FLEXI-STREAM itself is a mixin and should not be instantiated.")) -(define-condition flexi-stream-error (stream-error) - () - (:documentation "Superclass for all errors related to -flexi streams.")) - -(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) - () - (:documentation "Like FLEXI-STREAM-ERROR but with formatting -capabilities.")) - -(define-condition flexi-stream-element-type-error (flexi-stream-error) - ((element-type :initarg :element-type - :reader flexi-stream-element-type-error-element-type)) - (:report (lambda (condition stream) - (format stream "Element type ~S not allowed." - (flexi-stream-element-type-error-element-type condition)))) - (:documentation "Errors of this type are signalled if the flexi -stream has a wrong element type.")) - -(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) - () - (:documentation "Errors of this type are signalled if there is an -encoding problem.")) - -(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) - ((position-spec :initarg :position-spec - :reader flexi-stream-position-spec-error-position-spec)) - (:documentation "Errors of this type are signalled if an -erroneous position spec is used in conjunction with -FILE-POSITION.")) - -(defun signal-encoding-error (flexi-stream format-control &rest format-args) - "Convenience function similar to ERROR to signal conditions of type -FLEXI-STREAM-ENCODING-ERROR." - (error 'flexi-stream-encoding-error - :format-control format-control - :format-arguments format-args - :stream flexi-stream)) - (defun maybe-convert-external-format (external-format) "Given an external format designator \(a keyword, a list, or an EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT @@ -110,9 +71,7 @@ (error 'flexi-stream-element-type-error :element-type element-type :stream flexi-stream)) - (setq external-format (maybe-convert-external-format external-format))) - ;; set actual class and maybe contents of 8-bit encoding slots - (set-class flexi-stream)) + (setq external-format (maybe-convert-external-format external-format)))) (defmethod (setf flexi-stream-external-format) :around (new-value (flexi-stream flexi-stream)) "Converts the new value to an EXTERNAL-FORMAT object if @@ -226,461 +185,6 @@ MAKE-INSTANCE to create a new FLEXI-IO-STREAM but use MAKE-FLEXI-STREAM instead.")) -(defclass flexi-cr-mixin () - () - (:documentation "A mixin for flexi streams which need -end-of-line conversion, i.e. for those where the end-of-line -designator is /not/ the single character #\Linefeed.")) - -(defclass flexi-8-bit-stream (flexi-stream) - ((encoding-hash :accessor flexi-stream-encoding-hash)) - (:documentation "The class for all flexi streams which use an 8-bit -encoding and thus need an additional slot for the encoding hash.")) - -(defclass flexi-8-bit-input-stream (flexi-input-stream flexi-8-bit-stream) - ((encoding-table :accessor flexi-stream-encoding-table)) - (:documentation "The class for all flexi input streams which use an -8-bit encoding and thus need an additional slot for the encoding -table.")) - -(defclass flexi-cr-8-bit-input-stream (flexi-cr-mixin flexi-8-bit-input-stream) - () - (:documentation "The class for all flexi input streams which -use an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-input-stream (flexi-8-bit-input-stream) - () - (:documentation "Special class for flexi input streams which -use the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-input-stream (flexi-cr-mixin flexi-ascii-input-stream) - () - (:documentation "Special class for flexi input streams which -use the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-input-stream (flexi-8-bit-input-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-input-stream (flexi-cr-mixin flexi-latin-1-input-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-utf-32-le-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-32-be-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-utf-32-be-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-utf-16-le-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-16-be-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-utf-16-be-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-input-stream (flexi-cr-mixin flexi-utf-8-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-8-bit-output-stream (flexi-output-stream flexi-8-bit-stream) - () - (:documentation "The class for all flexi output streams which use an -8-bit encoding.")) - -(defclass flexi-cr-8-bit-output-stream (flexi-cr-mixin flexi-8-bit-output-stream) - () - (:documentation "The class for all flexi output streams which -use an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-output-stream (flexi-8-bit-output-stream) - () - (:documentation "Special class for flexi output streams which -use the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-output-stream (flexi-cr-mixin flexi-ascii-output-stream) - () - (:documentation "Special class for flexi output streams which -use the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-output-stream (flexi-8-bit-output-stream) - () - (:documentation "Special class for flexi output streams which -use the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-output-stream (flexi-cr-mixin flexi-latin-1-output-stream) - () - (:documentation "Special class for flexi output streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-utf-32-le-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-32-be-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-utf-32-be-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-utf-16-le-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-16-be-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-utf-16-be-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-output-stream (flexi-cr-mixin flexi-utf-8-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-8-bit-io-stream (flexi-8-bit-input-stream flexi-8-bit-output-stream flexi-io-stream) - () - (:documentation "The class for all flexi I/O streams which use an -8-bit encoding.")) - -(defclass flexi-cr-8-bit-io-stream (flexi-cr-mixin flexi-8-bit-io-stream) - () - (:documentation "The class for all flexi I/O streams which use -an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-io-stream (flexi-ascii-input-stream flexi-ascii-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-io-stream (flexi-cr-mixin flexi-ascii-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-io-stream (flexi-latin-1-input-stream flexi-latin-1-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-io-stream (flexi-cr-mixin flexi-latin-1-io-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-io-stream (flexi-utf-32-le-input-stream - flexi-utf-32-le-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-utf-32-le-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-32-be-io-stream (flexi-utf-32-be-input-stream - flexi-utf-32-be-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-utf-32-be-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-io-stream (flexi-utf-16-le-input-stream - flexi-utf-16-le-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-utf-16-le-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-be-io-stream (flexi-utf-16-be-input-stream - flexi-utf-16-be-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-utf-16-be-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-io-stream (flexi-utf-8-input-stream flexi-utf-8-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-io-stream (flexi-cr-mixin flexi-utf-8-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defmethod (setf flexi-stream-external-format) :after (new-value (stream flexi-stream)) - "After we've changed the external format of a flexi stream, we -might have to change its actual class and maybe also the contents -of its 8-bit encoding slots." - (declare (ignore new-value) - (optimize speed)) - ;; note that it's potentially dangerous to call SET-CLASS from - ;; within a method, see for example this thread: - ;; - (set-class stream)) - -(defmethod set-class ((stream flexi-input-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-input-stream - 'flexi-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-input-stream - 'flexi-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-input-stream - 'flexi-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-input-stream - 'flexi-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-input-stream - 'flexi-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-input-stream - 'flexi-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-input-stream - 'flexi-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-input-stream - 'flexi-utf-32-be-input-stream)))))))))) - -(defmethod set-class ((stream flexi-output-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-output-stream - 'flexi-ascii-output-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-output-stream - 'flexi-latin-1-output-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-output-stream - 'flexi-8-bit-output-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-output-stream - 'flexi-utf-8-output-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-output-stream - 'flexi-cr-utf-16-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-output-stream - 'flexi-utf-16-be-output-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-output-stream - 'flexi-cr-utf-32-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-output-stream - 'flexi-utf-32-be-output-stream)))))))))) - -(defmethod set-class ((stream flexi-io-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-io-stream - 'flexi-ascii-io-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-io-stream - 'flexi-latin-1-io-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-io-stream - 'flexi-8-bit-io-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-io-stream - 'flexi-utf-8-io-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-io-stream - 'flexi-cr-utf-16-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-io-stream - 'flexi-utf-16-be-io-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-io-stream - 'flexi-cr-utf-32-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-io-stream - 'flexi-utf-32-be-io-stream)))))))))) - -(defmethod set-class :after ((stream flexi-stream)) - "After we've changed the actual class of a flexi stream we may -have to set the contents of the 8-bit enconding slots as well." - (declare (optimize speed)) - (set-encoding-hash stream) - (set-encoding-table stream)) - -(defgeneric set-encoding-hash (stream) - (:method (stream)) - (:documentation "Sets the value of the ENCODING-HASH slot of -STREAM if necessary. The default method does nothing.")) - -(defgeneric set-encoding-table (stream) - (:method (stream)) - (:documentation "Sets the value of the ENCODING-TABLE slot of -STREAM if necessary. The default method does nothing.")) - -(defmethod set-encoding-hash ((stream flexi-8-bit-stream)) - "Sets the value of the ENCODING-HASH slot of STREAM depending -on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format) - (encoding-hash flexi-stream-encoding-hash)) - stream - (let ((external-format-name (external-format-name external-format))) - (setq encoding-hash - (cond ((ascii-name-p external-format-name) +ascii-hash+) - ((koi8-r-name-p external-format-name) +koi8-r-hash+) - ((iso-8859-name-p external-format-name) - (cdr (assoc external-format-name +iso-8859-hashes+ :test #'eq))) - ((code-page-name-p external-format-name) - (cdr (assoc (external-format-id external-format) +code-page-hashes+)))))))) - -(defmethod set-encoding-table ((stream flexi-8-bit-input-stream)) - "Sets the value of the ENCODING-TABLE slot of STREAM depending -on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format) - (encoding-table flexi-stream-encoding-table)) - stream - (let ((external-format-name (external-format-name external-format))) - (setq encoding-table - (cond ((ascii-name-p external-format-name) +ascii-table+) - ((koi8-r-name-p external-format-name) +koi8-r-table+) - ((iso-8859-name-p external-format-name) - (cdr (assoc external-format-name +iso-8859-tables+ :test #'eq))) - ((code-page-name-p external-format-name) - (cdr (assoc (external-format-id external-format) +code-page-tables+)))))))) - #+:cmu (defmethod input-stream-p ((stream flexi-io-stream)) "Explicitly states whether this is an input stream." Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.4 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.5 2008/05/17 13:50:16 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions Modified: branches/edi/test/packages.lisp ============================================================================== --- branches/edi/test/packages.lisp (original) +++ branches/edi/test/packages.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.4 2007/01/01 23:47:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.6 2008/05/17 16:38:26 edi Exp $ -;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -30,4 +30,5 @@ (in-package :cl-user) (defpackage :flexi-streams-test - (:use :cl :flexi-streams)) + (:use :cl :flexi-streams) + (:export :run-tests)) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.17 2007/12/29 22:58:44 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.20 2008/05/17 13:50:18 edi Exp $ -;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -89,13 +89,17 @@ (append args `(:eol-style ,eol-style :little-endian ,little-endian)))))))) -(defun create-test-combinations (file-name symbols) - "For a name suffix FILE-NAME and a list of symbols SYMBOLS -denoting different encodings of the corresponding file returns a -list of lists which can be used as arglists for COMPARE-FILES." +(defun create-test-combinations (file-name symbols &optional simplep) + "For a name suffix FILE-NAME and a list of symbols SYMBOLS denoting +different encodings of the corresponding file returns a list of lists +which can be used as arglists for COMPARE-FILES. If SIMPLEP is true, +a list which can be used for the string tests below is returned." (let ((file-variants (loop for symbol in symbols nconc (create-file-variants file-name symbol)))) (loop for (name-in . external-format-in) in file-variants + when simplep + collect (list name-in external-format-in) + else nconc (loop for (name-out . external-format-out) in file-variants collect (list name-in external-format-in name-out external-format-out))))) @@ -200,6 +204,27 @@ #+:lispworks (terpri *error-output*))))) +(defun file-as-octet-vector (pathspec) + "Returns the contents of the file denoted by PATHSPEC as a vector of +octets." + (with-open-file (in pathspec :element-type 'octet) + (let ((vector (make-array (file-length in) :element-type 'octet))) + (read-sequence vector in) + vector))) + +(defun file-as-string (pathspec external-format) + "Reads the contents of the file denoted by PATHSPEC using the +external format EXTERNAL-FORMAT and returns the result as a string." + (with-open-file (in pathspec :element-type 'octet) + (let* ((number-of-octets (file-length in)) + (in (make-flexi-stream in :external-format external-format)) + (string (make-array number-of-octets + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer string) (read-sequence string in)) + string))) + (defmacro with-test ((test-description) &body body) "Defines a test. Two utilities are available inside of the body of the maco: The function FAIL, and the macro CHECK. FAIL, the lowest @@ -231,6 +256,21 @@ (terpri *error-output*)) ,successp)))) +(defun string-test (pathspec external-format) + "Tests whether conversion from strings to octets and vice versa +using the external format EXTERNAL-FORMAT works as expected, using the +contents of the file denoted by PATHSPEC as test data and assuming +that the stream conversion functions work." + (let* ((full-path (merge-pathnames pathspec *this-file*)) + (octets-vector (file-as-octet-vector full-path)) + (octets-list (coerce octets-vector 'list)) + (string (file-as-string full-path external-format))) + (with-test ((format nil "String tests with format ~S." + (flex::normalize-external-format external-format))) + (check (string= (octets-to-string octets-vector :external-format external-format) string)) + (check (string= (octets-to-string octets-list :external-format external-format) string)) + (check (equalp (string-to-octets string :external-format external-format) octets-vector))))) + (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE restart each time a FLEXI-STREAM-ENCODING-ERROR is signalled. Signals @@ -262,6 +302,9 @@ (defun encoding-error-handling-test () "Tests several possible encoding errors and how they are handled." (with-test ("Handling of encoding errors.") + ;; handling of EOF in the middle of CRLF + (check (string= #.(string #\Return) + (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) (let ((*substitution-char* #\?)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "a??" (read-flexi-line `(,(char-code #\a) 128 200) :ascii))) @@ -326,13 +369,18 @@ CREATE-TEST-COMBINATIONS, runs test for handling of encoding errors, and shows simple statistics at the end." (let* ((*test-success-counter* 0) - (args-list (loop for (file-name symbols) in *test-files* - nconc (create-test-combinations file-name symbols))) - (no-tests (* 4 (length args-list)))) + (compare-files-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols))) + (no-tests (* 4 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - (dolist (args args-list) - (apply #'compare-files args)) + (dolist (args compare-files-args-list) + (apply 'compare-files args)) + (let ((string-test-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols t)))) + (incf no-tests (length string-test-args-list)) + (dolist (args string-test-args-list) + (apply 'string-test args))) (incf no-tests) (encoding-error-handling-test) (incf no-tests) Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Sat May 17 18:31:08 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.13 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.14 2008/05/17 13:50:16 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions From eweitz at common-lisp.net Sun May 18 00:32:41 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 20:32:41 -0400 (EDT) Subject: [flexi-streams-cvs] r24 - branches/edi Message-ID: <20080518003241.DC7BC18@common-lisp.net> Author: eweitz Date: Sat May 17 20:32:41 2008 New Revision: 24 Added: branches/edi/decode.lisp.temp - copied unchanged from r23, branches/edi/encode.lisp Removed: branches/edi/encode.lisp Log: Cleaning up wrong names From eweitz at common-lisp.net Sun May 18 00:33:05 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 20:33:05 -0400 (EDT) Subject: [flexi-streams-cvs] r25 - branches/edi Message-ID: <20080518003305.8CEF218@common-lisp.net> Author: eweitz Date: Sat May 17 20:33:05 2008 New Revision: 25 Added: branches/edi/encode.lisp - copied unchanged from r24, branches/edi/decode.lisp Removed: branches/edi/decode.lisp Log: Cleaning up wrong names, part 2 From eweitz at common-lisp.net Sun May 18 00:33:31 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 20:33:31 -0400 (EDT) Subject: [flexi-streams-cvs] r26 - branches/edi Message-ID: <20080518003331.3219018@common-lisp.net> Author: eweitz Date: Sat May 17 20:33:30 2008 New Revision: 26 Added: branches/edi/decode.lisp - copied unchanged from r25, branches/edi/decode.lisp.temp Removed: branches/edi/decode.lisp.temp Log: Cleaning up wrong names, part 3 From eweitz at common-lisp.net Sun May 18 01:23:54 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 17 May 2008 21:23:54 -0400 (EDT) Subject: [flexi-streams-cvs] r27 - in branches/edi: . test Message-ID: <20080518012354.7A8CC7A01B@common-lisp.net> Author: eweitz Date: Sat May 17 21:23:53 2008 New Revision: 27 Modified: branches/edi/decode.lisp branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/specials.lisp branches/edi/stream.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: New implementation for string functions Passes all tests Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sat May 17 21:23:53 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.4 2008/05/18 00:35:33 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -210,28 +210,26 @@ sum (ash octet count))))) (defmethod octets-to-char-code ((format flexi-cr-mixin) reader unreader stream) - "The `base' method for all streams which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of reading -one or more encoded characters." (declare (optimize speed)) (let ((char-code (call-next-method))) - (when (eq char-code :eof) - (return-from octets-to-char-code :eof)) - (with-accessors ((eol-style external-format-eol-style)) - format - (cond ((= char-code #.(char-code #\Return)) - (case eol-style - (:cr #.(char-code #\Newline)) - ;; in the case :CRLF we have to look ahead one character - (:crlf (let ((next-char-code (call-next-method))) - (case next-char-code - (#.(char-code #\Linefeed) - #.(char-code #\Newline)) - (:eof char-code) - ;; if the character we peeked at wasn't a - ;; linefeed character we unread its constituents - (otherwise - (funcall unreader (code-char next-char-code)) - char-code)))))) - (t char-code))))) + (case char-code + (#.(char-code #\Return) #.(char-code #\Newline)) + (:eof :eof) + (otherwise char-code)))) + +(defmethod octets-to-char-code ((format flexi-crlf-mixin) reader unreader stream) + (declare (optimize speed)) + (let ((char-code (call-next-method))) + (case char-code + (#.(char-code #\Return) + (let ((next-char-code (call-next-method))) + (case next-char-code + (#.(char-code #\Linefeed) #.(char-code #\Newline)) + (:eof char-code) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise (funcall unreader (code-char next-char-code)) + char-code)))) + (:eof :eof) + (t char-code)))) Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sat May 17 21:23:53 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.2 2008/05/17 16:35:58 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.4 2008/05/18 00:35:33 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -42,16 +42,14 @@ (let ((octet (char-code char))) (when (> octet 255) (signal-encoding-error stream "~S is not a LATIN-1 character." char)) - (funcall writer octet)) - char) + (funcall writer octet))) (defmethod char-to-octets ((format flexi-ascii-format) char writer stream) (declare (optimize speed)) (let ((octet (char-code char))) (when (> octet 127) (signal-encoding-error stream "~S is not an ASCII character." char)) - (funcall writer octet)) - char) + (funcall writer octet))) (defmethod char-to-octets ((format flexi-8-bit-format) char writer stream) (declare (optimize speed)) @@ -60,8 +58,7 @@ (let ((octet (gethash (char-code char) encoding-hash))) (unless octet (signal-encoding-error stream "~S is not in this encoding." char)) - (funcall writer octet)) - char)) + (funcall writer octet)))) (defmethod char-to-octets ((format flexi-utf-8-format) char writer stream) (declare (ignore stream) (optimize speed)) @@ -92,8 +89,7 @@ (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code))) one (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) - zero)) - char) + zero))) (defmethod char-to-octets ((format flexi-utf-16-le-format) char writer stream) (declare (ignore stream) (optimize speed)) @@ -105,8 +101,7 @@ (write-word char-code)) (t (decf char-code #x10000) (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) (defmethod char-to-octets ((format flexi-utf-16-be-format) char writer stream) (declare (ignore stream) (optimize speed)) @@ -119,33 +114,29 @@ (write-word char-code)) (t (decf char-code #x10000) (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) (defmethod char-to-octets ((format flexi-utf-32-le-format) char writer stream) (declare (ignore stream) (optimize speed)) (loop with char-code = (char-code char) for position in '(0 8 16 24) do - (funcall writer (ldb (byte 8 position) char-code))) - char) + (funcall writer (ldb (byte 8 position) char-code)))) (defmethod char-to-octets ((format flexi-utf-32-be-format) char writer stream) (declare (ignore stream) (optimize speed)) (loop with char-code = (char-code char) for position in '(24 16 8 0) do - (funcall writer (ldb (byte 8 position) char-code))) - char) + (funcall writer (ldb (byte 8 position) char-code)))) (defmethod char-to-octets ((format flexi-cr-mixin) char writer stream) - "The `base' method for all formats which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of sending -one or more characters to SINK." (declare (optimize speed)) - (case char - (#\Newline - (case (external-format-eol-style format) - (:cr (call-next-method format #\Return writer stream)) - (:crlf (call-next-method format #\Return writer stream) - (call-next-method format #\Linefeed writer stream)))) - (otherwise (call-next-method))) - char) + (if (char= char #\Newline) + (call-next-method format #\Return writer stream) + (call-next-method))) + +(defmethod char-to-octets ((format flexi-crlf-mixin) char writer stream) + (declare (optimize speed)) + (cond ((char= char #\Newline) + (call-next-method format #\Return writer stream) + (call-next-method format #\Linefeed writer stream)) + (t (call-next-method)))) Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Sat May 17 21:23:53 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.15 2008/05/17 16:38:24 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.17 2008/05/18 00:34:19 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -60,9 +60,13 @@ (defclass flexi-cr-mixin () () - (:documentation "A mixin for external-formats which need -end-of-line conversion, i.e. for those where the end-of-line -designator is /not/ the single character #\Linefeed.")) + (:documentation "A mixin for external-formats where the end-of-line +designator is #\Return.")) + +(defclass flexi-crlf-mixin () + () + (:documentation "A mixin for external-formats where the end-of-line +designator is the sequence #\Return #\Linefeed.")) (defclass flexi-8-bit-format (external-format) ((encoding-hash :accessor external-format-encoding-hash) @@ -72,9 +76,10 @@ tables.")) (defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) - () - (:documentation "The class for all external formats which use an -8-bit encoding /and/ need end-of-line conversion.")) + ()) + +(defclass flexi-crlf-8-bit-format (flexi-crlf-mixin flexi-8-bit-format) + ()) (defclass flexi-ascii-format (flexi-8-bit-format) () @@ -82,9 +87,10 @@ US-ASCCI encoding.")) (defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) - () - (:documentation "Special class for external formats which use the -US-ASCCI encoding /and/ need end-of-line conversion.")) + ()) + +(defclass flexi-crlf-ascii-format (flexi-crlf-mixin flexi-ascii-format) + ()) (defclass flexi-latin-1-format (flexi-8-bit-format) () @@ -92,53 +98,64 @@ ISO-8859-1 encoding.")) (defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) + ()) + +(defclass flexi-crlf-latin-1-format (flexi-crlf-mixin flexi-latin-1-format) + ()) + +(defclass flexi-utf-32-format (external-format) () - (:documentation "Special class for external formats which use the -ISO-8859-1 encoding /and/ need end-of-line conversion.")) + (:documentation "Abstract class for external formats which use the +UTF-32 encoding.")) -(defclass flexi-utf-32-le-format (external-format) +(defclass flexi-utf-32-le-format (flexi-utf-32-format) () (:documentation "Special class for external formats which use the UTF-32 encoding with little-endian byte ordering.")) (defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format) - () - (:documentation "Special class for external formats which use the -UTF-32 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) + ()) -(defclass flexi-utf-32-be-format (external-format) +(defclass flexi-crlf-utf-32-le-format (flexi-crlf-mixin flexi-utf-32-le-format) + ()) + +(defclass flexi-utf-32-be-format (flexi-utf-32-format) () (:documentation "Special class for external formats which use the UTF-32 encoding with big-endian byte ordering.")) (defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format) + ()) + +(defclass flexi-crlf-utf-32-be-format (flexi-crlf-mixin flexi-utf-32-be-format) + ()) + +(defclass flexi-utf-16-format (external-format) () - (:documentation "Special class for external formats which use the -UTF-32 encoding with big-endian byte ordering /and/ need end-of-line -conversion.")) + (:documentation "Abstract class for external formats which use the +UTF-16 encoding.")) -(defclass flexi-utf-16-le-format (external-format) +(defclass flexi-utf-16-le-format (flexi-utf-16-format) () (:documentation "Special class for external formats which use the UTF-16 encoding with little-endian byte ordering.")) (defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format) - () - (:documentation "Special class for external formats which use the -UTF-16 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) + ()) -(defclass flexi-utf-16-be-format (external-format) +(defclass flexi-crlf-utf-16-le-format (flexi-crlf-mixin flexi-utf-16-le-format) + ()) + +(defclass flexi-utf-16-be-format (flexi-utf-16-format) () (:documentation "Special class for external formats which use the UTF-16 encoding with big-endian byte ordering.")) (defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format) - () - (:documentation "Special class for external formats which use the -UTF-16 encoding with big-endian byte ordering /and/ need end-of-line -conversion.")) + ()) + +(defclass flexi-crlf-utf-16-be-format (flexi-crlf-mixin flexi-utf-16-be-format) + ()) (defclass flexi-utf-8-format (external-format) () @@ -146,9 +163,10 @@ UTF-8 encoding.")) (defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) - () - (:documentation "Special class for external formats which use the -UTF-8 encoding /and/ need end-of-line conversion.")) + ()) + +(defclass flexi-crlf-utf-8-format (flexi-crlf-mixin flexi-utf-8-format) + ()) (defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs) "Sets the fixed encoding/decoding tables for this particular @@ -171,40 +189,50 @@ (values (cdr (assoc id +code-page-hashes+)) (cdr (assoc id +code-page-tables+)))))))) -(defun external-format-class-name (real-name eol-style little-endian) - (let ((crp (not (eq eol-style :lf)))) - (cond ((ascii-name-p real-name) - (if crp - 'flexi-cr-ascii-format - 'flexi-ascii-format)) - ((eq real-name :iso-8859-1) - (if crp - 'flexi-cr-latin-1-format - 'flexi-latin-1-format)) - ((or (koi8-r-name-p real-name) - (iso-8859-name-p real-name) - (code-page-name-p real-name)) - (if crp - 'flexi-cr-8-bit-format - 'flexi-8-bit-format)) - (t (case real-name - (:utf-8 (if crp - 'flexi-cr-utf-8-format - 'flexi-utf-8-format)) - (:utf-16 (if crp - (if little-endian - 'flexi-cr-utf-16-le-format - 'flexi-cr-utf-16-be-format) - (if little-endian - 'flexi-utf-16-le-format - 'flexi-utf-16-be-format))) - (:utf-32 (if crp - (if little-endian - 'flexi-cr-utf-32-le-format - 'flexi-cr-utf-32-be-format) - (if little-endian - 'flexi-utf-32-le-format - 'flexi-utf-32-be-format)))))))) +(defun external-format-class-name (real-name &key eol-style little-endian id) + (declare (ignore id)) + (cond ((ascii-name-p real-name) + (ecase eol-style + (:lf 'flexi-ascii-format) + (:cr 'flexi-cr-ascii-format) + (:crlf 'flexi-crlf-ascii-format))) + ((eq real-name :iso-8859-1) + (ecase eol-style + (:lf 'flexi-latin-1-format) + (:cr 'flexi-cr-latin-1-format) + (:crlf 'flexi-crlf-latin-1-format))) + ((or (koi8-r-name-p real-name) + (iso-8859-name-p real-name) + (code-page-name-p real-name)) + (ecase eol-style + (:lf 'flexi-8-bit-format) + (:cr 'flexi-cr-8-bit-format) + (:crlf 'flexi-crlf-8-bit-format))) + (t (ecase real-name + (:utf-8 (ecase eol-style + (:lf 'flexi-utf-8-format) + (:cr 'flexi-cr-utf-8-format) + (:crlf 'flexi-crlf-utf-8-format))) + (:utf-16 (ecase eol-style + (:lf (if little-endian + 'flexi-utf-16-le-format + 'flexi-utf-16-be-format)) + (:cr (if little-endian + 'flexi-cr-utf-16-le-format + 'flexi-cr-utf-16-be-format)) + (:crlf (if little-endian + 'flexi-crlf-utf-16-le-format + 'flexi-crlf-utf-16-be-format)))) + (:utf-32 (ecase eol-style + (:lf (if little-endian + 'flexi-utf-32-le-format + 'flexi-utf-32-be-format)) + (:cr (if little-endian + 'flexi-cr-utf-32-le-format + 'flexi-cr-utf-32-be-format)) + (:crlf (if little-endian + 'flexi-crlf-utf-32-le-format + 'flexi-crlf-utf-32-be-format)))))))) (defun make-external-format% (name &key (little-endian *default-little-endian*) id eol-style) @@ -222,7 +250,7 @@ :eol-style (or eol-style :crlf))) (t (list :eol-style (or eol-style *default-eol-style*) :little-endian little-endian))))) - (apply #'make-instance (external-format-class-name real-name eol-style little-endian) + (apply #'make-instance (apply #'external-format-class-name real-name initargs) :name real-name initargs))) @@ -242,6 +270,15 @@ (append shortcut-args `(:eol-style ,eol-style)))) (t (apply #'make-external-format% name args))))) + +(defun maybe-convert-external-format (external-format) + "Given an external format designator \(a keyword, a list, or an +EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT +object." + (typecase external-format + (symbol (make-external-format external-format)) + (list (apply #'make-external-format external-format)) + (otherwise external-format))) (defun external-format-equal (ef1 ef2) "Checks whether two EXTERNAL-FORMAT objects denote the same @@ -292,4 +329,21 @@ "How an EXTERNAL-FORMAT object is rendered. Uses NORMALIZE-EXTERNAL-FORMAT." (print-unreadable-object (object stream :type t :identity t) - (prin1 (normalize-external-format object) stream))) \ No newline at end of file + (prin1 (normalize-external-format object) stream))) + +(defgeneric encoding-factor (format)) + +(defmethod encoding-factor ((format flexi-8-bit-format)) + 1) + +(defmethod encoding-factor ((format flexi-utf-8-format)) + 1.05) + +(defmethod encoding-factor ((format flexi-utf-16-format)) + 2.0) + +(defmethod encoding-factor ((format flexi-utf-32-format)) + 4) + +(defmethod encoding-factor ((format flexi-crlf-mixin)) + (* 1.02 (call-next-method))) \ No newline at end of file Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Sat May 17 21:23:53 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.26 2008/05/17 13:50:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.27 2008/05/18 01:21:34 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -33,6 +33,12 @@ "A shortcut for \(UNSIGNED-BYTE 8)." '(unsigned-byte 8)) +(deftype char* () + "Convenience shortcut to paper over the difference between LispWorks +and the other Lisps." + #+:lispworks 'lw:simple-char + #-:lispworks 'character) + (defvar +name-map+ '((:utf8 . :utf-8) (:utf16 . :utf-16) Modified: branches/edi/stream.lisp ============================================================================== --- branches/edi/stream.lisp (original) +++ branches/edi/stream.lisp Sat May 17 21:23:53 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.57 2008/05/17 14:21:20 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.59 2008/05/18 01:21:34 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -40,7 +40,7 @@ :accessor flexi-stream-external-format :documentation "The encoding currently used by this stream. Can be changed on the fly.") - (element-type :initform #+:lispworks 'lw:simple-char #-:lispworks 'character + (element-type :initform 'char* :initarg :element-type :accessor flexi-stream-element-type :documentation "The element type of this stream.")) @@ -49,15 +49,6 @@ allow for multi-octet external formats. FLEXI-STREAM itself is a mixin and should not be instantiated.")) -(defun maybe-convert-external-format (external-format) - "Given an external format designator \(a keyword, a list, or an -EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT -object." - (typecase external-format - (symbol (make-external-format external-format)) - (list (apply #'make-external-format external-format)) - (otherwise external-format))) - (defmethod initialize-instance :after ((flexi-stream flexi-stream) &rest initargs) "Makes sure the EXTERNAL-FORMAT and ELEMENT-TYPE slots contain reasonable values." Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 17 21:23:53 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.5 2008/05/17 13:50:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.8 2008/05/18 01:21:34 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,28 +29,80 @@ (in-package :flexi-streams) -(defun string-to-octets (string &key (external-format (make-external-format :latin1)) - (start 0) end) +(defun string-to-octets (string &key + (external-format (make-external-format :latin1)) + (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of octets corresponding to the external format EXTERNAL-FORMAT." - (declare (optimize speed)) - (with-output-to-sequence (out) - (let ((flexi (make-flexi-stream out :external-format external-format))) - (write-string string flexi :start start :end end)))) - -(defun octets-to-string (vector &key (external-format (make-external-format :latin1)) - (start 0) (end (length vector))) + (setq external-format (maybe-convert-external-format external-format)) + (let ((factor (encoding-factor external-format)) + (length (- end start))) + (etypecase factor + (float + (let ((octets (make-array (round (* factor length)) + :element-type 'octet + :fill-pointer 0 + :adjustable t))) + (loop for i from start below end + do (char-to-octets external-format + (char string i) + (lambda (octet) + (vector-push-extend octet octets)) + nil)) + octets)) + (integer + (let ((octets (make-array (* factor length) + :element-type 'octet))) + (loop with j = 0 + for i from start below end + do (char-to-octets external-format + (char string i) + (lambda (octet) + (setf (aref octets j) octet) + (incf j)) + nil)) + octets))))) + +(defun octets-to-string (vector &key + (external-format (make-external-format :latin1)) + (start 0) (end (length vector))) "Converts the Lisp vector VECTOR of octets from START to END to string using the external format EXTERNAL-FORMAT." - (declare (optimize speed)) - (with-input-from-sequence (in vector :start start :end end) - (let ((flexi (make-flexi-stream in :external-format external-format)) - (result (make-array (- end start) - :element-type #+:lispworks 'lw:simple-char - #-:lispworks 'character - :fill-pointer t))) - (setf (fill-pointer result) - (read-sequence result flexi)) - result))) - - + (setq external-format (maybe-convert-external-format external-format)) + (let ((factor (encoding-factor external-format)) + (length (- end start)) + (i start)) + (flet ((next-char () + (code-char + (octets-to-char-code external-format + (lambda () + (when (>= i end) + ;; TODO... + (error "End of data.")) + (prog1 + (aref vector i) + (incf i))) + (lambda (char) + (char-to-octets external-format + char + (lambda (octet) + (declare (ignore octet)) + (decf i)) + nil)) + nil)))) + (etypecase factor + (float + (let ((string (make-array (round (/ length factor)) + :element-type 'char* + :fill-pointer 0 + :adjustable t))) + (loop while (< i end) + do (vector-push-extend (next-char) string) + finally (return string)))) + (integer + (let* ((string-length (/ length factor)) + (string (make-array string-length + :element-type 'char*))) + (loop for j from 0 below string-length + do (setf (char string j) (next-char)) + finally (return string)))))))) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sat May 17 21:23:53 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.20 2008/05/17 13:50:18 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.21 2008/05/18 01:21:36 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -263,12 +263,10 @@ that the stream conversion functions work." (let* ((full-path (merge-pathnames pathspec *this-file*)) (octets-vector (file-as-octet-vector full-path)) - (octets-list (coerce octets-vector 'list)) (string (file-as-string full-path external-format))) (with-test ((format nil "String tests with format ~S." (flex::normalize-external-format external-format))) (check (string= (octets-to-string octets-vector :external-format external-format) string)) - (check (string= (octets-to-string octets-list :external-format external-format) string)) (check (equalp (string-to-octets string :external-format external-format) octets-vector))))) (defmacro using-values ((&rest values) &body body) From eweitz at common-lisp.net Sun May 18 14:01:13 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 18 May 2008 10:01:13 -0400 (EDT) Subject: [flexi-streams-cvs] r28 - branches/edi Message-ID: <20080518140113.AFCC41C003@common-lisp.net> Author: eweitz Date: Sun May 18 10:01:12 2008 New Revision: 28 Modified: branches/edi/strings.lisp Log: Reduce consing Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sun May 18 10:01:12 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.8 2008/05/18 01:21:34 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.9 2008/05/18 13:59:13 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -36,31 +36,33 @@ octets corresponding to the external format EXTERNAL-FORMAT." (setq external-format (maybe-convert-external-format external-format)) (let ((factor (encoding-factor external-format)) - (length (- end start))) + (length (- end start))) (etypecase factor (float (let ((octets (make-array (round (* factor length)) :element-type 'octet :fill-pointer 0 :adjustable t))) - (loop for i from start below end - do (char-to-octets external-format - (char string i) - (lambda (octet) - (vector-push-extend octet octets)) - nil)) + (flet ((writer (octet) + (vector-push-extend octet octets))) + (loop for i from start below end + do (char-to-octets external-format + (char string i) + #'writer + nil))) octets)) (integer (let ((octets (make-array (* factor length) - :element-type 'octet))) - (loop with j = 0 - for i from start below end - do (char-to-octets external-format - (char string i) - (lambda (octet) - (setf (aref octets j) octet) - (incf j)) - nil)) + :element-type 'octet)) + (j 0)) + (flet ((writer (octet) + (setf (aref octets j) octet) + (incf j))) + (loop for i from start below end do + (char-to-octets external-format + (char string i) + #'writer + nil))) octets))))) (defun octets-to-string (vector &key @@ -72,24 +74,27 @@ (let ((factor (encoding-factor external-format)) (length (- end start)) (i start)) - (flet ((next-char () - (code-char - (octets-to-char-code external-format - (lambda () - (when (>= i end) - ;; TODO... - (error "End of data.")) - (prog1 - (aref vector i) - (incf i))) - (lambda (char) - (char-to-octets external-format - char - (lambda (octet) - (declare (ignore octet)) - (decf i)) - nil)) - nil)))) + (labels ((reader () + (when (>= i end) + ;; TODO... + (error "End of data.")) + (prog1 + (aref vector i) + (incf i))) + (pseudo-writer (octet) + (declare (ignore octet)) + (decf i)) + (unreader (char) + (char-to-octets external-format + char + #'pseudo-writer + nil)) + (next-char () + (code-char + (octets-to-char-code external-format + #'reader + #'unreader + nil)))) (etypecase factor (float (let ((string (make-array (round (/ length factor)) From eweitz at common-lisp.net Sun May 18 14:59:45 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 18 May 2008 10:59:45 -0400 (EDT) Subject: [flexi-streams-cvs] r29 - in branches/edi: . doc test Message-ID: <20080518145945.1485A7E0B2@common-lisp.net> Author: eweitz Date: Sun May 18 10:59:44 2008 New Revision: 29 Modified: branches/edi/doc/index.html branches/edi/specials.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Some optimization Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Sun May 18 10:59:44 2008 @@ -977,8 +977,8 @@ Converts the Lisp string string from start to end to an array of octets corresponding to the external format external-format. The defaults for start and end -are 0 and NIL (meaning the length of the -vector). The default for external-format is the +are 0 and the length of the +string. The default for external-format is the value of evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) @@ -986,15 +986,15 @@


[Function] -
octets-to-string vector &key external-format start end => string +
octets-to-string sequence &key external-format start end => string -


Converts the Lisp vector vector +

Converts the Lisp sequence sequence of octets from start to end to string using the external format external-format. The defaults for start and end -are 0 and the length of the vector. The default +are 0 and the length of the sequence. The default for external-format is the value of evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) @@ -1037,7 +1037,7 @@ numerous patches and additions.

-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.98 2007/12/29 23:15:27 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.100 2008/05/18 14:59:02 edi Exp $

BACK TO MY HOMEPAGE Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Sun May 18 10:59:44 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.27 2008/05/18 01:21:34 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.28 2008/05/18 14:59:00 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,6 +29,15 @@ (in-package :flexi-streams) +(defvar *standard-optimize-settings* + '(optimize + speed + (safety 0) + (space 0) + (debug 1) + (compilation-speed 0)) + "The standard optimize settings used by most declaration expressions.") + (deftype octet () "A shortcut for \(UNSIGNED-BYTE 8)." '(unsigned-byte 8)) Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sun May 18 10:59:44 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.9 2008/05/18 13:59:13 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.10 2008/05/18 14:59:00 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -34,6 +34,8 @@ (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of octets corresponding to the external format EXTERNAL-FORMAT." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) (let ((factor (encoding-factor external-format)) (length (- end start))) @@ -45,7 +47,7 @@ :adjustable t))) (flet ((writer (octet) (vector-push-extend octet octets))) - (loop for i from start below end + (loop for i of-type fixnum from start below end do (char-to-octets external-format (char string i) #'writer @@ -55,33 +57,58 @@ (let ((octets (make-array (* factor length) :element-type 'octet)) (j 0)) + (declare (fixnum j)) (flet ((writer (octet) - (setf (aref octets j) octet) + (setf #+:lispworks (sys:typed-aref '(unsigned-byte 8) octets j) + #-:lispworks (aref octets j) + octet) (incf j))) - (loop for i from start below end do + (loop for i of-type fixnum from start below end do (char-to-octets external-format (char string i) #'writer nil))) octets))))) -(defun octets-to-string (vector &key - (external-format (make-external-format :latin1)) - (start 0) (end (length vector))) - "Converts the Lisp vector VECTOR of octets from START to END to +(defun octets-to-string (sequence &key + (external-format (make-external-format :latin1)) + (start 0) (end (length sequence))) + "Converts the Lisp sequence SEQUENCE of octets from START to END to string using the external format EXTERNAL-FORMAT." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) - (let ((factor (encoding-factor external-format)) - (length (- end start)) - (i start)) - (labels ((reader () - (when (>= i end) - ;; TODO... - (error "End of data.")) - (prog1 - (aref vector i) - (incf i))) - (pseudo-writer (octet) + (let* ((factor (encoding-factor external-format)) + (length (- end start)) + (i start) + (reader (etypecase sequence + #+:lispworks + ((array octet *) + (lambda () + (when (>= i end) + ;; TODO... + (error "End of data.")) + (prog1 + (sys:typed-aref '(unsigned-byte 8) sequence i) + (incf i)))) + ((array * *) + (lambda () + (when (>= i end) + ;; TODO... + (error "End of data.")) + (prog1 + (aref sequence i) + (incf i)))) + (list + (lambda () + (when (>= i end) + ;; TODO... + (error "End of data.")) + (prog1 + (nth i sequence) + (incf i))))))) + (declare (fixnum i)) + (labels ((pseudo-writer (octet) (declare (ignore octet)) (decf i)) (unreader (char) @@ -92,7 +119,7 @@ (next-char () (code-char (octets-to-char-code external-format - #'reader + reader #'unreader nil)))) (etypecase factor @@ -108,6 +135,7 @@ (let* ((string-length (/ length factor)) (string (make-array string-length :element-type 'char*))) - (loop for j from 0 below string-length - do (setf (char string j) (next-char)) + (declare (fixnum string-length)) + (loop for j of-type fixnum from 0 below string-length + do (setf (schar string j) (next-char)) finally (return string)))))))) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sun May 18 10:59:44 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.21 2008/05/18 01:21:36 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.22 2008/05/18 14:59:04 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -263,10 +263,12 @@ that the stream conversion functions work." (let* ((full-path (merge-pathnames pathspec *this-file*)) (octets-vector (file-as-octet-vector full-path)) + (octets-list (coerce octets-vector 'list)) (string (file-as-string full-path external-format))) (with-test ((format nil "String tests with format ~S." (flex::normalize-external-format external-format))) (check (string= (octets-to-string octets-vector :external-format external-format) string)) + (check (string= (octets-to-string octets-list :external-format external-format) string)) (check (equalp (string-to-octets string :external-format external-format) octets-vector))))) (defmacro using-values ((&rest values) &body body) From eweitz at common-lisp.net Sun May 18 22:27:40 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 18 May 2008 18:27:40 -0400 (EDT) Subject: [flexi-streams-cvs] r30 - branches/edi Message-ID: <20080518222740.C4DF77E0AD@common-lisp.net> Author: eweitz Date: Sun May 18 18:27:36 2008 New Revision: 30 Added: branches/edi/mapping.lisp (contents, props changed) Modified: branches/edi/ascii.lisp branches/edi/code-pages.lisp branches/edi/conditions.lisp branches/edi/decode.lisp branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/flexi-streams.asd branches/edi/input.lisp branches/edi/iso-8859.lisp branches/edi/koi8-r.lisp branches/edi/output.lisp branches/edi/packages.lisp branches/edi/specials.lisp branches/edi/strings.lisp branches/edi/util.lisp Log: More optimizations Passes tests Modified: branches/edi/ascii.lisp ============================================================================== --- branches/edi/ascii.lisp (original) +++ branches/edi/ascii.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.8 2008/05/17 13:50:15 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.9 2008/05/18 21:32:15 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,7 +29,8 @@ (in-package :flexi-streams) -(defvar +ascii-table+ - #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533) +(defconstant +ascii-table+ + ;; currently not used, but we leave it in here just in case... + (make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) "An array enumerating the character codes for the US-ASCII encoding.") Modified: branches/edi/code-pages.lisp ============================================================================== --- branches/edi/code-pages.lisp (original) +++ branches/edi/code-pages.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.6 2008/05/17 13:50:15 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.7 2008/05/18 21:32:15 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -31,32 +31,32 @@ ;;; the following code was auto-generated with LWW -(defvar +code-page-tables+ - '((437 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (720 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 233 226 132 224 134 231 234 235 232 239 238 141 142 143 144 1617 1618 244 164 1600 251 249 1569 1570 1571 1572 163 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1590 1591 1592 1593 1594 1601 181 1602 1603 1604 1605 1606 1607 1608 1609 1610 8801 1611 1612 1613 1614 1615 1616 8776 176 8729 183 8730 8319 178 9632 160)) - (737 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 931 932 933 934 935 936 937 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 965 966 967 968 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 969 940 941 942 970 943 972 973 971 974 902 904 905 906 908 910 911 177 8805 8804 938 939 247 8776 176 8729 183 8730 8319 178 9632 160)) - (775 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 262 252 233 257 228 291 229 263 322 275 342 343 299 377 196 197 201 230 198 333 246 290 162 346 347 214 220 248 163 216 215 164 256 298 243 379 380 378 8221 166 169 174 172 189 188 321 171 187 9617 9618 9619 9474 9508 260 268 280 278 9571 9553 9559 9565 302 352 9488 9492 9524 9516 9500 9472 9532 370 362 9562 9556 9577 9574 9568 9552 9580 381 261 269 281 279 303 353 371 363 382 9496 9484 9608 9604 9612 9616 9600 211 223 332 323 245 213 181 324 310 311 315 316 326 274 325 8217 173 177 8220 190 182 167 247 8222 176 8729 183 185 179 178 9632 160)) - (850 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 215 402 225 237 243 250 241 209 170 186 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 240 208 202 203 200 305 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 254 222 218 219 217 253 221 175 180 173 177 8215 190 182 167 247 184 176 168 183 185 179 178 9632 160)) - (852 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 367 263 231 322 235 336 337 238 377 196 262 201 313 314 244 246 317 318 346 347 214 220 356 357 321 215 269 225 237 243 250 260 261 381 382 280 281 172 378 268 351 171 187 9617 9618 9619 9474 9508 193 194 282 350 9571 9553 9559 9565 379 380 9488 9492 9524 9516 9500 9472 9532 258 259 9562 9556 9577 9574 9568 9552 9580 164 273 272 270 203 271 327 205 206 283 9496 9484 9608 9604 354 366 9600 211 223 212 323 324 328 352 353 340 218 341 368 253 221 355 180 173 733 731 711 728 167 247 184 176 168 729 369 344 345 9632 160)) - (855 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1106 1026 1107 1027 1105 1025 1108 1028 1109 1029 1110 1030 1111 1031 1112 1032 1113 1033 1114 1034 1115 1035 1116 1036 1118 1038 1119 1039 1102 1070 1098 1066 1072 1040 1073 1041 1094 1062 1076 1044 1077 1045 1092 1060 1075 1043 171 187 9617 9618 9619 9474 9508 1093 1061 1080 1048 9571 9553 9559 9565 1081 1049 9488 9492 9524 9516 9500 9472 9532 1082 1050 9562 9556 9577 9574 9568 9552 9580 164 1083 1051 1084 1052 1085 1053 1086 1054 1087 9496 9484 9608 9604 1055 1103 9600 1071 1088 1056 1089 1057 1090 1058 1091 1059 1078 1046 1074 1042 1100 1068 8470 173 1099 1067 1079 1047 1096 1064 1101 1069 1097 1065 1095 1063 167 9632 160)) - (857 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 305 196 197 201 230 198 244 246 242 251 249 304 214 220 248 163 216 350 351 225 237 243 250 241 209 286 287 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 186 170 202 203 200 65533 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 65533 215 218 219 217 236 255 175 180 173 177 65533 190 182 167 247 184 176 168 183 185 179 178 9632 160)) - (860 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 227 224 193 231 234 202 232 205 212 236 195 194 201 192 200 244 245 242 218 249 204 213 220 162 163 217 8359 211 225 237 243 250 241 209 170 186 191 210 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (861 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 208 240 222 196 197 201 230 198 244 246 254 251 221 253 214 220 248 163 216 8359 402 225 237 243 250 193 205 211 218 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (862 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (863 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 194 224 182 231 234 235 232 239 238 8215 192 167 201 200 202 244 203 207 251 249 164 212 220 162 163 217 219 402 166 180 243 250 168 184 179 175 206 8976 172 189 188 190 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (864 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 176 183 8729 8730 9618 9472 9474 9532 9508 9516 9500 9524 9488 9484 9492 9496 946 8734 966 177 189 188 8776 171 187 65271 65272 155 156 65275 65276 159 160 173 65154 163 164 65156 65533 65533 65166 65167 65173 65177 1548 65181 65185 65189 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 65233 1563 65201 65205 65209 1567 162 65152 65153 65155 65157 65226 65163 65165 65169 65171 65175 65179 65183 65187 65191 65193 65195 65197 65199 65203 65207 65211 65215 65217 65221 65227 65231 166 172 247 215 65225 1600 65235 65239 65243 65247 65251 65255 65259 65261 65263 65267 65213 65228 65230 65229 65249 65149 1617 65253 65257 65260 65264 65266 65232 65237 65269 65270 65245 65241 65265 9632 65533)) - (865 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 164 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (866 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1025 1105 1028 1108 1031 1111 1038 1118 176 8729 183 8730 8470 164 9632 160)) - (869 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 902 135 183 172 166 8216 8217 904 8213 905 906 938 908 147 148 910 939 169 911 178 179 940 163 941 942 943 970 912 972 973 913 914 915 916 917 918 919 189 920 921 171 187 9617 9618 9619 9474 9508 922 923 924 925 9571 9553 9559 9565 926 927 9488 9492 9524 9516 9500 9472 9532 928 929 9562 9556 9577 9574 9568 9552 9580 931 932 933 934 935 936 937 945 946 947 9496 9484 9608 9604 948 949 9600 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 900 173 177 965 966 967 167 968 901 176 168 969 971 944 974 9632 160)) - (1250 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 352 8249 346 356 381 377 144 8216 8217 8220 8221 8226 8211 8212 152 8482 353 8250 347 357 382 378 160 711 728 321 164 260 166 167 168 169 350 171 172 173 174 379 176 177 731 322 180 181 182 183 184 261 351 187 317 733 318 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) - (1251 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1026 1027 8218 1107 8222 8230 8224 8225 8364 8240 1033 8249 1034 1036 1035 1039 1106 8216 8217 8220 8221 8226 8211 8212 152 8482 1113 8250 1114 1116 1115 1119 160 1038 1118 1032 164 1168 166 167 1025 169 1028 171 172 173 174 1031 176 177 1030 1110 1169 181 182 183 1105 8470 1108 187 1112 1029 1109 1111 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103)) - (1252 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 381 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 382 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) - (1253 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 136 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 157 158 159 160 901 902 163 164 165 166 167 168 169 65533 171 172 173 174 8213 176 177 178 179 900 181 182 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) - (1254 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) - (1255 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 156 157 158 159 160 161 162 163 8362 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 191 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1520 1521 1522 1523 1524 65533 65533 65533 65533 65533 65533 65533 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) - (1256 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 1662 8218 402 8222 8230 8224 8225 710 8240 1657 8249 338 1670 1688 1672 1711 8216 8217 8220 8221 8226 8211 8212 1705 8482 1681 8250 339 8204 8205 1722 160 1548 162 163 164 165 166 167 168 169 1726 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 1563 187 188 189 190 1567 1729 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 215 1591 1592 1593 1594 1600 1601 1602 1603 224 1604 226 1605 1606 1607 1608 231 232 233 234 235 1609 1610 238 239 1611 1612 1613 1614 244 1615 1616 247 1617 249 1618 251 252 8206 8207 1746)) - (1257 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 138 8249 140 168 711 184 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 175 731 159 160 65533 162 163 164 65533 166 167 216 169 342 171 172 173 174 198 176 177 178 179 180 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 729)) - (1258 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 258 196 197 198 199 200 201 202 203 768 205 206 207 272 209 777 211 212 416 214 215 216 217 218 219 220 431 771 223 224 225 226 259 228 229 230 231 232 233 234 235 769 237 238 239 273 241 803 243 244 417 246 247 248 249 250 251 252 432 8363 255))) +(defconstant +code-page-tables+ + `((437 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (720 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 233 226 132 224 134 231 234 235 232 239 238 141 142 143 144 1617 1618 244 164 1600 251 249 1569 1570 1571 1572 163 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1590 1591 1592 1593 1594 1601 181 1602 1603 1604 1605 1606 1607 1608 1609 1610 8801 1611 1612 1613 1614 1615 1616 8776 176 8729 183 8730 8319 178 9632 160))) + (737 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 931 932 933 934 935 936 937 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 965 966 967 968 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 969 940 941 942 970 943 972 973 971 974 902 904 905 906 908 910 911 177 8805 8804 938 939 247 8776 176 8729 183 8730 8319 178 9632 160))) + (775 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 262 252 233 257 228 291 229 263 322 275 342 343 299 377 196 197 201 230 198 333 246 290 162 346 347 214 220 248 163 216 215 164 256 298 243 379 380 378 8221 166 169 174 172 189 188 321 171 187 9617 9618 9619 9474 9508 260 268 280 278 9571 9553 9559 9565 302 352 9488 9492 9524 9516 9500 9472 9532 370 362 9562 9556 9577 9574 9568 9552 9580 381 261 269 281 279 303 353 371 363 382 9496 9484 9608 9604 9612 9616 9600 211 223 332 323 245 213 181 324 310 311 315 316 326 274 325 8217 173 177 8220 190 182 167 247 8222 176 8729 183 185 179 178 9632 160))) + (850 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 215 402 225 237 243 250 241 209 170 186 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 240 208 202 203 200 305 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 254 222 218 219 217 253 221 175 180 173 177 8215 190 182 167 247 184 176 168 183 185 179 178 9632 160))) + (852 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 367 263 231 322 235 336 337 238 377 196 262 201 313 314 244 246 317 318 346 347 214 220 356 357 321 215 269 225 237 243 250 260 261 381 382 280 281 172 378 268 351 171 187 9617 9618 9619 9474 9508 193 194 282 350 9571 9553 9559 9565 379 380 9488 9492 9524 9516 9500 9472 9532 258 259 9562 9556 9577 9574 9568 9552 9580 164 273 272 270 203 271 327 205 206 283 9496 9484 9608 9604 354 366 9600 211 223 212 323 324 328 352 353 340 218 341 368 253 221 355 180 173 733 731 711 728 167 247 184 176 168 729 369 344 345 9632 160))) + (855 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1106 1026 1107 1027 1105 1025 1108 1028 1109 1029 1110 1030 1111 1031 1112 1032 1113 1033 1114 1034 1115 1035 1116 1036 1118 1038 1119 1039 1102 1070 1098 1066 1072 1040 1073 1041 1094 1062 1076 1044 1077 1045 1092 1060 1075 1043 171 187 9617 9618 9619 9474 9508 1093 1061 1080 1048 9571 9553 9559 9565 1081 1049 9488 9492 9524 9516 9500 9472 9532 1082 1050 9562 9556 9577 9574 9568 9552 9580 164 1083 1051 1084 1052 1085 1053 1086 1054 1087 9496 9484 9608 9604 1055 1103 9600 1071 1088 1056 1089 1057 1090 1058 1091 1059 1078 1046 1074 1042 1100 1068 8470 173 1099 1067 1079 1047 1096 1064 1101 1069 1097 1065 1095 1063 167 9632 160))) + (857 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 305 196 197 201 230 198 244 246 242 251 249 304 214 220 248 163 216 350 351 225 237 243 250 241 209 286 287 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 186 170 202 203 200 65533 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 65533 215 218 219 217 236 255 175 180 173 177 65533 190 182 167 247 184 176 168 183 185 179 178 9632 160))) + (860 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 227 224 193 231 234 202 232 205 212 236 195 194 201 192 200 244 245 242 218 249 204 213 220 162 163 217 8359 211 225 237 243 250 241 209 170 186 191 210 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (861 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 208 240 222 196 197 201 230 198 244 246 254 251 221 253 214 220 248 163 216 8359 402 225 237 243 250 193 205 211 218 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (862 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (863 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 194 224 182 231 234 235 232 239 238 8215 192 167 201 200 202 244 203 207 251 249 164 212 220 162 163 217 219 402 166 180 243 250 168 184 179 175 206 8976 172 189 188 190 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (864 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 176 183 8729 8730 9618 9472 9474 9532 9508 9516 9500 9524 9488 9484 9492 9496 946 8734 966 177 189 188 8776 171 187 65271 65272 155 156 65275 65276 159 160 173 65154 163 164 65156 65533 65533 65166 65167 65173 65177 1548 65181 65185 65189 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 65233 1563 65201 65205 65209 1567 162 65152 65153 65155 65157 65226 65163 65165 65169 65171 65175 65179 65183 65187 65191 65193 65195 65197 65199 65203 65207 65211 65215 65217 65221 65227 65231 166 172 247 215 65225 1600 65235 65239 65243 65247 65251 65255 65259 65261 65263 65267 65213 65228 65230 65229 65249 65149 1617 65253 65257 65260 65264 65266 65232 65237 65269 65270 65245 65241 65265 9632 65533))) + (865 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 164 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (866 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1025 1105 1028 1108 1031 1111 1038 1118 176 8729 183 8730 8470 164 9632 160))) + (869 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 902 135 183 172 166 8216 8217 904 8213 905 906 938 908 147 148 910 939 169 911 178 179 940 163 941 942 943 970 912 972 973 913 914 915 916 917 918 919 189 920 921 171 187 9617 9618 9619 9474 9508 922 923 924 925 9571 9553 9559 9565 926 927 9488 9492 9524 9516 9500 9472 9532 928 929 9562 9556 9577 9574 9568 9552 9580 931 932 933 934 935 936 937 945 946 947 9496 9484 9608 9604 948 949 9600 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 900 173 177 965 966 967 167 968 901 176 168 969 971 944 974 9632 160))) + (1250 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 352 8249 346 356 381 377 144 8216 8217 8220 8221 8226 8211 8212 152 8482 353 8250 347 357 382 378 160 711 728 321 164 260 166 167 168 169 350 171 172 173 174 379 176 177 731 322 180 181 182 183 184 261 351 187 317 733 318 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729))) + (1251 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1026 1027 8218 1107 8222 8230 8224 8225 8364 8240 1033 8249 1034 1036 1035 1039 1106 8216 8217 8220 8221 8226 8211 8212 152 8482 1113 8250 1114 1116 1115 1119 160 1038 1118 1032 164 1168 166 167 1025 169 1028 171 172 173 174 1031 176 177 1030 1110 1169 181 182 183 1105 8470 1108 187 1112 1029 1109 1111 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103))) + (1252 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 381 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 382 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255))) + (1253 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 136 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 157 158 159 160 901 902 163 164 165 166 167 168 169 65533 171 172 173 174 8213 176 177 178 179 900 181 182 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533))) + (1254 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255))) + (1255 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 156 157 158 159 160 161 162 163 8362 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 191 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1520 1521 1522 1523 1524 65533 65533 65533 65533 65533 65533 65533 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533))) + (1256 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 1662 8218 402 8222 8230 8224 8225 710 8240 1657 8249 338 1670 1688 1672 1711 8216 8217 8220 8221 8226 8211 8212 1705 8482 1681 8250 339 8204 8205 1722 160 1548 162 163 164 165 166 167 168 169 1726 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 1563 187 188 189 190 1567 1729 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 215 1591 1592 1593 1594 1600 1601 1602 1603 224 1604 226 1605 1606 1607 1608 231 232 233 234 235 1609 1610 238 239 1611 1612 1613 1614 244 1615 1616 247 1617 249 1618 251 252 8206 8207 1746))) + (1257 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 138 8249 140 168 711 184 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 175 731 159 160 65533 162 163 164 65533 166 167 216 169 342 171 172 173 174 198 176 177 178 179 180 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 729))) + (1258 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 258 196 197 198 199 200 201 202 203 768 205 206 207 272 209 777 211 212 416 214 215 216 217 218 219 220 431 771 223 224 225 226 259 228 229 230 231 232 233 234 235 769 237 238 239 273 241 803 243 244 417 246 247 248 249 250 251 252 432 8363 255)))) "A list of 8-bit Windows code pages where each element is a cons with the car being the ID of the code page and the cdr being a vector enumerating the corresponding character codes.") Modified: branches/edi/conditions.lisp ============================================================================== --- branches/edi/conditions.lisp (original) +++ branches/edi/conditions.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.3 2008/05/17 15:56:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.4 2008/05/18 20:34:52 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -61,12 +61,13 @@ FILE-POSITION.")) ;; TODO: stream might not be a stream... -(defun signal-encoding-error (flexi-stream format-control &rest format-args) +(defun signal-encoding-error (format-control &rest format-args) "Convenience function similar to ERROR to signal conditions of type FLEXI-STREAM-ENCODING-ERROR." (error 'flexi-stream-encoding-error :format-control format-control :format-arguments format-args + #+(or) #+(or) :stream flexi-stream)) (define-condition in-memory-stream-error (stream-error) Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.4 2008/05/18 00:35:33 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.7 2008/05/18 22:22:30 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,16 +29,16 @@ (in-package :flexi-streams) -(defun recover-from-encoding-error (stream format-control &rest format-args) - "Helper function used by the STREAM-READ-CHAR methods below to deal -with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and -returns its character code in this case. Otherwise signals a +(defun recover-from-encoding-error (format-control &rest format-args) + "Helper function used by OCTETS-TO-CHAR-CODE below to deal with +encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns +its character code in this case. Otherwise signals a FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this function and provides a corresponding USE-VALUE restart." (when *substitution-char* (return-from recover-from-encoding-error (char-code *substitution-char*))) (restart-case - (apply #'signal-encoding-error stream format-control format-args) + (apply #'signal-encoding-error format-control format-args) (use-value (char) :report "Specify a character to be used instead." :interactive (lambda () @@ -49,45 +49,59 @@ (return (list (char line 0))))))) (char-code char)))) -(defmethod octets-to-char-code ((format flexi-latin-1-format) reader unreader stream) - (declare (ignore unreader stream)) +(defgeneric octets-to-char-code (format reader) + (declare #.*standard-optimize-settings*) + (:documentation "Converts a sequence of octets to a character code +\(which is returned) using the external format FORMAT. The sequence +is obtained by calling the function \(which must be a functional +object) READER with no arguments which should return one octet per +call. + +The special variables *CURRENT-STREAM* and *CURRENT-UNREADER* must be +bound correctly whenever this function is called.")) + +(defmethod octets-to-char-code ((format flexi-latin-1-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (or (funcall reader) :eof)) -(defmethod octets-to-char-code ((format flexi-ascii-format) reader unreader stream) - (declare (ignore unreader)) +(defmethod octets-to-char-code ((format flexi-ascii-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (let ((octet (or (funcall reader) (return-from octets-to-char-code :eof)))) (declare (type octet octet)) (if (> octet 127) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + (recover-from-encoding-error "No character which corresponds to octet #x~X." octet) octet))) -(defmethod octets-to-char-code ((format flexi-8-bit-format) reader unreader stream) - (declare (ignore unreader)) +(defmethod octets-to-char-code ((format flexi-8-bit-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (with-accessors ((decoding-table external-format-decoding-table)) format (let* ((octet (or (funcall reader) (return-from octets-to-char-code :eof))) - (char-code (aref (the (simple-array * *) decoding-table) octet))) + (char-code (aref (the (simple-array char-code-integer *) decoding-table) octet))) (declare (type octet octet)) (if (or (null char-code) - (= char-code 65533)) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) + (= (the char-code-integer char-code) 65533)) + (recover-from-encoding-error "No character which corresponds to octet #x~X." octet) char-code)))) -(defmethod octets-to-char-code ((format flexi-utf-8-format) reader unreader stream) - (declare (ignore unreader)) +(defmethod octets-to-char-code ((format flexi-utf-8-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (funcall reader) - (cond (first-octet-seen - (return-from octets-to-char-code - (recover-from-encoding-error stream - "End of file while in UTF-8 sequence."))) - (t (return-from octets-to-char-code :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error "End of file while in UTF-8 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) (let ((octet (read-next-byte))) (declare (type octet octet)) (multiple-value-bind (start count) @@ -104,121 +118,126 @@ ((= #b11111100 (logand octet #b11111110)) (values (logand octet #b00000001) 5)) (t (return-from octets-to-char-code - (recover-from-encoding-error stream - "Unexpected value #x~X at start of UTF-8 sequence." + (recover-from-encoding-error "Unexpected value #x~X at start of UTF-8 sequence." octet)))) + (declare (fixnum count)) ;; note that we currently don't check for "overlong" ;; sequences or other illegal values (loop for result of-type (unsigned-byte 32) - = start then (+ (ash result 6) + = start then (+ (ash (the (unsigned-byte 26) result) 6) (logand octet #b111111)) repeat count for octet of-type octet = (read-next-byte) unless (= #b10000000 (logand octet #b11000000)) do (return-from octets-to-char-code - (recover-from-encoding-error stream - "Unexpected value #x~X in UTF-8 sequence." octet)) + (recover-from-encoding-error "Unexpected value #x~X in UTF-8 sequence." octet)) finally (return result))))))) -(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader unreader stream) - (declare (ignore unreader)) +(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (funcall reader) - (cond (first-octet-seen - (return-from octets-to-char-code - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from octets-to-char-code :eof)))) - (setq first-octet-seen t))) - (read-next-word () + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (flet ((read-next-word () (+ (the octet (read-next-byte)) (ash (the octet (read-next-byte)) 8)))) - (declare (inline read-next-byte read-next-word) - (dynamic-extent (function read-next-byte) (function read-next-word))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from octets-to-char-code - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word)))))) - -(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader unreader stream) - (declare (ignore unreader)) + (let ((word (read-next-word))) + (declare (type (unsigned-byte 16) word)) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (declare (type (unsigned-byte 16) next-word)) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word))))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (funcall reader) - (cond (first-octet-seen - (return-from octets-to-char-code - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from octets-to-char-code :eof)))) - (setq first-octet-seen t))) - (read-next-word () + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error "End of file while in UTF-16 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (flet ((read-next-word () (+ (ash (the octet (read-next-byte)) 8) (the octet (read-next-byte))))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from octets-to-char-code - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word)))))) - -(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader unreader stream) + (let ((word (read-next-word))) + (declare (type (unsigned-byte 16) word)) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (declare (type (unsigned-byte 16) next-word)) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word))))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (funcall reader) - (cond (first-octet-seen - (return-from octets-to-char-code - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from octets-to-char-code :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 0 to 24 by 8 + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (loop for count of-type fixnum from 0 to 24 by 8 for octet of-type octet = (read-next-byte) sum (ash octet count))))) -(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader unreader stream) - (declare (ignore unreader)) +(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader) + (declare #.*standard-optimize-settings*) + (declare (function reader)) (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (funcall reader) - (cond (first-octet-seen - (return-from octets-to-char-code - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from octets-to-char-code :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 24 downto 0 by 8 + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error "End of file while in UTF-32 sequence."))) + (t (return-from octets-to-char-code :eof)))) + (setq first-octet-seen t)))) + (loop for count of-type fixnum from 24 downto 0 by 8 for octet of-type octet = (read-next-byte) sum (ash octet count))))) -(defmethod octets-to-char-code ((format flexi-cr-mixin) reader unreader stream) - (declare (optimize speed)) +(defmethod octets-to-char-code ((format flexi-cr-mixin) reader) + (declare #.*standard-optimize-settings*) (let ((char-code (call-next-method))) (case char-code (#.(char-code #\Return) #.(char-code #\Newline)) (:eof :eof) (otherwise char-code)))) -(defmethod octets-to-char-code ((format flexi-crlf-mixin) reader unreader stream) - (declare (optimize speed)) +(defmethod octets-to-char-code ((format flexi-crlf-mixin) reader) + (declare #.*standard-optimize-settings*) + (declare (function *current-unreader*)) (let ((char-code (call-next-method))) (case char-code (#.(char-code #\Return) @@ -228,7 +247,7 @@ (:eof char-code) ;; if the character we peeked at wasn't a ;; linefeed character we unread its constituents - (otherwise (funcall unreader (code-char next-char-code)) + (otherwise (funcall *current-unreader* (code-char next-char-code)) char-code)))) (:eof :eof) (t char-code)))) Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.4 2008/05/18 00:35:33 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.7 2008/05/18 22:22:30 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,39 +29,46 @@ (in-package :flexi-streams) -(defgeneric char-to-octets (format char writer stream) - (:documentation "Converts the character CHAR to sequence of octets -and sends this sequence to SINK. STREAM will always be a flexi stream -which is used to determine how the character should be converted. -This function does all the work for STREAM-WRITE-CHAR in which case -SINK is the same as STREAM. It is also used in the implementation of -STREAM-WRITE-SEQUENCE below.")) - -(defmethod char-to-octets ((format flexi-latin-1-format) char writer stream) - (declare (optimize speed)) +(defgeneric char-to-octets (format char writer) + (declare #.*standard-optimize-settings*) + (:documentation "Converts the character CHAR to a sequence of octets +using the external format FORMAT. The conversion is performed by +calling the unary function \(which must be a functional object) WRITER +repeatedly each octet. The return value of this function is +unspecified. + +The special variable *CURRENT-STREAM* must be bound correctly whenever +this function is called.")) + +(defmethod char-to-octets ((format flexi-latin-1-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) (let ((octet (char-code char))) (when (> octet 255) - (signal-encoding-error stream "~S is not a LATIN-1 character." char)) + (signal-encoding-error "~S (code ~A) is not a LATIN-1 character." char octet)) (funcall writer octet))) -(defmethod char-to-octets ((format flexi-ascii-format) char writer stream) - (declare (optimize speed)) +(defmethod char-to-octets ((format flexi-ascii-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) (let ((octet (char-code char))) (when (> octet 127) - (signal-encoding-error stream "~S is not an ASCII character." char)) + (signal-encoding-error "~S (code ~A) is not an ASCII character." char octet)) (funcall writer octet))) -(defmethod char-to-octets ((format flexi-8-bit-format) char writer stream) - (declare (optimize speed)) +(defmethod char-to-octets ((format flexi-8-bit-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) (with-accessors ((encoding-hash external-format-encoding-hash)) format (let ((octet (gethash (char-code char) encoding-hash))) (unless octet - (signal-encoding-error stream "~S is not in this encoding." char)) + (signal-encoding-error "~S (code ~A) is not in this encoding." char octet)) (funcall writer octet)))) -(defmethod char-to-octets ((format flexi-utf-8-format) char writer stream) - (declare (ignore stream) (optimize speed)) +(defmethod char-to-octets ((format flexi-utf-8-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) (let ((char-code (char-code char))) (tagbody (cond ((< char-code #x80) @@ -79,7 +86,7 @@ ((< char-code #x4000000) (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code))) (go four)) - (t (funcall writer (logior #b11111100 (ldb (byte 1 30) char-code))))) + (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100)))) (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code))) four (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code))) @@ -91,52 +98,63 @@ (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) zero))) -(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer stream) - (declare (ignore stream) (optimize speed)) +(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) (flet ((write-word (word) (funcall writer (ldb (byte 8 0) word)) (funcall writer (ldb (byte 8 8) word)))) (let ((char-code (char-code char))) + (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) (t (decf char-code #x10000) (write-word (logior #xd800 (ldb (byte 10 10) char-code))) (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) -(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer stream) - (declare (ignore stream) (optimize speed)) +(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) (flet ((write-word (word) (funcall writer (ldb (byte 8 8) word)) (funcall writer (ldb (byte 8 0) word)))) - (declare (inline write-word) (dynamic-extent (function write-word))) (let ((char-code (char-code char))) + (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) (t (decf char-code #x10000) (write-word (logior #xd800 (ldb (byte 10 10) char-code))) (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) -(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer stream) - (declare (ignore stream) (optimize speed)) - (loop with char-code = (char-code char) - for position in '(0 8 16 24) do - (funcall writer (ldb (byte 8 position) char-code)))) - -(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer stream) - (declare (ignore stream) (optimize speed)) - (loop with char-code = (char-code char) - for position in '(24 16 8 0) do - (funcall writer (ldb (byte 8 position) char-code)))) - -(defmethod char-to-octets ((format flexi-cr-mixin) char writer stream) - (declare (optimize speed)) +(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) + (let ((char-code (char-code char))) + (funcall writer (ldb (byte 8 0) char-code)) + (funcall writer (ldb (byte 8 8) char-code)) + (funcall writer (ldb (byte 8 16) char-code)) + (funcall writer (ldb (byte 8 24) char-code)))) + +(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char) (function writer)) + (let ((char-code (char-code char))) + (funcall writer (ldb (byte 8 24) char-code)) + (funcall writer (ldb (byte 8 16) char-code)) + (funcall writer (ldb (byte 8 8) char-code)) + (funcall writer (ldb (byte 8 0) char-code)))) + +(defmethod char-to-octets ((format flexi-cr-mixin) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char)) (if (char= char #\Newline) - (call-next-method format #\Return writer stream) + (call-next-method format #\Return writer) (call-next-method))) -(defmethod char-to-octets ((format flexi-crlf-mixin) char writer stream) - (declare (optimize speed)) +(defmethod char-to-octets ((format flexi-crlf-mixin) char writer) + (declare #.*standard-optimize-settings*) + (declare (character char)) (cond ((char= char #\Newline) - (call-next-method format #\Return writer stream) - (call-next-method format #\Linefeed writer stream)) + (call-next-method format #\Return writer) + (call-next-method format #\Linefeed writer)) (t (call-next-method)))) Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.17 2008/05/18 00:34:19 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.18 2008/05/18 15:54:34 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -52,7 +52,8 @@ a #\Newline will be translated - one of the keywords :CR, :LF, or :CRLF.")) (:documentation "EXTERNAL-FORMAT objects are used to denote -encodings for flexi streams.")) +encodings for flexi streams or for the string functions defined in +strings.lisp.")) (defmethod make-load-form ((thing external-format) &optional environment) "Defines a way to reconstruct external formats. Needed for OpenMCL." @@ -76,21 +77,31 @@ tables.")) (defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) - ()) + () + (:documentation "Special class for external formats which use an +8-bit encoding /and/ have #\Return as the line-end character.")) (defclass flexi-crlf-8-bit-format (flexi-crlf-mixin flexi-8-bit-format) - ()) + () + (:documentation "Special class for external formats which use an +8-bit encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) (defclass flexi-ascii-format (flexi-8-bit-format) () (:documentation "Special class for external formats which use the -US-ASCCI encoding.")) +US-ASCII encoding.")) (defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) - ()) + () + (:documentation "Special class for external formats which use the +US-ASCII encoding /and/ have #\Return as the line-end character.")) (defclass flexi-crlf-ascii-format (flexi-crlf-mixin flexi-ascii-format) - ()) + () + (:documentation "Special class for external formats which use the +US-ASCII encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) (defclass flexi-latin-1-format (flexi-8-bit-format) () @@ -98,10 +109,15 @@ ISO-8859-1 encoding.")) (defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) - ()) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding /and/ have #\Return as the line-end character.")) (defclass flexi-crlf-latin-1-format (flexi-crlf-mixin flexi-latin-1-format) - ()) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) (defclass flexi-utf-32-format (external-format) () @@ -114,10 +130,16 @@ UTF-32 encoding with little-endian byte ordering.")) (defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering /and/ have #\Return +as the line-end character.")) (defclass flexi-crlf-utf-32-le-format (flexi-crlf-mixin flexi-utf-32-le-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering /and/ have the +sequence #\Return #\Linefeed as the line-end character.")) (defclass flexi-utf-32-be-format (flexi-utf-32-format) () @@ -125,10 +147,16 @@ UTF-32 encoding with big-endian byte ordering.")) (defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering /and/ have #\Return as +the line-end character.")) (defclass flexi-crlf-utf-32-be-format (flexi-crlf-mixin flexi-utf-32-be-format) - ()) + () + (:documentation "Special class for external formats which use the +the UTF-32 encoding with big-endian byte ordering /and/ have the +sequence #\Return #\Linefeed as the line-end character.")) (defclass flexi-utf-16-format (external-format) () @@ -141,10 +169,16 @@ UTF-16 encoding with little-endian byte ordering.")) (defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering /and/ have #\Return +as the line-end character.")) (defclass flexi-crlf-utf-16-le-format (flexi-crlf-mixin flexi-utf-16-le-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering /and/ have the +sequence #\Return #\Linefeed as the line-end character.")) (defclass flexi-utf-16-be-format (flexi-utf-16-format) () @@ -152,10 +186,16 @@ UTF-16 encoding with big-endian byte ordering.")) (defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering /and/ have #\Return as +the line-end character.")) (defclass flexi-crlf-utf-16-be-format (flexi-crlf-mixin flexi-utf-16-be-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering /and/ have the sequence +#\Return #\Linefeed as the line-end character.")) (defclass flexi-utf-8-format (external-format) () @@ -163,14 +203,20 @@ UTF-8 encoding.")) (defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding /and/ have #\Return as the line-end character.")) (defclass flexi-crlf-utf-8-format (flexi-crlf-mixin flexi-utf-8-format) - ()) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) (defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs) "Sets the fixed encoding/decoding tables for this particular external format." + (declare #.*standard-optimize-settings*) (declare (ignore initargs)) (with-accessors ((encoding-hash external-format-encoding-hash) (decoding-table flexi-stream-decoding-table) @@ -190,6 +236,9 @@ (cdr (assoc id +code-page-tables+)))))))) (defun external-format-class-name (real-name &key eol-style little-endian id) + "Given the initargs for a general external format returns the name +\(a symbol) of the most specific subclass matching these arguments." + (declare #.*standard-optimize-settings*) (declare (ignore id)) (cond ((ascii-name-p real-name) (ecase eol-style @@ -236,7 +285,10 @@ (defun make-external-format% (name &key (little-endian *default-little-endian*) id eol-style) - "Used internally by MAKE-EXTERNAL-FORMAT." + "Used internally by MAKE-EXTERNAL-FORMAT to default some of the +keywords arguments and to determine the right subclass of +EXTERNAL-FORMAT." + (declare #.*standard-optimize-settings*) (let* ((real-name (normalize-external-format-name name)) (initargs (cond ((or (iso-8859-name-p real-name) @@ -263,6 +315,8 @@ encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF which denote the end-of-line character \(sequence), ID is the ID of a Windows code page \(and ignored for other encodings)." + (declare #.*standard-optimize-settings*) + ;; the keyword arguments are only there for arglist display in the IDE (declare (ignore id little-endian)) (let ((shortcut-args (cdr (assoc name +shortcut-map+)))) (cond (shortcut-args @@ -275,14 +329,15 @@ "Given an external format designator \(a keyword, a list, or an EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT object." + (declare #.*standard-optimize-settings*) (typecase external-format (symbol (make-external-format external-format)) (list (apply #'make-external-format external-format)) (otherwise external-format))) (defun external-format-equal (ef1 ef2) - "Checks whether two EXTERNAL-FORMAT objects denote the same -encoding." + "Checks whether two EXTERNAL-FORMAT objects denote the same encoding." + (declare #.*standard-optimize-settings*) (let* ((name1 (external-format-name ef1)) (code-page-name-p (code-page-name-p name1))) ;; they must habe the same canonical name @@ -306,10 +361,10 @@ (defun normalize-external-format (external-format) "Returns a list which is a `normalized' representation of the -external format EXTERNAL-FORMAT. Used internally by -PRINT-OBJECT, for example. Basically, the result is argument -list that can be fed back to MAKE-EXTERNAL-FORMAT to create an -equivalent object." +external format EXTERNAL-FORMAT. Used internally by PRINT-OBJECT, for +example. Basically, the result is an argument list that can be fed +back to MAKE-EXTERNAL-FORMAT to create an equivalent object." + (declare #.*standard-optimize-settings*) (let ((name (external-format-name external-format)) (eol-style (external-format-eol-style external-format))) (cond ((or (ascii-name-p name) @@ -331,19 +386,45 @@ (print-unreadable-object (object stream :type t :identity t) (prin1 (normalize-external-format object) stream))) -(defgeneric encoding-factor (format)) +(defgeneric encoding-factor (format) + (:documentation "Given an external format FORMAT, returns a factor +which denotes the octets to characters ratio to expect when +encoding/decoding. If the returned value is an integer, the factor is +assumed to be exact. If it is a float, the factor is supposed to be +based on heuristics and usually not exact. + +This factor is used in string.lisp.") + (declare #.*standard-optimize-settings*)) (defmethod encoding-factor ((format flexi-8-bit-format)) + (declare #.*standard-optimize-settings*) + ;; 8-bit encodings map octets to characters in an exact one-to-one + ;; fashion 1) (defmethod encoding-factor ((format flexi-utf-8-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-8 characters can be anything from one to six octets, but we + ;; assume that the "overhead" is only about 5 percent - this + ;; estimate is obviously very much dependant on the content 1.05) (defmethod encoding-factor ((format flexi-utf-16-format)) + (declare #.*standard-optimize-settings*) + ;; usually one character maps to two octets, but characters with + ;; code points above #x10000 map to four octets - we assume that we + ;; usually don't see these characters but of course have to return a + ;; float 2.0) (defmethod encoding-factor ((format flexi-utf-32-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-32 always matches every character to four octets 4) (defmethod encoding-factor ((format flexi-crlf-mixin)) + (declare #.*standard-optimize-settings*) + ;; if the sequence #\Return #\Linefeed is the line-end marker, this + ;; obviously makes encodings potentially longer and definitely makes + ;; the estimate unexact (* 1.02 (call-next-method))) \ No newline at end of file Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.60 2008/05/17 15:56:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.62 2008/05/18 20:34:52 edi Exp $ ;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. @@ -38,17 +38,18 @@ :version "0.14.0" :serial t :components ((:file "packages") + (:file "mapping") (:file "ascii") (:file "koi8-r") (:file "iso-8859") (:file "code-pages") (:file "specials") (:file "util") + (:file "conditions") (:file "external-format") (:file "encode") (:file "decode") (:file "in-memory") - (:file "conditions") (:file "stream") #+:lispworks (:file "lw-binary-stream") (:file "output") Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.57 2008/05/17 16:44:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.59 2008/05/18 21:39:40 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -148,15 +148,15 @@ (octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format)) flexi-input-stream - (let ((counter 0) octets-reversed) + (let ((*current-stream* flexi-input-stream) + (counter 0) octets-reversed) (declare (integer position) (fixnum counter)) (char-to-octets external-format char (lambda (octet) (incf counter) - (push octet octets-reversed)) - nil) + (push octet octets-reversed))) (decf position counter) (setq octet-stack (nreconc octets-reversed octet-stack))))) @@ -172,12 +172,12 @@ ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after ;; this operation (setq last-octet nil) - (let ((char-code (octets-to-char-code external-format - (lambda () - (read-byte* stream)) - (lambda (char) - (unread-char% char stream)) - stream))) + (let* ((*current-unreader* (lambda (char) + (unread-char% char stream))) + (*current-stream* stream) + (char-code (octets-to-char-code external-format + (lambda () + (read-byte* stream))))) ;; remember this character and its char code for UNREAD-CHAR (setq last-char-code char-code) (or (code-char char-code) char-code)))) Modified: branches/edi/iso-8859.lisp ============================================================================== --- branches/edi/iso-8859.lisp (original) +++ branches/edi/iso-8859.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.6 2008/05/17 13:50:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.7 2008/05/18 21:32:15 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -32,22 +32,22 @@ ;;; the following code was auto-generated from files which can be ;;; found at -(defvar +iso-8859-tables+ - '((:iso-8859-1 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) - (:iso-8859-2 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 352 350 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 357 378 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) - (:iso-8859-3 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168 304 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305 351 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 202 203 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219 220 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236 237 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365 349 729)) - (:iso-8859-4 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 352 274 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 291 359 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 326 333 311 244 245 246 247 248 371 250 251 252 361 363 729)) - (:iso-8859-5 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 167 1118 1119)) - (:iso-8859-6 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533 65533 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 1567 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 65533 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) - (:iso-8859-7 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 168 169 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) - (:iso-8859-8 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) - (:iso-8859-9 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) - (:iso-8859-10 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 272 352 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 359 382 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 326 333 243 244 245 246 361 248 371 250 251 252 253 254 312)) - (:iso-8859-11 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 65533 65533 65533 65533)) - (:iso-8859-13 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216 169 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 8217)) - (:iso-8859-14 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 7808 169 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7809 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 217 218 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 253 375 255)) - (:iso-8859-15 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 169 170 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 187 338 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) - (:iso-8859-16 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353 169 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 537 187 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 204 205 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 538 223 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 273 324 242 243 244 337 246 347 369 249 250 251 252 281 539 255))) +(defconstant +iso-8859-tables+ + `((:iso-8859-1 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255))) + (:iso-8859-2 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 352 350 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 357 378 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729))) + (:iso-8859-3 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168 304 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305 351 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 202 203 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219 220 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236 237 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365 349 729))) + (:iso-8859-4 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 352 274 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 291 359 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 326 333 311 244 245 246 247 248 371 250 251 252 361 363 729))) + (:iso-8859-5 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 167 1118 1119))) + (:iso-8859-6 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533 65533 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 1567 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 65533 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533))) + (:iso-8859-7 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 168 169 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533))) + (:iso-8859-8 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533))) + (:iso-8859-9 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255))) + (:iso-8859-10 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 272 352 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 359 382 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 326 333 243 244 245 246 361 248 371 250 251 252 253 254 312))) + (:iso-8859-11 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 65533 65533 65533 65533))) + (:iso-8859-13 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216 169 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 8217))) + (:iso-8859-14 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 7808 169 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7809 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 217 218 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 253 375 255))) + (:iso-8859-15 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 169 170 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 187 338 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255))) + (:iso-8859-16 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353 169 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 537 187 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 204 205 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 538 223 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 273 324 242 243 244 337 246 347 369 249 250 251 252 281 539 255)))) "A list of the ISO-8859 encodings where each element is a cons with the car being a keyword denoting the encoding and the cdr being a vector enumerating the corresponding character codes.") Modified: branches/edi/koi8-r.lisp ============================================================================== --- branches/edi/koi8-r.lisp (original) +++ branches/edi/koi8-r.lisp Sun May 18 18:27:36 2008 @@ -1,6 +1,36 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/koi8-r.lisp,v 1.2 2008/05/18 21:32:15 edi Exp $ + +;;; Copyright (c) 2006, Igor Plekhov. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + (in-package :flexi-streams) ;; http://unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT -(defvar +koi8-r-table+ - #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 9472 9474 9484 9488 9492 9496 9500 9508 9516 9524 9532 9600 9604 9608 9612 9616 9617 9618 9619 8992 9632 8729 8730 8776 8804 8805 160 8993 176 178 183 247 9552 9553 9554 1105 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 1025 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 169 1102 1072 1073 1094 1076 1077 1092 1075 1093 1080 1081 1082 1083 1084 1085 1086 1087 1103 1088 1089 1090 1091 1078 1074 1100 1099 1079 1096 1101 1097 1095 1098 1070 1040 1041 1062 1044 1045 1060 1043 1061 1048 1049 1050 1051 1052 1053 1054 1055 1071 1056 1057 1058 1059 1046 1042 1068 1067 1047 1064 1069 1065 1063 1066) +(defconstant +koi8-r-table+ + (make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 9472 9474 9484 9488 9492 9496 9500 9508 9516 9524 9532 9600 9604 9608 9612 9616 9617 9618 9619 8992 9632 8729 8730 8776 8804 8805 160 8993 176 178 183 247 9552 9553 9554 1105 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 1025 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 169 1102 1072 1073 1094 1076 1077 1092 1075 1093 1080 1081 1082 1083 1084 1085 1086 1087 1103 1088 1089 1090 1091 1078 1074 1100 1099 1079 1096 1101 1097 1095 1098 1070 1040 1041 1062 1044 1045 1060 1043 1061 1048 1049 1050 1051 1052 1053 1054 1055 1071 1056 1057 1058 1059 1046 1042 1068 1067 1047 1064 1069 1065 1063 1066)) "An array enumerating the character codes for the KOI8-R encoding.") Added: branches/edi/mapping.lisp ============================================================================== --- (empty file) +++ branches/edi/mapping.lisp Sun May 18 18:27:36 2008 @@ -0,0 +1,67 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.15 2008/05/18 15:54:34 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(deftype octet () + "A shortcut for \(UNSIGNED-BYTE 8)." + '(unsigned-byte 8)) + +(deftype char* () + "Convenience shortcut to paper over the difference between LispWorks +and the other Lisps." + #+:lispworks 'lw:simple-char + #-:lispworks 'character) + +(deftype char-code-integer () + "The type of integers which can be returned by the function CHAR-CODE." + '(integer 0 #.(1- char-code-limit))) + +(defmacro defconstant (name value &optional doc) + "Make sure VALUE is evaluated only once \(to appease SBCL)." + `(cl:defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value) + ,@(when doc (list doc)))) + +(defun invert-table (table) + "`Inverts' an array which maps octets to character codes to a hash +table which maps character codes to octets." + (let ((hash (make-hash-table))) + (loop for octet from 0 + for char-code across table + unless (= char-code 65533) + do (setf (gethash char-code hash) octet)) + hash)) + +(defun make-decoding-table (list) + "Creates and returns an array which contains the elements in the +list LIST and has an element type that's suitable for character +codes." + (make-array (length list) + :element-type 'char-code-integer + :initial-contents list)) \ No newline at end of file Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.47 2008/05/17 16:40:33 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.49 2008/05/18 22:22:30 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -66,11 +66,11 @@ (declare (optimize speed)) (with-accessors ((external-format flexi-stream-external-format)) stream - (char-to-octets external-format - char - (lambda (octet) - (write-byte* octet stream)) - stream))) + (let ((*current-stream* stream)) + (char-to-octets external-format + char + (lambda (octet) + (write-byte* octet stream)))))) (defmethod stream-write-char :after ((stream flexi-output-stream) char) (declare (optimize speed)) @@ -155,6 +155,7 @@ (stream-write-byte flexi-output-stream element)) sequence)))) +#+(or) (defmethod stream-write-sequence ((stream flexi-output-stream) (sequence string) start end &key) "Optimized method for the cases where SEQUENCE is a string. Fills an internal buffer and uses repeated calls to WRITE-SEQUENCE to write @@ -174,14 +175,14 @@ :test #'char= :start start :end end - :from-end t))) + :from-end t)) + (*current-stream* stream)) (loop with format = (flexi-stream-external-format stream) for index from start below end do (char-to-octets format (aref sequence index) (lambda (octet) - (vector-push octet buffer)) - stream) + (vector-push octet buffer))) when (>= (fill-pointer buffer) +buffer-size+) do (write-sequence buffer (flexi-stream-stream stream)) (setf (fill-pointer buffer) 0) Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.31 2008/05/17 13:50:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.32 2008/05/18 21:32:15 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -34,9 +34,9 @@ (defpackage :flexi-streams (:use :cl :trivial-gray-streams) - (:nicknames :flex) - #+:lispworks - (:shadow :with-accessors) + (:nicknames :flex) + (:shadow #+:lispworks :with-accessors + :defconstant) (:export :*default-eol-style* :*default-little-endian* :*substitution-char* Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.28 2008/05/18 14:59:00 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.30 2008/05/18 21:32:15 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -38,15 +38,20 @@ (compilation-speed 0)) "The standard optimize settings used by most declaration expressions.") -(deftype octet () - "A shortcut for \(UNSIGNED-BYTE 8)." - '(unsigned-byte 8)) - -(deftype char* () - "Convenience shortcut to paper over the difference between LispWorks -and the other Lisps." - #+:lispworks 'lw:simple-char - #-:lispworks 'character) +(defvar *current-stream* nil + "The `stream' that is currently read from or written to. Not +necessarily a stream, can be any source or sink, like an array or a +list. Mainly used for error reporting. + +Must be bound to a suitable value when OCTETS-TO-CHAR-CODE or +CHAR-TO-OCTETS are called.") + +(defvar *current-unreader* nil + "A unary function which might be called to `unread' a character +\(i.e. the sequence of octets it represents). + +Used by the function OCTETS-TO-CHAR-CODE and must always be bound to a +suitable functional object when this function is called.") (defvar +name-map+ '((:utf8 . :utf-8) @@ -144,33 +149,23 @@ \(as if by a USE-VALUE restart) whenever during reading an error of type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise.") -(defun invert-table (table) - "`Inverts' an array which maps octets to character codes to a -hash tables which maps character codes to octets." - (let ((hash (make-hash-table))) - (loop for octet from 0 - for char-code across table - unless (= char-code 65533) - do (setf (gethash char-code hash) octet)) - hash)) - -(defvar +iso-8859-hashes+ +(defconstant +iso-8859-hashes+ (loop for (name . table) in +iso-8859-tables+ collect (cons name (invert-table table))) "An alist which maps names for ISO-8859 encodings to hash tables which map character codes to the corresponding octets.") -(defvar +code-page-hashes+ +(defconstant +code-page-hashes+ (loop for (id . table) in +code-page-tables+ collect (cons id (invert-table table))) "An alist which maps IDs of Windows code pages to hash tables which map character codes to the corresponding octets.") -(defvar +ascii-hash+ (invert-table +ascii-table+) +(defconstant +ascii-hash+ (invert-table +ascii-table+) "A hash table which maps US-ASCII character codes to the corresponding octets.") -(defvar +koi8-r-hash+ (invert-table +koi8-r-table+) +(defconstant +koi8-r-hash+ (invert-table +koi8-r-table+) "A hash table which maps KOI8-R character codes to the corresponding octets.") Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.10 2008/05/18 14:59:00 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.12 2008/05/18 22:22:30 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -38,7 +38,8 @@ (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) (let ((factor (encoding-factor external-format)) - (length (- end start))) + (length (- end start)) + (*current-stream* string)) (etypecase factor (float (let ((octets (make-array (round (* factor length)) @@ -50,24 +51,19 @@ (loop for i of-type fixnum from start below end do (char-to-octets external-format (char string i) - #'writer - nil))) + #'writer))) octets)) (integer - (let ((octets (make-array (* factor length) - :element-type 'octet)) + (let ((octets (make-array (* factor length) :element-type 'octet)) (j 0)) (declare (fixnum j)) (flet ((writer (octet) - (setf #+:lispworks (sys:typed-aref '(unsigned-byte 8) octets j) - #-:lispworks (aref octets j) - octet) + (setf (aref (the (array octet *) octets) j) octet) (incf j))) (loop for i of-type fixnum from start below end do (char-to-octets external-format (char string i) - #'writer - nil))) + #'writer))) octets))))) (defun octets-to-string (sequence &key @@ -82,14 +78,13 @@ (length (- end start)) (i start) (reader (etypecase sequence - #+:lispworks ((array octet *) (lambda () (when (>= i end) - ;; TODO... + ;; TODO... -> NIL? (error "End of data.")) (prog1 - (sys:typed-aref '(unsigned-byte 8) sequence i) + (aref (the (array octet *) sequence) i) (incf i)))) ((array * *) (lambda () @@ -106,22 +101,17 @@ (error "End of data.")) (prog1 (nth i sequence) - (incf i))))))) + (incf i)))))) + (*current-stream* sequence) + (*current-unreader* (lambda (char) + (char-to-octets external-format + char + (lambda (octet) + (declare (ignore octet)) + (decf i)))))) (declare (fixnum i)) - (labels ((pseudo-writer (octet) - (declare (ignore octet)) - (decf i)) - (unreader (char) - (char-to-octets external-format - char - #'pseudo-writer - nil)) - (next-char () - (code-char - (octets-to-char-code external-format - reader - #'unreader - nil)))) + (flet ((next-char () + (code-char (octets-to-char-code external-format reader)))) (etypecase factor (float (let ((string (make-array (round (/ length factor)) Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Sun May 18 18:27:36 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.14 2008/05/17 13:50:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.16 2008/05/18 20:34:53 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -108,6 +108,7 @@ (unless (find real-name +name-map+ :test #'eq :key #'cdr) + ;; TODO... (error "~S is not known to be a name for an external format." name)) real-name)) @@ -161,6 +162,8 @@ (defmacro with-accessors (slot-entries instance &body body) "For LispWorks, we prefer SLOT-VALUE over accessors for better performance." + ;; note that we assume that the variables have the same names as the + ;; slots `(with-slots ,(mapcar #'car slot-entries) ,instance , at body)) \ No newline at end of file From eweitz at common-lisp.net Mon May 19 08:01:37 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Mon, 19 May 2008 04:01:37 -0400 (EDT) Subject: [flexi-streams-cvs] r31 - in branches/edi: . doc test Message-ID: <20080519080137.ADF7B1900D@common-lisp.net> Author: eweitz Date: Mon May 19 04:01:35 2008 New Revision: 31 Modified: branches/edi/conditions.lisp branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/encode.lisp branches/edi/in-memory.lisp branches/edi/input.lisp branches/edi/lw-binary-stream.lisp branches/edi/output.lisp branches/edi/packages.lisp branches/edi/specials.lisp branches/edi/stream.lisp branches/edi/strings.lisp branches/edi/test/test.lisp branches/edi/util.lisp Log: Fix condition hierarchy Passes tests Modified: branches/edi/conditions.lisp ============================================================================== --- branches/edi/conditions.lisp (original) +++ branches/edi/conditions.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.4 2008/05/18 20:34:52 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.5 2008/05/19 07:57:07 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -31,8 +31,8 @@ (define-condition flexi-stream-error (stream-error) () - (:documentation "Superclass for all errors related to -flexi streams.")) + (:documentation "Superclass for all errors related to flexi +streams.")) (define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) () @@ -48,33 +48,16 @@ (:documentation "Errors of this type are signalled if the flexi stream has a wrong element type.")) -(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) - () - (:documentation "Errors of this type are signalled if there is an -encoding problem.")) - -(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) - ((position-spec :initarg :position-spec - :reader flexi-stream-position-spec-error-position-spec)) - (:documentation "Errors of this type are signalled if an -erroneous position spec is used in conjunction with -FILE-POSITION.")) - -;; TODO: stream might not be a stream... -(defun signal-encoding-error (format-control &rest format-args) - "Convenience function similar to ERROR to signal conditions of type -FLEXI-STREAM-ENCODING-ERROR." - (error 'flexi-stream-encoding-error - :format-control format-control - :format-arguments format-args - #+(or) #+(or) - :stream flexi-stream)) - (define-condition in-memory-stream-error (stream-error) () (:documentation "Superclass for all errors related to IN-MEMORY streams.")) +(define-condition in-memory-stream-simple-error (in-memory-stream-error simple-condition) + () + (:documentation "Like IN-MEMORY-STREAM-ERROR but with formatting +capabilities.")) + (define-condition in-memory-stream-closed-error (in-memory-stream-error) () (:report (lambda (condition stream) @@ -83,3 +66,33 @@ (:documentation "An error that is signalled when someone is trying to read from or write to a closed IN-MEMORY stream.")) +(define-condition in-memory-stream-position-spec-error (in-memory-stream-simple-error) + ((position-spec :initarg :position-spec + :reader in-memory-stream-position-spec-error-position-spec)) + (:documentation "Errors of this type are signalled if an erroneous +position spec is used in conjunction with FILE-POSITION.")) + +(define-condition external-format-error () + ((external-format :initarg :external-format + :initform nil + :reader external-format-error-external-format)) + (:documentation "Superclass for all errors related to external +formats.")) + +(define-condition external-format-simple-error (external-format-error simple-condition) + () + (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting +capabilities.")) + +(define-condition external-format-encoding-error (external-format-simple-error) + () + (:documentation "Errors of this type are signalled if there is an +encoding problem.")) + +(defun signal-encoding-error (external-format format-control &rest format-args) + "Convenience function similar to ERROR to signal conditions of type +EXTERNAL-FORMAT-ENCODING-ERROR." + (error 'external-format-encoding-error + :format-control format-control + :format-arguments format-args + :external-format external-format)) Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.7 2008/05/18 22:22:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.9 2008/05/19 07:57:07 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,16 +29,16 @@ (in-package :flexi-streams) -(defun recover-from-encoding-error (format-control &rest format-args) +(defun recover-from-encoding-error (external-format format-control &rest format-args) "Helper function used by OCTETS-TO-CHAR-CODE below to deal with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns -its character code in this case. Otherwise signals a -FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this +its character code in this case. Otherwise signals an +EXTERNAL-FORMAT-ENCODING-ERROR as determined by the arguments to this function and provides a corresponding USE-VALUE restart." (when *substitution-char* (return-from recover-from-encoding-error (char-code *substitution-char*))) (restart-case - (apply #'signal-encoding-error format-control format-args) + (apply #'signal-encoding-error external-format format-control format-args) (use-value (char) :report "Specify a character to be used instead." :interactive (lambda () @@ -72,7 +72,8 @@ (return-from octets-to-char-code :eof)))) (declare (type octet octet)) (if (> octet 127) - (recover-from-encoding-error "No character which corresponds to octet #x~X." octet) + (recover-from-encoding-error format + "No character which corresponds to octet #x~X." octet) octet))) (defmethod octets-to-char-code ((format flexi-8-bit-format) reader) @@ -86,7 +87,8 @@ (declare (type octet octet)) (if (or (null char-code) (= (the char-code-integer char-code) 65533)) - (recover-from-encoding-error "No character which corresponds to octet #x~X." octet) + (recover-from-encoding-error format + "No character which corresponds to octet #x~X." octet) char-code)))) (defmethod octets-to-char-code ((format flexi-utf-8-format) reader) @@ -99,7 +101,8 @@ (or (funcall reader) (cond (first-octet-seen (return-from octets-to-char-code - (recover-from-encoding-error "End of file while in UTF-8 sequence."))) + (recover-from-encoding-error format + "End of data while in UTF-8 sequence."))) (t (return-from octets-to-char-code :eof)))) (setq first-octet-seen t)))) (let ((octet (read-next-byte))) @@ -118,7 +121,8 @@ ((= #b11111100 (logand octet #b11111110)) (values (logand octet #b00000001) 5)) (t (return-from octets-to-char-code - (recover-from-encoding-error "Unexpected value #x~X at start of UTF-8 sequence." + (recover-from-encoding-error format + "Unexpected value #x~X at start of UTF-8 sequence." octet)))) (declare (fixnum count)) ;; note that we currently don't check for "overlong" @@ -130,7 +134,8 @@ for octet of-type octet = (read-next-byte) unless (= #b10000000 (logand octet #b11000000)) do (return-from octets-to-char-code - (recover-from-encoding-error "Unexpected value #x~X in UTF-8 sequence." octet)) + (recover-from-encoding-error format + "Unexpected value #x~X in UTF-8 sequence." octet)) finally (return result))))))) (defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader) @@ -143,7 +148,8 @@ (or (funcall reader) (cond (first-octet-seen (return-from octets-to-char-code - (recover-from-encoding-error "End of file while in UTF-16 sequence."))) + (recover-from-encoding-error format + "End of data while in UTF-16 sequence."))) (t (return-from octets-to-char-code :eof)))) (setq first-octet-seen t)))) (flet ((read-next-word () @@ -156,7 +162,8 @@ (declare (type (unsigned-byte 16) next-word)) (unless (<= #xdc00 next-word #xdfff) (return-from octets-to-char-code - (recover-from-encoding-error "Unexpected UTF-16 word #x~X following #x~X." + (recover-from-encoding-error format + "Unexpected UTF-16 word #x~X following #x~X." next-word word))) (+ (ash (logand #b1111111111 word) 10) (logand #b1111111111 next-word) @@ -173,7 +180,8 @@ (or (funcall reader) (cond (first-octet-seen (return-from octets-to-char-code - (recover-from-encoding-error "End of file while in UTF-16 sequence."))) + (recover-from-encoding-error format + "End of data while in UTF-16 sequence."))) (t (return-from octets-to-char-code :eof)))) (setq first-octet-seen t)))) (flet ((read-next-word () @@ -186,7 +194,8 @@ (declare (type (unsigned-byte 16) next-word)) (unless (<= #xdc00 next-word #xdfff) (return-from octets-to-char-code - (recover-from-encoding-error "Unexpected UTF-16 word #x~X following #x~X." + (recover-from-encoding-error format + "Unexpected UTF-16 word #x~X following #x~X." next-word word))) (+ (ash (logand #b1111111111 word) 10) (logand #b1111111111 next-word) @@ -203,7 +212,8 @@ (or (funcall reader) (cond (first-octet-seen (return-from octets-to-char-code - (recover-from-encoding-error "End of file while in UTF-32 sequence."))) + (recover-from-encoding-error format + "End of data while in UTF-32 sequence."))) (t (return-from octets-to-char-code :eof)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 0 to 24 by 8 @@ -220,7 +230,8 @@ (or (funcall reader) (cond (first-octet-seen (return-from octets-to-char-code - (recover-from-encoding-error "End of file while in UTF-32 sequence."))) + (recover-from-encoding-error format + "End of data while in UTF-32 sequence."))) (t (return-from octets-to-char-code :eof)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 24 downto 0 by 8 Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Mon May 19 04:01:35 2008 @@ -56,7 +56,6 @@

  1. Example usage
  2. Download and installation -
  3. Backward compatibility with version 0.10.3 and before
  4. Support and mailing lists
  5. The FLEXI-STREAMS dictionary
      @@ -70,6 +69,7 @@
    1. external-format-equal
    2. *default-eol-style*
    3. *default-little-endian* +
    4. external-format-encoding-error
  6. Flexi streams
      @@ -89,11 +89,8 @@
    1. *substitution-char*
    2. octet
    3. flexi-stream-error -
    4. flexi-stream-encoding-error
    5. flexi-stream-element-type-error
    6. flexi-stream-element-type-error-element-type -
    7. flexi-stream-position-spec-error -
    8. flexi-stream-position-spec-error-position-spec
  7. In-memory streams
      @@ -110,6 +107,8 @@
    1. with-output-to-sequence
    2. in-memory-stream-error
    3. in-memory-stream-closed-error +
    4. in-memory-stream-position-spec-error +
    5. in-memory-stream-position-spec-error-position-spec
  8. Strings
      @@ -256,27 +255,6 @@ href="http://arcanes.fr.eu.org/~pierre/2007/02/weitz/">http://arcanes.fr.eu.org/~pierre/2007/02/weitz/ thanks to Pierre Thierry. - -
       
      -

      -Backward compatibility with version 0.10.3 and before

      - -Two special variables used in flexi-streams 0.10.3 and before were removed - -*PROVIDE-USE-VALUE-RESTART* and *USE-REPLACEMENT-CHAR*. - -

      -The code now behaves as if -*PROVIDE-USE-VALUE-RESTART* is always T. -Instead of *USE-REPLACEMENT-CHAR*, you can use -*SUBSTITUTION-CHAR* or -invoke -a USE-VALUE -restart -when a FLEXI-STREAM-ENCODING-ERROR -is signalled. -
       

      Support and mailing lists

      For questions, bug reports, feature requests, improvements, or patches @@ -542,6 +520,32 @@ The default value for the little-endian keyword argument of MAKE-EXTERNAL-FORMAT. Its initial value corresponds to the endianess of the platform FLEXI-STREAMS is used on as revealed by the :LITTLE-ENDIAN feature.
+


[Condition] +
external-format-error + +


+All errors related to external formats are of this type. +There's a slot for the external format which can be accessed with EXTERNAL-FORMAT-ERROR-EXTERNAL-FORMAT. +
+ +


[Reader] +
external-format-error-external-format condition => external-format + +


If condition is of +type EXTERNAL-FORMAT-ERROR, +this function will return the associated external format. Note that +there are errors which happen during the creation of external formats +where this method returns NIL. +
+ +


[Condition] +
external-format-encoding-error + +


+All errors related to encoding problems with flexi streams are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE +restart is provided. See also *SUBSTITUTION-CHAR* and example for it. EXTERNAL-FORMAT-ENCODING-ERROR is a subtype of EXTERNAL-FORMAT-ERROR. +
+

Flexi streams

Flexi streams are the core of the FLEXI-STREAMS library. You @@ -736,7 +740,7 @@

If this value is not NIL, it should be a character which is used (as if by a USE-VALUE restart) whenever during reading an error of -type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise. +type EXTERNAL-FORMAT-ENCODING-ERROR would have been signalled otherwise.
 CL-USER 1 > (defun foo ()
@@ -770,7 +774,7 @@
 "xy"
 T
 
-CL-USER 5 > (handler-bind ((flexi-stream-encoding-error (lambda (condition)
+CL-USER 5 > (handler-bind ((external-format-encoding-error (lambda (condition)
                                                           (use-value #\-))))
               (foo))
 "--"
@@ -798,14 +802,6 @@
 


[Condition] -
flexi-stream-encoding-error - -


-All errors related to encoding problems with flexi streams are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE -restart is provided. See also *SUBSTITUTION-CHAR* and example for it. FLEXI-STREAM-ENCODING-ERROR is a subtype of FLEXI-STREAM-ERROR. -
- -


[Condition]
flexi-stream-element-type-error


@@ -819,26 +815,6 @@ If condition is of type FLEXI-STREAM-ELEMENT-TYPE-ERROR, this function will return the offending element type.
-


[Condition] -
flexi-stream-position-spec-error - -


Errors of this type are signalled if an erroneous -position spec is used in conjunction -with FILE-POSITION. This is a -subtype -of FLEXI-STREAM-ERROR -and has an additional slot for the position spec which can be accessed -with FLEXI-STREAM-POSITION-SPEC-ERROR-POSITION-SPEC. -
- -


[Reader] -
flexi-stream-position-spec-error-position-spec condition => position-spec - -


-If condition is of type FLEXI-STREAM-POSITION-SPEC-ERROR, this function will return the offending position spec. -
-

In-memory streams

The library also provides in-memory binary streams which are modeled after string streams and behave very similar only that they deal with octets instead of characters and the underlying data structure is not a string but either a list or a vector. These streams can obviously be used as the underlying streams for flexi streams. @@ -965,6 +941,25 @@ An error of this type is signalled if one tries to read from or write to an in-memory stream which had already been closed. This is a subtype of IN-MEMORY-STREAM-ERROR.
+


[Condition] +
in-memory-stream-position-spec-error + +


Errors of this type are signalled if an erroneous +position spec is used in conjunction +with FILE-POSITION. This is a +subtype +of IN-MEMORY-STREAM-ERROR +and has an additional slot for the position spec which can be accessed +with IN-MEMORY-STREAM-POSITION-SPEC-ERROR-POSITION-SPEC. +
+ +


[Reader] +
in-memory-stream-position-spec-error-position-spec condition => position-spec + +


+If condition is of type IN-MEMORY-STREAM-POSITION-SPEC-ERROR, this function will return the offending position spec. +
+

Strings

This section collects a few convenience functions for strings conversions: @@ -1037,7 +1032,7 @@ numerous patches and additions.

-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.100 2008/05/18 14:59:02 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.102 2008/05/19 07:57:10 edi Exp $

BACK TO MY HOMEPAGE Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.7 2008/05/18 22:22:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.8 2008/05/19 07:57:07 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -45,7 +45,7 @@ (declare (character char) (function writer)) (let ((octet (char-code char))) (when (> octet 255) - (signal-encoding-error "~S (code ~A) is not a LATIN-1 character." char octet)) + (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet)) (funcall writer octet))) (defmethod char-to-octets ((format flexi-ascii-format) char writer) @@ -53,7 +53,7 @@ (declare (character char) (function writer)) (let ((octet (char-code char))) (when (> octet 127) - (signal-encoding-error "~S (code ~A) is not an ASCII character." char octet)) + (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet)) (funcall writer octet))) (defmethod char-to-octets ((format flexi-8-bit-format) char writer) @@ -63,7 +63,7 @@ format (let ((octet (gethash (char-code char) encoding-hash))) (unless octet - (signal-encoding-error "~S (code ~A) is not in this encoding." char octet)) + (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet)) (funcall writer octet)))) (defmethod char-to-octets ((format flexi-utf-8-format) char writer) Modified: branches/edi/in-memory.lisp ============================================================================== --- branches/edi/in-memory.lisp (original) +++ branches/edi/in-memory.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.29 2008/05/17 16:35:58 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.31 2008/05/19 07:57:07 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -107,163 +107,194 @@ #+:cmu (defmethod open-stream-p ((stream in-memory-stream)) "Returns a true value if STREAM is open. See ANSI standard." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (in-memory-stream-open-p stream)) #+:cmu (defmethod close ((stream in-memory-stream) &key abort) "Closes the stream STREAM. See ANSI standard." - (declare (ignore abort) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore abort)) (prog1 (in-memory-stream-open-p stream) (setf (in-memory-stream-open-p stream) nil))) (defmethod check-if-open ((stream in-memory-stream)) "Checks if STREAM is open and signals an error otherwise." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (unless (open-stream-p stream) (error 'in-memory-stream-closed-error :stream stream))) (defmethod stream-element-type ((stream in-memory-stream)) "The element type is always OCTET by definition." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) 'octet) (defmethod transform-octet ((stream in-memory-stream) octet) "Applies the transformer of STREAM to octet and returns the result." + (declare #.*standard-optimize-settings*) (funcall (or (in-memory-stream-transformer stream) #'identity) octet)) (defmethod stream-read-byte ((stream list-input-stream)) "Reads one byte by simply popping it off of the top of the list." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (transform-octet stream (or (pop (list-stream-list stream)) - (return-from stream-read-byte :eof)))) + (with-accessors ((list list-stream-list)) + stream + (transform-octet stream (or (pop list) (return-from stream-read-byte :eof))))) (defmethod stream-listen ((stream list-input-stream)) "Checks whether list is not empty." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (list-stream-list stream)) + (with-accessors ((list list-stream-list)) + stream + list)) (defmethod stream-read-sequence ((stream list-input-stream) sequence start end &key) "Repeatedly pops elements from the list until it's empty." - (declare (optimize speed) (type (integer 0 *) start end)) - (loop for index from start below end - while (list-stream-list stream) - do (setf (elt sequence index) - (pop (list-stream-list stream))) - finally (return index))) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((list list-stream-list)) + stream + (loop for index of-type fixnum from start below end + while list + do (setf (elt sequence index) (pop list)) + finally (return index)))) (defmethod stream-read-byte ((stream vector-input-stream)) "Reads one byte and increments INDEX pointer unless we're beyond END pointer." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (let ((index (vector-stream-index stream))) - (cond ((< index (vector-stream-end stream)) - (incf (vector-stream-index stream)) - (transform-octet stream (aref (vector-stream-vector stream) index))) - (t :eof)))) + (with-accessors ((index vector-stream-index) + (end vector-stream-end) + (vector vector-stream-vector)) + stream + (let ((current-index index)) + (declare (fixnum current-index)) + (cond ((< current-index (the fixnum end)) + (incf (the fixnum index)) + (transform-octet stream (aref vector current-index))) + (t :eof))))) (defmethod stream-listen ((stream vector-input-stream)) "Checking whether INDEX is beyond END." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (< (vector-stream-index stream) (vector-stream-end stream))) + (with-accessors ((index vector-stream-index) + (end vector-stream-end)) + stream + (< (the fixnum index) (the fixnum end)))) (defmethod stream-read-sequence ((stream vector-input-stream) sequence start end &key) "Traverses both sequences in parallel until the end of one of them is reached." - (declare (optimize speed) (type (integer 0 *) start end)) - (loop with vector-end of-type (integer 0 #.array-dimension-limit) = (vector-stream-end stream) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (loop with vector-end of-type fixnum = (vector-stream-end stream) with vector = (vector-stream-vector stream) - for index from start below end - for vector-index of-type (integer 0 #.array-dimension-limit) = (vector-stream-index stream) + for index of-type fixnum from start below end + for vector-index of-type fixnum = (vector-stream-index stream) while (< vector-index vector-end) do (setf (elt sequence index) (aref vector vector-index)) - (incf (vector-stream-index stream)) + (incf (the fixnum (vector-stream-index stream))) finally (return index))) (defmethod stream-write-byte ((stream vector-output-stream) byte) "Writes a byte \(octet) by extending the underlying vector." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (vector-push-extend (transform-octet stream byte) - (vector-stream-vector stream))) + (with-accessors ((vector vector-stream-vector)) + stream + (vector-push-extend (transform-octet stream byte) vector))) (defmethod stream-write-sequence ((stream vector-output-stream) sequence start end &key) "Just calls VECTOR-PUSH-EXTEND repeatedly." - (declare (optimize speed) (type (integer 0 *) start end)) - (loop with vector = (vector-stream-vector stream) - for index from start below end - do (vector-push-extend (elt sequence index) vector)) - sequence) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((vector vector-stream-vector)) + stream + (loop for index of-type fixnum from start below end + do (vector-push-extend (elt sequence index) vector)) + sequence)) (defmethod stream-file-position ((stream vector-input-stream)) "Simply returns the index into the underlying vector." - (declare (optimize speed)) - (vector-stream-index stream)) + (declare #.*standard-optimize-settings*) + (with-accessors ((index vector-stream-index)) + stream + index)) (defmethod (setf stream-file-position) (position-spec (stream vector-input-stream)) "Sets the index into the underlying vector if POSITION-SPEC is acceptable." - (declare (optimize speed)) - (setf (vector-stream-index stream) - (case position-spec - (:start 0) - (:end (vector-stream-end stream)) - (otherwise - (unless (integerp position-spec) - (error 'flexi-stream-position-spec-error - :format-control "Unknown file position designator: ~S." - :format-arguments (list position-spec) - :position-spec position-spec)) - (unless (<= 0 position-spec (vector-stream-end stream)) - (error 'flexi-stream-position-spec-error - :format-control "File position designator ~S is out of bounds." - :format-arguments (list position-spec) - :position-spec position-spec)) - position-spec))) - position-spec) - -(defmethod stream-file-position ((stream vector-output-stream)) - "Simply returns the fill pointer of the underlying vector." - (declare (optimize speed)) - (fill-pointer (vector-stream-vector stream))) - -(defmethod (setf stream-file-position) (position-spec (stream vector-output-stream)) - "Sets the fill pointer underlying vector if POSITION-SPEC is -acceptable. Adjusts the vector if necessary." - (declare (optimize speed)) - (let* ((vector (vector-stream-vector stream)) - (total-size (array-total-size vector)) - (new-fill-pointer + (declare #.*standard-optimize-settings*) + (with-accessors ((index vector-stream-index) + (end vector-stream-end)) + stream + (setq index (case position-spec (:start 0) - (:end - (warn "File position designator :END doesn't really make sense for an output stream.") - total-size) + (:end end) (otherwise (unless (integerp position-spec) - (error 'flexi-stream-position-spec-error + (error 'in-memory-stream-position-spec-error :format-control "Unknown file position designator: ~S." :format-arguments (list position-spec) + :stream stream :position-spec position-spec)) - (unless (<= 0 position-spec array-total-size-limit) - (error 'flexi-stream-position-spec-error + (unless (<= 0 position-spec end) + (error 'in-memory-stream-position-spec-error :format-control "File position designator ~S is out of bounds." :format-arguments (list position-spec) + :stream stream :position-spec position-spec)) - position-spec)))) - (when (> new-fill-pointer total-size) - (adjust-array vector new-fill-pointer)) - (setf (fill-pointer vector) new-fill-pointer) + position-spec))) position-spec)) +(defmethod stream-file-position ((stream vector-output-stream)) + "Simply returns the fill pointer of the underlying vector." + (declare #.*standard-optimize-settings*) + (with-accessors ((vector vector-stream-vector)) + stream + (fill-pointer vector))) + +(defmethod (setf stream-file-position) (position-spec (stream vector-output-stream)) + "Sets the fill pointer underlying vector if POSITION-SPEC is +acceptable. Adjusts the vector if necessary." + (declare #.*standard-optimize-settings*) + (with-accessors ((vector vector-stream-vector)) + stream + (let* ((total-size (array-total-size vector)) + (new-fill-pointer + (case position-spec + (:start 0) + (:end + (warn "File position designator :END doesn't really make sense for an output stream.") + total-size) + (otherwise + (unless (integerp position-spec) + (error 'in-memory-stream-position-spec-error + :format-control "Unknown file position designator: ~S." + :format-arguments (list position-spec) + :stream stream + :position-spec position-spec)) + (unless (<= 0 position-spec array-total-size-limit) + (error 'in-memory-stream-position-spec-error + :format-control "File position designator ~S is out of bounds." + :format-arguments (list position-spec) + :stream stream + :position-spec position-spec)) + position-spec)))) + (declare (fixnum total-size new-fill-pointer)) + (when (> new-fill-pointer total-size) + (adjust-array vector new-fill-pointer)) + (setf (fill-pointer vector) new-fill-pointer) + position-spec))) + (defmethod make-in-memory-input-stream ((vector vector) &key (start 0) (end (length vector)) transformer) @@ -271,7 +302,7 @@ octets in the subsequence of VECTOR bounded by START and END. Each octet returned will be transformed in turn by the optional TRANSFORMER function." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-instance 'vector-input-stream :vector vector :index start @@ -285,7 +316,7 @@ octets in the subsequence of LIST bounded by START and END. Each octet returned will be transformed in turn by the optional TRANSFORMER function." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-instance 'list-input-stream :list (subseq list start end) :transformer transformer)) @@ -293,7 +324,7 @@ (defun make-output-vector (&key (element-type 'octet)) "Creates and returns an array which can be used as the underlying vector for a VECTOR-OUTPUT-STREAM." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-array 0 :adjustable t :fill-pointer 0 :element-type element-type)) @@ -304,7 +335,7 @@ that contains the octes that were actually output. The octets stored will each be transformed by the optional TRANSFORMER function." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-instance 'vector-output-stream :vector (make-output-vector :element-type element-type) :transformer transformer)) @@ -316,19 +347,23 @@ been output since the last call to GET-OUTPUT-STREAM-SEQUENCE or since the creation of the stream, whichever occurred most recently. If AS-LIST is true the return value is coerced to a list." - (declare (optimize speed)) - (prog1 - (if as-list - (coerce (vector-stream-vector stream) 'list) - (vector-stream-vector stream)) - (setf (vector-stream-vector stream) - (make-output-vector)))) + (declare #.*standard-optimize-settings*) + (with-accessors ((vector vector-stream-vector)) + stream + (prog1 + (if as-list + (coerce vector 'list) + vector) + (setq vector + (make-output-vector))))) (defmethod output-stream-sequence-length ((stream in-memory-output-stream)) "Returns the current length of the underlying vector of the IN-MEMORY output stream STREAM." (declare (optimize speed)) - (length (the (simple-array * (*)) (vector-stream-vector stream)))) + (with-accessors ((vector vector-stream-vector)) + stream + (length (the (simple-array * (*)) vector)))) (defmacro with-input-from-sequence ((var sequence &key start end transformer) &body body) Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.59 2008/05/18 21:39:40 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.60 2008/05/19 07:57:07 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -148,8 +148,7 @@ (octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format)) flexi-input-stream - (let ((*current-stream* flexi-input-stream) - (counter 0) octets-reversed) + (let ((counter 0) octets-reversed) (declare (integer position) (fixnum counter)) (char-to-octets external-format @@ -174,7 +173,6 @@ (setq last-octet nil) (let* ((*current-unreader* (lambda (char) (unread-char% char stream))) - (*current-stream* stream) (char-code (octets-to-char-code external-format (lambda () (read-byte* stream))))) Modified: branches/edi/lw-binary-stream.lisp ============================================================================== --- branches/edi/lw-binary-stream.lisp (original) +++ branches/edi/lw-binary-stream.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.13 2008/05/17 14:21:20 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.14 2008/05/18 23:13:59 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -48,251 +48,11 @@ optimizing input and output on LispWorks. See READ-BYTE* and WRITE-BYTE*.")) -(defclass flexi-binary-8-bit-input-stream (flexi-8-bit-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-8-BIT-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-8-bit-input-stream (flexi-cr-mixin flexi-binary-8-bit-input-stream) - () - (:documentation "Like FLEXI-CR-8-BIT-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-ascii-input-stream (flexi-ascii-input-stream flexi-binary-8-bit-input-stream) - () - (:documentation "Like FLEXI-ASCII-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-ascii-input-stream (flexi-cr-mixin flexi-binary-ascii-input-stream) - () - (:documentation "Like FLEXI-CR-ASCII-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-latin-1-input-stream (flexi-latin-1-input-stream flexi-binary-8-bit-input-stream) - () - (:documentation "Like FLEXI-LATIN-1-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-latin-1-input-stream (flexi-cr-mixin flexi-binary-latin-1-input-stream) - () - (:documentation "Like FLEXI-CR-LATIN-1-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-le-input-stream (flexi-utf-32-le-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-32-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-binary-utf-32-le-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-be-input-stream (flexi-utf-32-be-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-32-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-binary-utf-32-be-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-le-input-stream (flexi-utf-16-le-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-16-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-binary-utf-16-le-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-be-input-stream (flexi-utf-16-be-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-16-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-binary-utf-16-be-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-8-input-stream (flexi-utf-8-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-8-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-8-input-stream (flexi-cr-mixin flexi-binary-utf-8-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-8-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-8-bit-output-stream (flexi-8-bit-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-8-BIT-OUTPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-8-bit-output-stream (flexi-cr-mixin flexi-binary-8-bit-output-stream) - () - (:documentation "Like FLEXI-CR-8-BIT-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-ascii-output-stream (flexi-ascii-output-stream flexi-binary-8-bit-output-stream) - () - (:documentation "Like FLEXI-ASCII-OUTPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-ascii-output-stream (flexi-cr-mixin flexi-binary-ascii-output-stream) - () - (:documentation "Like FLEXI-CR-ASCII-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-latin-1-output-stream (flexi-latin-1-output-stream flexi-binary-8-bit-output-stream) - () - (:documentation "Like FLEXI-LATIN-1-OUTPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-latin-1-output-stream (flexi-cr-mixin flexi-binary-latin-1-output-stream) - () - (:documentation "Like FLEXI-CR-LATIN-1-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-le-output-stream (flexi-utf-32-le-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-32-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-binary-utf-32-le-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-be-output-stream (flexi-utf-32-be-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-32-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-binary-utf-32-be-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-le-output-stream (flexi-utf-16-le-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-16-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-binary-utf-16-le-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-be-output-stream (flexi-utf-16-be-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-16-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-binary-utf-16-be-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-8-output-stream (flexi-utf-8-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-8-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-8-output-stream (flexi-cr-mixin flexi-binary-utf-8-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-8-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-8-bit-io-stream (flexi-binary-io-stream flexi-8-bit-io-stream) - () - (:documentation "Like FLEXI-8-BIT-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-8-bit-io-stream (flexi-cr-mixin flexi-binary-8-bit-io-stream) - () - (:documentation "Like FLEXI-CR-8-BIT-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-ascii-io-stream (flexi-ascii-io-stream flexi-binary-8-bit-io-stream) - () - (:documentation "Like FLEXI-ASCII-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-ascii-io-stream (flexi-cr-mixin flexi-binary-ascii-io-stream) - () - (:documentation "Like FLEXI-CR-ASCII-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-latin-1-io-stream (flexi-latin-1-io-stream flexi-binary-8-bit-io-stream) - () - (:documentation "Like FLEXI-LATIN-1-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-latin-1-io-stream (flexi-cr-mixin flexi-binary-latin-1-io-stream) - () - (:documentation "Like FLEXI-CR-LATIN-1-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-le-io-stream (flexi-utf-32-le-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-32-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-binary-utf-32-le-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-be-io-stream (flexi-utf-32-be-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-32-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-binary-utf-32-be-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-le-io-stream (flexi-utf-16-le-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-16-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-binary-utf-16-le-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-be-io-stream (flexi-utf-16-be-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-16-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-binary-utf-16-be-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-8-io-stream (flexi-utf-8-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-8-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-8-io-stream (flexi-cr-mixin flexi-binary-utf-8-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-8-IO-STREAM but -optimized for LispWorks binary streams.")) - (defmethod initialize-instance :after ((flexi-stream flexi-output-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. Only needed for LispWorks." - (declare (ignore initargs) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) (with-accessors ((stream flexi-stream-stream)) flexi-stream (when (subtypep (stream-element-type stream) 'octet) @@ -304,8 +64,8 @@ (defmethod initialize-instance :after ((flexi-stream flexi-input-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. Only needed for LispWorks." - (declare (ignore initargs) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) (with-accessors ((stream flexi-stream-stream)) flexi-stream (when (subtypep (stream-element-type stream) 'octet) Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.49 2008/05/18 22:22:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.50 2008/05/19 07:57:07 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -66,11 +66,10 @@ (declare (optimize speed)) (with-accessors ((external-format flexi-stream-external-format)) stream - (let ((*current-stream* stream)) - (char-to-octets external-format - char - (lambda (octet) - (write-byte* octet stream)))))) + (char-to-octets external-format + char + (lambda (octet) + (write-byte* octet stream))))) (defmethod stream-write-char :after ((stream flexi-output-stream) char) (declare (optimize speed)) @@ -155,7 +154,6 @@ (stream-write-byte flexi-output-stream element)) sequence)))) -#+(or) (defmethod stream-write-sequence ((stream flexi-output-stream) (sequence string) start end &key) "Optimized method for the cases where SEQUENCE is a string. Fills an internal buffer and uses repeated calls to WRITE-SEQUENCE to write @@ -168,15 +166,14 @@ (unless (typep stream 'flexi-binary-output-stream) (return-from stream-write-sequence (call-next-method))) - (let* ((buffer (make-array (+ +buffer-size+ 20) - :element-type '(unsigned-byte 8) - :fill-pointer 0)) - (last-newline-pos (position #\Newline sequence - :test #'char= - :start start - :end end - :from-end t)) - (*current-stream* stream)) + (let ((buffer (make-array (+ +buffer-size+ 20) + :element-type '(unsigned-byte 8) + :fill-pointer 0)) + (last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) (loop with format = (flexi-stream-external-format stream) for index from start below end do (char-to-octets format Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.32 2008/05/18 21:32:15 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.33 2008/05/19 07:57:08 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -41,6 +41,9 @@ :*default-little-endian* :*substitution-char* :external-format-eol-style + :external-format-error + :external-format-error-external-format + :external-format-encoding-error :external-format-equal :external-format-id :external-format-little-endian @@ -51,20 +54,19 @@ :flexi-stream :flexi-stream-bound :flexi-stream-external-format - :flexi-stream-encoding-error :flexi-stream-element-type :flexi-stream-element-type-error :flexi-stream-element-type-error-element-type :flexi-stream-error :flexi-stream-column :flexi-stream-position - :flexi-stream-position-spec-error - :flexi-stream-position-spec-error-position-spec :flexi-stream-stream :get-output-stream-sequence :in-memory-stream :in-memory-stream-closed-error :in-memory-stream-error + :in-memory-stream-position-spec-error + :in-memory-stream-position-spec-error-position-spec :in-memory-input-stream :in-memory-output-stream :list-stream Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.30 2008/05/18 21:32:15 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.31 2008/05/19 07:57:08 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -38,14 +38,6 @@ (compilation-speed 0)) "The standard optimize settings used by most declaration expressions.") -(defvar *current-stream* nil - "The `stream' that is currently read from or written to. Not -necessarily a stream, can be any source or sink, like an array or a -list. Mainly used for error reporting. - -Must be bound to a suitable value when OCTETS-TO-CHAR-CODE or -CHAR-TO-OCTETS are called.") - (defvar *current-unreader* nil "A unary function which might be called to `unread' a character \(i.e. the sequence of octets it represents). Modified: branches/edi/stream.lisp ============================================================================== --- branches/edi/stream.lisp (original) +++ branches/edi/stream.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.59 2008/05/18 01:21:34 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.60 2008/05/18 23:14:00 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -52,8 +52,8 @@ (defmethod initialize-instance :after ((flexi-stream flexi-stream) &rest initargs) "Makes sure the EXTERNAL-FORMAT and ELEMENT-TYPE slots contain reasonable values." - (declare (ignore initargs) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) (with-accessors ((external-format flexi-stream-external-format) (element-type flexi-stream-element-type)) flexi-stream @@ -67,10 +67,12 @@ (defmethod (setf flexi-stream-external-format) :around (new-value (flexi-stream flexi-stream)) "Converts the new value to an EXTERNAL-FORMAT object if necessary." + (declare #.*standard-optimize-settings*) (call-next-method (maybe-convert-external-format new-value) flexi-stream)) (defmethod (setf flexi-stream-element-type) :before (new-value (flexi-stream flexi-stream)) "Checks whether the new value makes sense before it is set." + (declare #.*standard-optimize-settings*) (unless (or (subtypep new-value 'character) (subtypep new-value 'octet)) (error 'flexi-stream-element-type-error @@ -80,13 +82,15 @@ (defmethod stream-element-type ((stream flexi-stream)) "Returns the element type that was provided by the creator of the stream." - (declare (optimize speed)) - (flexi-stream-element-type stream)) + (declare #.*standard-optimize-settings*) + (with-accessors ((element-type flexi-stream-element-type)) + stream + element-type)) (defmethod close ((stream flexi-stream) &key abort) "Closes the flexi stream by closing the underlying `real' stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) stream (cond ((open-stream-p stream) @@ -95,19 +99,24 @@ (defmethod open-stream-p ((stream flexi-stream)) "A flexi stream is open if its underlying stream is open." - (declare (optimize speed)) - (open-stream-p (flexi-stream-stream stream))) + (declare #.*standard-optimize-settings*) + (with-accessors ((stream flexi-stream-stream)) + stream + (open-stream-p stream))) (defmethod stream-file-position ((stream flexi-stream)) "Dispatch to method for underlying stream." - (declare (optimize speed)) - (stream-file-position (flexi-stream-stream stream))) + (declare #.*standard-optimize-settings*) + (with-accessors ((stream flexi-stream-stream)) + stream + (stream-file-position stream))) (defmethod (setf stream-file-position) (position-spec (stream flexi-stream)) "Dispatch to method for underlying stream." - (declare (optimize speed)) - (setf (stream-file-position (flexi-stream-stream stream)) - position-spec)) + (declare #.*standard-optimize-settings*) + (with-accessors ((stream flexi-stream-stream)) + stream + (setf (stream-file-position stream) position-spec))) (defclass flexi-output-stream (flexi-stream fundamental-binary-output-stream fundamental-character-output-stream) @@ -123,7 +132,7 @@ #+:cmu (defmethod input-stream-p ((stream flexi-output-stream)) "Explicitly states whether this is an input stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) nil) (defclass flexi-input-stream (flexi-stream fundamental-binary-input-stream @@ -166,7 +175,7 @@ #+:cmu (defmethod output-stream-p ((stream flexi-input-stream)) "Explicitly states whether this is an output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) nil) (defclass flexi-io-stream (flexi-input-stream flexi-output-stream) @@ -179,13 +188,13 @@ #+:cmu (defmethod input-stream-p ((stream flexi-io-stream)) "Explicitly states whether this is an input stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) t) #+:cmu (defmethod output-stream-p ((stream flexi-io-stream)) "Explicitly states whether this is an output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) t) (defun make-flexi-stream (stream &rest args @@ -207,6 +216,7 @@ streams) should be NIL or an integer. If BOUND is not NIL and POSITION has gone beyond BOUND, then the stream will behave as if no more input is available." + (declare #.*standard-optimize-settings*) ;; these arguments are ignored - they are only there to provide a ;; meaningful parameter list for IDEs (declare (ignore element-type column position bound)) Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.12 2008/05/18 22:22:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.14 2008/05/19 07:57:08 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -38,8 +38,7 @@ (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) (let ((factor (encoding-factor external-format)) - (length (- end start)) - (*current-stream* string)) + (length (- end start))) (etypecase factor (float (let ((octets (make-array (round (* factor length)) @@ -47,6 +46,7 @@ :fill-pointer 0 :adjustable t))) (flet ((writer (octet) + ;; TODO: do this manually (vector-push-extend octet octets))) (loop for i of-type fixnum from start below end do (char-to-octets external-format @@ -102,13 +102,11 @@ (prog1 (nth i sequence) (incf i)))))) - (*current-stream* sequence) - (*current-unreader* (lambda (char) - (char-to-octets external-format - char - (lambda (octet) - (declare (ignore octet)) - (decf i)))))) + (*current-unreader* (flet ((pseudo-writer (octet) + (declare (ignore octet)) + (decf i))) + (lambda (char) + (char-to-octets external-format char #'pseudo-writer))))) (declare (fixnum i)) (flet ((next-char () (code-char (octets-to-char-code external-format reader)))) @@ -119,6 +117,7 @@ :fill-pointer 0 :adjustable t))) (loop while (< i end) + ;; TODO: do this manually do (vector-push-extend (next-char) string) finally (return string)))) (integer Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.22 2008/05/18 14:59:04 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.25 2008/05/19 07:57:12 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -256,11 +256,40 @@ (terpri *error-output*)) ,successp)))) +(defun old-string-to-octets (string &key + (external-format (make-external-format :latin1)) + (start 0) end) + "The old version of STRING-TO-OCTETS. We can use it to test +in-memory streams." + (declare (optimize speed)) + (with-output-to-sequence (out) + (let ((flexi (make-flexi-stream out :external-format external-format))) + (write-string string flexi :start start :end end)))) + +(defun old-octets-to-string (vector &key + (external-format (make-external-format :latin1)) + (start 0) (end (length vector))) + "The old version of OCTETS-TO-STRING. We can use it to test +in-memory streams." + (declare (optimize speed)) + (with-input-from-sequence (in vector :start start :end end) + (let ((flexi (make-flexi-stream in :external-format external-format)) + (result (make-array (- end start) + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer result) + (read-sequence result flexi)) + result))) + (defun string-test (pathspec external-format) "Tests whether conversion from strings to octets and vice versa using the external format EXTERNAL-FORMAT works as expected, using the contents of the file denoted by PATHSPEC as test data and assuming -that the stream conversion functions work." +that the stream conversion functions work. + +Also tests with the old versions of the conversion functions in order +to test in-memory streams." (let* ((full-path (merge-pathnames pathspec *this-file*)) (octets-vector (file-as-octet-vector full-path)) (octets-list (coerce octets-vector 'list)) @@ -269,27 +298,30 @@ (flex::normalize-external-format external-format))) (check (string= (octets-to-string octets-vector :external-format external-format) string)) (check (string= (octets-to-string octets-list :external-format external-format) string)) - (check (equalp (string-to-octets string :external-format external-format) octets-vector))))) + (check (equalp (string-to-octets string :external-format external-format) octets-vector)) + (check (string= (old-octets-to-string octets-vector :external-format external-format) string)) + (check (string= (old-octets-to-string octets-list :external-format external-format) string)) + (check (equalp (old-string-to-octets string :external-format external-format) octets-vector))))) (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE -restart each time a FLEXI-STREAM-ENCODING-ERROR is signalled. Signals -an error when there are more or less FLEXI-STREAM-ENCODING-ERRORs than -there are elements in VALUES." +restart each time a EXTERNAL-FORMAT-ENCODING-ERROR is signalled. +Signals an error when there are more or less +EXTERNAL-FORMAT-ENCODING-ERRORs than there are elements in VALUES." (flex::with-unique-names (value-stack condition-counter) `(let ((,value-stack ',values) (,condition-counter 0)) - (handler-bind ((flexi-stream-encoding-error + (handler-bind ((external-format-encoding-error #'(lambda (c) (declare (ignore c)) (unless ,value-stack - (error "Too many FLEXI-STREAM-ENCODING-ERRORs signalled, expected only ~A." + (error "Too many encoding errors signalled, expected only ~A." ,(length values))) (incf ,condition-counter) (use-value (pop ,value-stack))))) (prog1 (progn , at body) (when ,value-stack - (error "~A FLEXI-STREAM-ENCODING-ERRORs signalled, but ~A were expected." + (error "~A encoding errors signalled, but ~A were expected." ,condition-counter ,(length values)))))))) (defun read-flexi-line (sequence external-format) @@ -299,9 +331,9 @@ (setq in (make-flexi-stream in :external-format external-format)) (read-line in))) -(defun encoding-error-handling-test () - "Tests several possible encoding errors and how they are handled." - (with-test ("Handling of encoding errors.") +(defun error-handling-test () + "Tests several possible errors and how they are handled." + (with-test ("Handling of errors.") ;; handling of EOF in the middle of CRLF (check (string= #.(string #\Return) (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) @@ -382,7 +414,7 @@ (dolist (args string-test-args-list) (apply 'string-test args))) (incf no-tests) - (encoding-error-handling-test) + (error-handling-test) (incf no-tests) (unread-char-test) (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%" Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Mon May 19 04:01:35 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.16 2008/05/18 20:34:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.17 2008/05/19 07:57:08 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -108,8 +108,9 @@ (unless (find real-name +name-map+ :test #'eq :key #'cdr) - ;; TODO... - (error "~S is not known to be a name for an external format." name)) + (error 'external-format-error + :format-control "~S is not known to be a name for an external format." + :format-arguments (list name))) real-name)) (defun ascii-name-p (name) From eweitz at common-lisp.net Mon May 19 19:47:41 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Mon, 19 May 2008 15:47:41 -0400 (EDT) Subject: [flexi-streams-cvs] r32 - branches/edi/test Message-ID: <20080519194741.1983612073@common-lisp.net> Author: eweitz Date: Mon May 19 15:47:40 2008 New Revision: 32 Modified: branches/edi/test/test.lisp Log: More tests Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Mon May 19 15:47:40 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.25 2008/05/19 07:57:12 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.27 2008/05/19 19:47:17 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,6 +29,13 @@ (in-package :flexi-streams-test) +(defconstant +buffer-size+ 8192 + "Size of buffers for COPY-STREAM* below.") + +(defvar *copy-function* nil + "Which function to use when copying from one stream to the other - +see for example COPY-FILE below.") + (defvar *this-file* (load-time-value (or #.*compile-file-pathname* *load-pathname*)) "The pathname of the file \(`test.lisp') where this variable was @@ -125,6 +132,17 @@ while line do (write-line line out)))) +(defun copy-stream* (stream-in external-format-in stream-out external-format-out) + "Like COPY-STREAM, but uses READ-SEQUENCE and WRITE-SEQUENCE instead +of READ-LINE and WRITE-LINE." + (let ((in (make-flexi-stream stream-in :external-format external-format-in)) + (out (make-flexi-stream stream-out :external-format external-format-out)) + (buffer (make-array +buffer-size+ :element-type 'flex::char*))) + (loop + (let ((position (read-sequence buffer in))) + (when (zerop position) (return)) + (write-sequence buffer out :end position))))) + (defun copy-file (path-in external-format-in path-out external-format-out direction-out direction-in) "Copies the contents of the file denoted by the pathname PATH-IN to the file denoted by the pathname PATH-OUT using flexi @@ -143,7 +161,7 @@ :direction direction-out :if-does-not-exist :create :if-exists :supersede) - (copy-stream in external-format-in out external-format-out)))) + (funcall *copy-function* in external-format-in out external-format-out)))) #+:lispworks (defun copy-file-lw (path-in external-format-in path-out external-format-out direction-out direction-in) @@ -162,7 +180,7 @@ :direction :output :if-does-not-exist :create :if-exists :supersede) - (copy-stream in external-format-in out external-format-out)))) + (funcall *copy-function* in external-format-in out external-format-out)))) (defun compare-files (path-in external-format-in path-out external-format-out) "Copies the contents of the file (in the `test') denoted by the @@ -179,7 +197,8 @@ (full-path-orig (merge-pathnames path-out *this-file*))) (dolist (direction-out '(:output :io)) (dolist (direction-in '(:input :io)) - (format *error-output* "Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (format *error-output* "Test \(using ~A) ~S ~S [~A]~% --> ~S [~A].~%" + *copy-function* path-in (flex::normalize-external-format external-format-in) direction-in (flex::normalize-external-format external-format-out) direction-out) (copy-file full-path-in external-format-in @@ -190,7 +209,8 @@ (t (format *error-output* " Test failed!!!~%"))) (terpri *error-output*) #+:lispworks - (format *error-output* "LW-Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (format *error-output* "LW-Test \(using ~A) ~S ~S [~A]~% --> ~S [~A].~%" + *copy-function* path-in (flex::normalize-external-format external-format-in) direction-in (flex::normalize-external-format external-format-out) direction-out) #+:lispworks @@ -331,6 +351,10 @@ (setq in (make-flexi-stream in :external-format external-format)) (read-line in))) +(defun read-flexi-line* (sequence external-format) + "Like READ-FLEXI-LINE but uses OCTETS-TO-STRING internally." + (octets-to-string sequence :external-format external-format)) + (defun error-handling-test () "Tests several possible errors and how they are handled." (with-test ("Handling of errors.") @@ -340,45 +364,71 @@ (let ((*substitution-char* #\?)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "a??" (read-flexi-line `(,(char-code #\a) 128 200) :ascii))) + (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 128 200) :ascii))) ;; :WINDOWS-1253 doesn't have a characters with codes 170 and 210 (check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253))) + (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))) ;; not a valid UTF-8 sequence - (check (string= "??" (read-flexi-line `(#xe4 #xf6 #xfc) :utf8))) + (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))) + (check (string= "??" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 - (check (string= "??" (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8))) + (check (string= "??" (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) (let ((*substitution-char* nil)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "abc" (using-values (#\b #\c) (read-flexi-line `(,(char-code #\a) 128 200) :ascii)))) + (check (string= "abc" (using-values (#\b #\c) + (read-flexi-line* `#(,(char-code #\a) 128 200) :ascii)))) ;; :WINDOWS-1253 encoding doesn't have a characters with codes 170 and 210 (check (string= "axy" (using-values (#\x #\y) (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253)))) + (check (string= "axy" (using-values (#\x #\y) + (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))) ;; not a valid UTF-8 sequence - (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#xe4 #xf6 #xfc) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 - (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) ;; only one byte - (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16le)))) + (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le)))) + (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16le)))) ;; two bytes, but value of resulting word suggests that another word follows - (check (string= "R" (using-values (#\R) (read-flexi-line `(#x01 #xd8) :utf-16le)))) + (check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le)))) + (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le)))) ;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff - (check (string= "T" (using-values (#\T) (read-flexi-line `(#x01 #xd8 #xff #xdb) :utf-16le)))) + (check (string= "T" (using-values (#\T) (read-flexi-line '(#x01 #xd8 #xff #xdb) :utf-16le)))) + (check (string= "T" (using-values (#\T) (read-flexi-line* #(#x01 #xd8 #xff #xdb) :utf-16le)))) ;; the same as for little endian above, but using inverse order of bytes in words - (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16be)))) - (check (string= "R" (using-values (#\R) (read-flexi-line `(#xd8 #x01) :utf-16be)))) - (check (string= "T" (using-values (#\T) (read-flexi-line `(#xd8 #x01 #xdb #xff) :utf-16be)))) + (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be)))) + (check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be)))) + (check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be)))) + (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16be)))) + (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be)))) + (check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be)))) ;; the only case when error is signalled for UTF-32 is at end of file ;; in the middle of 4-byte sequence, both for big and little endian - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32le)))) (check (string= "aY" (using-values (#\Y) (read-flexi-line `(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32be)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32le)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line* `#(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32be)))) (check (string= "aY" (using-values (#\Y) - (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) + (read-flexi-line* `#(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) (defun unread-char-test () "Tests whether UNREAD-CHAR behaves as expected." @@ -398,16 +448,17 @@ (defun run-tests () "Applies COMPARE-FILES to all test scenarios created with -CREATE-TEST-COMBINATIONS, runs test for handling of encoding errors, -and shows simple statistics at the end." +CREATE-TEST-COMBINATIONS, runs other tests like handling of encoding +errors, shows simple statistics at the end." (let* ((*test-success-counter* 0) (compare-files-args-list (loop for (file-name symbols) in *test-files* nconc (create-test-combinations file-name symbols))) - (no-tests (* 4 (length compare-files-args-list)))) + (no-tests (* 8 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - (dolist (args compare-files-args-list) - (apply 'compare-files args)) + (dolist (*copy-function* '(copy-stream copy-stream*)) + (dolist (args compare-files-args-list) + (apply 'compare-files args))) (let ((string-test-args-list (loop for (file-name symbols) in *test-files* nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length string-test-args-list)) From eweitz at common-lisp.net Mon May 19 22:59:14 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Mon, 19 May 2008 18:59:14 -0400 (EDT) Subject: [flexi-streams-cvs] r33 - branches/edi Message-ID: <20080519225914.4FB127E0AD@common-lisp.net> Author: eweitz Date: Mon May 19 18:59:07 2008 New Revision: 33 Modified: branches/edi/decode.lisp branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/flexi-streams.asd branches/edi/input.lisp branches/edi/mapping.lisp branches/edi/output.lisp branches/edi/stream.lisp branches/edi/strings.lisp branches/edi/util.lisp Log: Better read-sequence implementation Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.9 2008/05/19 07:57:07 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.12 2008/05/19 22:32:56 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -52,24 +52,23 @@ (defgeneric octets-to-char-code (format reader) (declare #.*standard-optimize-settings*) (:documentation "Converts a sequence of octets to a character code -\(which is returned) using the external format FORMAT. The sequence -is obtained by calling the function \(which must be a functional -object) READER with no arguments which should return one octet per -call. +\(which is returned, or NIL in case of EOF) using the external format +FORMAT. The sequence is obtained by calling the function \(which must +be a functional object) READER with no arguments which should return +one octet per call. In the case of EOF, READER should return NIL. -The special variables *CURRENT-STREAM* and *CURRENT-UNREADER* must be -bound correctly whenever this function is called.")) +The special variable *CURRENT-UNREADER* must be bound correctly +whenever this function is called.")) (defmethod octets-to-char-code ((format flexi-latin-1-format) reader) (declare #.*standard-optimize-settings*) (declare (function reader)) - (or (funcall reader) :eof)) + (funcall reader)) (defmethod octets-to-char-code ((format flexi-ascii-format) reader) (declare #.*standard-optimize-settings*) (declare (function reader)) - (let ((octet (or (funcall reader) - (return-from octets-to-char-code :eof)))) + (when-let (octet (funcall reader)) (declare (type octet octet)) (if (> octet 127) (recover-from-encoding-error format @@ -81,15 +80,14 @@ (declare (function reader)) (with-accessors ((decoding-table external-format-decoding-table)) format - (let* ((octet (or (funcall reader) - (return-from octets-to-char-code :eof))) - (char-code (aref (the (simple-array char-code-integer *) decoding-table) octet))) + (when-let (octet (funcall reader)) (declare (type octet octet)) - (if (or (null char-code) - (= (the char-code-integer char-code) 65533)) - (recover-from-encoding-error format - "No character which corresponds to octet #x~X." octet) - char-code)))) + (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) octet))) + (if (or (null char-code) + (= (the char-code-integer char-code) 65533)) + (recover-from-encoding-error format + "No character which corresponds to octet #x~X." octet) + char-code))))) (defmethod octets-to-char-code ((format flexi-utf-8-format) reader) (declare #.*standard-optimize-settings*) @@ -103,7 +101,7 @@ (return-from octets-to-char-code (recover-from-encoding-error format "End of data while in UTF-8 sequence."))) - (t (return-from octets-to-char-code :eof)))) + (t (return-from octets-to-char-code nil)))) (setq first-octet-seen t)))) (let ((octet (read-next-byte))) (declare (type octet octet)) @@ -150,11 +148,12 @@ (return-from octets-to-char-code (recover-from-encoding-error format "End of data while in UTF-16 sequence."))) - (t (return-from octets-to-char-code :eof)))) + (t (return-from octets-to-char-code nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () (+ (the octet (read-next-byte)) (ash (the octet (read-next-byte)) 8)))) + (declare (inline read-next-word)) (let ((word (read-next-word))) (declare (type (unsigned-byte 16) word)) (cond ((<= #xd800 word #xdfff) @@ -182,11 +181,12 @@ (return-from octets-to-char-code (recover-from-encoding-error format "End of data while in UTF-16 sequence."))) - (t (return-from octets-to-char-code :eof)))) + (t (return-from octets-to-char-code nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () (+ (ash (the octet (read-next-byte)) 8) (the octet (read-next-byte))))) + (declare (inline read-next-word)) (let ((word (read-next-word))) (declare (type (unsigned-byte 16) word)) (cond ((<= #xd800 word #xdfff) @@ -214,7 +214,7 @@ (return-from octets-to-char-code (recover-from-encoding-error format "End of data while in UTF-32 sequence."))) - (t (return-from octets-to-char-code :eof)))) + (t (return-from octets-to-char-code nil)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 0 to 24 by 8 for octet of-type octet = (read-next-byte) @@ -232,7 +232,7 @@ (return-from octets-to-char-code (recover-from-encoding-error format "End of data while in UTF-32 sequence."))) - (t (return-from octets-to-char-code :eof)))) + (t (return-from octets-to-char-code nil)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 24 downto 0 by 8 for octet of-type octet = (read-next-byte) @@ -243,7 +243,6 @@ (let ((char-code (call-next-method))) (case char-code (#.(char-code #\Return) #.(char-code #\Newline)) - (:eof :eof) (otherwise char-code)))) (defmethod octets-to-char-code ((format flexi-crlf-mixin) reader) @@ -255,11 +254,12 @@ (let ((next-char-code (call-next-method))) (case next-char-code (#.(char-code #\Linefeed) #.(char-code #\Newline)) - (:eof char-code) + ;; we saw a CR but no LF afterwards, but then the data + ;; ended, so we just return #\Return + ((nil) #.(char-code #\Return)) ;; if the character we peeked at wasn't a ;; linefeed character we unread its constituents (otherwise (funcall *current-unreader* (code-char next-char-code)) char-code)))) - (:eof :eof) - (t char-code)))) + (otherwise char-code)))) Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.8 2008/05/19 07:57:07 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.10 2008/05/19 22:32:56 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -35,10 +35,7 @@ using the external format FORMAT. The conversion is performed by calling the unary function \(which must be a functional object) WRITER repeatedly each octet. The return value of this function is -unspecified. - -The special variable *CURRENT-STREAM* must be bound correctly whenever -this function is called.")) +unspecified.")) (defmethod char-to-octets ((format flexi-latin-1-format) char writer) (declare #.*standard-optimize-settings*) @@ -104,6 +101,7 @@ (flet ((write-word (word) (funcall writer (ldb (byte 8 0) word)) (funcall writer (ldb (byte 8 8) word)))) + (declare (inline read-next-word)) (let ((char-code (char-code char))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) @@ -118,6 +116,7 @@ (flet ((write-word (word) (funcall writer (ldb (byte 8 8) word)) (funcall writer (ldb (byte 8 0) word)))) + (declare (inline read-next-word)) (let ((char-code (char-code char))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.18 2008/05/18 15:54:34 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.19 2008/05/19 11:20:11 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -390,8 +390,8 @@ (:documentation "Given an external format FORMAT, returns a factor which denotes the octets to characters ratio to expect when encoding/decoding. If the returned value is an integer, the factor is -assumed to be exact. If it is a float, the factor is supposed to be -based on heuristics and usually not exact. +assumed to be exact. If it is a \(double) float, the factor is +supposed to be based on heuristics and usually not exact. This factor is used in string.lisp.") (declare #.*standard-optimize-settings*)) @@ -407,7 +407,7 @@ ;; UTF-8 characters can be anything from one to six octets, but we ;; assume that the "overhead" is only about 5 percent - this ;; estimate is obviously very much dependant on the content - 1.05) + 1.05d0) (defmethod encoding-factor ((format flexi-utf-16-format)) (declare #.*standard-optimize-settings*) @@ -415,7 +415,7 @@ ;; code points above #x10000 map to four octets - we assume that we ;; usually don't see these characters but of course have to return a ;; float - 2.0) + 2.0d0) (defmethod encoding-factor ((format flexi-utf-32-format)) (declare #.*standard-optimize-settings*) @@ -427,4 +427,4 @@ ;; if the sequence #\Return #\Linefeed is the line-end marker, this ;; obviously makes encodings potentially longer and definitely makes ;; the estimate unexact - (* 1.02 (call-next-method))) \ No newline at end of file + (* 1.02d0 (call-next-method))) Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.62 2008/05/18 20:34:52 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.63 2008/05/18 23:13:59 edi Exp $ ;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. @@ -39,8 +39,8 @@ :serial t :components ((:file "packages") (:file "mapping") - (:file "ascii") - (:file "koi8-r") + (:file "ascii") + (:file "koi8-r") (:file "iso-8859") (:file "code-pages") (:file "specials") Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.60 2008/05/19 07:57:07 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.65 2008/05/19 22:54:10 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -34,7 +34,7 @@ "Reads one byte \(octet) from the underlying stream of FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty)." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; we're using S instead of STREAM here because of an ;; issue with SBCL: ;; @@ -58,7 +58,7 @@ "Reads one byte \(octet) from the underlying stream of FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty)." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) (octet-stack flexi-stream-octet-stack) @@ -85,7 +85,7 @@ FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty). Optimized version \(only needed for LispWorks) in case the underlying stream is binary." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) (octet-stack flexi-stream-octet-stack) @@ -104,7 +104,7 @@ (defmethod stream-clear-input ((flexi-input-stream flexi-input-stream)) "Calls the corresponding method for the underlying input stream and also clears the value of the OCTET-STACK slot." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; note that we don't reset the POSITION slot (with-accessors ((octet-stack flexi-stream-octet-stack) (stream flexi-stream-stream)) @@ -116,12 +116,14 @@ "Calls the corresponding method for the underlying input stream but first checks if \(old) input is available in the OCTET-STACK slot." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) (octet-stack flexi-stream-octet-stack) (stream flexi-stream-stream)) flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) (when (and bound (>= position bound)) (return-from stream-listen nil)) @@ -129,7 +131,7 @@ (defmethod stream-read-byte ((stream flexi-input-stream)) "Reads one byte \(octet) from the underlying stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; set LAST-CHAR-CODE slot to NIL because we can't UNREAD-CHAR after ;; this operation (with-accessors ((last-char-code flexi-stream-last-char-code) @@ -144,6 +146,7 @@ "Used internally to put a character CHAR which was already read back on the stream. Uses the OCTET-STACK slot and decrements the POSITION slot accordingly." + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format)) @@ -151,16 +154,16 @@ (let ((counter 0) octets-reversed) (declare (integer position) (fixnum counter)) - (char-to-octets external-format - char - (lambda (octet) - (incf counter) - (push octet octets-reversed))) - (decf position counter) - (setq octet-stack (nreconc octets-reversed octet-stack))))) + (flet ((writer (octet) + (incf counter) + (push octet octets-reversed))) + (declare (dynamic-extent (function writer))) + (char-to-octets external-format char #'writer) + (decf position counter) + (setq octet-stack (nreconc octets-reversed octet-stack)))))) (defmethod stream-read-char ((stream flexi-input-stream)) - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; note that we do nothing for the :LF EOL style because we assume ;; that #\Newline is the same as #\Linefeed in all Lisps which will ;; use this library @@ -171,67 +174,148 @@ ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after ;; this operation (setq last-octet nil) - (let* ((*current-unreader* (lambda (char) - (unread-char% char stream))) - (char-code (octets-to-char-code external-format - (lambda () - (read-byte* stream))))) - ;; remember this character and its char code for UNREAD-CHAR - (setq last-char-code char-code) - (or (code-char char-code) char-code)))) + (flet ((reader () + (read-byte* stream)) + (unreader (char) + (unread-char% char stream))) + (declare (dynamic-extent (function reader) (function unreader))) + (let* ((*current-unreader* #'unreader) + (char-code (or (octets-to-char-code external-format #'reader) + (return-from stream-read-char :eof)))) + ;; remember this character and its char code for UNREAD-CHAR + (setq last-char-code char-code) + (or (code-char char-code) char-code))))) (defmethod stream-read-char-no-hang ((stream flexi-input-stream)) "Reads one character if the underlying stream has at least one octet available." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; note that this may block for non-8-bit encodings - I think ;; there's no easy way to handle this correctly (and (stream-listen stream) (stream-read-char stream))) (defmethod stream-read-sequence ((flexi-input-stream flexi-input-stream) sequence start end &key) - "Reads enough input from STREAM to fill SEQUENCE from START to END. -If SEQUENCE is an array which can store octets we use READ-SEQUENCE to -fill it in one fell swoop, otherwise we iterate using -STREAM-READ-CHAR." - (declare (optimize speed) - (type (integer 0 *) start end)) - (with-accessors ((last-char-code flexi-stream-last-char-code) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (external-format flexi-stream-external-format) (last-octet flexi-stream-last-octet) - (stream flexi-stream-stream) - (position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) + (last-char-code flexi-stream-last-char-code) + (element-type flexi-stream-element-type) + (stream flexi-stream-stream)) flexi-input-stream - (declare (integer position)) - (cond ((and (arrayp sequence) - (subtypep 'octet (array-element-type sequence))) - (setf last-char-code nil) - (let ((cursor start)) - (loop with stack = octet-stack - for continuep = (< cursor end) - for octet = (and continuep (pop stack)) - while octet - do (setf (aref sequence cursor) (the octet octet)) - (incf cursor)) - (let ((index - (read-sequence sequence stream :start cursor :end end))) - (incf position (- index start)) - (when (> index start) - (setq last-octet (aref sequence (1- index)))) - index))) - (t - (loop for index from start below end - for element = (stream-read-char flexi-input-stream) - until (eq element :eof) - do (setf (elt sequence index) element) - finally (return index)))))) + (let ((buffer (make-octet-buffer)) + (buffer-pos 0) + (buffer-end 0) + (index start)) + (declare (fixnum buffer-pos buffer-end index) + (type (array octet *) buffer)) + (flet ((compute-minimum () + "Computes the minimum amount of octets we can savely +read into the buffer without violating the stream's bound \(if there +is one) and without potentially reading more than we need." + ;; this has to be done conservatively, unfortunately - + ;; it is possible that we only fill the buffer in very + ;; small chunks once we're near END (but this is only + ;; relevant for multi-byte encodings, of course) + (let ((minimum (min (the fixnum (- end index)) +buffer-size+))) + (cond (bound (min minimum (- bound position))) + (t minimum)))) + (fill-buffer (end) + "Tries to fill the buffer from BUFFER-POS to END and +returns NIL if the buffer doesn't contain any new data." + (setq buffer-end (read-sequence buffer stream + :start buffer-pos + :end end)) + ;; BUFFER-POS is only greater than zero if the buffer + ;; already contains unread data from the octet stack + ;; (see below), so we test for ZEROP here and do /not/ + ;; compare with BUFFER-POS + (unless (zerop buffer-end) + (incf position buffer-end)))) + (let ((minimum (compute-minimum))) + (declare (fixnum minimum)) + ;; put data from octet stack into buffer if there is any + (loop + (when (>= buffer-pos minimum) + (return)) + (let ((next-octet (pop octet-stack))) + (cond (next-octet + (setf (aref buffer buffer-pos) (the octet next-octet)) + (incf buffer-pos)) + (t (return))))) + ;; fill buffer for the first time or return immediately if + ;; we don't succeed + (unless (fill-buffer minimum) + (return-from stream-read-sequence start))) + (setq buffer-pos 0) + (flet ((next-octet () + "Returns the next octet from the buffer and fills it +if it is exhausted. Returns NIL if there's no more data on the +stream." + (when (>= buffer-pos buffer-end) + (setq buffer-pos 0) + (unless (fill-buffer (compute-minimum)) + (return-from next-octet))) + (prog1 + (aref buffer buffer-pos) + (incf buffer-pos))) + (unreader (char) + (unread-char% char flexi-input-stream))) + (declare (dynamic-extent (function next-octet) (function unreader))) + (let ((*current-unreader* #'unreader)) + (macrolet ((iterate (octetp set-place) + "A very unhygienic macro to implement the +actual iteration through the sequence including housekeeping for the +flexi stream. If OCTETP is true, we put octets into the stream, +otherwise characters. SET-PLACE is the place \(using the index INDEX) +used to access the sequence." + `(flet ((leave () + "This is the function used to abort +the LOOP iteration below." + (when (> index start) + ;; if something was read at all, + ;; update LAST-OCTET and + ;; LAST-CHAR-CODE accordingly + (setq ,(if octetp 'last-char-code 'last-octet) + nil + ,(if octetp 'last-octet 'last-char-code) + ,(sublis '((index . (1- index))) set-place))) + (return-from stream-read-sequence index))) + (loop + (when (>= index end) (leave)) + (let ((next-thing ,(if octetp + '(next-octet) + '(octets-to-char-code external-format #'next-octet)))) + (unless next-thing (leave)) + (setf ,set-place ,(if octetp 'next-thing '(code-char next-thing))) + (incf index)))))) + (etypecase sequence + (string (iterate nil (char sequence index))) + (array + (let ((array-element-type (array-element-type sequence))) + (cond ((type-equal array-element-type 'octet) + (iterate t (aref (the (array octet *) sequence) index))) + ((or (subtypep array-element-type 'integer) + (type-equal element-type 'octet)) + (iterate t (aref sequence index))) + (t + (iterate nil (aref sequence index)))))) + (list + (cond ((type-equal element-type 'octet) + (iterate t (nth index sequence))) + (t + (iterate nil (nth index sequence))))))))))))) (defmethod stream-unread-char ((stream flexi-input-stream) char) "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM. Makes sure CHAR will only be unread if it was the last character read and if it was read with the same encoding that's currently being used by the stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((last-char-code flexi-stream-last-char-code)) stream (unless last-char-code @@ -249,7 +333,7 @@ "Similar to UNREAD-CHAR in that it `unreads' the last octet from STREAM. Note that you can only call UNREAD-BYTE after a corresponding READ-BYTE." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((last-octet flexi-stream-last-octet) (octet-stack flexi-stream-octet-stack) (position flexi-stream-position)) @@ -274,7 +358,7 @@ not 0 is returned, if PEEK-TYPE is an octet, the next octet which equals PEEK-TYPE is returned. EOF-ERROR-P and EOF-VALUE are interpreted as usual." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (loop for octet = (read-byte flexi-input-stream eof-error-p eof-value) until (cond ((null peek-type)) ((eql octet eof-value)) Modified: branches/edi/mapping.lisp ============================================================================== --- branches/edi/mapping.lisp (original) +++ branches/edi/mapping.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.15 2008/05/18 15:54:34 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.1 2008/05/19 09:09:15 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.50 2008/05/19 07:57:07 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.52 2008/05/19 22:32:56 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -36,14 +36,14 @@ #-:lispworks (defmethod write-byte* (byte (sink flexi-output-stream)) - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) sink (write-byte byte stream))) #+:lispworks (defmethod write-byte* (byte (sink flexi-output-stream)) - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; we use WRITE-SEQUENCE because WRITE-BYTE doesn't work with all ;; bivalent streams in LispWorks (4.4.6) (with-accessors ((stream flexi-stream-stream)) @@ -57,22 +57,22 @@ (defmethod write-byte* (byte (sink flexi-binary-output-stream)) "Optimized version \(only needed for LispWorks) in case the underlying stream is binary." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) sink (write-byte byte stream))) (defmethod stream-write-char ((stream flexi-output-stream) char) - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((external-format flexi-stream-external-format)) stream - (char-to-octets external-format - char - (lambda (octet) - (write-byte* octet stream))))) + (flet ((writer (octet) + (write-byte* octet stream))) + (declare (dynamic-extent (function writer))) + (char-to-octets external-format char #'writer)))) (defmethod stream-write-char :after ((stream flexi-output-stream) char) - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; update the column unless we're in the middle of the line and ;; the current value is NIL (with-accessors ((column flexi-stream-column)) @@ -83,7 +83,7 @@ (defmethod stream-clear-output ((flexi-output-stream flexi-output-stream)) "Simply calls the corresponding method for the underlying output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) flexi-output-stream (clear-output stream))) @@ -91,7 +91,7 @@ (defmethod stream-finish-output ((flexi-output-stream flexi-output-stream)) "Simply calls the corresponding method for the underlying output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) flexi-output-stream (finish-output stream))) @@ -99,7 +99,7 @@ (defmethod stream-force-output ((flexi-output-stream flexi-output-stream)) "Simply calls the corresponding method for the underlying output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) flexi-output-stream (force-output stream))) @@ -107,14 +107,14 @@ (defmethod stream-line-column ((flexi-output-stream flexi-output-stream)) "Returns the column stored in the COLUMN slot of the FLEXI-OUTPUT-STREAM object STREAM." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((column flexi-stream-column)) flexi-output-stream column)) (defmethod stream-write-byte ((flexi-output-stream flexi-output-stream) byte) "Writes a byte \(octet) to the underlying stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((column flexi-stream-column)) flexi-output-stream ;; set column to NIL because we don't know how to handle binary @@ -125,7 +125,7 @@ #+:allegro (defmethod stream-terpri ((stream flexi-output-stream)) "Writes a #\Newline character to the underlying stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline)) @@ -135,8 +135,8 @@ characters. Characters are output according to the current encoding \(external format) of the FLEXI-OUTPUT-STREAM object STREAM." - (declare (optimize speed) - (type (integer 0 *) start end)) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) (with-accessors ((stream flexi-stream-stream) (column flexi-stream-column)) flexi-output-stream @@ -158,8 +158,8 @@ "Optimized method for the cases where SEQUENCE is a string. Fills an internal buffer and uses repeated calls to WRITE-SEQUENCE to write to the underlying stream." - (declare (optimize speed) - (type (integer 0 *) start end)) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) ;; don't use this optimized method for bivalent character streams on ;; LispWorks, as it currently gets confused by the fill pointer #+:lispworks @@ -194,4 +194,5 @@ (defmethod stream-write-string ((stream flexi-output-stream) string &optional (start 0) (end (length string))) "Simply hands over to the optimized method for STREAM-WRITE-SEQUENCE." + (declare #.*standard-optimize-settings*) (stream-write-sequence stream string start (or end (length string)))) Modified: branches/edi/stream.lisp ============================================================================== --- branches/edi/stream.lisp (original) +++ branches/edi/stream.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.60 2008/05/18 23:14:00 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.61 2008/05/19 22:32:56 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -74,7 +74,7 @@ "Checks whether the new value makes sense before it is set." (declare #.*standard-optimize-settings*) (unless (or (subtypep new-value 'character) - (subtypep new-value 'octet)) + (type-equal new-value 'octet)) (error 'flexi-stream-element-type-error :element-type new-value :stream flexi-stream))) Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.14 2008/05/19 07:57:08 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.19 2008/05/19 22:32:56 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -39,32 +39,52 @@ (setq external-format (maybe-convert-external-format external-format)) (let ((factor (encoding-factor external-format)) (length (- end start))) + (declare (fixnum length)) (etypecase factor - (float - (let ((octets (make-array (round (* factor length)) - :element-type 'octet - :fill-pointer 0 - :adjustable t))) - (flet ((writer (octet) - ;; TODO: do this manually - (vector-push-extend octet octets))) - (loop for i of-type fixnum from start below end - do (char-to-octets external-format - (char string i) - #'writer))) - octets)) (integer (let ((octets (make-array (* factor length) :element-type 'octet)) (j 0)) (declare (fixnum j)) (flet ((writer (octet) + (declare (octet octet)) (setf (aref (the (array octet *) octets) j) octet) (incf j))) + (declare (dynamic-extent (function writer))) (loop for i of-type fixnum from start below end do (char-to-octets external-format (char string i) #'writer))) - octets))))) + octets)) + (double-float + ;; this is a bit clunky but hopefully a bit more efficient than + ;; using VECTOR-PUSH-EXTEND + (let* ((octets-length (ceiling (* factor length))) + (octets (make-array octets-length + :element-type 'octet + :fill-pointer t + :adjustable t)) + (i start) + (j 0)) + (declare (fixnum i j octets-length) + (double-float factor)) + (flet ((writer (octet) + (declare (octet octet)) + (when (>= j octets-length) + (setq factor (* factor 2.0d0)) + (incf octets-length (the fixnum (ceiling (* factor (- end i))))) + (adjust-array octets octets-length :fill-pointer t)) + (setf (aref (the (array octet *) octets) j) octet) + (incf j))) + (declare (dynamic-extent (function writer))) + (loop + (when (>= i end) + (return)) + (char-to-octets external-format + (char string i) + #'writer) + (incf i)) + (setf (fill-pointer octets) j) + octets)))))) (defun octets-to-string (sequence &key (external-format (make-external-format :latin1)) @@ -80,51 +100,61 @@ (reader (etypecase sequence ((array octet *) (lambda () - (when (>= i end) - ;; TODO... -> NIL? - (error "End of data.")) - (prog1 - (aref (the (array octet *) sequence) i) - (incf i)))) + (and (< i end) + (prog1 + (aref (the (array octet *) sequence) i) + (incf i))))) ((array * *) (lambda () - (when (>= i end) - ;; TODO... - (error "End of data.")) - (prog1 - (aref sequence i) - (incf i)))) + (and (< i end) + (prog1 + (aref sequence i) + (incf i))))) (list (lambda () - (when (>= i end) - ;; TODO... - (error "End of data.")) - (prog1 - (nth i sequence) - (incf i)))))) - (*current-unreader* (flet ((pseudo-writer (octet) - (declare (ignore octet)) - (decf i))) - (lambda (char) - (char-to-octets external-format char #'pseudo-writer))))) - (declare (fixnum i)) - (flet ((next-char () - (code-char (octets-to-char-code external-format reader)))) - (etypecase factor - (float - (let ((string (make-array (round (/ length factor)) - :element-type 'char* - :fill-pointer 0 - :adjustable t))) - (loop while (< i end) - ;; TODO: do this manually - do (vector-push-extend (next-char) string) - finally (return string)))) - (integer - (let* ((string-length (/ length factor)) - (string (make-array string-length - :element-type 'char*))) - (declare (fixnum string-length)) - (loop for j of-type fixnum from 0 below string-length - do (setf (schar string j) (next-char)) - finally (return string)))))))) + (and (< i end) + (prog1 + (nth i sequence) + (incf i)))))))) + (declare (fixnum i length) (dynamic-extent reader)) + (labels ((pseudo-writer (octet) + (declare (ignore octet)) + (decf i)) + (unreader (char) + (char-to-octets external-format char #'pseudo-writer))) + (declare (dynamic-extent (function pseudo-writer) (function unreader))) + (let ((*current-unreader* #'unreader)) + (flet ((next-char () + (code-char (octets-to-char-code external-format reader)))) + (declare (inline next-char)) + (etypecase factor + (integer + (let* ((string-length (/ length factor)) + (string (make-array string-length + :element-type 'char*))) + (declare (fixnum string-length)) + (loop for j of-type fixnum from 0 below string-length + do (setf (schar string j) (next-char)) + finally (return string)))) + (double-float + ;; this is a bit clunky but hopefully a bit more efficient than + ;; using VECTOR-PUSH-EXTEND + (let* ((string-length (ceiling length (the double-float factor))) + (string (make-array string-length + :element-type 'char* + :fill-pointer t + :adjustable t)) + (j 0)) + (declare (fixnum j string-length) + (double-float factor)) + (loop + (when (>= i end) + (return)) + (when (>= j string-length) + (setq factor (/ factor 2.0d0)) + (incf string-length (the fixnum (ceiling (- end i) factor))) + (adjust-array string string-length :fill-pointer t)) + (setf (char string j) (next-char)) + (incf j)) + (setf (fill-pointer string) j) + string)))))))) Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Mon May 19 18:59:07 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.17 2008/05/19 07:57:08 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.19 2008/05/19 22:32:57 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -31,7 +31,14 @@ #+:lispworks (eval-when (:compile-toplevel :load-toplevel :execute) - (import 'lw:with-unique-names)) + (import '(lw:with-unique-names lw:when-let))) + +#-:lispworks +(defmacro when-let ((var form) &body body) + "Evaluates FORM and binds VAR to the result, then executes BODY +if VAR has a true value." + `(let ((,var ,form)) + (when ,var , at body))) #-:lispworks (defmacro with-unique-names ((&rest bindings) &body body) @@ -167,4 +174,14 @@ ;; slots `(with-slots ,(mapcar #'car slot-entries) ,instance - , at body)) \ No newline at end of file + , at body)) + +(defun make-octet-buffer () + "Creates and returns a fresh buffer \(a specialized array) of size ++BUFFER-SIZE+ to hold octets." + (make-array +buffer-size+ :element-type 'octet)) + +(defun type-equal (type1 type2) + "Whether TYPE1 and TYPE2 denote the same type." + (and (subtypep type1 type2) + (subtypep type2 type1))) \ No newline at end of file From eweitz at common-lisp.net Mon May 19 23:55:13 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Mon, 19 May 2008 19:55:13 -0400 (EDT) Subject: [flexi-streams-cvs] r34 - branches/edi/test Message-ID: <20080519235513.982065007A@common-lisp.net> Author: eweitz Date: Mon May 19 19:55:12 2008 New Revision: 34 Modified: branches/edi/test/test.lisp Log: More tests Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Mon May 19 19:55:12 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.27 2008/05/19 19:47:17 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.28 2008/05/19 23:54:55 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -323,6 +323,64 @@ (check (string= (old-octets-to-string octets-list :external-format external-format) string)) (check (equalp (old-string-to-octets string :external-format external-format) octets-vector))))) +(defun sequence-equal (seq1 seq2) + "Whether the two sequences have the same elements." + (and (= (length seq1) (length seq2)) + (loop for i below (length seq1) + always (eql (elt seq1 i) (elt seq2 i))))) + +(defun read-sequence-test (pathspec external-format) + "Several tests to confirm that READ-SEQUENCE behaves as expected." + (with-test ((format nil "READ-SEQUENCE tests with format ~S." + (flex::normalize-external-format external-format))) + (let* ((full-path (merge-pathnames pathspec *this-file*)) + (file-string (file-as-string full-path external-format)) + (string-length (length file-string)) + (octets (file-as-octet-vector full-path)) + (octet-length (length octets))) + (when (external-format-equal external-format (make-external-format :utf8)) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (list (make-list octet-length))) + (setf (flexi-stream-element-type in) 'octet) + (read-sequence list in) + (check (sequence-equal list octets)))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (third (floor octet-length 3)) + (half (floor octet-length 2)) + (vector (make-array half :element-type 'octet))) + (check (sequence-equal (loop repeat third + collect (read-byte in)) + (subseq octets 0 third))) + (read-sequence vector in) + (check (sequence-equal vector (subseq octets third (+ third half))))))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (string (make-string (- string-length 10) :element-type 'flex::char*))) + (setf (flexi-stream-element-type in) 'octet) + (check (sequence-equal (loop repeat 10 + collect (read-char in)) + (subseq file-string 0 10))) + (read-sequence string in) + (check (sequence-equal string (subseq file-string 10))))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (list (make-list (- string-length 100)))) + (check (sequence-equal (loop repeat 100 + collect (read-char in)) + (subseq file-string 0 100))) + (read-sequence list in) + (check (sequence-equal list (subseq file-string 100))))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (array (make-array (- string-length 50)))) + (check (sequence-equal (loop repeat 50 + collect (read-char in)) + (subseq file-string 0 50))) + (read-sequence array in) + (check (sequence-equal array (subseq file-string 50)))))))) + (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE restart each time a EXTERNAL-FORMAT-ENCODING-ERROR is signalled. @@ -456,17 +514,26 @@ (no-tests (* 8 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) + #+(or) (dolist (*copy-function* '(copy-stream copy-stream*)) (dolist (args compare-files-args-list) (apply 'compare-files args))) + #+(or) (let ((string-test-args-list (loop for (file-name symbols) in *test-files* nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length string-test-args-list)) (dolist (args string-test-args-list) (apply 'string-test args))) + (let ((read-sequence-test-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols t)))) + (incf no-tests (length read-sequence-test-args-list)) + (dolist (args read-sequence-test-args-list) + (apply 'read-sequence-test args))) (incf no-tests) + #+(or) (error-handling-test) (incf no-tests) + #+(or) (unread-char-test) (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%" (= no-tests *test-success-counter*) (- no-tests *test-success-counter*) no-tests))) From eweitz at common-lisp.net Tue May 20 08:03:33 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 04:03:33 -0400 (EDT) Subject: [flexi-streams-cvs] r35 - in branches/edi: . doc test Message-ID: <20080520080333.9D264662D8@common-lisp.net> Author: eweitz Date: Tue May 20 04:03:28 2008 New Revision: 35 Modified: branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/input.lisp branches/edi/output.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Checkpoint Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.12 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.14 2008/05/20 07:51:09 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -69,8 +69,7 @@ (declare #.*standard-optimize-settings*) (declare (function reader)) (when-let (octet (funcall reader)) - (declare (type octet octet)) - (if (> octet 127) + (if (> (the octet octet) 127) (recover-from-encoding-error format "No character which corresponds to octet #x~X." octet) octet))) @@ -81,8 +80,8 @@ (with-accessors ((decoding-table external-format-decoding-table)) format (when-let (octet (funcall reader)) - (declare (type octet octet)) - (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) octet))) + (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) + (the octet octet)))) (if (or (null char-code) (= (the char-code-integer char-code) 65533)) (recover-from-encoding-error format Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Tue May 20 04:03:28 2008 @@ -196,7 +196,9 @@

For more examples see the source code -of CL-RFC2047, +Drakma, Chunga, or CL-WBXML. @@ -970,29 +972,25 @@


Converts the Lisp string string from start to end to an array of -octets corresponding to the external format external-format. The defaults for +octets corresponding to the external format designated by external format external-format. The defaults for start and end -are 0 and the length of the -string. The default for external-format is the -value of -evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) +are 0 and the length of the string. The default +for external-format is :LATIN1.


[Function]
octets-to-string sequence &key external-format start end => string -


Converts the Lisp sequence sequence -of octets from start -to end to string using -the external +

Converts the Lisp +sequence sequence of
octets +from start to end to string +using the external format designated +by external format external-format. The defaults for start and end are 0 and the length of the sequence. The default -for external-format is the value of -evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) +for external-format is :LATIN1.

 

File positions

@@ -1032,7 +1030,7 @@ numerous patches and additions.

-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.102 2008/05/19 07:57:10 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.104 2008/05/20 06:55:21 edi Exp $

BACK TO MY HOMEPAGE Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.10 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.11 2008/05/20 08:02:49 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -101,7 +101,7 @@ (flet ((write-word (word) (funcall writer (ldb (byte 8 0) word)) (funcall writer (ldb (byte 8 8) word)))) - (declare (inline read-next-word)) + (declare (inline write-word)) (let ((char-code (char-code char))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) @@ -116,7 +116,7 @@ (flet ((write-word (word) (funcall writer (ldb (byte 8 8) word)) (funcall writer (ldb (byte 8 0) word)))) - (declare (inline read-next-word)) + (declare (inline write-word)) (let ((char-code (char-code char))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.19 2008/05/19 11:20:11 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.20 2008/05/20 08:02:50 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -219,7 +219,7 @@ (declare #.*standard-optimize-settings*) (declare (ignore initargs)) (with-accessors ((encoding-hash external-format-encoding-hash) - (decoding-table flexi-stream-decoding-table) + (decoding-table external-format-decoding-table) (name external-format-name) (id external-format-id)) external-format Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.65 2008/05/19 22:54:10 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.66 2008/05/20 00:37:27 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -196,6 +196,10 @@ (stream-read-char stream))) (defmethod stream-read-sequence ((flexi-input-stream flexi-input-stream) sequence start end &key) + "An optimized version which uses a buffer underneath. The function +can deliver characters as well as octets and it decides what to do +based on the element type of the sequence \(which takes precedence) +and the element type of the stream." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (with-accessors ((position flexi-stream-position) @@ -207,21 +211,31 @@ (element-type flexi-stream-element-type) (stream flexi-stream-stream)) flexi-input-stream - (let ((buffer (make-octet-buffer)) - (buffer-pos 0) - (buffer-end 0) - (index start)) - (declare (fixnum buffer-pos buffer-end index) + (let* ((buffer (make-octet-buffer)) + (buffer-pos 0) + (buffer-end 0) + (index start) + (want-chars-p (or (stringp sequence) + (and (vectorp sequence) + (not (subtypep (array-element-type sequence) 'integer))) + (type-equal element-type 'octet))) + (factor (if want-chars-p (encoding-factor external-format) 1)) + (integer-factor (floor factor)) + ;; it's an interesting question whether it makes sense + ;; performance-wise to make RESERVE significantly bigger + ;; (and thus put potentially a lot more octets into + ;; OCTET-STACK), especially for UTF-8 + (reserve (if (floatp factor) (* 2 integer-factor) 0))) + (declare (fixnum buffer-pos buffer-end index integer-factor reserve) (type (array octet *) buffer)) (flet ((compute-minimum () "Computes the minimum amount of octets we can savely read into the buffer without violating the stream's bound \(if there -is one) and without potentially reading more than we need." - ;; this has to be done conservatively, unfortunately - - ;; it is possible that we only fill the buffer in very - ;; small chunks once we're near END (but this is only - ;; relevant for multi-byte encodings, of course) - (let ((minimum (min (the fixnum (- end index)) +buffer-size+))) +is one) and without potentially reading much more than we need." + (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor + (the fixnum (- end index)))) + reserve)) + +buffer-size+))) (cond (bound (min minimum (- bound position))) (t minimum)))) (fill-buffer (end) @@ -286,7 +300,16 @@ ,(sublis '((index . (1- index))) set-place))) (return-from stream-read-sequence index))) (loop - (when (>= index end) (leave)) + (when (>= index end) + ;; check if there are octets in the + ;; buffer we didn't use - see + ;; COMPUTE-MINIMUM above + (loop + (when (>= buffer-pos buffer-end) + (return)) + (decf buffer-end) + (push (aref buffer buffer-end) octet-stack)) + (leave)) (let ((next-thing ,(if octetp '(next-octet) '(octets-to-char-code external-format #'next-octet)))) Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.52 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.54 2008/05/20 06:15:44 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -129,6 +129,19 @@ ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline)) +;; TODO: file-position -> octet-stack (and others?) + +;; other way around: function "resync" trying to use File-position? + +;; "resync" independent function to empty octet-stack? +;; (decrement-file-position) => success +;; (resync ... &optional how-much (length octet-stack)) => success + +;; in stream-read-sequence: if file stream, read more into buffer, +;; then resync with file-position? + +;; TODO: interaction between read and write + (defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) "Writes all elements of the sequence SEQUENCE from START to END to the underlying stream. The elements can be either octets or Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.19 2008/05/19 22:32:56 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.20 2008/05/20 06:15:38 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -30,10 +30,11 @@ (in-package :flexi-streams) (defun string-to-octets (string &key - (external-format (make-external-format :latin1)) + (external-format :latin1) (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of -octets corresponding to the external format EXTERNAL-FORMAT." +octets corresponding to the external format designated by +EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) @@ -87,10 +88,10 @@ octets)))))) (defun octets-to-string (sequence &key - (external-format (make-external-format :latin1)) + (external-format :latin1) (start 0) (end (length sequence))) "Converts the Lisp sequence SEQUENCE of octets from START to END to -string using the external format EXTERNAL-FORMAT." +string using the external format designated by EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Tue May 20 04:03:28 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.28 2008/05/19 23:54:55 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.29 2008/05/20 00:37:30 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -367,19 +367,25 @@ (with-open-file (in full-path :element-type 'octet) (let* ((in (make-flexi-stream in :external-format external-format)) (list (make-list (- string-length 100)))) - (check (sequence-equal (loop repeat 100 + (check (sequence-equal (loop repeat 50 collect (read-char in)) - (subseq file-string 0 100))) + (subseq file-string 0 50))) (read-sequence list in) - (check (sequence-equal list (subseq file-string 100))))) + (check (sequence-equal list (subseq file-string 50 (- string-length 50)))) + (check (sequence-equal (loop repeat 50 + collect (read-char in)) + (subseq file-string (- string-length 50)))))) (with-open-file (in full-path :element-type 'octet) (let* ((in (make-flexi-stream in :external-format external-format)) (array (make-array (- string-length 50)))) - (check (sequence-equal (loop repeat 50 + (check (sequence-equal (loop repeat 25 collect (read-char in)) - (subseq file-string 0 50))) + (subseq file-string 0 25))) (read-sequence array in) - (check (sequence-equal array (subseq file-string 50)))))))) + (check (sequence-equal array (subseq file-string 25 (- string-length 25)))) + (check (sequence-equal (loop repeat 25 + collect (read-char in)) + (subseq file-string (- string-length 25))))))))) (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE @@ -514,11 +520,9 @@ (no-tests (* 8 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - #+(or) (dolist (*copy-function* '(copy-stream copy-stream*)) (dolist (args compare-files-args-list) (apply 'compare-files args))) - #+(or) (let ((string-test-args-list (loop for (file-name symbols) in *test-files* nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length string-test-args-list)) @@ -530,10 +534,8 @@ (dolist (args read-sequence-test-args-list) (apply 'read-sequence-test args))) (incf no-tests) - #+(or) (error-handling-test) (incf no-tests) - #+(or) (unread-char-test) (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%" (= no-tests *test-success-counter*) (- no-tests *test-success-counter*) no-tests))) From eweitz at common-lisp.net Tue May 20 12:55:02 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 08:55:02 -0400 (EDT) Subject: [flexi-streams-cvs] r36 - in branches/edi: . test Message-ID: <20080520125502.CDD8A1A0A2@common-lisp.net> Author: eweitz Date: Tue May 20 08:55:00 2008 New Revision: 36 Modified: branches/edi/decode.lisp branches/edi/input.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Checkpoint Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Tue May 20 08:55:00 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.14 2008/05/20 07:51:09 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.15 2008/05/20 09:37:43 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -239,6 +239,7 @@ (defmethod octets-to-char-code ((format flexi-cr-mixin) reader) (declare #.*standard-optimize-settings*) + (declare (ignore reader)) (let ((char-code (call-next-method))) (case char-code (#.(char-code #\Return) #.(char-code #\Newline)) @@ -247,6 +248,7 @@ (defmethod octets-to-char-code ((format flexi-crlf-mixin) reader) (declare #.*standard-optimize-settings*) (declare (function *current-unreader*)) + (declare (ignore reader)) (let ((char-code (call-next-method))) (case char-code (#.(char-code #\Return) Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Tue May 20 08:55:00 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.66 2008/05/20 00:37:27 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.67 2008/05/20 09:38:07 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -199,7 +199,9 @@ "An optimized version which uses a buffer underneath. The function can deliver characters as well as octets and it decides what to do based on the element type of the sequence \(which takes precedence) -and the element type of the stream." +and the element type of the stream. What you'll really get might also +depend on your Lisp. Some of the implementations are more picky than +others - see for example FLEXI-STREAMS-TEST:READ-SEQUENCE-TEST." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (with-accessors ((position flexi-stream-position) Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Tue May 20 08:55:00 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.20 2008/05/20 06:15:38 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.21 2008/05/20 09:04:23 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -130,7 +130,7 @@ (declare (inline next-char)) (etypecase factor (integer - (let* ((string-length (/ length factor)) + (let* ((string-length (ceiling length factor)) (string (make-array string-length :element-type 'char*))) (declare (fixnum string-length)) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Tue May 20 08:55:00 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.29 2008/05/20 00:37:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.30 2008/05/20 09:37:30 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -339,11 +339,17 @@ (octets (file-as-octet-vector full-path)) (octet-length (length octets))) (when (external-format-equal external-format (make-external-format :utf8)) + #-:openmcl + ;; FLEXI-STREAMS puts integers into the list, but OpenMCL + ;; thinks they are characters... (with-open-file (in full-path :element-type 'octet) (let* ((in (make-flexi-stream in :external-format external-format)) (list (make-list octet-length))) (setf (flexi-stream-element-type in) 'octet) + #-:clisp (read-sequence list in) + #+:clisp + (ext:read-byte-sequence list in) (check (sequence-equal list octets)))) (with-open-file (in full-path :element-type 'octet) (let* ((in (make-flexi-stream in :external-format external-format)) @@ -370,7 +376,10 @@ (check (sequence-equal (loop repeat 50 collect (read-char in)) (subseq file-string 0 50))) + #-:clisp (read-sequence list in) + #+:clisp + (ext:read-char-sequence list in) (check (sequence-equal list (subseq file-string 50 (- string-length 50)))) (check (sequence-equal (loop repeat 50 collect (read-char in)) @@ -381,7 +390,10 @@ (check (sequence-equal (loop repeat 25 collect (read-char in)) (subseq file-string 0 25))) + #-:clisp (read-sequence array in) + #+:clisp + (ext:read-char-sequence array in) (check (sequence-equal array (subseq file-string 25 (- string-length 25)))) (check (sequence-equal (loop repeat 25 collect (read-char in)) @@ -500,11 +512,11 @@ (flet ((test-one-file (file-name external-format) (with-open-file (in (merge-pathnames file-name *this-file*) :element-type 'flex:octet) - (setq in (make-flexi-stream in :external-format external-format)) - (loop repeat 300 - for char = (read-char in) - do (unread-char char in) - (check (char= (read-char in) char)))))) + (let ((in (make-flexi-stream in :external-format external-format))) + (loop repeat 300 + for char = (read-char in) + do (unread-char char in) + (check (char= (read-char in) char))))))) (loop for (file-name symbols) in *test-files* do (loop for symbol in symbols do (loop for (file-name . external-format) in (create-file-variants file-name symbol) @@ -520,9 +532,11 @@ (no-tests (* 8 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) + #+(or) (dolist (*copy-function* '(copy-stream copy-stream*)) (dolist (args compare-files-args-list) (apply 'compare-files args))) + #+(or) (let ((string-test-args-list (loop for (file-name symbols) in *test-files* nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length string-test-args-list)) From eweitz at common-lisp.net Tue May 20 12:56:10 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 08:56:10 -0400 (EDT) Subject: [flexi-streams-cvs] r37 - branches/edi/test Message-ID: <20080520125610.E1C571B000@common-lisp.net> Author: eweitz Date: Tue May 20 08:56:10 2008 New Revision: 37 Modified: branches/edi/test/test.lisp Log: Remove debugging stuff Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Tue May 20 08:56:10 2008 @@ -532,11 +532,9 @@ (no-tests (* 8 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - #+(or) (dolist (*copy-function* '(copy-stream copy-stream*)) (dolist (args compare-files-args-list) (apply 'compare-files args))) - #+(or) (let ((string-test-args-list (loop for (file-name symbols) in *test-files* nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length string-test-args-list)) From eweitz at common-lisp.net Tue May 20 23:45:26 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 19:45:26 -0400 (EDT) Subject: [flexi-streams-cvs] r38 - in branches/edi: . doc test Message-ID: <20080520234526.3162E6D07D@common-lisp.net> Author: eweitz Date: Tue May 20 19:45:25 2008 New Revision: 38 Modified: branches/edi/conditions.lisp branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/flexi-streams.asd branches/edi/input.lisp branches/edi/mapping.lisp branches/edi/output.lisp branches/edi/packages.lisp branches/edi/specials.lisp branches/edi/test/test.lisp branches/edi/util.lisp Log: IO stream cleanup Modified: branches/edi/conditions.lisp ============================================================================== --- branches/edi/conditions.lisp (original) +++ branches/edi/conditions.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.5 2008/05/19 07:57:07 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.6 2008/05/20 23:44:45 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -48,6 +48,15 @@ (:documentation "Errors of this type are signalled if the flexi stream has a wrong element type.")) +(define-condition flexi-stream-out-of-sync-error (flexi-stream-error) + () + (:report (lambda (condition stream) + (format stream "Stream out of sync from previous +lookahead, couldn't rewind."))) + (:documentation "This can happen if you're trying to write to an IO +stream which had prior to that `looked ahead' while reading and now +can't `rewind' to the octet where you /should/ be.")) + (define-condition in-memory-stream-error (stream-error) () (:documentation "Superclass for all errors related to Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.15 2008/05/20 09:37:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.16 2008/05/20 23:01:50 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -61,12 +61,12 @@ whenever this function is called.")) (defmethod octets-to-char-code ((format flexi-latin-1-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (funcall reader)) (defmethod octets-to-char-code ((format flexi-ascii-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (when-let (octet (funcall reader)) (if (> (the octet octet) 127) @@ -75,7 +75,7 @@ octet))) (defmethod octets-to-char-code ((format flexi-8-bit-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (with-accessors ((decoding-table external-format-decoding-table)) format @@ -89,7 +89,7 @@ char-code))))) (defmethod octets-to-char-code ((format flexi-utf-8-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (let (first-octet-seen) (declare (boolean first-octet-seen)) @@ -105,7 +105,7 @@ (let ((octet (read-next-byte))) (declare (type octet octet)) (multiple-value-bind (start count) - (cond ((zerop (logand octet #b10000000)) + (cond ((not (logbitp 7 octet)) (values octet 0)) ((= #b11000000 (logand octet #b11100000)) (values (logand octet #b00011111) 1)) @@ -124,8 +124,8 @@ (declare (fixnum count)) ;; note that we currently don't check for "overlong" ;; sequences or other illegal values - (loop for result of-type (unsigned-byte 32) - = start then (+ (ash (the (unsigned-byte 26) result) 6) + (loop for result of-type code-point + = start then (+ (ash result 6) (logand octet #b111111)) repeat count for octet of-type octet = (read-next-byte) @@ -136,7 +136,7 @@ finally (return result))))))) (defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (let (first-octet-seen) (declare (boolean first-octet-seen)) @@ -169,7 +169,7 @@ (t word))))))) (defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (let (first-octet-seen) (declare (boolean first-octet-seen)) @@ -202,7 +202,7 @@ (t word))))))) (defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (let (first-octet-seen) (declare (boolean first-octet-seen)) @@ -220,7 +220,7 @@ sum (ash octet count))))) (defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function reader)) (let (first-octet-seen) (declare (boolean first-octet-seen)) @@ -238,7 +238,7 @@ sum (ash octet count))))) (defmethod octets-to-char-code ((format flexi-cr-mixin) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (ignore reader)) (let ((char-code (call-next-method))) (case char-code @@ -246,7 +246,7 @@ (otherwise char-code)))) (defmethod octets-to-char-code ((format flexi-crlf-mixin) reader) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (function *current-unreader*)) (declare (ignore reader)) (let ((char-code (call-next-method))) Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Tue May 20 19:45:25 2008 @@ -89,6 +89,7 @@

  • *substitution-char*
  • octet
  • flexi-stream-error +
  • flexi-stream-out-of-sync-error
  • flexi-stream-element-type-error
  • flexi-stream-element-type-error-element-type @@ -804,6 +805,15 @@

  • [Condition] +
    flexi-stream-out-of-sync-error + +


    This can happen if you're trying to write to +an IO stream which had prior to that +"looked ahead" while reading and now can't "rewind" to the octet where +you should be. +
    + +


    [Condition]
    flexi-stream-element-type-error


    @@ -1030,7 +1040,7 @@ numerous patches and additions.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.104 2008/05/20 06:55:21 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.105 2008/05/20 23:44:47 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.11 2008/05/20 08:02:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -38,7 +38,7 @@ unspecified.")) (defmethod char-to-octets ((format flexi-latin-1-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (let ((octet (char-code char))) (when (> octet 255) @@ -46,7 +46,7 @@ (funcall writer octet))) (defmethod char-to-octets ((format flexi-ascii-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (let ((octet (char-code char))) (when (> octet 127) @@ -54,7 +54,7 @@ (funcall writer octet))) (defmethod char-to-octets ((format flexi-8-bit-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (with-accessors ((encoding-hash external-format-encoding-hash)) format @@ -64,7 +64,7 @@ (funcall writer octet)))) (defmethod char-to-octets ((format flexi-utf-8-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (let ((char-code (char-code char))) (tagbody @@ -96,7 +96,7 @@ zero))) (defmethod char-to-octets ((format flexi-utf-16-le-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (flet ((write-word (word) (funcall writer (ldb (byte 8 0) word)) @@ -111,7 +111,7 @@ (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) (defmethod char-to-octets ((format flexi-utf-16-be-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (flet ((write-word (word) (funcall writer (ldb (byte 8 8) word)) @@ -126,7 +126,7 @@ (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) (defmethod char-to-octets ((format flexi-utf-32-le-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (let ((char-code (char-code char))) (funcall writer (ldb (byte 8 0) char-code)) @@ -135,7 +135,7 @@ (funcall writer (ldb (byte 8 24) char-code)))) (defmethod char-to-octets ((format flexi-utf-32-be-format) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char) (function writer)) (let ((char-code (char-code char))) (funcall writer (ldb (byte 8 24) char-code)) @@ -144,14 +144,14 @@ (funcall writer (ldb (byte 8 0) char-code)))) (defmethod char-to-octets ((format flexi-cr-mixin) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char)) (if (char= char #\Newline) (call-next-method format #\Return writer) (call-next-method))) (defmethod char-to-octets ((format flexi-crlf-mixin) char writer) - (declare #.*standard-optimize-settings*) + (declare #.*fixnum-optimize-settings*) (declare (character char)) (cond ((char= char #\Newline) (call-next-method format #\Return writer) Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.20 2008/05/20 08:02:50 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -284,7 +284,7 @@ 'flexi-crlf-utf-32-be-format)))))))) (defun make-external-format% (name &key (little-endian *default-little-endian*) - id eol-style) + id eol-style) "Used internally by MAKE-EXTERNAL-FORMAT to default some of the keywords arguments and to determine the right subclass of EXTERNAL-FORMAT." @@ -297,7 +297,9 @@ (list :eol-style (or eol-style *default-eol-style*))) ((code-page-name-p real-name) (list :id (or (known-code-page-id-p id) - (error "Unknown code page ID ~S" id)) + (error 'external-format-error + :format-control "Unknown code page ID ~S" + :format-arguments (list id))) ;; default EOL style for Windows code pages is :CRLF :eol-style (or eol-style :crlf))) (t (list :eol-style (or eol-style *default-eol-style*) Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.63 2008/05/18 23:13:59 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.64 2008/05/20 23:01:51 edi Exp $ ;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. @@ -54,6 +54,7 @@ #+:lispworks (:file "lw-binary-stream") (:file "output") (:file "input") + (:file "io") (:file "strings")) :depends-on (:trivial-gray-streams)) Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.67 2008/05/20 09:38:07 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.68 2008/05/20 23:01:51 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -213,7 +213,7 @@ (element-type flexi-stream-element-type) (stream flexi-stream-stream)) flexi-input-stream - (let* ((buffer (make-octet-buffer)) + (let* (buffer (buffer-pos 0) (buffer-end 0) (index start) @@ -229,7 +229,7 @@ ;; OCTET-STACK), especially for UTF-8 (reserve (if (floatp factor) (* 2 integer-factor) 0))) (declare (fixnum buffer-pos buffer-end index integer-factor reserve) - (type (array octet *) buffer)) + (boolean want-chars-p)) (flet ((compute-minimum () "Computes the minimum amount of octets we can savely read into the buffer without violating the stream's bound \(if there @@ -243,6 +243,15 @@ (fill-buffer (end) "Tries to fill the buffer from BUFFER-POS to END and returns NIL if the buffer doesn't contain any new data." + ;; put data from octet stack into buffer if there is any + (loop + (when (>= buffer-pos end) + (return)) + (let ((next-octet (pop octet-stack))) + (cond (next-octet + (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet)) + (incf buffer-pos)) + (t (return))))) (setq buffer-end (read-sequence buffer stream :start buffer-pos :end end)) @@ -254,15 +263,7 @@ (incf position buffer-end)))) (let ((minimum (compute-minimum))) (declare (fixnum minimum)) - ;; put data from octet stack into buffer if there is any - (loop - (when (>= buffer-pos minimum) - (return)) - (let ((next-octet (pop octet-stack))) - (cond (next-octet - (setf (aref buffer buffer-pos) (the octet next-octet)) - (incf buffer-pos)) - (t (return))))) + (setq buffer (make-octet-buffer minimum)) ;; fill buffer for the first time or return immediately if ;; we don't succeed (unless (fill-buffer minimum) @@ -277,7 +278,7 @@ (unless (fill-buffer (compute-minimum)) (return-from next-octet))) (prog1 - (aref buffer buffer-pos) + (aref (the (array octet *) buffer) buffer-pos) (incf buffer-pos))) (unreader (char) (unread-char% char flexi-input-stream))) @@ -310,7 +311,7 @@ (when (>= buffer-pos buffer-end) (return)) (decf buffer-end) - (push (aref buffer buffer-end) octet-stack)) + (push (aref (the (array octet *) buffer) buffer-end) octet-stack)) (leave)) (let ((next-thing ,(if octetp '(next-octet) Modified: branches/edi/mapping.lisp ============================================================================== --- branches/edi/mapping.lisp (original) +++ branches/edi/mapping.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.1 2008/05/19 09:09:15 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -40,9 +40,16 @@ #-:lispworks 'character) (deftype char-code-integer () - "The type of integers which can be returned by the function CHAR-CODE." + "The subtype of integers which can be returned by the function CHAR-CODE." '(integer 0 #.(1- char-code-limit))) +(deftype code-point () + "The subtype of integers that's just big enough to hold all Unicode +codepoints. + +See for example ." + '(mod #x110000)) + (defmacro defconstant (name value &optional doc) "Make sure VALUE is evaluated only once \(to appease SBCL)." `(cl:defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value) Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.54 2008/05/20 06:15:44 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.56 2008/05/20 23:44:45 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,37 +29,37 @@ (in-package :flexi-streams) -(defgeneric write-byte* (byte sink) +(defgeneric write-byte* (byte stream) + (declare #.*standard-optimize-settings*) (:documentation "Writes one byte \(octet) to the underlying stream -of SINK \(if SINK is a flexi stream) or adds the byte to the end of -SINK \(if SINK is an array with a fill pointer).")) +STREAM.")) #-:lispworks -(defmethod write-byte* (byte (sink flexi-output-stream)) +(defmethod write-byte* (byte (flexi-output-stream flexi-output-stream)) (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) - sink + flexi-output-stream (write-byte byte stream))) #+:lispworks -(defmethod write-byte* (byte (sink flexi-output-stream)) +(defmethod write-byte* (byte (flexi-output-stream flexi-output-stream)) (declare #.*standard-optimize-settings*) ;; we use WRITE-SEQUENCE because WRITE-BYTE doesn't work with all ;; bivalent streams in LispWorks (4.4.6) (with-accessors ((stream flexi-stream-stream)) - sink + flexi-output-stream (write-sequence (make-array 1 :element-type 'octet :initial-element byte) stream) byte)) #+:lispworks -(defmethod write-byte* (byte (sink flexi-binary-output-stream)) +(defmethod write-byte* (byte (flexi-output-stream flexi-binary-output-stream)) "Optimized version \(only needed for LispWorks) in case the underlying stream is binary." (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) - sink + flexi-output-stream (write-byte byte stream))) (defmethod stream-write-char ((stream flexi-output-stream) char) @@ -180,7 +180,7 @@ (return-from stream-write-sequence (call-next-method))) (let ((buffer (make-array (+ +buffer-size+ 20) - :element-type '(unsigned-byte 8) + :element-type 'octet :fill-pointer 0)) (last-newline-pos (position #\Newline sequence :test #'char= Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.33 2008/05/19 07:57:08 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.34 2008/05/20 23:44:45 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -53,12 +53,13 @@ :flexi-io-stream :flexi-stream :flexi-stream-bound + :flexi-stream-column :flexi-stream-external-format :flexi-stream-element-type :flexi-stream-element-type-error :flexi-stream-element-type-error-element-type :flexi-stream-error - :flexi-stream-column + :flexi-stream-out-of-sync-error :flexi-stream-position :flexi-stream-stream :get-output-stream-sequence Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.31 2008/05/19 07:57:08 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.32 2008/05/20 23:01:51 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -38,6 +38,17 @@ (compilation-speed 0)) "The standard optimize settings used by most declaration expressions.") +(defvar *fixnum-optimize-settings* + '(optimize + speed + (safety 0) + (space 0) + (debug 1) + (compilation-speed 0) + #+:lispworks (hcl:fixnum-safety 0)) + "Like *STANDARD-OPTIMIZE-SETTINGS*, but \(on LispWorks) with all +arithmetic being fixnum arithmetic.") + (defvar *current-unreader* nil "A unary function which might be called to `unread' a character \(i.e. the sequence of octets it represents). @@ -162,7 +173,7 @@ corresponding octets.") (defconstant +buffer-size+ 8192 - "Size of buffers used for internal purposes.") + "Default size for buffers used for internal purposes.") (pushnew :flexi-streams *features*) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.30 2008/05/20 09:37:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Tue May 20 19:45:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.19 2008/05/19 22:32:57 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -176,12 +176,20 @@ ,instance , at body)) -(defun make-octet-buffer () +(defun make-octet-buffer (&optional (size +buffer-size+)) "Creates and returns a fresh buffer \(a specialized array) of size +BUFFER-SIZE+ to hold octets." - (make-array +buffer-size+ :element-type 'octet)) + (declare #.*standard-optimize-settings*) + (make-array size :element-type 'octet)) (defun type-equal (type1 type2) "Whether TYPE1 and TYPE2 denote the same type." + (declare #.*standard-optimize-settings*) (and (subtypep type1 type2) - (subtypep type2 type1))) \ No newline at end of file + (subtypep type2 type1))) + +(defun maybe-rewind (stream octets) + "Tries to `rewind' the \(binary) stream STREAM by OCTETS octets. +Returns a true value if it succeeds." + (when-let (position (file-position stream)) + (file-position stream (- position octets)))) \ No newline at end of file From eweitz at common-lisp.net Tue May 20 23:47:01 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 19:47:01 -0400 (EDT) Subject: [flexi-streams-cvs] r39 - branches/edi Message-ID: <20080520234701.8E5626D07D@common-lisp.net> Author: eweitz Date: Tue May 20 19:46:57 2008 New Revision: 39 Added: branches/edi/io.lisp (contents, props changed) Log: Forgot one... Added: branches/edi/io.lisp ============================================================================== --- (empty file) +++ branches/edi/io.lisp Tue May 20 19:46:57 2008 @@ -0,0 +1,110 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/io.lisp,v 1.2 2008/05/20 23:44:45 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defmethod reset-input-state ((flexi-io-stream flexi-io-stream)) + "This method is used to clear any state associated with previous +input before output is attempted on the stream. It can fail if the +octet stack is not empty and the stream can't be `rewound'." + (declare #.*standard-optimize-settings*) + (with-accessors ((last-char-code flexi-stream-last-char-code) + (last-octet flexi-stream-last-octet) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-io-stream + (when octet-stack + (unless (maybe-rewind stream (length octet-stack)) + (error 'flexi-stream-out-of-sync-error + :stream flexi-io-stream)) + (setq octet-stack nil)) + (setq last-octet nil + last-char-code nil))) + +(defmethod stream-write-byte :before ((stream flexi-io-stream) byte) + (declare #.*standard-optimize-settings*) + (declare (ignore byte)) + (reset-input-state stream)) + +(defmethod stream-write-char :before ((stream flexi-io-stream) char) + (declare #.*standard-optimize-settings*) + (declare (ignore char)) + (reset-input-state stream)) + +(defmethod stream-write-sequence :before ((stream flexi-io-stream) sequence start end &key) + (declare #.*standard-optimize-settings*) + (declare (ignore sequence start end)) + (reset-input-state stream)) + +(defmethod stream-clear-output :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-input-state stream)) + +(defmethod reset-output-state ((flexi-io-stream flexi-io-stream)) + "This method is used to clear any state associated with previous +output before the stream is used for input." + (declare #.*standard-optimize-settings*) + (with-accessors ((column flexi-stream-column)) + flexi-io-stream + (setq column nil))) + +(defmethod stream-read-byte :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-output-state stream)) + +(defmethod stream-read-char :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-output-state stream)) + +(defmethod stream-read-sequence :before ((stream flexi-io-stream) sequence start end &key) + (declare #.*standard-optimize-settings*) + (declare (ignore sequence start end)) + (reset-output-state stream)) + +(defmethod stream-unread-char :before ((stream flexi-io-stream) char) + (declare #.*standard-optimize-settings*) + (declare (ignore char)) + (reset-output-state stream)) + +(defmethod unread-byte :before (byte (stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (declare (ignore byte)) + (reset-output-state stream)) + +(defmethod stream-clear-input :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-output-state stream)) + +(defmethod write-byte* :after (byte (stream flexi-io-stream)) + "Keep POSITION slot up to date even when performing output." + (declare #.*standard-optimize-settings*) + (declare (ignore byte)) + (with-accessors ((position flexi-stream-position)) + stream + (incf position))) \ No newline at end of file From eweitz at common-lisp.net Wed May 21 00:19:12 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 20:19:12 -0400 (EDT) Subject: [flexi-streams-cvs] r40 - branches/edi Message-ID: <20080521001912.DCB3E6F23F@common-lisp.net> Author: eweitz Date: Tue May 20 20:19:12 2008 New Revision: 40 Modified: branches/edi/conditions.lisp branches/edi/input.lisp branches/edi/output.lisp Log: read-sequence slightly improved for file streams Modified: branches/edi/conditions.lisp ============================================================================== --- branches/edi/conditions.lisp (original) +++ branches/edi/conditions.lisp Tue May 20 20:19:12 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.6 2008/05/20 23:44:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -51,6 +51,7 @@ (define-condition flexi-stream-out-of-sync-error (flexi-stream-error) () (:report (lambda (condition stream) + (declare (ignore condition)) (format stream "Stream out of sync from previous lookahead, couldn't rewind."))) (:documentation "This can happen if you're trying to write to an IO Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Tue May 20 20:19:12 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.68 2008/05/20 23:01:51 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.70 2008/05/21 00:18:35 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -217,23 +217,32 @@ (buffer-pos 0) (buffer-end 0) (index start) + ;; whether we will deliver characters and thus the number + ;; of octets to read might not be equal to the number of + ;; sequence elements to fill (want-chars-p (or (stringp sequence) (and (vectorp sequence) (not (subtypep (array-element-type sequence) 'integer))) - (type-equal element-type 'octet))) + (not (type-equal element-type 'octet)))) + ;; whether we will later be able to rewind the stream if + ;; needed (to get rid of unused octets in the buffer) + (can-rewind-p (and want-chars-p (maybe-rewind stream 0))) (factor (if want-chars-p (encoding-factor external-format) 1)) (integer-factor (floor factor)) ;; it's an interesting question whether it makes sense ;; performance-wise to make RESERVE significantly bigger ;; (and thus put potentially a lot more octets into ;; OCTET-STACK), especially for UTF-8 - (reserve (if (floatp factor) (* 2 integer-factor) 0))) + (reserve (cond ((not (floatp factor)) 0) + ((not can-rewind-p) (* 2 integer-factor)) + (t (ceiling (* (- factor integer-factor) (- end start))))))) (declare (fixnum buffer-pos buffer-end index integer-factor reserve) - (boolean want-chars-p)) - (flet ((compute-minimum () - "Computes the minimum amount of octets we can savely -read into the buffer without violating the stream's bound \(if there -is one) and without potentially reading much more than we need." + (boolean want-chars-p can-rewind-p)) + (flet ((compute-fill-amount () + "Computes the amount of octets we can savely read into +the buffer without violating the stream's bound \(if there is one) and +without potentially reading much more than we need \(unless we can +rewind afterwards)." (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor (the fixnum (- end index)))) reserve)) @@ -261,7 +270,7 @@ ;; compare with BUFFER-POS (unless (zerop buffer-end) (incf position buffer-end)))) - (let ((minimum (compute-minimum))) + (let ((minimum (compute-fill-amount))) (declare (fixnum minimum)) (setq buffer (make-octet-buffer minimum)) ;; fill buffer for the first time or return immediately if @@ -275,7 +284,7 @@ stream." (when (>= buffer-pos buffer-end) (setq buffer-pos 0) - (unless (fill-buffer (compute-minimum)) + (unless (fill-buffer (compute-fill-amount)) (return-from next-octet))) (prog1 (aref (the (array octet *) buffer) buffer-pos) @@ -306,12 +315,17 @@ (when (>= index end) ;; check if there are octets in the ;; buffer we didn't use - see - ;; COMPUTE-MINIMUM above - (loop - (when (>= buffer-pos buffer-end) - (return)) - (decf buffer-end) - (push (aref (the (array octet *) buffer) buffer-end) octet-stack)) + ;; COMPUTE-FILL-AMOUNT above + (let ((rest (- buffer-end buffer-pos))) + (when (plusp rest) + (or (and can-rewind-p + (maybe-rewind stream rest)) + (loop + (when (>= buffer-pos buffer-end) + (return)) + (decf buffer-end) + (push (aref (the (array octet *) buffer) buffer-end) + octet-stack))))) (leave)) (let ((next-thing ,(if octetp '(next-octet) Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Tue May 20 20:19:12 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.56 2008/05/20 23:44:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.57 2008/05/21 00:04:58 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -129,19 +129,6 @@ ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline)) -;; TODO: file-position -> octet-stack (and others?) - -;; other way around: function "resync" trying to use File-position? - -;; "resync" independent function to empty octet-stack? -;; (decrement-file-position) => success -;; (resync ... &optional how-much (length octet-stack)) => success - -;; in stream-read-sequence: if file stream, read more into buffer, -;; then resync with file-position? - -;; TODO: interaction between read and write - (defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) "Writes all elements of the sequence SEQUENCE from START to END to the underlying stream. The elements can be either octets or From eweitz at common-lisp.net Wed May 21 01:18:59 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 21:18:59 -0400 (EDT) Subject: [flexi-streams-cvs] r41 - in branches/edi: . doc Message-ID: <20080521011859.35B03601AD@common-lisp.net> Author: eweitz Date: Tue May 20 21:18:58 2008 New Revision: 41 Modified: branches/edi/CHANGELOG branches/edi/doc/index.html branches/edi/output.lisp Log: write-sequence Modified: branches/edi/CHANGELOG ============================================================================== --- branches/edi/CHANGELOG (original) +++ branches/edi/CHANGELOG Tue May 20 21:18:58 2008 @@ -1,3 +1,5 @@ +Complete redesign, various additions, bugfixes, performance improvements (with the help of Hans H?bner) + Version 0.14.0 2007-12-30 Some fixes for LispWorks (when the underlying stream is a character stream) Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Tue May 20 21:18:58 2008 @@ -1037,10 +1037,12 @@ Thanks to David Lichteblau for numerous portability patches. Thanks to Igor Plekhov for the KOI8-R code. Thanks to Anton Vodonosov for -numerous patches and additions. +numerous patches and additions. Thanks +to Hans Hübner for +his work on making FLEXI-STREAMS faster.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.105 2008/05/20 23:44:47 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.106 2008/05/21 01:06:45 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Tue May 20 21:18:58 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.57 2008/05/21 00:04:58 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.59 2008/05/21 01:17:45 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -129,66 +129,71 @@ ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline)) -(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) - "Writes all elements of the sequence SEQUENCE from START to END -to the underlying stream. The elements can be either octets or -characters. Characters are output according to the current -encoding \(external format) of the FLEXI-OUTPUT-STREAM object -STREAM." - (declare #.*standard-optimize-settings*) +(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key) +; (declare #.*standard-optimize-settings*) (declare (fixnum start end)) - (with-accessors ((stream flexi-stream-stream) - (column flexi-stream-column)) - flexi-output-stream - (cond ((and (arrayp sequence) - (subtypep (array-element-type sequence) 'octet)) - ;; set column to NIL because we don't know how to handle binary - ;; output mixed with character output - (setq column nil) - (write-sequence sequence stream :start start :end end)) - (t (loop for index from start below end - for element = (elt sequence index) - when (characterp element) do - (stream-write-char flexi-output-stream element) - else do - (stream-write-byte flexi-output-stream element)) - sequence)))) - -(defmethod stream-write-sequence ((stream flexi-output-stream) (sequence string) start end &key) - "Optimized method for the cases where SEQUENCE is a string. Fills -an internal buffer and uses repeated calls to WRITE-SEQUENCE to write -to the underlying stream." - (declare #.*standard-optimize-settings*) - (declare (fixnum start end)) - ;; don't use this optimized method for bivalent character streams on - ;; LispWorks, as it currently gets confused by the fill pointer - #+:lispworks - (unless (typep stream 'flexi-binary-output-stream) - (return-from stream-write-sequence - (call-next-method))) - (let ((buffer (make-array (+ +buffer-size+ 20) - :element-type 'octet - :fill-pointer 0)) - (last-newline-pos (position #\Newline sequence - :test #'char= - :start start - :end end - :from-end t))) - (loop with format = (flexi-stream-external-format stream) - for index from start below end - do (char-to-octets format - (aref sequence index) - (lambda (octet) - (vector-push octet buffer))) - when (>= (fill-pointer buffer) +buffer-size+) do - (write-sequence buffer (flexi-stream-stream stream)) - (setf (fill-pointer buffer) 0) - finally (when (>= (fill-pointer buffer) 0) - (write-sequence buffer (flexi-stream-stream stream)))) - (setf (flexi-stream-column stream) - (cond (last-newline-pos (- end last-newline-pos 1)) - ((flexi-stream-column stream) - (+ (flexi-stream-column stream) (- end start)))))) + (with-accessors ((column flexi-stream-column) + (external-format flexi-stream-external-format) + (stream flexi-stream-stream)) + stream + (let* ((octet-seen-p nil) + (buffer-pos 0) + ;; whether we might receive characters and thus the number + ;; of octets to output might not be equal to the number of + ;; sequence elements to write + (chars-p (or (listp sequence) + (and (vectorp sequence) + (not (subtypep (array-element-type sequence) 'integer))))) + (factor (if chars-p (encoding-factor external-format) 1)) + (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) + (buffer (make-octet-buffer buffer-size))) + (declare (fixnum buffer-pos buffer-size) + (boolean octet-seen-p) + (type (array octet *) buffer)) + (labels ((flush-buffer () + (write-sequence buffer stream :end buffer-pos) + (setq buffer-pos 0)) + (write-octet (octet) + (declare (octet octet)) + (when (>= buffer-pos buffer-size) + (flush-buffer)) + (setf (aref buffer buffer-pos) octet) + (incf buffer-pos)) + (write-character (char) + (char-to-octets external-format char #'write-octet)) + (write-object (object) + (etypecase object + (octet (setq octet-seen-p t) + (write-octet object)) + (character (write-character object))))) + (declare (dynamic-extent (function write-octet))) + (macrolet ((iterate (octets-p output-form) + `(progn + ,@(if octets-p '((setq octet-seen-p t))) + (loop for index of-type fixnum from start below end + do ,output-form + finally (when (plusp buffer-pos) + (flush-buffer)))))) + (etypecase sequence + (string (iterate nil (write-character (char sequence index)))) + (array + (let ((array-element-type (array-element-type sequence))) + (cond ((type-equal array-element-type 'octet) + (iterate t (write-octet (aref (the (array octet *) sequence) index)))) + ((subtypep array-element-type 'integer) + (iterate t (write-octet (aref sequence index)))) + (t (iterate nil (write-object (aref sequence index))))))) + (list (iterate nil (write-object (nth index sequence))))) + (setq column + (cond (octet-seen-p nil) + (t (let ((last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (cond (last-newline-pos (- end last-newline-pos 1)) + (column (+ column (- end start)))))))))))) + sequence) (defmethod stream-write-string ((stream flexi-output-stream) string From eweitz at common-lisp.net Wed May 21 01:28:34 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 21:28:34 -0400 (EDT) Subject: [flexi-streams-cvs] r42 - branches/edi Message-ID: <20080521012834.71A6B79185@common-lisp.net> Author: eweitz Date: Tue May 20 21:28:34 2008 New Revision: 42 Modified: branches/edi/output.lisp Log: More documentation Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Tue May 20 21:28:34 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.59 2008/05/21 01:17:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.60 2008/05/21 01:26:43 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -130,7 +130,13 @@ (stream-write-char stream #\Newline)) (defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key) -; (declare #.*standard-optimize-settings*) + "An optimized version which uses a buffer underneath. The function +can accepts characters as well as octets and it decides what to do +based on the element type of the sequence \(if possible) or on the +individual elements, i.e. you can mix characters and octets in +SEQUENCE if you want. Whether that really works might also depend on +your Lisp, some of the implementations are more picky than others." + (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (with-accessors ((column flexi-stream-column) (external-format flexi-stream-external-format) @@ -151,23 +157,32 @@ (boolean octet-seen-p) (type (array octet *) buffer)) (labels ((flush-buffer () + "Sends all octets in BUFFER to the underlying stream." (write-sequence buffer stream :end buffer-pos) (setq buffer-pos 0)) (write-octet (octet) + "Adds one octet to the buffer and flush it if necessary." (declare (octet octet)) (when (>= buffer-pos buffer-size) (flush-buffer)) (setf (aref buffer buffer-pos) octet) (incf buffer-pos)) (write-character (char) + "Adds the octets representing the character CHAR to the buffer." (char-to-octets external-format char #'write-octet)) (write-object (object) + "Dispatches to WRITE-OCTET or WRITE-CHARACTER +depending on the type of OBJECT." (etypecase object (octet (setq octet-seen-p t) (write-octet object)) (character (write-character object))))) (declare (dynamic-extent (function write-octet))) (macrolet ((iterate (octets-p output-form) + "An unhygienic macro to implement the actual +iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one +sequence element and put its octet representation into the buffer. +OCTETS-P is true if we know in advance that we will send octets." `(progn ,@(if octets-p '((setq octet-seen-p t))) (loop for index of-type fixnum from start below end @@ -184,6 +199,7 @@ (iterate t (write-octet (aref sequence index)))) (t (iterate nil (write-object (aref sequence index))))))) (list (iterate nil (write-object (nth index sequence))))) + ;; update the column slot, setting if to NIL if we sent octets (setq column (cond (octet-seen-p nil) (t (let ((last-newline-pos (position #\Newline sequence From eweitz at common-lisp.net Wed May 21 01:49:43 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Tue, 20 May 2008 21:49:43 -0400 (EDT) Subject: [flexi-streams-cvs] r43 - in branches/edi: . doc Message-ID: <20080521014943.76DCD33102@common-lisp.net> Author: eweitz Date: Tue May 20 21:49:41 2008 New Revision: 43 Modified: branches/edi/doc/index.html branches/edi/packages.lisp branches/edi/strings.lisp Log: New function octet-length Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Tue May 20 21:49:41 2008 @@ -115,6 +115,7 @@

    1. string-to-octets
    2. octets-to-string +
    3. octet-length
  • File positions @@ -470,8 +471,8 @@ CL-USER 5 > (make-external-format :ucs-2be) #<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CRLF :LITTLE-ENDIAN NIL) 2067DBE4> -CL-USER 6 > (make-external-format :ucs-2be :eol-style :br) -#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :BR :LITTLE-ENDIAN NIL) 206B54AC> +CL-USER 6 > (make-external-format :ucs-2be :eol-style :cr) +#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CR :LITTLE-ENDIAN NIL) 206B54AC>
  • @@ -982,7 +983,8 @@

    Converts the Lisp string string from start to end to an array of -octets corresponding to the external format designated by external format external-format. The defaults for +octets corresponding to the external +format designated by external-format. The defaults for start and end are 0 and the length of the string. The default for external-format is :LATIN1. @@ -995,9 +997,25 @@

    Converts the Lisp sequence sequence of octets from start to end to string -using the external format designated -by external -format external-format. The defaults for +using the external format designated +by external-format. The defaults for +start and end +are 0 and the length of the sequence. The default +for external-format is :LATIN1. +
    + +


    [Function] +
    octet-length string &key external-format start end => length-or-nil + +


    + +Returns the length of the substring of string from start to end in +octets if encoded using +the external format designated +by external-format. Might return NIL +if there's no efficient way to compute the length without iterating +through the whole string. +The defaults for start and end are 0 and the length of the sequence. The default for external-format is :LATIN1. @@ -1042,7 +1060,7 @@ his work on making FLEXI-STREAMS faster.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.106 2008/05/21 01:06:45 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.107 2008/05/21 01:43:45 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Tue May 20 21:49:41 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.34 2008/05/20 23:44:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.35 2008/05/21 01:43:43 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -76,6 +76,7 @@ :make-in-memory-output-stream :make-flexi-stream :octet + :octet-length :octets-to-string :output-stream-sequence-length :peek-byte Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Tue May 20 21:49:41 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.21 2008/05/20 09:04:23 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -159,3 +159,15 @@ (incf j)) (setf (fill-pointer string) j) string)))))))) + +(defun octet-length (string &key (external-format :latin1) (start 0) (end (length string))) + "Returns the length of the substring of STRING from START to END in +octets if encoded using the external format EXTERNAL-FORMAT. Might +return NIL if there's no efficient way to compute the length without +iterating through the whole string." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (setq external-format (maybe-convert-external-format external-format)) + (let ((factor (encoding-factor external-format))) + (typecase factor + (fixnum (* factor (- end start)))))) From eweitz at common-lisp.net Wed May 21 11:55:22 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Wed, 21 May 2008 07:55:22 -0400 (EDT) Subject: [flexi-streams-cvs] r44 - in branches/edi: . doc Message-ID: <20080521115522.BB41C2E2D0@common-lisp.net> Author: eweitz Date: Wed May 21 07:55:21 2008 New Revision: 44 Modified: branches/edi/CHANGELOG branches/edi/doc/index.html branches/edi/flexi-streams.asd Log: Make it 0.15.0 Modified: branches/edi/CHANGELOG ============================================================================== --- branches/edi/CHANGELOG (original) +++ branches/edi/CHANGELOG Wed May 21 07:55:21 2008 @@ -1,3 +1,5 @@ +Version 0.15.0 +2008-05-21 Complete redesign, various additions, bugfixes, performance improvements (with the help of Hans H?bner) Version 0.14.0 Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Wed May 21 07:55:21 2008 @@ -224,7 +224,7 @@

    FLEXI-STREAMS together with this documentation can be downloaded from http://weitz.de/files/flexi-streams.tar.gz. The -current version is 0.14.0. +current version is 0.15.0.

    Before you install FLEXI-STREAMS you first need to install the -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.107 2008/05/21 01:43:45 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.108 2008/05/21 11:53:08 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Wed May 21 07:55:21 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.64 2008/05/20 23:01:51 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.65 2008/05/21 11:53:07 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -35,7 +35,7 @@ (in-package :flexi-streams-system) (defsystem :flexi-streams - :version "0.14.0" + :version "0.15.0" :serial t :components ((:file "packages") (:file "mapping") From eweitz at common-lisp.net Wed May 21 12:01:22 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Wed, 21 May 2008 08:01:22 -0400 (EDT) Subject: [flexi-streams-cvs] r45 - in trunk: . doc test Message-ID: <20080521120122.50E2F12061@common-lisp.net> Author: eweitz Date: Wed May 21 08:00:42 2008 New Revision: 45 Added: trunk/decode.lisp (contents, props changed) trunk/encode.lisp (contents, props changed) trunk/io.lisp (contents, props changed) trunk/mapping.lisp (contents, props changed) Modified: trunk/CHANGELOG trunk/ascii.lisp trunk/code-pages.lisp trunk/doc/index.html trunk/external-format.lisp trunk/flexi-streams.asd trunk/in-memory.lisp trunk/input.lisp trunk/iso-8859.lisp trunk/koi8-r.lisp trunk/lw-binary-stream.lisp trunk/output.lisp trunk/packages.lisp trunk/specials.lisp trunk/stream.lisp trunk/strings.lisp trunk/test/packages.lisp trunk/test/test.lisp trunk/util.lisp Log: Version 0.15.0, update from branch Modified: trunk/CHANGELOG ============================================================================== --- trunk/CHANGELOG (original) +++ trunk/CHANGELOG Wed May 21 08:00:42 2008 @@ -1,3 +1,7 @@ +Version 0.15.0 +2008-05-21 +Complete redesign, various additions, bugfixes, performance improvements (with the help of Hans H?bner) + Version 0.14.0 2007-12-30 Some fixes for LispWorks (when the underlying stream is a character stream) Modified: trunk/ascii.lisp ============================================================================== --- trunk/ascii.lisp (original) +++ trunk/ascii.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.7 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/ascii.lisp,v 1.9 2008/05/18 21:32:15 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -29,7 +29,8 @@ (in-package :flexi-streams) -(defvar +ascii-table+ - #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533) +(defconstant +ascii-table+ + ;; currently not used, but we leave it in here just in case... + (make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) "An array enumerating the character codes for the US-ASCII encoding.") Modified: trunk/code-pages.lisp ============================================================================== --- trunk/code-pages.lisp (original) +++ trunk/code-pages.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/code-pages.lisp,v 1.7 2008/05/18 21:32:15 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -31,32 +31,32 @@ ;;; the following code was auto-generated with LWW -(defvar +code-page-tables+ - '((437 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (720 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 233 226 132 224 134 231 234 235 232 239 238 141 142 143 144 1617 1618 244 164 1600 251 249 1569 1570 1571 1572 163 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1590 1591 1592 1593 1594 1601 181 1602 1603 1604 1605 1606 1607 1608 1609 1610 8801 1611 1612 1613 1614 1615 1616 8776 176 8729 183 8730 8319 178 9632 160)) - (737 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 931 932 933 934 935 936 937 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 965 966 967 968 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 969 940 941 942 970 943 972 973 971 974 902 904 905 906 908 910 911 177 8805 8804 938 939 247 8776 176 8729 183 8730 8319 178 9632 160)) - (775 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 262 252 233 257 228 291 229 263 322 275 342 343 299 377 196 197 201 230 198 333 246 290 162 346 347 214 220 248 163 216 215 164 256 298 243 379 380 378 8221 166 169 174 172 189 188 321 171 187 9617 9618 9619 9474 9508 260 268 280 278 9571 9553 9559 9565 302 352 9488 9492 9524 9516 9500 9472 9532 370 362 9562 9556 9577 9574 9568 9552 9580 381 261 269 281 279 303 353 371 363 382 9496 9484 9608 9604 9612 9616 9600 211 223 332 323 245 213 181 324 310 311 315 316 326 274 325 8217 173 177 8220 190 182 167 247 8222 176 8729 183 185 179 178 9632 160)) - (850 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 215 402 225 237 243 250 241 209 170 186 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 240 208 202 203 200 305 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 254 222 218 219 217 253 221 175 180 173 177 8215 190 182 167 247 184 176 168 183 185 179 178 9632 160)) - (852 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 367 263 231 322 235 336 337 238 377 196 262 201 313 314 244 246 317 318 346 347 214 220 356 357 321 215 269 225 237 243 250 260 261 381 382 280 281 172 378 268 351 171 187 9617 9618 9619 9474 9508 193 194 282 350 9571 9553 9559 9565 379 380 9488 9492 9524 9516 9500 9472 9532 258 259 9562 9556 9577 9574 9568 9552 9580 164 273 272 270 203 271 327 205 206 283 9496 9484 9608 9604 354 366 9600 211 223 212 323 324 328 352 353 340 218 341 368 253 221 355 180 173 733 731 711 728 167 247 184 176 168 729 369 344 345 9632 160)) - (855 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1106 1026 1107 1027 1105 1025 1108 1028 1109 1029 1110 1030 1111 1031 1112 1032 1113 1033 1114 1034 1115 1035 1116 1036 1118 1038 1119 1039 1102 1070 1098 1066 1072 1040 1073 1041 1094 1062 1076 1044 1077 1045 1092 1060 1075 1043 171 187 9617 9618 9619 9474 9508 1093 1061 1080 1048 9571 9553 9559 9565 1081 1049 9488 9492 9524 9516 9500 9472 9532 1082 1050 9562 9556 9577 9574 9568 9552 9580 164 1083 1051 1084 1052 1085 1053 1086 1054 1087 9496 9484 9608 9604 1055 1103 9600 1071 1088 1056 1089 1057 1090 1058 1091 1059 1078 1046 1074 1042 1100 1068 8470 173 1099 1067 1079 1047 1096 1064 1101 1069 1097 1065 1095 1063 167 9632 160)) - (857 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 305 196 197 201 230 198 244 246 242 251 249 304 214 220 248 163 216 350 351 225 237 243 250 241 209 286 287 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 186 170 202 203 200 65533 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 65533 215 218 219 217 236 255 175 180 173 177 65533 190 182 167 247 184 176 168 183 185 179 178 9632 160)) - (860 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 227 224 193 231 234 202 232 205 212 236 195 194 201 192 200 244 245 242 218 249 204 213 220 162 163 217 8359 211 225 237 243 250 241 209 170 186 191 210 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (861 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 208 240 222 196 197 201 230 198 244 246 254 251 221 253 214 220 248 163 216 8359 402 225 237 243 250 193 205 211 218 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (862 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (863 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 194 224 182 231 234 235 232 239 238 8215 192 167 201 200 202 244 203 207 251 249 164 212 220 162 163 217 219 402 166 180 243 250 168 184 179 175 206 8976 172 189 188 190 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (864 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 176 183 8729 8730 9618 9472 9474 9532 9508 9516 9500 9524 9488 9484 9492 9496 946 8734 966 177 189 188 8776 171 187 65271 65272 155 156 65275 65276 159 160 173 65154 163 164 65156 65533 65533 65166 65167 65173 65177 1548 65181 65185 65189 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 65233 1563 65201 65205 65209 1567 162 65152 65153 65155 65157 65226 65163 65165 65169 65171 65175 65179 65183 65187 65191 65193 65195 65197 65199 65203 65207 65211 65215 65217 65221 65227 65231 166 172 247 215 65225 1600 65235 65239 65243 65247 65251 65255 65259 65261 65263 65267 65213 65228 65230 65229 65249 65149 1617 65253 65257 65260 65264 65266 65232 65237 65269 65270 65245 65241 65265 9632 65533)) - (865 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 164 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160)) - (866 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1025 1105 1028 1108 1031 1111 1038 1118 176 8729 183 8730 8470 164 9632 160)) - (869 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 902 135 183 172 166 8216 8217 904 8213 905 906 938 908 147 148 910 939 169 911 178 179 940 163 941 942 943 970 912 972 973 913 914 915 916 917 918 919 189 920 921 171 187 9617 9618 9619 9474 9508 922 923 924 925 9571 9553 9559 9565 926 927 9488 9492 9524 9516 9500 9472 9532 928 929 9562 9556 9577 9574 9568 9552 9580 931 932 933 934 935 936 937 945 946 947 9496 9484 9608 9604 948 949 9600 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 900 173 177 965 966 967 167 968 901 176 168 969 971 944 974 9632 160)) - (1250 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 352 8249 346 356 381 377 144 8216 8217 8220 8221 8226 8211 8212 152 8482 353 8250 347 357 382 378 160 711 728 321 164 260 166 167 168 169 350 171 172 173 174 379 176 177 731 322 180 181 182 183 184 261 351 187 317 733 318 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) - (1251 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1026 1027 8218 1107 8222 8230 8224 8225 8364 8240 1033 8249 1034 1036 1035 1039 1106 8216 8217 8220 8221 8226 8211 8212 152 8482 1113 8250 1114 1116 1115 1119 160 1038 1118 1032 164 1168 166 167 1025 169 1028 171 172 173 174 1031 176 177 1030 1110 1169 181 182 183 1105 8470 1108 187 1112 1029 1109 1111 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103)) - (1252 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 381 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 382 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) - (1253 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 136 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 157 158 159 160 901 902 163 164 165 166 167 168 169 65533 171 172 173 174 8213 176 177 178 179 900 181 182 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) - (1254 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) - (1255 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 156 157 158 159 160 161 162 163 8362 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 191 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1520 1521 1522 1523 1524 65533 65533 65533 65533 65533 65533 65533 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) - (1256 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 1662 8218 402 8222 8230 8224 8225 710 8240 1657 8249 338 1670 1688 1672 1711 8216 8217 8220 8221 8226 8211 8212 1705 8482 1681 8250 339 8204 8205 1722 160 1548 162 163 164 165 166 167 168 169 1726 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 1563 187 188 189 190 1567 1729 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 215 1591 1592 1593 1594 1600 1601 1602 1603 224 1604 226 1605 1606 1607 1608 231 232 233 234 235 1609 1610 238 239 1611 1612 1613 1614 244 1615 1616 247 1617 249 1618 251 252 8206 8207 1746)) - (1257 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 138 8249 140 168 711 184 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 175 731 159 160 65533 162 163 164 65533 166 167 216 169 342 171 172 173 174 198 176 177 178 179 180 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 729)) - (1258 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 258 196 197 198 199 200 201 202 203 768 205 206 207 272 209 777 211 212 416 214 215 216 217 218 219 220 431 771 223 224 225 226 259 228 229 230 231 232 233 234 235 769 237 238 239 273 241 803 243 244 417 246 247 248 249 250 251 252 432 8363 255))) +(defconstant +code-page-tables+ + `((437 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (720 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 233 226 132 224 134 231 234 235 232 239 238 141 142 143 144 1617 1618 244 164 1600 251 249 1569 1570 1571 1572 163 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1590 1591 1592 1593 1594 1601 181 1602 1603 1604 1605 1606 1607 1608 1609 1610 8801 1611 1612 1613 1614 1615 1616 8776 176 8729 183 8730 8319 178 9632 160))) + (737 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 931 932 933 934 935 936 937 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 965 966 967 968 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 969 940 941 942 970 943 972 973 971 974 902 904 905 906 908 910 911 177 8805 8804 938 939 247 8776 176 8729 183 8730 8319 178 9632 160))) + (775 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 262 252 233 257 228 291 229 263 322 275 342 343 299 377 196 197 201 230 198 333 246 290 162 346 347 214 220 248 163 216 215 164 256 298 243 379 380 378 8221 166 169 174 172 189 188 321 171 187 9617 9618 9619 9474 9508 260 268 280 278 9571 9553 9559 9565 302 352 9488 9492 9524 9516 9500 9472 9532 370 362 9562 9556 9577 9574 9568 9552 9580 381 261 269 281 279 303 353 371 363 382 9496 9484 9608 9604 9612 9616 9600 211 223 332 323 245 213 181 324 310 311 315 316 326 274 325 8217 173 177 8220 190 182 167 247 8222 176 8729 183 185 179 178 9632 160))) + (850 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 215 402 225 237 243 250 241 209 170 186 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 240 208 202 203 200 305 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 254 222 218 219 217 253 221 175 180 173 177 8215 190 182 167 247 184 176 168 183 185 179 178 9632 160))) + (852 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 367 263 231 322 235 336 337 238 377 196 262 201 313 314 244 246 317 318 346 347 214 220 356 357 321 215 269 225 237 243 250 260 261 381 382 280 281 172 378 268 351 171 187 9617 9618 9619 9474 9508 193 194 282 350 9571 9553 9559 9565 379 380 9488 9492 9524 9516 9500 9472 9532 258 259 9562 9556 9577 9574 9568 9552 9580 164 273 272 270 203 271 327 205 206 283 9496 9484 9608 9604 354 366 9600 211 223 212 323 324 328 352 353 340 218 341 368 253 221 355 180 173 733 731 711 728 167 247 184 176 168 729 369 344 345 9632 160))) + (855 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1106 1026 1107 1027 1105 1025 1108 1028 1109 1029 1110 1030 1111 1031 1112 1032 1113 1033 1114 1034 1115 1035 1116 1036 1118 1038 1119 1039 1102 1070 1098 1066 1072 1040 1073 1041 1094 1062 1076 1044 1077 1045 1092 1060 1075 1043 171 187 9617 9618 9619 9474 9508 1093 1061 1080 1048 9571 9553 9559 9565 1081 1049 9488 9492 9524 9516 9500 9472 9532 1082 1050 9562 9556 9577 9574 9568 9552 9580 164 1083 1051 1084 1052 1085 1053 1086 1054 1087 9496 9484 9608 9604 1055 1103 9600 1071 1088 1056 1089 1057 1090 1058 1091 1059 1078 1046 1074 1042 1100 1068 8470 173 1099 1067 1079 1047 1096 1064 1101 1069 1097 1065 1095 1063 167 9632 160))) + (857 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 305 196 197 201 230 198 244 246 242 251 249 304 214 220 248 163 216 350 351 225 237 243 250 241 209 286 287 191 174 172 189 188 161 171 187 9617 9618 9619 9474 9508 193 194 192 169 9571 9553 9559 9565 162 165 9488 9492 9524 9516 9500 9472 9532 227 195 9562 9556 9577 9574 9568 9552 9580 164 186 170 202 203 200 65533 205 206 207 9496 9484 9608 9604 166 204 9600 211 223 212 210 245 213 181 65533 215 218 219 217 236 255 175 180 173 177 65533 190 182 167 247 184 176 168 183 185 179 178 9632 160))) + (860 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 227 224 193 231 234 202 232 205 212 236 195 194 201 192 200 244 245 242 218 249 204 213 220 162 163 217 8359 211 225 237 243 250 241 209 170 186 191 210 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (861 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 208 240 222 196 197 201 230 198 244 246 254 251 221 253 214 220 248 163 216 8359 402 225 237 243 250 193 205 211 218 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (862 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 162 163 165 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (863 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 194 224 182 231 234 235 232 239 238 8215 192 167 201 200 202 244 203 207 251 249 164 212 220 162 163 217 219 402 166 180 243 250 168 184 179 175 206 8976 172 189 188 190 171 187 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (864 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 176 183 8729 8730 9618 9472 9474 9532 9508 9516 9500 9524 9488 9484 9492 9496 946 8734 966 177 189 188 8776 171 187 65271 65272 155 156 65275 65276 159 160 173 65154 163 164 65156 65533 65533 65166 65167 65173 65177 1548 65181 65185 65189 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 65233 1563 65201 65205 65209 1567 162 65152 65153 65155 65157 65226 65163 65165 65169 65171 65175 65179 65183 65187 65191 65193 65195 65197 65199 65203 65207 65211 65215 65217 65221 65227 65231 166 172 247 215 65225 1600 65235 65239 65243 65247 65251 65255 65259 65261 65263 65267 65213 65228 65230 65229 65249 65149 1617 65253 65257 65260 65264 65266 65232 65237 65269 65270 65245 65241 65265 9632 65533))) + (865 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 199 252 233 226 228 224 229 231 234 235 232 239 238 236 196 197 201 230 198 244 246 242 251 249 255 214 220 248 163 216 8359 402 225 237 243 250 241 209 170 186 191 8976 172 189 188 161 171 164 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 945 223 915 960 931 963 181 964 934 920 937 948 8734 966 949 8745 8801 177 8805 8804 8992 8993 247 8776 176 8729 183 8730 8319 178 9632 160))) + (866 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 9617 9618 9619 9474 9508 9569 9570 9558 9557 9571 9553 9559 9565 9564 9563 9488 9492 9524 9516 9500 9472 9532 9566 9567 9562 9556 9577 9574 9568 9552 9580 9575 9576 9572 9573 9561 9560 9554 9555 9579 9578 9496 9484 9608 9604 9612 9616 9600 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1025 1105 1028 1108 1031 1111 1038 1118 176 8729 183 8730 8470 164 9632 160))) + (869 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 902 135 183 172 166 8216 8217 904 8213 905 906 938 908 147 148 910 939 169 911 178 179 940 163 941 942 943 970 912 972 973 913 914 915 916 917 918 919 189 920 921 171 187 9617 9618 9619 9474 9508 922 923 924 925 9571 9553 9559 9565 926 927 9488 9492 9524 9516 9500 9472 9532 928 929 9562 9556 9577 9574 9568 9552 9580 931 932 933 934 935 936 937 945 946 947 9496 9484 9608 9604 948 949 9600 950 951 952 953 954 955 956 957 958 959 960 961 963 962 964 900 173 177 965 966 967 167 968 901 176 168 969 971 944 974 9632 160))) + (1250 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 352 8249 346 356 381 377 144 8216 8217 8220 8221 8226 8211 8212 152 8482 353 8250 347 357 382 378 160 711 728 321 164 260 166 167 168 169 350 171 172 173 174 379 176 177 731 322 180 181 182 183 184 261 351 187 317 733 318 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729))) + (1251 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 1026 1027 8218 1107 8222 8230 8224 8225 8364 8240 1033 8249 1034 1036 1035 1039 1106 8216 8217 8220 8221 8226 8211 8212 152 8482 1113 8250 1114 1116 1115 1119 160 1038 1118 1032 164 1168 166 167 1025 169 1028 171 172 173 174 1031 176 177 1030 1110 1169 181 182 183 1105 8470 1108 187 1112 1029 1109 1111 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103))) + (1252 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 381 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 382 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255))) + (1253 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 136 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 157 158 159 160 901 902 163 164 165 166 167 168 169 65533 171 172 173 174 8213 176 177 178 179 900 181 182 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533))) + (1254 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 352 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 353 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255))) + (1255 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 140 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 156 157 158 159 160 161 162 163 8362 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 191 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1520 1521 1522 1523 1524 65533 65533 65533 65533 65533 65533 65533 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533))) + (1256 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 1662 8218 402 8222 8230 8224 8225 710 8240 1657 8249 338 1670 1688 1672 1711 8216 8217 8220 8221 8226 8211 8212 1705 8482 1681 8250 339 8204 8205 1722 160 1548 162 163 164 165 166 167 168 169 1726 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 1563 187 188 189 190 1567 1729 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 215 1591 1592 1593 1594 1600 1601 1602 1603 224 1604 226 1605 1606 1607 1608 231 232 233 234 235 1609 1610 238 239 1611 1612 1613 1614 244 1615 1616 247 1617 249 1618 251 252 8206 8207 1746))) + (1257 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 131 8222 8230 8224 8225 136 8240 138 8249 140 168 711 184 144 8216 8217 8220 8221 8226 8211 8212 152 8482 154 8250 156 175 731 159 160 65533 162 163 164 65533 166 167 216 169 342 171 172 173 174 198 176 177 178 179 180 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 729))) + (1258 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 8364 129 8218 402 8222 8230 8224 8225 710 8240 138 8249 338 141 142 143 144 8216 8217 8220 8221 8226 8211 8212 732 8482 154 8250 339 157 158 376 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 258 196 197 198 199 200 201 202 203 768 205 206 207 272 209 777 211 212 416 214 215 216 217 218 219 220 431 771 223 224 225 226 259 228 229 230 231 232 233 234 235 769 237 238 239 273 241 803 243 244 417 246 247 248 249 250 251 252 432 8363 255)))) "A list of 8-bit Windows code pages where each element is a cons with the car being the ID of the code page and the cdr being a vector enumerating the corresponding character codes.") Added: trunk/decode.lisp ============================================================================== --- (empty file) +++ trunk/decode.lisp Wed May 21 08:00:42 2008 @@ -0,0 +1,266 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.16 2008/05/20 23:01:50 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defun recover-from-encoding-error (external-format format-control &rest format-args) + "Helper function used by OCTETS-TO-CHAR-CODE below to deal with +encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns +its character code in this case. Otherwise signals an +EXTERNAL-FORMAT-ENCODING-ERROR as determined by the arguments to this +function and provides a corresponding USE-VALUE restart." + (when *substitution-char* + (return-from recover-from-encoding-error (char-code *substitution-char*))) + (restart-case + (apply #'signal-encoding-error external-format format-control format-args) + (use-value (char) + :report "Specify a character to be used instead." + :interactive (lambda () + (loop + (format *query-io* "Type a character: ") + (let ((line (read-line *query-io*))) + (when (= 1 (length line)) + (return (list (char line 0))))))) + (char-code char)))) + +(defgeneric octets-to-char-code (format reader) + (declare #.*standard-optimize-settings*) + (:documentation "Converts a sequence of octets to a character code +\(which is returned, or NIL in case of EOF) using the external format +FORMAT. The sequence is obtained by calling the function \(which must +be a functional object) READER with no arguments which should return +one octet per call. In the case of EOF, READER should return NIL. + +The special variable *CURRENT-UNREADER* must be bound correctly +whenever this function is called.")) + +(defmethod octets-to-char-code ((format flexi-latin-1-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (funcall reader)) + +(defmethod octets-to-char-code ((format flexi-ascii-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (when-let (octet (funcall reader)) + (if (> (the octet octet) 127) + (recover-from-encoding-error format + "No character which corresponds to octet #x~X." octet) + octet))) + +(defmethod octets-to-char-code ((format flexi-8-bit-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (with-accessors ((decoding-table external-format-decoding-table)) + format + (when-let (octet (funcall reader)) + (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) + (the octet octet)))) + (if (or (null char-code) + (= (the char-code-integer char-code) 65533)) + (recover-from-encoding-error format + "No character which corresponds to octet #x~X." octet) + char-code))))) + +(defmethod octets-to-char-code ((format flexi-utf-8-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (let (first-octet-seen) + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error format + "End of data while in UTF-8 sequence."))) + (t (return-from octets-to-char-code nil)))) + (setq first-octet-seen t)))) + (let ((octet (read-next-byte))) + (declare (type octet octet)) + (multiple-value-bind (start count) + (cond ((not (logbitp 7 octet)) + (values octet 0)) + ((= #b11000000 (logand octet #b11100000)) + (values (logand octet #b00011111) 1)) + ((= #b11100000 (logand octet #b11110000)) + (values (logand octet #b00001111) 2)) + ((= #b11110000 (logand octet #b11111000)) + (values (logand octet #b00000111) 3)) + ((= #b11111000 (logand octet #b11111100)) + (values (logand octet #b00000011) 4)) + ((= #b11111100 (logand octet #b11111110)) + (values (logand octet #b00000001) 5)) + (t (return-from octets-to-char-code + (recover-from-encoding-error format + "Unexpected value #x~X at start of UTF-8 sequence." + octet)))) + (declare (fixnum count)) + ;; note that we currently don't check for "overlong" + ;; sequences or other illegal values + (loop for result of-type code-point + = start then (+ (ash result 6) + (logand octet #b111111)) + repeat count + for octet of-type octet = (read-next-byte) + unless (= #b10000000 (logand octet #b11000000)) + do (return-from octets-to-char-code + (recover-from-encoding-error format + "Unexpected value #x~X in UTF-8 sequence." octet)) + finally (return result))))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (let (first-octet-seen) + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error format + "End of data while in UTF-16 sequence."))) + (t (return-from octets-to-char-code nil)))) + (setq first-octet-seen t)))) + (flet ((read-next-word () + (+ (the octet (read-next-byte)) + (ash (the octet (read-next-byte)) 8)))) + (declare (inline read-next-word)) + (let ((word (read-next-word))) + (declare (type (unsigned-byte 16) word)) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (declare (type (unsigned-byte 16) next-word)) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error format + "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word))))))) + +(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (let (first-octet-seen) + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error format + "End of data while in UTF-16 sequence."))) + (t (return-from octets-to-char-code nil)))) + (setq first-octet-seen t)))) + (flet ((read-next-word () + (+ (ash (the octet (read-next-byte)) 8) + (the octet (read-next-byte))))) + (declare (inline read-next-word)) + (let ((word (read-next-word))) + (declare (type (unsigned-byte 16) word)) + (cond ((<= #xd800 word #xdfff) + (let ((next-word (read-next-word))) + (declare (type (unsigned-byte 16) next-word)) + (unless (<= #xdc00 next-word #xdfff) + (return-from octets-to-char-code + (recover-from-encoding-error format + "Unexpected UTF-16 word #x~X following #x~X." + next-word word))) + (+ (ash (logand #b1111111111 word) 10) + (logand #b1111111111 next-word) + #x10000))) + (t word))))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (let (first-octet-seen) + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error format + "End of data while in UTF-32 sequence."))) + (t (return-from octets-to-char-code nil)))) + (setq first-octet-seen t)))) + (loop for count of-type fixnum from 0 to 24 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (let (first-octet-seen) + (declare (boolean first-octet-seen)) + (macrolet ((read-next-byte () + '(prog1 + (or (funcall reader) + (cond (first-octet-seen + (return-from octets-to-char-code + (recover-from-encoding-error format + "End of data while in UTF-32 sequence."))) + (t (return-from octets-to-char-code nil)))) + (setq first-octet-seen t)))) + (loop for count of-type fixnum from 24 downto 0 by 8 + for octet of-type octet = (read-next-byte) + sum (ash octet count))))) + +(defmethod octets-to-char-code ((format flexi-cr-mixin) reader) + (declare #.*fixnum-optimize-settings*) + (declare (ignore reader)) + (let ((char-code (call-next-method))) + (case char-code + (#.(char-code #\Return) #.(char-code #\Newline)) + (otherwise char-code)))) + +(defmethod octets-to-char-code ((format flexi-crlf-mixin) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function *current-unreader*)) + (declare (ignore reader)) + (let ((char-code (call-next-method))) + (case char-code + (#.(char-code #\Return) + (let ((next-char-code (call-next-method))) + (case next-char-code + (#.(char-code #\Linefeed) #.(char-code #\Newline)) + ;; we saw a CR but no LF afterwards, but then the data + ;; ended, so we just return #\Return + ((nil) #.(char-code #\Return)) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise (funcall *current-unreader* (code-char next-char-code)) + char-code)))) + (otherwise char-code)))) + Modified: trunk/doc/index.html ============================================================================== --- trunk/doc/index.html (original) +++ trunk/doc/index.html Wed May 21 08:00:42 2008 @@ -56,7 +56,6 @@

    1. Example usage
    2. Download and installation -
    3. Backward compatibility with version 0.10.3 and before
    4. Support and mailing lists
    5. The FLEXI-STREAMS dictionary
        @@ -70,6 +69,7 @@
      1. external-format-equal
      2. *default-eol-style*
      3. *default-little-endian* +
      4. external-format-encoding-error
    6. Flexi streams
        @@ -89,11 +89,9 @@
      1. *substitution-char*
      2. octet
      3. flexi-stream-error -
      4. flexi-stream-encoding-error +
      5. flexi-stream-out-of-sync-error
      6. flexi-stream-element-type-error
      7. flexi-stream-element-type-error-element-type -
      8. flexi-stream-position-spec-error -
      9. flexi-stream-position-spec-error-position-spec
    7. In-memory streams
        @@ -110,11 +108,14 @@
      1. with-output-to-sequence
      2. in-memory-stream-error
      3. in-memory-stream-closed-error +
      4. in-memory-stream-position-spec-error +
      5. in-memory-stream-position-spec-error-position-spec
    8. Strings
      1. string-to-octets
      2. octets-to-string +
      3. octet-length
  • File positions @@ -197,7 +198,9 @@

    For more examples see the source code -of CL-RFC2047, +Drakma, Chunga, or CL-WBXML. @@ -221,7 +224,7 @@

    FLEXI-STREAMS together with this documentation can be downloaded from http://weitz.de/files/flexi-streams.tar.gz. The -current version is 0.14.0. +current version is 0.15.0.

    Before you install FLEXI-STREAMS you first need to install the http://arcanes.fr.eu.org/~pierre/2007/02/weitz/ thanks to Pierre Thierry. - -
     
    -

    -Backward compatibility with version 0.10.3 and before

    - -Two special variables used in flexi-streams 0.10.3 and before were removed - -*PROVIDE-USE-VALUE-RESTART* and *USE-REPLACEMENT-CHAR*. - -

    -The code now behaves as if -*PROVIDE-USE-VALUE-RESTART* is always T. -Instead of *USE-REPLACEMENT-CHAR*, you can use -*SUBSTITUTION-CHAR* or -invoke -a USE-VALUE -restart -when a FLEXI-STREAM-ENCODING-ERROR -is signalled. -
     

    Support and mailing lists

    For questions, bug reports, feature requests, improvements, or patches @@ -489,8 +471,8 @@ CL-USER 5 > (make-external-format :ucs-2be) #<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CRLF :LITTLE-ENDIAN NIL) 2067DBE4> -CL-USER 6 > (make-external-format :ucs-2be :eol-style :br) -#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :BR :LITTLE-ENDIAN NIL) 206B54AC> +CL-USER 6 > (make-external-format :ucs-2be :eol-style :cr) +#<FLEXI-STREAMS::EXTERNAL-FORMAT (:UTF-16 :EOL-STYLE :CR :LITTLE-ENDIAN NIL) 206B54AC>
  • @@ -542,6 +524,32 @@ The default value for the little-endian keyword argument of MAKE-EXTERNAL-FORMAT. Its initial value corresponds to the endianess of the platform FLEXI-STREAMS is used on as revealed by the :LITTLE-ENDIAN feature.
    +


    [Condition] +
    external-format-error + +


    +All errors related to external formats are of this type. +There's a slot for the external format which can be accessed with EXTERNAL-FORMAT-ERROR-EXTERNAL-FORMAT. +
    + +


    [Reader] +
    external-format-error-external-format condition => external-format + +


    If condition is of +type EXTERNAL-FORMAT-ERROR, +this function will return the associated external format. Note that +there are errors which happen during the creation of external formats +where this method returns NIL. +
    + +


    [Condition] +
    external-format-encoding-error + +


    +All errors related to encoding problems with flexi streams are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE +restart is provided. See also *SUBSTITUTION-CHAR* and example for it. EXTERNAL-FORMAT-ENCODING-ERROR is a subtype of EXTERNAL-FORMAT-ERROR. +
    +

    Flexi streams

    Flexi streams are the core of the FLEXI-STREAMS library. You @@ -736,7 +744,7 @@

    If this value is not NIL, it should be a character which is used (as if by a USE-VALUE restart) whenever during reading an error of -type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise. +type EXTERNAL-FORMAT-ENCODING-ERROR would have been signalled otherwise.
     CL-USER 1 > (defun foo ()
    @@ -770,7 +778,7 @@
     "xy"
     T
     
    -CL-USER 5 > (handler-bind ((flexi-stream-encoding-error (lambda (condition)
    +CL-USER 5 > (handler-bind ((external-format-encoding-error (lambda (condition)
                                                               (use-value #\-))))
                   (foo))
     "--"
    @@ -798,11 +806,12 @@
     


    [Condition] -
    flexi-stream-encoding-error +
    flexi-stream-out-of-sync-error -


    -All errors related to encoding problems with flexi streams are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE -restart is provided. See also *SUBSTITUTION-CHAR* and example for it. FLEXI-STREAM-ENCODING-ERROR is a subtype of FLEXI-STREAM-ERROR. +

    This can happen if you're trying to write to +an IO stream which had prior to that +"looked ahead" while reading and now can't "rewind" to the octet where +you should be.


    [Condition] @@ -819,26 +828,6 @@ If condition is of type FLEXI-STREAM-ELEMENT-TYPE-ERROR, this function will return the offending element type.

    -


    [Condition] -
    flexi-stream-position-spec-error - -


    Errors of this type are signalled if an erroneous -position spec is used in conjunction -with FILE-POSITION. This is a -subtype -of FLEXI-STREAM-ERROR -and has an additional slot for the position spec which can be accessed -with FLEXI-STREAM-POSITION-SPEC-ERROR-POSITION-SPEC. -
    - -


    [Reader] -
    flexi-stream-position-spec-error-position-spec condition => position-spec - -


    -If condition is of type FLEXI-STREAM-POSITION-SPEC-ERROR, this function will return the offending position spec. -
    -

    In-memory streams

    The library also provides in-memory binary streams which are modeled after string streams and behave very similar only that they deal with octets instead of characters and the underlying data structure is not a string but either a list or a vector. These streams can obviously be used as the underlying streams for flexi streams. @@ -965,6 +954,25 @@ An error of this type is signalled if one tries to read from or write to an in-memory stream which had already been closed. This is a subtype of IN-MEMORY-STREAM-ERROR. +


    [Condition] +
    in-memory-stream-position-spec-error + +


    Errors of this type are signalled if an erroneous +position spec is used in conjunction +with FILE-POSITION. This is a +subtype +of IN-MEMORY-STREAM-ERROR +and has an additional slot for the position spec which can be accessed +with IN-MEMORY-STREAM-POSITION-SPEC-ERROR-POSITION-SPEC. +
    + +


    [Reader] +
    in-memory-stream-position-spec-error-position-spec condition => position-spec + +


    +If condition is of type IN-MEMORY-STREAM-POSITION-SPEC-ERROR, this function will return the offending position spec. +
    +

    Strings

    This section collects a few convenience functions for strings conversions: @@ -975,29 +983,42 @@

    Converts the Lisp string string from start to end to an array of -octets corresponding to the external format external-format. The defaults for +octets corresponding to the external +format designated by external-format. The defaults for start and end -are 0 and NIL (meaning the length of the -vector). The default for external-format is the -value of -evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) +are 0 and the length of the string. The default +for external-format is :LATIN1.


    [Function] -
    octets-to-string vector &key external-format start end => string +
    octets-to-string sequence &key external-format start end => string + +


    Converts the Lisp +sequence sequence of octets +from start to end to string +using the external format designated +by external-format. The defaults for +start and end +are 0 and the length of the sequence. The default +for external-format is :LATIN1. +
    + +


    [Function] +
    octet-length string &key external-format start end => length-or-nil + +


    -

    Converts the Lisp vector vector -of octets from start -to end to string using -the external -format external-format. The defaults for +Returns the length of the substring of string from start to end in +octets if encoded using +the external format designated +by external-format. Might return NIL +if there's no efficient way to compute the length without iterating +through the whole string. +The defaults for start and end -are 0 and the length of the vector. The default -for external-format is the value of -evaluating (MAKE-EXTERNAL-FORMAT :LATIN1) +are 0 and the length of the sequence. The default +for external-format is :LATIN1.

     

    File positions

    @@ -1034,10 +1055,12 @@ Thanks to David Lichteblau for numerous portability patches. Thanks to Igor Plekhov for the KOI8-R code. Thanks to Anton Vodonosov for -numerous patches and additions. +numerous patches and additions. Thanks +to Hans Hübner for +his work on making FLEXI-STREAMS faster.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.98 2007/12/29 23:15:27 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.108 2008/05/21 11:53:08 edi Exp $

    BACK TO MY HOMEPAGE Added: trunk/encode.lisp ============================================================================== --- (empty file) +++ trunk/encode.lisp Wed May 21 08:00:42 2008 @@ -0,0 +1,159 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric char-to-octets (format char writer) + (declare #.*standard-optimize-settings*) + (:documentation "Converts the character CHAR to a sequence of octets +using the external format FORMAT. The conversion is performed by +calling the unary function \(which must be a functional object) WRITER +repeatedly each octet. The return value of this function is +unspecified.")) + +(defmethod char-to-octets ((format flexi-latin-1-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (let ((octet (char-code char))) + (when (> octet 255) + (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet)) + (funcall writer octet))) + +(defmethod char-to-octets ((format flexi-ascii-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (let ((octet (char-code char))) + (when (> octet 127) + (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet)) + (funcall writer octet))) + +(defmethod char-to-octets ((format flexi-8-bit-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (with-accessors ((encoding-hash external-format-encoding-hash)) + format + (let ((octet (gethash (char-code char) encoding-hash))) + (unless octet + (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet)) + (funcall writer octet)))) + +(defmethod char-to-octets ((format flexi-utf-8-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (let ((char-code (char-code char))) + (tagbody + (cond ((< char-code #x80) + (funcall writer char-code) + (go zero)) + ((< char-code #x800) + (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code))) + (go one)) + ((< char-code #x10000) + (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code))) + (go two)) + ((< char-code #x200000) + (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code))) + (go three)) + ((< char-code #x4000000) + (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code))) + (go four)) + (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100)))) + (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code))) + four + (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code))) + three + (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code))) + two + (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code))) + one + (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) + zero))) + +(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 0) word)) + (funcall writer (ldb (byte 8 8) word)))) + (declare (inline write-word)) + (let ((char-code (char-code char))) + (declare (type char-code-integer char-code)) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) + +(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (flet ((write-word (word) + (funcall writer (ldb (byte 8 8) word)) + (funcall writer (ldb (byte 8 0) word)))) + (declare (inline write-word)) + (let ((char-code (char-code char))) + (declare (type char-code-integer char-code)) + (cond ((< char-code #x10000) + (write-word char-code)) + (t (decf char-code #x10000) + (write-word (logior #xd800 (ldb (byte 10 10) char-code))) + (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) + +(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (let ((char-code (char-code char))) + (funcall writer (ldb (byte 8 0) char-code)) + (funcall writer (ldb (byte 8 8) char-code)) + (funcall writer (ldb (byte 8 16) char-code)) + (funcall writer (ldb (byte 8 24) char-code)))) + +(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (let ((char-code (char-code char))) + (funcall writer (ldb (byte 8 24) char-code)) + (funcall writer (ldb (byte 8 16) char-code)) + (funcall writer (ldb (byte 8 8) char-code)) + (funcall writer (ldb (byte 8 0) char-code)))) + +(defmethod char-to-octets ((format flexi-cr-mixin) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char)) + (if (char= char #\Newline) + (call-next-method format #\Return writer) + (call-next-method))) + +(defmethod char-to-octets ((format flexi-crlf-mixin) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char)) + (cond ((char= char #\Newline) + (call-next-method format #\Return writer) + (call-next-method format #\Linefeed writer)) + (t (call-next-method)))) Modified: trunk/external-format.lisp ============================================================================== --- trunk/external-format.lisp (original) +++ trunk/external-format.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.11 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -52,15 +52,243 @@ a #\Newline will be translated - one of the keywords :CR, :LF, or :CRLF.")) (:documentation "EXTERNAL-FORMAT objects are used to denote -encodings for flexi streams.")) +encodings for flexi streams or for the string functions defined in +strings.lisp.")) (defmethod make-load-form ((thing external-format) &optional environment) "Defines a way to reconstruct external formats. Needed for OpenMCL." (make-load-form-saving-slots thing :environment environment)) +(defclass flexi-cr-mixin () + () + (:documentation "A mixin for external-formats where the end-of-line +designator is #\Return.")) + +(defclass flexi-crlf-mixin () + () + (:documentation "A mixin for external-formats where the end-of-line +designator is the sequence #\Return #\Linefeed.")) + +(defclass flexi-8-bit-format (external-format) + ((encoding-hash :accessor external-format-encoding-hash) + (decoding-table :accessor external-format-decoding-table)) + (:documentation "The class for all flexi streams which use an 8-bit +encoding and thus need additional slots for the encoding/decoding +tables.")) + +(defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format) + () + (:documentation "Special class for external formats which use an +8-bit encoding /and/ have #\Return as the line-end character.")) + +(defclass flexi-crlf-8-bit-format (flexi-crlf-mixin flexi-8-bit-format) + () + (:documentation "Special class for external formats which use an +8-bit encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) + +(defclass flexi-ascii-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +US-ASCII encoding.")) + +(defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format) + () + (:documentation "Special class for external formats which use the +US-ASCII encoding /and/ have #\Return as the line-end character.")) + +(defclass flexi-crlf-ascii-format (flexi-crlf-mixin flexi-ascii-format) + () + (:documentation "Special class for external formats which use the +US-ASCII encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) + +(defclass flexi-latin-1-format (flexi-8-bit-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding.")) + +(defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding /and/ have #\Return as the line-end character.")) + +(defclass flexi-crlf-latin-1-format (flexi-crlf-mixin flexi-latin-1-format) + () + (:documentation "Special class for external formats which use the +ISO-8859-1 encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) + +(defclass flexi-utf-32-format (external-format) + () + (:documentation "Abstract class for external formats which use the +UTF-32 encoding.")) + +(defclass flexi-utf-32-le-format (flexi-utf-32-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering /and/ have #\Return +as the line-end character.")) + +(defclass flexi-crlf-utf-32-le-format (flexi-crlf-mixin flexi-utf-32-le-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with little-endian byte ordering /and/ have the +sequence #\Return #\Linefeed as the line-end character.")) + +(defclass flexi-utf-32-be-format (flexi-utf-32-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format) + () + (:documentation "Special class for external formats which use the +UTF-32 encoding with big-endian byte ordering /and/ have #\Return as +the line-end character.")) + +(defclass flexi-crlf-utf-32-be-format (flexi-crlf-mixin flexi-utf-32-be-format) + () + (:documentation "Special class for external formats which use the +the UTF-32 encoding with big-endian byte ordering /and/ have the +sequence #\Return #\Linefeed as the line-end character.")) + +(defclass flexi-utf-16-format (external-format) + () + (:documentation "Abstract class for external formats which use the +UTF-16 encoding.")) + +(defclass flexi-utf-16-le-format (flexi-utf-16-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering.")) + +(defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering /and/ have #\Return +as the line-end character.")) + +(defclass flexi-crlf-utf-16-le-format (flexi-crlf-mixin flexi-utf-16-le-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with little-endian byte ordering /and/ have the +sequence #\Return #\Linefeed as the line-end character.")) + +(defclass flexi-utf-16-be-format (flexi-utf-16-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering.")) + +(defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering /and/ have #\Return as +the line-end character.")) + +(defclass flexi-crlf-utf-16-be-format (flexi-crlf-mixin flexi-utf-16-be-format) + () + (:documentation "Special class for external formats which use the +UTF-16 encoding with big-endian byte ordering /and/ have the sequence +#\Return #\Linefeed as the line-end character.")) + +(defclass flexi-utf-8-format (external-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding.")) + +(defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding /and/ have #\Return as the line-end character.")) + +(defclass flexi-crlf-utf-8-format (flexi-crlf-mixin flexi-utf-8-format) + () + (:documentation "Special class for external formats which use the +UTF-8 encoding /and/ have the sequence #\Return #\Linefeed as the +line-end character.")) + +(defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs) + "Sets the fixed encoding/decoding tables for this particular +external format." + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) + (with-accessors ((encoding-hash external-format-encoding-hash) + (decoding-table external-format-decoding-table) + (name external-format-name) + (id external-format-id)) + external-format + (multiple-value-setq (encoding-hash decoding-table) + (cond ((ascii-name-p name) + (values +ascii-hash+ +ascii-table+)) + ((koi8-r-name-p name) + (values +koi8-r-hash+ +koi8-r-table+)) + ((iso-8859-name-p name) + (values (cdr (assoc name +iso-8859-hashes+ :test #'eq)) + (cdr (assoc name +iso-8859-tables+ :test #'eq)))) + ((code-page-name-p name) + (values (cdr (assoc id +code-page-hashes+)) + (cdr (assoc id +code-page-tables+)))))))) + +(defun external-format-class-name (real-name &key eol-style little-endian id) + "Given the initargs for a general external format returns the name +\(a symbol) of the most specific subclass matching these arguments." + (declare #.*standard-optimize-settings*) + (declare (ignore id)) + (cond ((ascii-name-p real-name) + (ecase eol-style + (:lf 'flexi-ascii-format) + (:cr 'flexi-cr-ascii-format) + (:crlf 'flexi-crlf-ascii-format))) + ((eq real-name :iso-8859-1) + (ecase eol-style + (:lf 'flexi-latin-1-format) + (:cr 'flexi-cr-latin-1-format) + (:crlf 'flexi-crlf-latin-1-format))) + ((or (koi8-r-name-p real-name) + (iso-8859-name-p real-name) + (code-page-name-p real-name)) + (ecase eol-style + (:lf 'flexi-8-bit-format) + (:cr 'flexi-cr-8-bit-format) + (:crlf 'flexi-crlf-8-bit-format))) + (t (ecase real-name + (:utf-8 (ecase eol-style + (:lf 'flexi-utf-8-format) + (:cr 'flexi-cr-utf-8-format) + (:crlf 'flexi-crlf-utf-8-format))) + (:utf-16 (ecase eol-style + (:lf (if little-endian + 'flexi-utf-16-le-format + 'flexi-utf-16-be-format)) + (:cr (if little-endian + 'flexi-cr-utf-16-le-format + 'flexi-cr-utf-16-be-format)) + (:crlf (if little-endian + 'flexi-crlf-utf-16-le-format + 'flexi-crlf-utf-16-be-format)))) + (:utf-32 (ecase eol-style + (:lf (if little-endian + 'flexi-utf-32-le-format + 'flexi-utf-32-be-format)) + (:cr (if little-endian + 'flexi-cr-utf-32-le-format + 'flexi-cr-utf-32-be-format)) + (:crlf (if little-endian + 'flexi-crlf-utf-32-le-format + 'flexi-crlf-utf-32-be-format)))))))) + (defun make-external-format% (name &key (little-endian *default-little-endian*) - id eol-style) - "Used internally by MAKE-EXTERNAL-FORMAT." + id eol-style) + "Used internally by MAKE-EXTERNAL-FORMAT to default some of the +keywords arguments and to determine the right subclass of +EXTERNAL-FORMAT." + (declare #.*standard-optimize-settings*) (let* ((real-name (normalize-external-format-name name)) (initargs (cond ((or (iso-8859-name-p real-name) @@ -69,12 +297,14 @@ (list :eol-style (or eol-style *default-eol-style*))) ((code-page-name-p real-name) (list :id (or (known-code-page-id-p id) - (error "Unknown code page ID ~S" id)) + (error 'external-format-error + :format-control "Unknown code page ID ~S" + :format-arguments (list id))) ;; default EOL style for Windows code pages is :CRLF :eol-style (or eol-style :crlf))) (t (list :eol-style (or eol-style *default-eol-style*) :little-endian little-endian))))) - (apply #'make-instance 'external-format + (apply #'make-instance (apply #'external-format-class-name real-name initargs) :name real-name initargs))) @@ -87,6 +317,8 @@ encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF which denote the end-of-line character \(sequence), ID is the ID of a Windows code page \(and ignored for other encodings)." + (declare #.*standard-optimize-settings*) + ;; the keyword arguments are only there for arglist display in the IDE (declare (ignore id little-endian)) (let ((shortcut-args (cdr (assoc name +shortcut-map+)))) (cond (shortcut-args @@ -94,10 +326,20 @@ (append shortcut-args `(:eol-style ,eol-style)))) (t (apply #'make-external-format% name args))))) + +(defun maybe-convert-external-format (external-format) + "Given an external format designator \(a keyword, a list, or an +EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT +object." + (declare #.*standard-optimize-settings*) + (typecase external-format + (symbol (make-external-format external-format)) + (list (apply #'make-external-format external-format)) + (otherwise external-format))) (defun external-format-equal (ef1 ef2) - "Checks whether two EXTERNAL-FORMAT objects denote the same -encoding." + "Checks whether two EXTERNAL-FORMAT objects denote the same encoding." + (declare #.*standard-optimize-settings*) (let* ((name1 (external-format-name ef1)) (code-page-name-p (code-page-name-p name1))) ;; they must habe the same canonical name @@ -121,10 +363,10 @@ (defun normalize-external-format (external-format) "Returns a list which is a `normalized' representation of the -external format EXTERNAL-FORMAT. Used internally by -PRINT-OBJECT, for example. Basically, the result is argument -list that can be fed back to MAKE-EXTERNAL-FORMAT to create an -equivalent object." +external format EXTERNAL-FORMAT. Used internally by PRINT-OBJECT, for +example. Basically, the result is an argument list that can be fed +back to MAKE-EXTERNAL-FORMAT to create an equivalent object." + (declare #.*standard-optimize-settings*) (let ((name (external-format-name external-format)) (eol-style (external-format-eol-style external-format))) (cond ((or (ascii-name-p name) @@ -144,4 +386,47 @@ "How an EXTERNAL-FORMAT object is rendered. Uses NORMALIZE-EXTERNAL-FORMAT." (print-unreadable-object (object stream :type t :identity t) - (prin1 (normalize-external-format object) stream))) \ No newline at end of file + (prin1 (normalize-external-format object) stream))) + +(defgeneric encoding-factor (format) + (:documentation "Given an external format FORMAT, returns a factor +which denotes the octets to characters ratio to expect when +encoding/decoding. If the returned value is an integer, the factor is +assumed to be exact. If it is a \(double) float, the factor is +supposed to be based on heuristics and usually not exact. + +This factor is used in string.lisp.") + (declare #.*standard-optimize-settings*)) + +(defmethod encoding-factor ((format flexi-8-bit-format)) + (declare #.*standard-optimize-settings*) + ;; 8-bit encodings map octets to characters in an exact one-to-one + ;; fashion + 1) + +(defmethod encoding-factor ((format flexi-utf-8-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-8 characters can be anything from one to six octets, but we + ;; assume that the "overhead" is only about 5 percent - this + ;; estimate is obviously very much dependant on the content + 1.05d0) + +(defmethod encoding-factor ((format flexi-utf-16-format)) + (declare #.*standard-optimize-settings*) + ;; usually one character maps to two octets, but characters with + ;; code points above #x10000 map to four octets - we assume that we + ;; usually don't see these characters but of course have to return a + ;; float + 2.0d0) + +(defmethod encoding-factor ((format flexi-utf-32-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-32 always matches every character to four octets + 4) + +(defmethod encoding-factor ((format flexi-crlf-mixin)) + (declare #.*standard-optimize-settings*) + ;; if the sequence #\Return #\Linefeed is the line-end marker, this + ;; obviously makes encodings potentially longer and definitely makes + ;; the estimate unexact + (* 1.02d0 (call-next-method))) Modified: trunk/flexi-streams.asd ============================================================================== --- trunk/flexi-streams.asd (original) +++ trunk/flexi-streams.asd Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.58 2007/12/29 23:15:26 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.65 2008/05/21 11:53:07 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -35,21 +35,26 @@ (in-package :flexi-streams-system) (defsystem :flexi-streams - :version "0.14.0" + :version "0.15.0" :serial t :components ((:file "packages") - (:file "ascii") - (:file "koi8-r") + (:file "mapping") + (:file "ascii") + (:file "koi8-r") (:file "iso-8859") (:file "code-pages") (:file "specials") (:file "util") + (:file "conditions") (:file "external-format") + (:file "encode") + (:file "decode") (:file "in-memory") (:file "stream") #+:lispworks (:file "lw-binary-stream") (:file "output") (:file "input") + (:file "io") (:file "strings")) :depends-on (:trivial-gray-streams)) Modified: trunk/in-memory.lisp ============================================================================== --- trunk/in-memory.lisp (original) +++ trunk/in-memory.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.26 2007/12/29 21:17:05 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/in-memory.lisp,v 1.31 2008/05/19 07:57:07 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -104,179 +104,197 @@ (:documentation "A binary output stream that writes its data to an associated vector.")) -(define-condition in-memory-stream-error (stream-error) - () - (:documentation "Superclass for all errors related to -IN-MEMORY streams.")) - -(define-condition in-memory-stream-closed-error (in-memory-stream-error) - () - (:report (lambda (condition stream) - (format stream "~S is closed." - (stream-error-stream condition)))) - (:documentation "An error that is signalled when someone is trying -to read from or write to a closed IN-MEMORY stream.")) - #+:cmu (defmethod open-stream-p ((stream in-memory-stream)) "Returns a true value if STREAM is open. See ANSI standard." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (in-memory-stream-open-p stream)) #+:cmu (defmethod close ((stream in-memory-stream) &key abort) "Closes the stream STREAM. See ANSI standard." - (declare (ignore abort) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore abort)) (prog1 (in-memory-stream-open-p stream) (setf (in-memory-stream-open-p stream) nil))) (defmethod check-if-open ((stream in-memory-stream)) "Checks if STREAM is open and signals an error otherwise." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (unless (open-stream-p stream) (error 'in-memory-stream-closed-error :stream stream))) (defmethod stream-element-type ((stream in-memory-stream)) "The element type is always OCTET by definition." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) 'octet) (defmethod transform-octet ((stream in-memory-stream) octet) "Applies the transformer of STREAM to octet and returns the result." + (declare #.*standard-optimize-settings*) (funcall (or (in-memory-stream-transformer stream) #'identity) octet)) (defmethod stream-read-byte ((stream list-input-stream)) "Reads one byte by simply popping it off of the top of the list." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (transform-octet stream (or (pop (list-stream-list stream)) - (return-from stream-read-byte :eof)))) + (with-accessors ((list list-stream-list)) + stream + (transform-octet stream (or (pop list) (return-from stream-read-byte :eof))))) (defmethod stream-listen ((stream list-input-stream)) "Checks whether list is not empty." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (list-stream-list stream)) + (with-accessors ((list list-stream-list)) + stream + list)) (defmethod stream-read-sequence ((stream list-input-stream) sequence start end &key) "Repeatedly pops elements from the list until it's empty." - (declare (optimize speed) (type (integer 0 *) start end)) - (loop for index from start below end - while (list-stream-list stream) - do (setf (elt sequence index) - (pop (list-stream-list stream))) - finally (return index))) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((list list-stream-list)) + stream + (loop for index of-type fixnum from start below end + while list + do (setf (elt sequence index) (pop list)) + finally (return index)))) (defmethod stream-read-byte ((stream vector-input-stream)) "Reads one byte and increments INDEX pointer unless we're beyond END pointer." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (let ((index (vector-stream-index stream))) - (cond ((< index (vector-stream-end stream)) - (incf (vector-stream-index stream)) - (transform-octet stream (aref (vector-stream-vector stream) index))) - (t :eof)))) + (with-accessors ((index vector-stream-index) + (end vector-stream-end) + (vector vector-stream-vector)) + stream + (let ((current-index index)) + (declare (fixnum current-index)) + (cond ((< current-index (the fixnum end)) + (incf (the fixnum index)) + (transform-octet stream (aref vector current-index))) + (t :eof))))) (defmethod stream-listen ((stream vector-input-stream)) "Checking whether INDEX is beyond END." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (< (vector-stream-index stream) (vector-stream-end stream))) + (with-accessors ((index vector-stream-index) + (end vector-stream-end)) + stream + (< (the fixnum index) (the fixnum end)))) (defmethod stream-read-sequence ((stream vector-input-stream) sequence start end &key) "Traverses both sequences in parallel until the end of one of them is reached." - (declare (optimize speed) (type (integer 0 *) start end)) - (loop with vector-end of-type (integer 0 #.array-dimension-limit) = (vector-stream-end stream) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (loop with vector-end of-type fixnum = (vector-stream-end stream) with vector = (vector-stream-vector stream) - for index from start below end - for vector-index of-type (integer 0 #.array-dimension-limit) = (vector-stream-index stream) + for index of-type fixnum from start below end + for vector-index of-type fixnum = (vector-stream-index stream) while (< vector-index vector-end) do (setf (elt sequence index) (aref vector vector-index)) - (incf (vector-stream-index stream)) + (incf (the fixnum (vector-stream-index stream))) finally (return index))) (defmethod stream-write-byte ((stream vector-output-stream) byte) "Writes a byte \(octet) by extending the underlying vector." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (check-if-open stream) - (vector-push-extend (transform-octet stream byte) - (vector-stream-vector stream))) + (with-accessors ((vector vector-stream-vector)) + stream + (vector-push-extend (transform-octet stream byte) vector))) (defmethod stream-write-sequence ((stream vector-output-stream) sequence start end &key) "Just calls VECTOR-PUSH-EXTEND repeatedly." - (declare (optimize speed) (type (integer 0 *) start end)) - (loop with vector = (vector-stream-vector stream) - for index from start below end - do (vector-push-extend (elt sequence index) vector)) - sequence) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((vector vector-stream-vector)) + stream + (loop for index of-type fixnum from start below end + do (vector-push-extend (elt sequence index) vector)) + sequence)) (defmethod stream-file-position ((stream vector-input-stream)) "Simply returns the index into the underlying vector." - (declare (optimize speed)) - (vector-stream-index stream)) + (declare #.*standard-optimize-settings*) + (with-accessors ((index vector-stream-index)) + stream + index)) (defmethod (setf stream-file-position) (position-spec (stream vector-input-stream)) "Sets the index into the underlying vector if POSITION-SPEC is acceptable." - (declare (optimize speed)) - (setf (vector-stream-index stream) - (case position-spec - (:start 0) - (:end (vector-stream-end stream)) - (otherwise - (unless (integerp position-spec) - (error 'flexi-stream-position-spec-error - :format-control "Unknown file position designator: ~S." - :format-arguments (list position-spec) - :position-spec position-spec)) - (unless (<= 0 position-spec (vector-stream-end stream)) - (error 'flexi-stream-position-spec-error - :format-control "File position designator ~S is out of bounds." - :format-arguments (list position-spec) - :position-spec position-spec)) - position-spec))) - position-spec) - -(defmethod stream-file-position ((stream vector-output-stream)) - "Simply returns the fill pointer of the underlying vector." - (declare (optimize speed)) - (fill-pointer (vector-stream-vector stream))) - -(defmethod (setf stream-file-position) (position-spec (stream vector-output-stream)) - "Sets the fill pointer underlying vector if POSITION-SPEC is -acceptable. Adjusts the vector if necessary." - (declare (optimize speed)) - (let* ((vector (vector-stream-vector stream)) - (total-size (array-total-size vector)) - (new-fill-pointer + (declare #.*standard-optimize-settings*) + (with-accessors ((index vector-stream-index) + (end vector-stream-end)) + stream + (setq index (case position-spec (:start 0) - (:end - (warn "File position designator :END doesn't really make sense for an output stream.") - total-size) + (:end end) (otherwise (unless (integerp position-spec) - (error 'flexi-stream-position-spec-error + (error 'in-memory-stream-position-spec-error :format-control "Unknown file position designator: ~S." :format-arguments (list position-spec) + :stream stream :position-spec position-spec)) - (unless (<= 0 position-spec array-total-size-limit) - (error 'flexi-stream-position-spec-error + (unless (<= 0 position-spec end) + (error 'in-memory-stream-position-spec-error :format-control "File position designator ~S is out of bounds." :format-arguments (list position-spec) + :stream stream :position-spec position-spec)) - position-spec)))) - (when (> new-fill-pointer total-size) - (adjust-array vector new-fill-pointer)) - (setf (fill-pointer vector) new-fill-pointer) + position-spec))) position-spec)) +(defmethod stream-file-position ((stream vector-output-stream)) + "Simply returns the fill pointer of the underlying vector." + (declare #.*standard-optimize-settings*) + (with-accessors ((vector vector-stream-vector)) + stream + (fill-pointer vector))) + +(defmethod (setf stream-file-position) (position-spec (stream vector-output-stream)) + "Sets the fill pointer underlying vector if POSITION-SPEC is +acceptable. Adjusts the vector if necessary." + (declare #.*standard-optimize-settings*) + (with-accessors ((vector vector-stream-vector)) + stream + (let* ((total-size (array-total-size vector)) + (new-fill-pointer + (case position-spec + (:start 0) + (:end + (warn "File position designator :END doesn't really make sense for an output stream.") + total-size) + (otherwise + (unless (integerp position-spec) + (error 'in-memory-stream-position-spec-error + :format-control "Unknown file position designator: ~S." + :format-arguments (list position-spec) + :stream stream + :position-spec position-spec)) + (unless (<= 0 position-spec array-total-size-limit) + (error 'in-memory-stream-position-spec-error + :format-control "File position designator ~S is out of bounds." + :format-arguments (list position-spec) + :stream stream + :position-spec position-spec)) + position-spec)))) + (declare (fixnum total-size new-fill-pointer)) + (when (> new-fill-pointer total-size) + (adjust-array vector new-fill-pointer)) + (setf (fill-pointer vector) new-fill-pointer) + position-spec))) + (defmethod make-in-memory-input-stream ((vector vector) &key (start 0) (end (length vector)) transformer) @@ -284,7 +302,7 @@ octets in the subsequence of VECTOR bounded by START and END. Each octet returned will be transformed in turn by the optional TRANSFORMER function." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-instance 'vector-input-stream :vector vector :index start @@ -298,7 +316,7 @@ octets in the subsequence of LIST bounded by START and END. Each octet returned will be transformed in turn by the optional TRANSFORMER function." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-instance 'list-input-stream :list (subseq list start end) :transformer transformer)) @@ -306,7 +324,7 @@ (defun make-output-vector (&key (element-type 'octet)) "Creates and returns an array which can be used as the underlying vector for a VECTOR-OUTPUT-STREAM." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-array 0 :adjustable t :fill-pointer 0 :element-type element-type)) @@ -317,7 +335,7 @@ that contains the octes that were actually output. The octets stored will each be transformed by the optional TRANSFORMER function." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (make-instance 'vector-output-stream :vector (make-output-vector :element-type element-type) :transformer transformer)) @@ -329,19 +347,23 @@ been output since the last call to GET-OUTPUT-STREAM-SEQUENCE or since the creation of the stream, whichever occurred most recently. If AS-LIST is true the return value is coerced to a list." - (declare (optimize speed)) - (prog1 - (if as-list - (coerce (vector-stream-vector stream) 'list) - (vector-stream-vector stream)) - (setf (vector-stream-vector stream) - (make-output-vector)))) + (declare #.*standard-optimize-settings*) + (with-accessors ((vector vector-stream-vector)) + stream + (prog1 + (if as-list + (coerce vector 'list) + vector) + (setq vector + (make-output-vector))))) (defmethod output-stream-sequence-length ((stream in-memory-output-stream)) "Returns the current length of the underlying vector of the IN-MEMORY output stream STREAM." (declare (optimize speed)) - (length (the (simple-array * (*)) (vector-stream-vector stream)))) + (with-accessors ((vector vector-stream-vector)) + stream + (length (the (simple-array * (*)) vector)))) (defmacro with-input-from-sequence ((var sequence &key start end transformer) &body body) @@ -382,14 +404,3 @@ , at body (get-output-stream-sequence ,var :as-list ,as-list)) (when ,var (close ,var))))) - -(declaim (inline translate-char)) -(defun translate-char (char-code external-format) - "Returns a list of octets which correspond to the -representation of the character with character code CHAR-CODE -when sent to a flexi stream with external format EXTERNAL-FORMAT. -Used internally by UNREAD-CHAR%. See also STRING-TO-OCTETS." - (declare (optimize speed)) - (with-output-to-sequence (list :as-list t) - (let ((stream (make-flexi-stream list :external-format external-format))) - (write-char (code-char char-code) stream)))) \ No newline at end of file Modified: trunk/input.lisp ============================================================================== --- trunk/input.lisp (original) +++ trunk/input.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.51 2007/12/29 22:58:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.70 2008/05/21 00:18:35 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -34,7 +34,7 @@ "Reads one byte \(octet) from the underlying stream of FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty)." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; we're using S instead of STREAM here because of an ;; issue with SBCL: ;; @@ -58,7 +58,7 @@ "Reads one byte \(octet) from the underlying stream of FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty)." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) (octet-stack flexi-stream-octet-stack) @@ -85,7 +85,7 @@ FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty). Optimized version \(only needed for LispWorks) in case the underlying stream is binary." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) (octet-stack flexi-stream-octet-stack) @@ -104,7 +104,7 @@ (defmethod stream-clear-input ((flexi-input-stream flexi-input-stream)) "Calls the corresponding method for the underlying input stream and also clears the value of the OCTET-STACK slot." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; note that we don't reset the POSITION slot (with-accessors ((octet-stack flexi-stream-octet-stack) (stream flexi-stream-stream)) @@ -116,12 +116,14 @@ "Calls the corresponding method for the underlying input stream but first checks if \(old) input is available in the OCTET-STACK slot." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) (octet-stack flexi-stream-octet-stack) (stream flexi-stream-stream)) flexi-input-stream + (declare (integer position) + (type (or null integer) bound)) (when (and bound (>= position bound)) (return-from stream-listen nil)) @@ -129,7 +131,7 @@ (defmethod stream-read-byte ((stream flexi-input-stream)) "Reads one byte \(octet) from the underlying stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; set LAST-CHAR-CODE slot to NIL because we can't UNREAD-CHAR after ;; this operation (with-accessors ((last-char-code flexi-stream-last-char-code) @@ -140,397 +142,220 @@ (setq last-octet octet) (or octet :eof)))) -(defgeneric unread-char% (char-code flexi-input-stream) - (:documentation "Used internally to put a character denoted by the -character code CHAR-CODE which was already read back on the stream. -Uses the OCTET-STACK slot and decrements the POSITION slot -accordingly.")) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-input-stream)) - "The default method which is un-optimized and uses TRANSLATE-CHAR to -figure out which octets to put on the octet stack." - (declare (optimize speed) (inline translate-char)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (external-format flexi-stream-external-format)) - flexi-input-stream - (declare (integer position)) - (let ((octets-read (translate-char char-code external-format))) - (decf position (length octets-read)) - (setq octet-stack (append octets-read octet-stack))))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-latin-1-input-stream)) - "For ISO-8859-1 we can simply put the character code itself on the -octet stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push char-code octet-stack))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-ascii-input-stream)) - "For ASCII we can simply put the character code itself on the octet -stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push char-code octet-stack))) - -(defmethod unread-char% (char-code (flexi-input-stream flexi-8-bit-input-stream)) - "For 8-bit encodings we just have to put one octet on the octet -stack which we can look up in the encoding hash." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (encoding-hash flexi-stream-encoding-hash)) - flexi-input-stream - (declare (integer position)) - (decf position) - (push (gethash char-code encoding-hash) octet-stack))) - -(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) - (flexi-input-stream flexi-cr-8-bit-input-stream)) - "A kind of `safety net' for the optimized 8-bit versions of -UNREAD-CHAR% which checks for the single case where more than one -octet has to be put on the octet stack." - (declare (optimize speed)) - (with-accessors ((position flexi-stream-position) - (octet-stack flexi-stream-octet-stack) - (external-format flexi-stream-external-format)) - flexi-input-stream - (declare (integer position)) - ;; note that below we use the knowledge that in all 8-bit encodings - ;; #\Return and #\Linefeed are mapped to the same octets - (case (external-format-eol-style external-format) - (:crlf - (decf position 2) - (push #.(char-code #\Linefeed) octet-stack) - (push #.(char-code #\Return) octet-stack)) - (otherwise - (decf position) - (push #.(char-code #\Return) octet-stack))))) - -#+:lispworks -(defmethod unread-char% ((char-code (eql #.(char-code #\Newline))) - (flexi-input-stream flexi-binary-cr-8-bit-input-stream)) - "A kind of `safety net' for the optimized 8-bit versions of -UNREAD-CHAR% which checks for the single case where more than one -octet has to be put on the octet stack. - -This method \(identical to the one defined directly above) exists only -for LispWorks' \"binary\" streams and must be there due to the -slightly clunky class hierarchy." - (declare (optimize speed)) +(defun unread-char% (char flexi-input-stream) + "Used internally to put a character CHAR which was already read back +on the stream. Uses the OCTET-STACK slot and decrements the POSITION +slot accordingly." + (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format)) flexi-input-stream - (declare (integer position)) - ;; note that below we use the knowledge that in all 8-bit encodings - ;; #\Return and #\Linefeed are mapped to the same octets - (case (external-format-eol-style external-format) - (:crlf - (decf position 2) - (push #.(char-code #\Linefeed) octet-stack) - (push #.(char-code #\Return) octet-stack)) - (otherwise - (decf position) - (push #.(char-code #\Return) octet-stack))))) - -(defmacro define-char-reader ((stream-var stream-class) &body body) - "Helper macro to define methods for STREAM-READ-CHAR. Defines a -method for the class STREAM-CLASS using the variable STREAM-VAR and -the code body BODY wrapped with some standard code common to all -methods defined here. The return value of BODY is a character code. -In case of encoding problems, BODY must return the value returned by -\(RECOVER-FROM-ENCODING-ERROR ...)." - (with-unique-names (char-code body-fn) - `(defmethod stream-read-char ((,stream-var ,stream-class)) - "This method was generated with the DEFINE-CHAR-READER macro." - (declare (optimize speed)) - ;; note that we do nothing for the :LF EOL style because we - ;; assume that #\Newline is the same as #\Linefeed in all - ;; Lisps which will use this library - (with-accessors ((last-octet flexi-stream-last-octet) - (last-char-code flexi-stream-last-char-code)) - ,stream-var - ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after - ;; this operation - (setq last-octet nil) - (let ((,char-code - (flet ((,body-fn () , at body)) - (declare (inline ,body-fn) (dynamic-extent (function ,body-fn))) - (,body-fn)))) - ;; remember this character and the current external format - ;; for UNREAD-CHAR - (setq last-char-code ,char-code) - (or (code-char ,char-code) ,char-code)))))) - -(defun recover-from-encoding-error (flexi-stream format-control &rest format-args) - "Helper function used by the STREAM-READ-CHAR methods below to deal -with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and -returns its character code in this case. Otherwise signals a -FLEXI-STREAM-ENCODING-ERROR as determined by the arguments to this -function and provides a corresponding USE-VALUE restart." - (when *substitution-char* - (return-from recover-from-encoding-error (char-code *substitution-char*))) - (restart-case - (apply #'signal-encoding-error flexi-stream format-control format-args) - (use-value (char) - :report "Specify a character to be used instead." - :interactive (lambda () - (loop - (format *query-io* "Type a character: ") - (let ((line (read-line *query-io*))) - (when (= 1 (length line)) - (return (list (char line 0))))))) - (char-code char)))) - -(define-char-reader (stream flexi-latin-1-input-stream) - (or (read-byte* stream) - (return-from stream-read-char :eof))) - -(define-char-reader (stream flexi-ascii-input-stream) - (let ((octet (or (read-byte* stream) - (return-from stream-read-char :eof)))) - (declare (type octet octet)) - (if (> octet 127) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) - octet))) - -(define-char-reader (stream flexi-8-bit-input-stream) - (with-accessors ((encoding-table flexi-stream-encoding-table)) + (let ((counter 0) octets-reversed) + (declare (integer position) + (fixnum counter)) + (flet ((writer (octet) + (incf counter) + (push octet octets-reversed))) + (declare (dynamic-extent (function writer))) + (char-to-octets external-format char #'writer) + (decf position counter) + (setq octet-stack (nreconc octets-reversed octet-stack)))))) + +(defmethod stream-read-char ((stream flexi-input-stream)) + (declare #.*standard-optimize-settings*) + ;; note that we do nothing for the :LF EOL style because we assume + ;; that #\Newline is the same as #\Linefeed in all Lisps which will + ;; use this library + (with-accessors ((external-format flexi-stream-external-format) + (last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code)) stream - (let* ((octet (or (read-byte* stream) - (return-from stream-read-char :eof))) - (char-code (aref (the (simple-array * *) encoding-table) octet))) - (declare (type octet octet)) - (if (or (null char-code) - (= char-code 65533)) - (recover-from-encoding-error stream "No character which corresponds to octet #x~X." octet) - char-code)))) - -(define-char-reader (stream flexi-utf-8-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-8 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (let ((octet (read-next-byte))) - (declare (type octet octet)) - (multiple-value-bind (start count) - (cond ((zerop (logand octet #b10000000)) - (values octet 0)) - ((= #b11000000 (logand octet #b11100000)) - (values (logand octet #b00011111) 1)) - ((= #b11100000 (logand octet #b11110000)) - (values (logand octet #b00001111) 2)) - ((= #b11110000 (logand octet #b11111000)) - (values (logand octet #b00000111) 3)) - ((= #b11111000 (logand octet #b11111100)) - (values (logand octet #b00000011) 4)) - ((= #b11111100 (logand octet #b11111110)) - (values (logand octet #b00000001) 5)) - (t (return-from body - (recover-from-encoding-error stream - "Unexpected value #x~X at start of UTF-8 sequence." - octet)))) - ;; note that we currently don't check for "overlong" - ;; sequences or other illegal values - (loop for result of-type (unsigned-byte 32) - = start then (+ (ash result 6) - (logand octet #b111111)) - repeat count - for octet of-type octet = (read-next-byte) - unless (= #b10000000 (logand octet #b11000000)) - do (return-from body - (recover-from-encoding-error stream - "Unexpected value #x~X in UTF-8 sequence." octet)) - finally (return result)))))))) - -(define-char-reader (stream flexi-utf-16-le-input-stream) - (block body - (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t))) - (read-next-word () - (+ (the octet (read-next-byte)) - (ash (the octet (read-next-byte)) 8)))) - (declare (inline read-next-byte read-next-word) - (dynamic-extent (function read-next-byte) (function read-next-word))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from body - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word))))))) - -(define-char-reader (stream flexi-utf-16-be-input-stream) - (block body - (let (first-octet-seen) - (labels ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-16 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t))) - (read-next-word () - (+ (ash (the octet (read-next-byte)) 8) - (the octet (read-next-byte))))) - (let ((word (read-next-word))) - (cond ((<= #xd800 word #xdfff) - (let ((next-word (read-next-word))) - (unless (<= #xdc00 next-word #xdfff) - (return-from body - (recover-from-encoding-error stream "Unexpected UTF-16 word #x~X following #x~X." - next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) - #x10000))) - (t word))))))) - -(define-char-reader (stream flexi-utf-32-le-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 0 to 24 by 8 - for octet of-type octet = (read-next-byte) - sum (ash octet count)))))) - -(define-char-reader (stream flexi-utf-32-be-input-stream) - (block body - (let (first-octet-seen) - (flet ((read-next-byte () - (prog1 - (or (read-byte* stream) - (cond (first-octet-seen - (return-from body - (recover-from-encoding-error stream - "End of file while in UTF-32 sequence."))) - (t (return-from stream-read-char :eof)))) - (setq first-octet-seen t)))) - (declare (inline read-next-byte) (dynamic-extent (function read-next-byte))) - (loop for count from 24 downto 0 by 8 - for octet of-type octet = (read-next-byte) - sum (ash octet count)))))) - -(defmethod stream-read-char ((stream flexi-cr-mixin)) - "The `base' method for all streams which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of -reading one or more characters from the stream." - (declare (optimize speed)) - (let ((char (call-next-method))) - (when (eq char :eof) - (return-from stream-read-char :eof)) - (with-accessors ((external-format flexi-stream-external-format) - (last-char-code flexi-stream-last-char-code)) - stream - (when (eql char #\Return) - (case (external-format-eol-style external-format) - (:cr (setq char #\Newline - last-char-code #.(char-code #\Newline))) - ;; in the case :CRLF we have to look ahead one character - (:crlf (let ((next-char (call-next-method))) - (case next-char - (#\Linefeed - (setq char #\Newline - last-char-code #.(char-code #\Newline))) - (:eof) - ;; if the character we peeked at wasn't a - ;; linefeed character we push its - ;; constituents back onto our internal - ;; octet stack - (otherwise (unread-char% (char-code next-char) stream))))))) - char))) + ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after + ;; this operation + (setq last-octet nil) + (flet ((reader () + (read-byte* stream)) + (unreader (char) + (unread-char% char stream))) + (declare (dynamic-extent (function reader) (function unreader))) + (let* ((*current-unreader* #'unreader) + (char-code (or (octets-to-char-code external-format #'reader) + (return-from stream-read-char :eof)))) + ;; remember this character and its char code for UNREAD-CHAR + (setq last-char-code char-code) + (or (code-char char-code) char-code))))) (defmethod stream-read-char-no-hang ((stream flexi-input-stream)) "Reads one character if the underlying stream has at least one octet available." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; note that this may block for non-8-bit encodings - I think ;; there's no easy way to handle this correctly (and (stream-listen stream) (stream-read-char stream))) (defmethod stream-read-sequence ((flexi-input-stream flexi-input-stream) sequence start end &key) - "Reads enough input from STREAM to fill SEQUENCE from START to END. -If SEQUENCE is an array which can store octets we use READ-SEQUENCE to -fill it in one fell swoop, otherwise we iterate using -STREAM-READ-CHAR." - (declare (optimize speed) - (type (integer 0 *) start end)) - (with-accessors ((last-char-code flexi-stream-last-char-code) + "An optimized version which uses a buffer underneath. The function +can deliver characters as well as octets and it decides what to do +based on the element type of the sequence \(which takes precedence) +and the element type of the stream. What you'll really get might also +depend on your Lisp. Some of the implementations are more picky than +others - see for example FLEXI-STREAMS-TEST:READ-SEQUENCE-TEST." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (external-format flexi-stream-external-format) (last-octet flexi-stream-last-octet) - (stream flexi-stream-stream) - (position flexi-stream-position) - (octet-stack flexi-stream-octet-stack)) + (last-char-code flexi-stream-last-char-code) + (element-type flexi-stream-element-type) + (stream flexi-stream-stream)) flexi-input-stream - (declare (integer position)) - (cond ((and (arrayp sequence) - (subtypep 'octet (array-element-type sequence))) - (setf last-char-code nil) - (let ((cursor start)) - (loop with stack = octet-stack - for continuep = (< cursor end) - for octet = (and continuep (pop stack)) - while octet - do (setf (aref sequence cursor) (the octet octet)) - (incf cursor)) - (let ((index - (read-sequence sequence stream :start cursor :end end))) - (incf position (- index start)) - (when (> index start) - (setq last-octet (aref sequence (1- index)))) - index))) - (t - (loop for index from start below end - for element = (stream-read-char flexi-input-stream) - until (eq element :eof) - do (setf (elt sequence index) element) - finally (return index)))))) + (let* (buffer + (buffer-pos 0) + (buffer-end 0) + (index start) + ;; whether we will deliver characters and thus the number + ;; of octets to read might not be equal to the number of + ;; sequence elements to fill + (want-chars-p (or (stringp sequence) + (and (vectorp sequence) + (not (subtypep (array-element-type sequence) 'integer))) + (not (type-equal element-type 'octet)))) + ;; whether we will later be able to rewind the stream if + ;; needed (to get rid of unused octets in the buffer) + (can-rewind-p (and want-chars-p (maybe-rewind stream 0))) + (factor (if want-chars-p (encoding-factor external-format) 1)) + (integer-factor (floor factor)) + ;; it's an interesting question whether it makes sense + ;; performance-wise to make RESERVE significantly bigger + ;; (and thus put potentially a lot more octets into + ;; OCTET-STACK), especially for UTF-8 + (reserve (cond ((not (floatp factor)) 0) + ((not can-rewind-p) (* 2 integer-factor)) + (t (ceiling (* (- factor integer-factor) (- end start))))))) + (declare (fixnum buffer-pos buffer-end index integer-factor reserve) + (boolean want-chars-p can-rewind-p)) + (flet ((compute-fill-amount () + "Computes the amount of octets we can savely read into +the buffer without violating the stream's bound \(if there is one) and +without potentially reading much more than we need \(unless we can +rewind afterwards)." + (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor + (the fixnum (- end index)))) + reserve)) + +buffer-size+))) + (cond (bound (min minimum (- bound position))) + (t minimum)))) + (fill-buffer (end) + "Tries to fill the buffer from BUFFER-POS to END and +returns NIL if the buffer doesn't contain any new data." + ;; put data from octet stack into buffer if there is any + (loop + (when (>= buffer-pos end) + (return)) + (let ((next-octet (pop octet-stack))) + (cond (next-octet + (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet)) + (incf buffer-pos)) + (t (return))))) + (setq buffer-end (read-sequence buffer stream + :start buffer-pos + :end end)) + ;; BUFFER-POS is only greater than zero if the buffer + ;; already contains unread data from the octet stack + ;; (see below), so we test for ZEROP here and do /not/ + ;; compare with BUFFER-POS + (unless (zerop buffer-end) + (incf position buffer-end)))) + (let ((minimum (compute-fill-amount))) + (declare (fixnum minimum)) + (setq buffer (make-octet-buffer minimum)) + ;; fill buffer for the first time or return immediately if + ;; we don't succeed + (unless (fill-buffer minimum) + (return-from stream-read-sequence start))) + (setq buffer-pos 0) + (flet ((next-octet () + "Returns the next octet from the buffer and fills it +if it is exhausted. Returns NIL if there's no more data on the +stream." + (when (>= buffer-pos buffer-end) + (setq buffer-pos 0) + (unless (fill-buffer (compute-fill-amount)) + (return-from next-octet))) + (prog1 + (aref (the (array octet *) buffer) buffer-pos) + (incf buffer-pos))) + (unreader (char) + (unread-char% char flexi-input-stream))) + (declare (dynamic-extent (function next-octet) (function unreader))) + (let ((*current-unreader* #'unreader)) + (macrolet ((iterate (octetp set-place) + "A very unhygienic macro to implement the +actual iteration through the sequence including housekeeping for the +flexi stream. If OCTETP is true, we put octets into the stream, +otherwise characters. SET-PLACE is the place \(using the index INDEX) +used to access the sequence." + `(flet ((leave () + "This is the function used to abort +the LOOP iteration below." + (when (> index start) + ;; if something was read at all, + ;; update LAST-OCTET and + ;; LAST-CHAR-CODE accordingly + (setq ,(if octetp 'last-char-code 'last-octet) + nil + ,(if octetp 'last-octet 'last-char-code) + ,(sublis '((index . (1- index))) set-place))) + (return-from stream-read-sequence index))) + (loop + (when (>= index end) + ;; check if there are octets in the + ;; buffer we didn't use - see + ;; COMPUTE-FILL-AMOUNT above + (let ((rest (- buffer-end buffer-pos))) + (when (plusp rest) + (or (and can-rewind-p + (maybe-rewind stream rest)) + (loop + (when (>= buffer-pos buffer-end) + (return)) + (decf buffer-end) + (push (aref (the (array octet *) buffer) buffer-end) + octet-stack))))) + (leave)) + (let ((next-thing ,(if octetp + '(next-octet) + '(octets-to-char-code external-format #'next-octet)))) + (unless next-thing (leave)) + (setf ,set-place ,(if octetp 'next-thing '(code-char next-thing))) + (incf index)))))) + (etypecase sequence + (string (iterate nil (char sequence index))) + (array + (let ((array-element-type (array-element-type sequence))) + (cond ((type-equal array-element-type 'octet) + (iterate t (aref (the (array octet *) sequence) index))) + ((or (subtypep array-element-type 'integer) + (type-equal element-type 'octet)) + (iterate t (aref sequence index))) + (t + (iterate nil (aref sequence index)))))) + (list + (cond ((type-equal element-type 'octet) + (iterate t (nth index sequence))) + (t + (iterate nil (nth index sequence))))))))))))) (defmethod stream-unread-char ((stream flexi-input-stream) char) "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM. Makes sure CHAR will only be unread if it was the last character read and if it was read with the same encoding that's currently being used by the stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((last-char-code flexi-stream-last-char-code)) stream (unless last-char-code @@ -540,7 +365,7 @@ (error 'flexi-stream-simple-error :format-control "Last character read (~S) was different from ~S." :format-arguments (list (code-char last-char-code) char))) - (unread-char% last-char-code stream) + (unread-char% char stream) (setq last-char-code nil) nil)) @@ -548,7 +373,7 @@ "Similar to UNREAD-CHAR in that it `unreads' the last octet from STREAM. Note that you can only call UNREAD-BYTE after a corresponding READ-BYTE." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((last-octet flexi-stream-last-octet) (octet-stack flexi-stream-octet-stack) (position flexi-stream-position)) @@ -573,7 +398,7 @@ not 0 is returned, if PEEK-TYPE is an octet, the next octet which equals PEEK-TYPE is returned. EOF-ERROR-P and EOF-VALUE are interpreted as usual." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (loop for octet = (read-byte flexi-input-stream eof-error-p eof-value) until (cond ((null peek-type)) ((eql octet eof-value)) Added: trunk/io.lisp ============================================================================== --- (empty file) +++ trunk/io.lisp Wed May 21 08:00:42 2008 @@ -0,0 +1,110 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/io.lisp,v 1.2 2008/05/20 23:44:45 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defmethod reset-input-state ((flexi-io-stream flexi-io-stream)) + "This method is used to clear any state associated with previous +input before output is attempted on the stream. It can fail if the +octet stack is not empty and the stream can't be `rewound'." + (declare #.*standard-optimize-settings*) + (with-accessors ((last-char-code flexi-stream-last-char-code) + (last-octet flexi-stream-last-octet) + (octet-stack flexi-stream-octet-stack) + (stream flexi-stream-stream)) + flexi-io-stream + (when octet-stack + (unless (maybe-rewind stream (length octet-stack)) + (error 'flexi-stream-out-of-sync-error + :stream flexi-io-stream)) + (setq octet-stack nil)) + (setq last-octet nil + last-char-code nil))) + +(defmethod stream-write-byte :before ((stream flexi-io-stream) byte) + (declare #.*standard-optimize-settings*) + (declare (ignore byte)) + (reset-input-state stream)) + +(defmethod stream-write-char :before ((stream flexi-io-stream) char) + (declare #.*standard-optimize-settings*) + (declare (ignore char)) + (reset-input-state stream)) + +(defmethod stream-write-sequence :before ((stream flexi-io-stream) sequence start end &key) + (declare #.*standard-optimize-settings*) + (declare (ignore sequence start end)) + (reset-input-state stream)) + +(defmethod stream-clear-output :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-input-state stream)) + +(defmethod reset-output-state ((flexi-io-stream flexi-io-stream)) + "This method is used to clear any state associated with previous +output before the stream is used for input." + (declare #.*standard-optimize-settings*) + (with-accessors ((column flexi-stream-column)) + flexi-io-stream + (setq column nil))) + +(defmethod stream-read-byte :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-output-state stream)) + +(defmethod stream-read-char :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-output-state stream)) + +(defmethod stream-read-sequence :before ((stream flexi-io-stream) sequence start end &key) + (declare #.*standard-optimize-settings*) + (declare (ignore sequence start end)) + (reset-output-state stream)) + +(defmethod stream-unread-char :before ((stream flexi-io-stream) char) + (declare #.*standard-optimize-settings*) + (declare (ignore char)) + (reset-output-state stream)) + +(defmethod unread-byte :before (byte (stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (declare (ignore byte)) + (reset-output-state stream)) + +(defmethod stream-clear-input :before ((stream flexi-io-stream)) + (declare #.*standard-optimize-settings*) + (reset-output-state stream)) + +(defmethod write-byte* :after (byte (stream flexi-io-stream)) + "Keep POSITION slot up to date even when performing output." + (declare #.*standard-optimize-settings*) + (declare (ignore byte)) + (with-accessors ((position flexi-stream-position)) + stream + (incf position))) \ No newline at end of file Modified: trunk/iso-8859.lisp ============================================================================== --- trunk/iso-8859.lisp (original) +++ trunk/iso-8859.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.5 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/iso-8859.lisp,v 1.7 2008/05/18 21:32:15 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -32,22 +32,22 @@ ;;; the following code was auto-generated from files which can be ;;; found at -(defvar +iso-8859-tables+ - '((:iso-8859-1 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) - (:iso-8859-2 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 352 350 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 357 378 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729)) - (:iso-8859-3 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168 304 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305 351 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 202 203 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219 220 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236 237 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365 349 729)) - (:iso-8859-4 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 352 274 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 291 359 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 326 333 311 244 245 246 247 248 371 250 251 252 361 363 729)) - (:iso-8859-5 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 167 1118 1119)) - (:iso-8859-6 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533 65533 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 1567 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 65533 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533)) - (:iso-8859-7 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 168 169 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533)) - (:iso-8859-8 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533)) - (:iso-8859-9 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255)) - (:iso-8859-10 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 272 352 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 359 382 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 326 333 243 244 245 246 361 248 371 250 251 252 253 254 312)) - (:iso-8859-11 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 65533 65533 65533 65533)) - (:iso-8859-13 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216 169 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 8217)) - (:iso-8859-14 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 7808 169 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7809 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 217 218 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 253 375 255)) - (:iso-8859-15 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 169 170 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 187 338 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255)) - (:iso-8859-16 . #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353 169 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 537 187 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 204 205 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 538 223 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 273 324 242 243 244 337 246 347 369 249 250 251 252 281 539 255))) +(defconstant +iso-8859-tables+ + `((:iso-8859-1 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255))) + (:iso-8859-2 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 728 321 164 317 346 167 168 352 350 356 377 173 381 379 176 261 731 322 180 318 347 711 184 353 351 357 378 733 382 380 340 193 194 258 196 313 262 199 268 201 280 203 282 205 206 270 272 323 327 211 212 336 214 215 344 366 218 368 220 221 354 223 341 225 226 259 228 314 263 231 269 233 281 235 283 237 238 271 273 324 328 243 244 337 246 247 345 367 250 369 252 253 355 729))) + (:iso-8859-3 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 294 728 163 164 65533 292 167 168 304 350 286 308 173 65533 379 176 295 178 179 180 181 293 183 184 305 351 287 309 189 65533 380 192 193 194 65533 196 266 264 199 200 201 202 203 204 205 206 207 65533 209 210 211 212 288 214 215 284 217 218 219 220 364 348 223 224 225 226 65533 228 267 265 231 232 233 234 235 236 237 238 239 65533 241 242 243 244 289 246 247 285 249 250 251 252 365 349 729))) + (:iso-8859-4 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 312 342 164 296 315 167 168 352 274 290 358 173 381 175 176 261 731 343 180 297 316 711 184 353 275 291 359 330 382 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 298 272 325 332 310 212 213 214 215 216 370 218 219 220 360 362 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 299 273 326 333 311 244 245 246 247 248 371 250 251 252 361 363 729))) + (:iso-8859-5 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 173 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 8470 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 167 1118 1119))) + (:iso-8859-6 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 65533 65533 164 65533 65533 65533 65533 65533 65533 65533 1548 173 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 1563 65533 65533 65533 1567 65533 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 65533 65533 65533 65533 65533 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533))) + (:iso-8859-7 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8216 8217 163 8364 8367 166 167 168 169 890 171 172 173 65533 8213 176 177 178 179 900 901 902 183 904 905 906 187 908 189 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 65533 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 65533))) + (:iso-8859-8 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 65533 162 163 164 165 166 167 168 169 215 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 247 187 188 189 190 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 65533 8215 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 65533 65533 8206 8207 65533))) + (:iso-8859-9 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 286 209 210 211 212 213 214 215 216 217 218 219 220 304 350 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 287 241 242 243 244 245 246 247 248 249 250 251 252 305 351 255))) + (:iso-8859-10 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 274 290 298 296 310 167 315 272 352 358 381 173 362 330 176 261 275 291 299 297 311 183 316 273 353 359 382 8213 363 331 256 193 194 195 196 197 198 302 268 201 280 203 278 205 206 207 208 325 332 211 212 213 214 360 216 370 218 219 220 221 222 223 257 225 226 227 228 229 230 303 269 233 281 235 279 237 238 239 240 326 333 243 244 245 246 361 248 371 250 251 252 253 254 312))) + (:iso-8859-11 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 65533 65533 65533 65533 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 65533 65533 65533 65533))) + (:iso-8859-13 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 8221 162 163 164 8222 166 167 216 169 342 171 172 173 174 198 176 177 178 179 8220 181 182 183 248 185 343 187 188 189 190 230 260 302 256 262 196 197 280 274 268 201 377 278 290 310 298 315 352 323 325 211 332 213 214 215 370 321 346 362 220 379 381 223 261 303 257 263 228 229 281 275 269 233 378 279 291 311 299 316 353 324 326 243 333 245 246 247 371 322 347 363 252 380 382 8217))) + (:iso-8859-14 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 7682 7683 163 266 267 7690 167 7808 169 7810 7691 7922 173 174 376 7710 7711 288 289 7744 7745 182 7766 7809 7767 7811 7776 7923 7812 7813 7777 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 372 209 210 211 212 213 214 7786 216 217 218 219 220 221 374 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 373 241 242 243 244 245 246 7787 248 249 250 251 252 253 375 255))) + (:iso-8859-15 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 8364 165 352 167 353 169 170 171 172 173 174 175 176 177 178 179 381 181 182 183 382 185 186 187 338 339 376 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255))) + (:iso-8859-16 . ,(make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 260 261 321 8364 8222 352 167 353 169 536 171 377 173 378 379 176 177 268 322 381 8221 182 183 382 269 537 187 338 339 376 380 192 193 194 258 196 262 198 199 200 201 202 203 204 205 206 207 272 323 210 211 212 336 214 346 368 217 218 219 220 280 538 223 224 225 226 259 228 263 230 231 232 233 234 235 236 237 238 239 273 324 242 243 244 337 246 347 369 249 250 251 252 281 539 255)))) "A list of the ISO-8859 encodings where each element is a cons with the car being a keyword denoting the encoding and the cdr being a vector enumerating the corresponding character codes.") Modified: trunk/koi8-r.lisp ============================================================================== --- trunk/koi8-r.lisp (original) +++ trunk/koi8-r.lisp Wed May 21 08:00:42 2008 @@ -1,6 +1,36 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/koi8-r.lisp,v 1.2 2008/05/18 21:32:15 edi Exp $ + +;;; Copyright (c) 2006, Igor Plekhov. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + (in-package :flexi-streams) ;; http://unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT -(defvar +koi8-r-table+ - #(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 9472 9474 9484 9488 9492 9496 9500 9508 9516 9524 9532 9600 9604 9608 9612 9616 9617 9618 9619 8992 9632 8729 8730 8776 8804 8805 160 8993 176 178 183 247 9552 9553 9554 1105 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 1025 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 169 1102 1072 1073 1094 1076 1077 1092 1075 1093 1080 1081 1082 1083 1084 1085 1086 1087 1103 1088 1089 1090 1091 1078 1074 1100 1099 1079 1096 1101 1097 1095 1098 1070 1040 1041 1062 1044 1045 1060 1043 1061 1048 1049 1050 1051 1052 1053 1054 1055 1071 1056 1057 1058 1059 1046 1042 1068 1067 1047 1064 1069 1065 1063 1066) +(defconstant +koi8-r-table+ + (make-decoding-table '(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 9472 9474 9484 9488 9492 9496 9500 9508 9516 9524 9532 9600 9604 9608 9612 9616 9617 9618 9619 8992 9632 8729 8730 8776 8804 8805 160 8993 176 178 183 247 9552 9553 9554 1105 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 1025 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 169 1102 1072 1073 1094 1076 1077 1092 1075 1093 1080 1081 1082 1083 1084 1085 1086 1087 1103 1088 1089 1090 1091 1078 1074 1100 1099 1079 1096 1101 1097 1095 1098 1070 1040 1041 1062 1044 1045 1060 1043 1061 1048 1049 1050 1051 1052 1053 1054 1055 1071 1056 1057 1058 1059 1046 1042 1068 1067 1047 1064 1069 1065 1063 1066)) "An array enumerating the character codes for the KOI8-R encoding.") Modified: trunk/lw-binary-stream.lisp ============================================================================== --- trunk/lw-binary-stream.lisp (original) +++ trunk/lw-binary-stream.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.10 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/lw-binary-stream.lisp,v 1.14 2008/05/18 23:13:59 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -48,394 +48,28 @@ optimizing input and output on LispWorks. See READ-BYTE* and WRITE-BYTE*.")) -(defclass flexi-binary-8-bit-input-stream (flexi-8-bit-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-8-BIT-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-8-bit-input-stream (flexi-cr-mixin flexi-binary-8-bit-input-stream) - () - (:documentation "Like FLEXI-CR-8-BIT-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-ascii-input-stream (flexi-ascii-input-stream flexi-binary-8-bit-input-stream) - () - (:documentation "Like FLEXI-ASCII-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-ascii-input-stream (flexi-cr-mixin flexi-binary-ascii-input-stream) - () - (:documentation "Like FLEXI-CR-ASCII-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-latin-1-input-stream (flexi-latin-1-input-stream flexi-binary-8-bit-input-stream) - () - (:documentation "Like FLEXI-LATIN-1-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-latin-1-input-stream (flexi-cr-mixin flexi-binary-latin-1-input-stream) - () - (:documentation "Like FLEXI-CR-LATIN-1-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-le-input-stream (flexi-utf-32-le-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-32-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-binary-utf-32-le-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-be-input-stream (flexi-utf-32-be-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-32-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-binary-utf-32-be-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-le-input-stream (flexi-utf-16-le-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-16-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-binary-utf-16-le-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-LE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-be-input-stream (flexi-utf-16-be-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-16-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-binary-utf-16-be-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-BE-INPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-8-input-stream (flexi-utf-8-input-stream flexi-binary-input-stream) - () - (:documentation "Like FLEXI-UTF-8-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-8-input-stream (flexi-cr-mixin flexi-binary-utf-8-input-stream) - () - (:documentation "Like FLEXI-CR-UTF-8-INPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-8-bit-output-stream (flexi-8-bit-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-8-BIT-OUTPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-8-bit-output-stream (flexi-cr-mixin flexi-binary-8-bit-output-stream) - () - (:documentation "Like FLEXI-CR-8-BIT-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-ascii-output-stream (flexi-ascii-output-stream flexi-binary-8-bit-output-stream) - () - (:documentation "Like FLEXI-ASCII-OUTPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-ascii-output-stream (flexi-cr-mixin flexi-binary-ascii-output-stream) - () - (:documentation "Like FLEXI-CR-ASCII-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-latin-1-output-stream (flexi-latin-1-output-stream flexi-binary-8-bit-output-stream) - () - (:documentation "Like FLEXI-LATIN-1-OUTPUT-STREAM but optimized -for LispWorks binary streams.")) - -(defclass flexi-binary-cr-latin-1-output-stream (flexi-cr-mixin flexi-binary-latin-1-output-stream) - () - (:documentation "Like FLEXI-CR-LATIN-1-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-le-output-stream (flexi-utf-32-le-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-32-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-binary-utf-32-le-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-be-output-stream (flexi-utf-32-be-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-32-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-binary-utf-32-be-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-le-output-stream (flexi-utf-16-le-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-16-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-binary-utf-16-le-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-LE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-be-output-stream (flexi-utf-16-be-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-16-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-binary-utf-16-be-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-BE-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-8-output-stream (flexi-utf-8-output-stream flexi-binary-output-stream) - () - (:documentation "Like FLEXI-UTF-8-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-8-output-stream (flexi-cr-mixin flexi-binary-utf-8-output-stream) - () - (:documentation "Like FLEXI-CR-UTF-8-OUTPUT-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-8-bit-io-stream (flexi-binary-io-stream flexi-8-bit-io-stream) - () - (:documentation "Like FLEXI-8-BIT-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-8-bit-io-stream (flexi-cr-mixin flexi-binary-8-bit-io-stream) - () - (:documentation "Like FLEXI-CR-8-BIT-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-ascii-io-stream (flexi-ascii-io-stream flexi-binary-8-bit-io-stream) - () - (:documentation "Like FLEXI-ASCII-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-ascii-io-stream (flexi-cr-mixin flexi-binary-ascii-io-stream) - () - (:documentation "Like FLEXI-CR-ASCII-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-latin-1-io-stream (flexi-latin-1-io-stream flexi-binary-8-bit-io-stream) - () - (:documentation "Like FLEXI-LATIN-1-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-latin-1-io-stream (flexi-cr-mixin flexi-binary-latin-1-io-stream) - () - (:documentation "Like FLEXI-CR-LATIN-1-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-le-io-stream (flexi-utf-32-le-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-32-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-binary-utf-32-le-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-32-be-io-stream (flexi-utf-32-be-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-32-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-binary-utf-32-be-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-32-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-le-io-stream (flexi-utf-16-le-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-16-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-binary-utf-16-le-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-LE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-16-be-io-stream (flexi-utf-16-be-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-16-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-binary-utf-16-be-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-16-BE-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-utf-8-io-stream (flexi-utf-8-io-stream flexi-binary-io-stream) - () - (:documentation "Like FLEXI-UTF-8-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defclass flexi-binary-cr-utf-8-io-stream (flexi-cr-mixin flexi-binary-utf-8-io-stream) - () - (:documentation "Like FLEXI-CR-UTF-8-IO-STREAM but -optimized for LispWorks binary streams.")) - -(defmethod set-class ((stream flexi-binary-input-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-input-stream - 'flexi-binary-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-input-stream - 'flexi-binary-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-input-stream - 'flexi-binary-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-input-stream - 'flexi-binary-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-input-stream - 'flexi-binary-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-input-stream - 'flexi-binary-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-input-stream - 'flexi-binary-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-input-stream - 'flexi-binary-utf-32-be-input-stream)))))))))) - -(defmethod set-class ((stream flexi-binary-output-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-output-stream - 'flexi-binary-ascii-output-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-output-stream - 'flexi-binary-latin-1-output-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-output-stream - 'flexi-binary-8-bit-output-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-output-stream - 'flexi-binary-utf-8-output-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-output-stream - 'flexi-binary-cr-utf-16-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-output-stream - 'flexi-binary-utf-16-be-output-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-output-stream - 'flexi-binary-cr-utf-32-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-output-stream - 'flexi-binary-utf-32-be-output-stream)))))))))) - -(defmethod set-class ((stream flexi-binary-io-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-binary-cr-ascii-io-stream - 'flexi-binary-ascii-io-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-binary-cr-latin-1-io-stream - 'flexi-binary-latin-1-io-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-binary-cr-8-bit-io-stream - 'flexi-binary-8-bit-io-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-binary-cr-utf-8-io-stream - 'flexi-binary-utf-8-io-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-16-le-io-stream - 'flexi-binary-cr-utf-16-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-16-le-io-stream - 'flexi-binary-utf-16-be-io-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-binary-cr-utf-32-le-io-stream - 'flexi-binary-cr-utf-32-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-binary-utf-32-le-io-stream - 'flexi-binary-utf-32-be-io-stream)))))))))) - - (defmethod initialize-instance :after ((flexi-stream flexi-output-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. Only needed for LispWorks." - (declare (ignore initargs) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) (with-accessors ((stream flexi-stream-stream)) flexi-stream (when (subtypep (stream-element-type stream) 'octet) (change-class flexi-stream (typecase flexi-stream (flexi-io-stream 'flexi-binary-io-stream) - (otherwise 'flexi-binary-output-stream))) - (set-class flexi-stream)))) + (otherwise 'flexi-binary-output-stream)))))) (defmethod initialize-instance :after ((flexi-stream flexi-input-stream) &rest initargs) "Might change the class of FLEXI-STREAM for optimization purposes. Only needed for LispWorks." - (declare (ignore initargs) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) (with-accessors ((stream flexi-stream-stream)) flexi-stream (when (subtypep (stream-element-type stream) 'octet) (change-class flexi-stream (typecase flexi-stream (flexi-io-stream 'flexi-binary-io-stream) - (otherwise 'flexi-binary-input-stream))) - (set-class flexi-stream)))) + (otherwise 'flexi-binary-input-stream)))))) Added: trunk/mapping.lisp ============================================================================== --- (empty file) +++ trunk/mapping.lisp Wed May 21 08:00:42 2008 @@ -0,0 +1,74 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(deftype octet () + "A shortcut for \(UNSIGNED-BYTE 8)." + '(unsigned-byte 8)) + +(deftype char* () + "Convenience shortcut to paper over the difference between LispWorks +and the other Lisps." + #+:lispworks 'lw:simple-char + #-:lispworks 'character) + +(deftype char-code-integer () + "The subtype of integers which can be returned by the function CHAR-CODE." + '(integer 0 #.(1- char-code-limit))) + +(deftype code-point () + "The subtype of integers that's just big enough to hold all Unicode +codepoints. + +See for example ." + '(mod #x110000)) + +(defmacro defconstant (name value &optional doc) + "Make sure VALUE is evaluated only once \(to appease SBCL)." + `(cl:defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value) + ,@(when doc (list doc)))) + +(defun invert-table (table) + "`Inverts' an array which maps octets to character codes to a hash +table which maps character codes to octets." + (let ((hash (make-hash-table))) + (loop for octet from 0 + for char-code across table + unless (= char-code 65533) + do (setf (gethash char-code hash) octet)) + hash)) + +(defun make-decoding-table (list) + "Creates and returns an array which contains the elements in the +list LIST and has an element type that's suitable for character +codes." + (make-array (length list) + :element-type 'char-code-integer + :initial-contents list)) \ No newline at end of file Modified: trunk/output.lisp ============================================================================== --- trunk/output.lisp (original) +++ trunk/output.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.44 2007/12/29 22:23:23 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.60 2008/05/21 01:26:43 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -29,173 +29,50 @@ (in-package :flexi-streams) -(defgeneric write-byte* (byte sink) +(defgeneric write-byte* (byte stream) + (declare #.*standard-optimize-settings*) (:documentation "Writes one byte \(octet) to the underlying stream -of SINK \(if SINK is a flexi stream) or adds the byte to the end of -SINK \(if SINK is an array with a fill pointer).")) +STREAM.")) #-:lispworks -(defmethod write-byte* (byte (sink flexi-output-stream)) - (declare (optimize speed)) +(defmethod write-byte* (byte (flexi-output-stream flexi-output-stream)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) - sink + flexi-output-stream (write-byte byte stream))) #+:lispworks -(defmethod write-byte* (byte (sink flexi-output-stream)) - (declare (optimize speed)) +(defmethod write-byte* (byte (flexi-output-stream flexi-output-stream)) + (declare #.*standard-optimize-settings*) ;; we use WRITE-SEQUENCE because WRITE-BYTE doesn't work with all ;; bivalent streams in LispWorks (4.4.6) (with-accessors ((stream flexi-stream-stream)) - sink + flexi-output-stream (write-sequence (make-array 1 :element-type 'octet :initial-element byte) stream) byte)) #+:lispworks -(defmethod write-byte* (byte (sink flexi-binary-output-stream)) +(defmethod write-byte* (byte (flexi-output-stream flexi-binary-output-stream)) "Optimized version \(only needed for LispWorks) in case the underlying stream is binary." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) - sink + flexi-output-stream (write-byte byte stream))) -(defmethod write-byte* (byte (sink array)) - (declare (optimize speed)) - (vector-push byte sink)) - -(defgeneric char-to-octets (stream char sink) - (:documentation "Converts the character CHAR to sequence of octets -and sends this sequence to SINK. STREAM will always be a flexi stream -which is used to determine how the character should be converted. -This function does all the work for STREAM-WRITE-CHAR in which case -SINK is the same as STREAM. It is also used in the implementation of -STREAM-WRITE-SEQUENCE below.")) - (defmethod stream-write-char ((stream flexi-output-stream) char) - (declare (optimize speed)) - (char-to-octets stream char stream)) - -(defmethod char-to-octets ((stream flexi-latin-1-output-stream) char sink) - (declare (optimize speed)) - (let ((octet (char-code char))) - (when (> octet 255) - (signal-encoding-error stream "~S is not a LATIN-1 character." char)) - (write-byte* octet sink)) - char) - -(defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink) - (declare (optimize speed)) - (let ((octet (char-code char))) - (when (> octet 127) - (signal-encoding-error stream "~S is not an ASCII character." char)) - (write-byte* octet sink)) - char) - -(defmethod char-to-octets ((stream flexi-8-bit-output-stream) char sink) - (declare (optimize speed)) - (with-accessors ((encoding-hash flexi-stream-encoding-hash)) - stream - (let ((octet (gethash (char-code char) encoding-hash))) - (unless octet - (signal-encoding-error stream "~S is not in this encoding." char)) - (write-byte* octet sink)) - char)) - -(defmethod char-to-octets ((stream flexi-utf-8-output-stream) char sink) - (declare (optimize speed)) - (let ((char-code (char-code char))) - (tagbody - (cond ((< char-code #x80) - (write-byte* char-code sink) - (go zero)) - ((< char-code #x800) - (write-byte* (logior #b11000000 (ldb (byte 5 6) char-code)) sink) - (go one)) - ((< char-code #x10000) - (write-byte* (logior #b11100000 (ldb (byte 4 12) char-code)) sink) - (go two)) - ((< char-code #x200000) - (write-byte* (logior #b11110000 (ldb (byte 3 18) char-code)) sink) - (go three)) - ((< char-code #x4000000) - (write-byte* (logior #b11111000 (ldb (byte 2 24) char-code)) sink) - (go four)) - (t (write-byte* (logior #b11111100 (ldb (byte 1 30) char-code)) sink))) - (write-byte* (logior #b10000000 (ldb (byte 6 24) char-code)) sink) - four - (write-byte* (logior #b10000000 (ldb (byte 6 18) char-code)) sink) - three - (write-byte* (logior #b10000000 (ldb (byte 6 12) char-code)) sink) - two - (write-byte* (logior #b10000000 (ldb (byte 6 6) char-code)) sink) - one - (write-byte* (logior #b10000000 (ldb (byte 6 0) char-code)) sink) - zero)) - char) - -(defmethod char-to-octets ((stream flexi-utf-16-le-output-stream) char sink) - (declare (optimize speed)) - (flet ((write-word (word) - (write-byte* (ldb (byte 8 0) word) sink) - (write-byte* (ldb (byte 8 8) word) sink))) - (declare (inline write-word) (dynamic-extent (function write-word))) - (let ((char-code (char-code char))) - (cond ((< char-code #x10000) - (write-word char-code)) - (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) - -(defmethod char-to-octets ((stream flexi-utf-16-be-output-stream) char sink) - (declare (optimize speed)) - (flet ((write-word (word) - (write-byte* (ldb (byte 8 8) word) sink) - (write-byte* (ldb (byte 8 0) word) sink))) - (declare (inline write-word) (dynamic-extent (function write-word))) - (let ((char-code (char-code char))) - (cond ((< char-code #x10000) - (write-word char-code)) - (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))) - char) - -(defmethod char-to-octets ((stream flexi-utf-32-le-output-stream) char sink) - (declare (optimize speed)) - (loop with char-code = (char-code char) - for position in '(0 8 16 24) do - (write-byte* (ldb (byte 8 position) char-code) sink)) - char) - -(defmethod char-to-octets ((stream flexi-utf-32-be-output-stream) char sink) - (declare (optimize speed)) - (loop with char-code = (char-code char) - for position in '(24 16 8 0) do - (write-byte* (ldb (byte 8 position) char-code) sink)) - char) - -(defmethod char-to-octets ((stream flexi-cr-mixin) char sink) - "The `base' method for all streams which need end-of-line -conversion. Uses CALL-NEXT-METHOD to do the actual work of sending -one or more characters to SINK." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((external-format flexi-stream-external-format)) stream - (case char - (#\Newline - (case (external-format-eol-style external-format) - (:cr (call-next-method stream #\Return sink)) - (:crlf (call-next-method stream #\Return sink) - (call-next-method stream #\Linefeed sink)))) - (otherwise (call-next-method))) - char)) + (flet ((writer (octet) + (write-byte* octet stream))) + (declare (dynamic-extent (function writer))) + (char-to-octets external-format char #'writer)))) (defmethod stream-write-char :after ((stream flexi-output-stream) char) - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; update the column unless we're in the middle of the line and ;; the current value is NIL (with-accessors ((column flexi-stream-column)) @@ -206,7 +83,7 @@ (defmethod stream-clear-output ((flexi-output-stream flexi-output-stream)) "Simply calls the corresponding method for the underlying output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) flexi-output-stream (clear-output stream))) @@ -214,7 +91,7 @@ (defmethod stream-finish-output ((flexi-output-stream flexi-output-stream)) "Simply calls the corresponding method for the underlying output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) flexi-output-stream (finish-output stream))) @@ -222,7 +99,7 @@ (defmethod stream-force-output ((flexi-output-stream flexi-output-stream)) "Simply calls the corresponding method for the underlying output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) flexi-output-stream (force-output stream))) @@ -230,14 +107,14 @@ (defmethod stream-line-column ((flexi-output-stream flexi-output-stream)) "Returns the column stored in the COLUMN slot of the FLEXI-OUTPUT-STREAM object STREAM." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((column flexi-stream-column)) flexi-output-stream column)) (defmethod stream-write-byte ((flexi-output-stream flexi-output-stream) byte) "Writes a byte \(octet) to the underlying stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((column flexi-stream-column)) flexi-output-stream ;; set column to NIL because we don't know how to handle binary @@ -248,69 +125,95 @@ #+:allegro (defmethod stream-terpri ((stream flexi-output-stream)) "Writes a #\Newline character to the underlying stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline)) -(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) - "Writes all elements of the sequence SEQUENCE from START to END -to the underlying stream. The elements can be either octets or -characters. Characters are output according to the current -encoding \(external format) of the FLEXI-OUTPUT-STREAM object -STREAM." - (declare (optimize speed) - (type (integer 0 *) start end)) - (with-accessors ((stream flexi-stream-stream) - (column flexi-stream-column)) - flexi-output-stream - (cond ((and (arrayp sequence) - (subtypep (array-element-type sequence) 'octet)) - ;; set column to NIL because we don't know how to handle binary - ;; output mixed with character output - (setq column nil) - (write-sequence sequence stream :start start :end end)) - (t (loop for index from start below end - for element = (elt sequence index) - when (characterp element) do - (stream-write-char flexi-output-stream element) - else do - (stream-write-byte flexi-output-stream element)) - sequence)))) - -(defmethod stream-write-sequence ((stream flexi-output-stream) (sequence string) start end &key) - "Optimized method for the cases where SEQUENCE is a string. Fills -an internal buffer and uses repeated calls to WRITE-SEQUENCE to write -to the underlying stream." - (declare (optimize speed) - (type (integer 0 *) start end)) - ;; don't use this optimized method for bivalent character streams on - ;; LispWorks, as it currently gets confused by the fill pointer - #+:lispworks - (unless (typep stream 'flexi-binary-output-stream) - (return-from stream-write-sequence - (call-next-method))) - (let* ((buffer (make-array (+ +buffer-size+ 20) - :element-type '(unsigned-byte 8) - :fill-pointer 0)) - (last-newline-pos (position #\Newline sequence - :test #'char= - :start start - :end end - :from-end t))) - (loop for index from start below end - do (char-to-octets stream (aref sequence index) buffer) - when (>= (fill-pointer buffer) +buffer-size+) do - (write-sequence buffer (flexi-stream-stream stream)) - (setf (fill-pointer buffer) 0) - finally (when (>= (fill-pointer buffer) 0) - (write-sequence buffer (flexi-stream-stream stream)))) - (setf (flexi-stream-column stream) - (cond (last-newline-pos (- end last-newline-pos 1)) - ((flexi-stream-column stream) - (+ (flexi-stream-column stream) (- end start)))))) +(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key) + "An optimized version which uses a buffer underneath. The function +can accepts characters as well as octets and it decides what to do +based on the element type of the sequence \(if possible) or on the +individual elements, i.e. you can mix characters and octets in +SEQUENCE if you want. Whether that really works might also depend on +your Lisp, some of the implementations are more picky than others." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((column flexi-stream-column) + (external-format flexi-stream-external-format) + (stream flexi-stream-stream)) + stream + (let* ((octet-seen-p nil) + (buffer-pos 0) + ;; whether we might receive characters and thus the number + ;; of octets to output might not be equal to the number of + ;; sequence elements to write + (chars-p (or (listp sequence) + (and (vectorp sequence) + (not (subtypep (array-element-type sequence) 'integer))))) + (factor (if chars-p (encoding-factor external-format) 1)) + (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) + (buffer (make-octet-buffer buffer-size))) + (declare (fixnum buffer-pos buffer-size) + (boolean octet-seen-p) + (type (array octet *) buffer)) + (labels ((flush-buffer () + "Sends all octets in BUFFER to the underlying stream." + (write-sequence buffer stream :end buffer-pos) + (setq buffer-pos 0)) + (write-octet (octet) + "Adds one octet to the buffer and flush it if necessary." + (declare (octet octet)) + (when (>= buffer-pos buffer-size) + (flush-buffer)) + (setf (aref buffer buffer-pos) octet) + (incf buffer-pos)) + (write-character (char) + "Adds the octets representing the character CHAR to the buffer." + (char-to-octets external-format char #'write-octet)) + (write-object (object) + "Dispatches to WRITE-OCTET or WRITE-CHARACTER +depending on the type of OBJECT." + (etypecase object + (octet (setq octet-seen-p t) + (write-octet object)) + (character (write-character object))))) + (declare (dynamic-extent (function write-octet))) + (macrolet ((iterate (octets-p output-form) + "An unhygienic macro to implement the actual +iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one +sequence element and put its octet representation into the buffer. +OCTETS-P is true if we know in advance that we will send octets." + `(progn + ,@(if octets-p '((setq octet-seen-p t))) + (loop for index of-type fixnum from start below end + do ,output-form + finally (when (plusp buffer-pos) + (flush-buffer)))))) + (etypecase sequence + (string (iterate nil (write-character (char sequence index)))) + (array + (let ((array-element-type (array-element-type sequence))) + (cond ((type-equal array-element-type 'octet) + (iterate t (write-octet (aref (the (array octet *) sequence) index)))) + ((subtypep array-element-type 'integer) + (iterate t (write-octet (aref sequence index)))) + (t (iterate nil (write-object (aref sequence index))))))) + (list (iterate nil (write-object (nth index sequence))))) + ;; update the column slot, setting if to NIL if we sent octets + (setq column + (cond (octet-seen-p nil) + (t (let ((last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (cond (last-newline-pos (- end last-newline-pos 1)) + (column (+ column (- end start)))))))))))) + sequence) (defmethod stream-write-string ((stream flexi-output-stream) string &optional (start 0) (end (length string))) "Simply hands over to the optimized method for STREAM-WRITE-SEQUENCE." + (declare #.*standard-optimize-settings*) (stream-write-sequence stream string start (or end (length string)))) Modified: trunk/packages.lisp ============================================================================== --- trunk/packages.lisp (original) +++ trunk/packages.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.30 2007/10/11 20:23:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.35 2008/05/21 01:43:43 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -34,13 +34,16 @@ (defpackage :flexi-streams (:use :cl :trivial-gray-streams) - (:nicknames :flex) - #+:lispworks - (:shadow :with-accessors) + (:nicknames :flex) + (:shadow #+:lispworks :with-accessors + :defconstant) (:export :*default-eol-style* :*default-little-endian* :*substitution-char* :external-format-eol-style + :external-format-error + :external-format-error-external-format + :external-format-encoding-error :external-format-equal :external-format-id :external-format-little-endian @@ -50,21 +53,21 @@ :flexi-io-stream :flexi-stream :flexi-stream-bound + :flexi-stream-column :flexi-stream-external-format - :flexi-stream-encoding-error :flexi-stream-element-type :flexi-stream-element-type-error :flexi-stream-element-type-error-element-type :flexi-stream-error - :flexi-stream-column + :flexi-stream-out-of-sync-error :flexi-stream-position - :flexi-stream-position-spec-error - :flexi-stream-position-spec-error-position-spec :flexi-stream-stream :get-output-stream-sequence :in-memory-stream :in-memory-stream-closed-error :in-memory-stream-error + :in-memory-stream-position-spec-error + :in-memory-stream-position-spec-error-position-spec :in-memory-input-stream :in-memory-output-stream :list-stream @@ -73,6 +76,7 @@ :make-in-memory-output-stream :make-flexi-stream :octet + :octet-length :octets-to-string :output-stream-sequence-length :peek-byte Modified: trunk/specials.lisp ============================================================================== --- trunk/specials.lisp (original) +++ trunk/specials.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.25 2007/12/29 21:17:06 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.32 2008/05/20 23:01:51 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -29,9 +29,32 @@ (in-package :flexi-streams) -(deftype octet () - "A shortcut for \(UNSIGNED-BYTE 8)." - '(unsigned-byte 8)) +(defvar *standard-optimize-settings* + '(optimize + speed + (safety 0) + (space 0) + (debug 1) + (compilation-speed 0)) + "The standard optimize settings used by most declaration expressions.") + +(defvar *fixnum-optimize-settings* + '(optimize + speed + (safety 0) + (space 0) + (debug 1) + (compilation-speed 0) + #+:lispworks (hcl:fixnum-safety 0)) + "Like *STANDARD-OPTIMIZE-SETTINGS*, but \(on LispWorks) with all +arithmetic being fixnum arithmetic.") + +(defvar *current-unreader* nil + "A unary function which might be called to `unread' a character +\(i.e. the sequence of octets it represents). + +Used by the function OCTETS-TO-CHAR-CODE and must always be bound to a +suitable functional object when this function is called.") (defvar +name-map+ '((:utf8 . :utf-8) @@ -129,38 +152,28 @@ \(as if by a USE-VALUE restart) whenever during reading an error of type FLEXI-STREAM-ENCODING-ERROR would have been signalled otherwise.") -(defun invert-table (table) - "`Inverts' an array which maps octets to character codes to a -hash tables which maps character codes to octets." - (let ((hash (make-hash-table))) - (loop for octet from 0 - for char-code across table - unless (= char-code 65533) - do (setf (gethash char-code hash) octet)) - hash)) - -(defvar +iso-8859-hashes+ +(defconstant +iso-8859-hashes+ (loop for (name . table) in +iso-8859-tables+ collect (cons name (invert-table table))) "An alist which maps names for ISO-8859 encodings to hash tables which map character codes to the corresponding octets.") -(defvar +code-page-hashes+ +(defconstant +code-page-hashes+ (loop for (id . table) in +code-page-tables+ collect (cons id (invert-table table))) "An alist which maps IDs of Windows code pages to hash tables which map character codes to the corresponding octets.") -(defvar +ascii-hash+ (invert-table +ascii-table+) +(defconstant +ascii-hash+ (invert-table +ascii-table+) "A hash table which maps US-ASCII character codes to the corresponding octets.") -(defvar +koi8-r-hash+ (invert-table +koi8-r-table+) +(defconstant +koi8-r-hash+ (invert-table +koi8-r-table+) "A hash table which maps KOI8-R character codes to the corresponding octets.") (defconstant +buffer-size+ 8192 - "Size of buffers used for internal purposes.") + "Default size for buffers used for internal purposes.") (pushnew :flexi-streams *features*) Modified: trunk/stream.lisp ============================================================================== --- trunk/stream.lisp (original) +++ trunk/stream.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.53 2007/12/29 22:26:04 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/stream.lisp,v 1.61 2008/05/19 22:32:56 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -40,7 +40,7 @@ :accessor flexi-stream-external-format :documentation "The encoding currently used by this stream. Can be changed on the fly.") - (element-type :initform #+:lispworks 'lw:simple-char #-:lispworks 'character + (element-type :initform 'char* :initarg :element-type :accessor flexi-stream-element-type :documentation "The element type of this stream.")) @@ -49,59 +49,11 @@ allow for multi-octet external formats. FLEXI-STREAM itself is a mixin and should not be instantiated.")) -(define-condition flexi-stream-error (stream-error) - () - (:documentation "Superclass for all errors related to -flexi streams.")) - -(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) - () - (:documentation "Like FLEXI-STREAM-ERROR but with formatting -capabilities.")) - -(define-condition flexi-stream-element-type-error (flexi-stream-error) - ((element-type :initarg :element-type - :reader flexi-stream-element-type-error-element-type)) - (:report (lambda (condition stream) - (format stream "Element type ~S not allowed." - (flexi-stream-element-type-error-element-type condition)))) - (:documentation "Errors of this type are signalled if the flexi -stream has a wrong element type.")) - -(define-condition flexi-stream-encoding-error (flexi-stream-simple-error) - () - (:documentation "Errors of this type are signalled if there is an -encoding problem.")) - -(define-condition flexi-stream-position-spec-error (flexi-stream-simple-error) - ((position-spec :initarg :position-spec - :reader flexi-stream-position-spec-error-position-spec)) - (:documentation "Errors of this type are signalled if an -erroneous position spec is used in conjunction with -FILE-POSITION.")) - -(defun signal-encoding-error (flexi-stream format-control &rest format-args) - "Convenience function similar to ERROR to signal conditions of type -FLEXI-STREAM-ENCODING-ERROR." - (error 'flexi-stream-encoding-error - :format-control format-control - :format-arguments format-args - :stream flexi-stream)) - -(defun maybe-convert-external-format (external-format) - "Given an external format designator \(a keyword, a list, or an -EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT -object." - (typecase external-format - (symbol (make-external-format external-format)) - (list (apply #'make-external-format external-format)) - (otherwise external-format))) - (defmethod initialize-instance :after ((flexi-stream flexi-stream) &rest initargs) "Makes sure the EXTERNAL-FORMAT and ELEMENT-TYPE slots contain reasonable values." - (declare (ignore initargs) - (optimize speed)) + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) (with-accessors ((external-format flexi-stream-external-format) (element-type flexi-stream-element-type)) flexi-stream @@ -110,19 +62,19 @@ (error 'flexi-stream-element-type-error :element-type element-type :stream flexi-stream)) - (setq external-format (maybe-convert-external-format external-format))) - ;; set actual class and maybe contents of 8-bit encoding slots - (set-class flexi-stream)) + (setq external-format (maybe-convert-external-format external-format)))) (defmethod (setf flexi-stream-external-format) :around (new-value (flexi-stream flexi-stream)) "Converts the new value to an EXTERNAL-FORMAT object if necessary." + (declare #.*standard-optimize-settings*) (call-next-method (maybe-convert-external-format new-value) flexi-stream)) (defmethod (setf flexi-stream-element-type) :before (new-value (flexi-stream flexi-stream)) "Checks whether the new value makes sense before it is set." + (declare #.*standard-optimize-settings*) (unless (or (subtypep new-value 'character) - (subtypep new-value 'octet)) + (type-equal new-value 'octet)) (error 'flexi-stream-element-type-error :element-type new-value :stream flexi-stream))) @@ -130,13 +82,15 @@ (defmethod stream-element-type ((stream flexi-stream)) "Returns the element type that was provided by the creator of the stream." - (declare (optimize speed)) - (flexi-stream-element-type stream)) + (declare #.*standard-optimize-settings*) + (with-accessors ((element-type flexi-stream-element-type)) + stream + element-type)) (defmethod close ((stream flexi-stream) &key abort) "Closes the flexi stream by closing the underlying `real' stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) (with-accessors ((stream flexi-stream-stream)) stream (cond ((open-stream-p stream) @@ -145,19 +99,24 @@ (defmethod open-stream-p ((stream flexi-stream)) "A flexi stream is open if its underlying stream is open." - (declare (optimize speed)) - (open-stream-p (flexi-stream-stream stream))) + (declare #.*standard-optimize-settings*) + (with-accessors ((stream flexi-stream-stream)) + stream + (open-stream-p stream))) (defmethod stream-file-position ((stream flexi-stream)) "Dispatch to method for underlying stream." - (declare (optimize speed)) - (stream-file-position (flexi-stream-stream stream))) + (declare #.*standard-optimize-settings*) + (with-accessors ((stream flexi-stream-stream)) + stream + (stream-file-position stream))) (defmethod (setf stream-file-position) (position-spec (stream flexi-stream)) "Dispatch to method for underlying stream." - (declare (optimize speed)) - (setf (stream-file-position (flexi-stream-stream stream)) - position-spec)) + (declare #.*standard-optimize-settings*) + (with-accessors ((stream flexi-stream-stream)) + stream + (setf (stream-file-position stream) position-spec))) (defclass flexi-output-stream (flexi-stream fundamental-binary-output-stream fundamental-character-output-stream) @@ -173,7 +132,7 @@ #+:cmu (defmethod input-stream-p ((stream flexi-output-stream)) "Explicitly states whether this is an input stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) nil) (defclass flexi-input-stream (flexi-stream fundamental-binary-input-stream @@ -216,7 +175,7 @@ #+:cmu (defmethod output-stream-p ((stream flexi-input-stream)) "Explicitly states whether this is an output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) nil) (defclass flexi-io-stream (flexi-input-stream flexi-output-stream) @@ -226,471 +185,16 @@ MAKE-INSTANCE to create a new FLEXI-IO-STREAM but use MAKE-FLEXI-STREAM instead.")) -(defclass flexi-cr-mixin () - () - (:documentation "A mixin for flexi streams which need -end-of-line conversion, i.e. for those where the end-of-line -designator is /not/ the single character #\Linefeed.")) - -(defclass flexi-8-bit-stream (flexi-stream) - ((encoding-hash :accessor flexi-stream-encoding-hash)) - (:documentation "The class for all flexi streams which use an 8-bit -encoding and thus need an additional slot for the encoding hash.")) - -(defclass flexi-8-bit-input-stream (flexi-input-stream flexi-8-bit-stream) - ((encoding-table :accessor flexi-stream-encoding-table)) - (:documentation "The class for all flexi input streams which use an -8-bit encoding and thus need an additional slot for the encoding -table.")) - -(defclass flexi-cr-8-bit-input-stream (flexi-cr-mixin flexi-8-bit-input-stream) - () - (:documentation "The class for all flexi input streams which -use an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-input-stream (flexi-8-bit-input-stream) - () - (:documentation "Special class for flexi input streams which -use the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-input-stream (flexi-cr-mixin flexi-ascii-input-stream) - () - (:documentation "Special class for flexi input streams which -use the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-input-stream (flexi-8-bit-input-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-input-stream (flexi-cr-mixin flexi-latin-1-input-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-input-stream (flexi-cr-mixin flexi-utf-32-le-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-32-be-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-input-stream (flexi-cr-mixin flexi-utf-32-be-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-input-stream (flexi-cr-mixin flexi-utf-16-le-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-16-be-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-input-stream (flexi-cr-mixin flexi-utf-16-be-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-input-stream (flexi-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-input-stream (flexi-cr-mixin flexi-utf-8-input-stream) - () - (:documentation "Special class for flexi input streams which -use the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-8-bit-output-stream (flexi-output-stream flexi-8-bit-stream) - () - (:documentation "The class for all flexi output streams which use an -8-bit encoding.")) - -(defclass flexi-cr-8-bit-output-stream (flexi-cr-mixin flexi-8-bit-output-stream) - () - (:documentation "The class for all flexi output streams which -use an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-output-stream (flexi-8-bit-output-stream) - () - (:documentation "Special class for flexi output streams which -use the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-output-stream (flexi-cr-mixin flexi-ascii-output-stream) - () - (:documentation "Special class for flexi output streams which -use the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-output-stream (flexi-8-bit-output-stream) - () - (:documentation "Special class for flexi output streams which -use the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-output-stream (flexi-cr-mixin flexi-latin-1-output-stream) - () - (:documentation "Special class for flexi output streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-output-stream (flexi-cr-mixin flexi-utf-32-le-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-32-be-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-output-stream (flexi-cr-mixin flexi-utf-32-be-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-output-stream (flexi-cr-mixin flexi-utf-16-le-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with little-endian byte ordering /and/ -need end-of-line conversion.")) - -(defclass flexi-utf-16-be-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-output-stream (flexi-cr-mixin flexi-utf-16-be-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-output-stream (flexi-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-output-stream (flexi-cr-mixin flexi-utf-8-output-stream) - () - (:documentation "Special class for flexi output streams which -use the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-8-bit-io-stream (flexi-8-bit-input-stream flexi-8-bit-output-stream flexi-io-stream) - () - (:documentation "The class for all flexi I/O streams which use an -8-bit encoding.")) - -(defclass flexi-cr-8-bit-io-stream (flexi-cr-mixin flexi-8-bit-io-stream) - () - (:documentation "The class for all flexi I/O streams which use -an 8-bit encoding /and/ need end-of-line conversion.")) - -(defclass flexi-ascii-io-stream (flexi-ascii-input-stream flexi-ascii-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the US-ASCCI encoding.")) - -(defclass flexi-cr-ascii-io-stream (flexi-cr-mixin flexi-ascii-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the US-ASCCI encoding /and/ need end-of-line conversion.")) - -(defclass flexi-latin-1-io-stream (flexi-latin-1-input-stream flexi-latin-1-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the ISO-8859-1 encoding.")) - -(defclass flexi-cr-latin-1-io-stream (flexi-cr-mixin flexi-latin-1-io-stream) - () - (:documentation "Special class for flexi input streams which -use the ISO-8859-1 encoding /and/ need end-of-line conversion.")) - -(defclass flexi-utf-32-le-io-stream (flexi-utf-32-le-input-stream - flexi-utf-32-le-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-32-le-io-stream (flexi-cr-mixin flexi-utf-32-le-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-32-be-io-stream (flexi-utf-32-be-input-stream - flexi-utf-32-be-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-32-be-io-stream (flexi-cr-mixin flexi-utf-32-be-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-32 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-le-io-stream (flexi-utf-16-le-input-stream - flexi-utf-16-le-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with little-endian byte ordering.")) - -(defclass flexi-cr-utf-16-le-io-stream (flexi-cr-mixin flexi-utf-16-le-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with little-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-16-be-io-stream (flexi-utf-16-be-input-stream - flexi-utf-16-be-output-stream - flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with big-endian byte ordering.")) - -(defclass flexi-cr-utf-16-be-io-stream (flexi-cr-mixin flexi-utf-16-be-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-16 encoding with big-endian byte ordering /and/ need -end-of-line conversion.")) - -(defclass flexi-utf-8-io-stream (flexi-utf-8-input-stream flexi-utf-8-output-stream flexi-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-8 encoding.")) - -(defclass flexi-cr-utf-8-io-stream (flexi-cr-mixin flexi-utf-8-io-stream) - () - (:documentation "Special class for flexi I/O streams which use -the UTF-8 encoding /and/ need end-of-line conversion.")) - -(defmethod (setf flexi-stream-external-format) :after (new-value (stream flexi-stream)) - "After we've changed the external format of a flexi stream, we -might have to change its actual class and maybe also the contents -of its 8-bit encoding slots." - (declare (ignore new-value) - (optimize speed)) - ;; note that it's potentially dangerous to call SET-CLASS from - ;; within a method, see for example this thread: - ;; - (set-class stream)) - -(defmethod set-class ((stream flexi-input-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-input-stream - 'flexi-ascii-input-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-input-stream - 'flexi-latin-1-input-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-input-stream - 'flexi-8-bit-input-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-input-stream - 'flexi-utf-8-input-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-input-stream - 'flexi-cr-utf-16-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-input-stream - 'flexi-utf-16-be-input-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-input-stream - 'flexi-cr-utf-32-be-input-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-input-stream - 'flexi-utf-32-be-input-stream)))))))))) - -(defmethod set-class ((stream flexi-output-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-output-stream - 'flexi-ascii-output-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-output-stream - 'flexi-latin-1-output-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-output-stream - 'flexi-8-bit-output-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-output-stream - 'flexi-utf-8-output-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-output-stream - 'flexi-cr-utf-16-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-output-stream - 'flexi-utf-16-be-output-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-output-stream - 'flexi-cr-utf-32-be-output-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-output-stream - 'flexi-utf-32-be-output-stream)))))))))) - -(defmethod set-class ((stream flexi-io-stream)) - "Changes the actual class of STREAM depending on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format)) - stream - (let ((external-format-name (external-format-name external-format)) - (external-format-cr (not (eq (external-format-eol-style external-format) :lf)))) - (change-class stream - (cond ((ascii-name-p external-format-name) - (if external-format-cr - 'flexi-cr-ascii-io-stream - 'flexi-ascii-io-stream)) - ((eq external-format-name :iso-8859-1) - (if external-format-cr - 'flexi-cr-latin-1-io-stream - 'flexi-latin-1-io-stream)) - ((or (koi8-r-name-p external-format-name) - (iso-8859-name-p external-format-name) - (code-page-name-p external-format-name)) - (if external-format-cr - 'flexi-cr-8-bit-io-stream - 'flexi-8-bit-io-stream)) - (t (case external-format-name - (:utf-8 (if external-format-cr - 'flexi-cr-utf-8-io-stream - 'flexi-utf-8-io-stream)) - (:utf-16 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-16-le-io-stream - 'flexi-cr-utf-16-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-16-le-io-stream - 'flexi-utf-16-be-io-stream))) - (:utf-32 (if external-format-cr - (if (external-format-little-endian external-format) - 'flexi-cr-utf-32-le-io-stream - 'flexi-cr-utf-32-be-io-stream) - (if (external-format-little-endian external-format) - 'flexi-utf-32-le-io-stream - 'flexi-utf-32-be-io-stream)))))))))) - -(defmethod set-class :after ((stream flexi-stream)) - "After we've changed the actual class of a flexi stream we may -have to set the contents of the 8-bit enconding slots as well." - (declare (optimize speed)) - (set-encoding-hash stream) - (set-encoding-table stream)) - -(defgeneric set-encoding-hash (stream) - (:method (stream)) - (:documentation "Sets the value of the ENCODING-HASH slot of -STREAM if necessary. The default method does nothing.")) - -(defgeneric set-encoding-table (stream) - (:method (stream)) - (:documentation "Sets the value of the ENCODING-TABLE slot of -STREAM if necessary. The default method does nothing.")) - -(defmethod set-encoding-hash ((stream flexi-8-bit-stream)) - "Sets the value of the ENCODING-HASH slot of STREAM depending -on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format) - (encoding-hash flexi-stream-encoding-hash)) - stream - (let ((external-format-name (external-format-name external-format))) - (setq encoding-hash - (cond ((ascii-name-p external-format-name) +ascii-hash+) - ((koi8-r-name-p external-format-name) +koi8-r-hash+) - ((iso-8859-name-p external-format-name) - (cdr (assoc external-format-name +iso-8859-hashes+ :test #'eq))) - ((code-page-name-p external-format-name) - (cdr (assoc (external-format-id external-format) +code-page-hashes+)))))))) - -(defmethod set-encoding-table ((stream flexi-8-bit-input-stream)) - "Sets the value of the ENCODING-TABLE slot of STREAM depending -on its external format." - (declare (optimize speed)) - (with-accessors ((external-format flexi-stream-external-format) - (encoding-table flexi-stream-encoding-table)) - stream - (let ((external-format-name (external-format-name external-format))) - (setq encoding-table - (cond ((ascii-name-p external-format-name) +ascii-table+) - ((koi8-r-name-p external-format-name) +koi8-r-table+) - ((iso-8859-name-p external-format-name) - (cdr (assoc external-format-name +iso-8859-tables+ :test #'eq))) - ((code-page-name-p external-format-name) - (cdr (assoc (external-format-id external-format) +code-page-tables+)))))))) - #+:cmu (defmethod input-stream-p ((stream flexi-io-stream)) "Explicitly states whether this is an input stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) t) #+:cmu (defmethod output-stream-p ((stream flexi-io-stream)) "Explicitly states whether this is an output stream." - (declare (optimize speed)) + (declare #.*standard-optimize-settings*) t) (defun make-flexi-stream (stream &rest args @@ -712,6 +216,7 @@ streams) should be NIL or an integer. If BOUND is not NIL and POSITION has gone beyond BOUND, then the stream will behave as if no more input is available." + (declare #.*standard-optimize-settings*) ;; these arguments are ignored - they are only there to provide a ;; meaningful parameter list for IDEs (declare (ignore element-type column position bound)) Modified: trunk/strings.lisp ============================================================================== --- trunk/strings.lisp (original) +++ trunk/strings.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.4 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -29,28 +29,145 @@ (in-package :flexi-streams) -(defun string-to-octets (string &key (external-format (make-external-format :latin1)) - (start 0) end) +(defun string-to-octets (string &key + (external-format :latin1) + (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of -octets corresponding to the external format EXTERNAL-FORMAT." - (declare (optimize speed)) - (with-output-to-sequence (out) - (let ((flexi (make-flexi-stream out :external-format external-format))) - (write-string string flexi :start start :end end)))) - -(defun octets-to-string (vector &key (external-format (make-external-format :latin1)) - (start 0) (end (length vector))) - "Converts the Lisp vector VECTOR of octets from START to END to -string using the external format EXTERNAL-FORMAT." - (declare (optimize speed)) - (with-input-from-sequence (in vector :start start :end end) - (let ((flexi (make-flexi-stream in :external-format external-format)) - (result (make-array (- end start) - :element-type #+:lispworks 'lw:simple-char - #-:lispworks 'character - :fill-pointer t))) - (setf (fill-pointer result) - (read-sequence result flexi)) - result))) - - +octets corresponding to the external format designated by +EXTERNAL-FORMAT." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (setq external-format (maybe-convert-external-format external-format)) + (let ((factor (encoding-factor external-format)) + (length (- end start))) + (declare (fixnum length)) + (etypecase factor + (integer + (let ((octets (make-array (* factor length) :element-type 'octet)) + (j 0)) + (declare (fixnum j)) + (flet ((writer (octet) + (declare (octet octet)) + (setf (aref (the (array octet *) octets) j) octet) + (incf j))) + (declare (dynamic-extent (function writer))) + (loop for i of-type fixnum from start below end do + (char-to-octets external-format + (char string i) + #'writer))) + octets)) + (double-float + ;; this is a bit clunky but hopefully a bit more efficient than + ;; using VECTOR-PUSH-EXTEND + (let* ((octets-length (ceiling (* factor length))) + (octets (make-array octets-length + :element-type 'octet + :fill-pointer t + :adjustable t)) + (i start) + (j 0)) + (declare (fixnum i j octets-length) + (double-float factor)) + (flet ((writer (octet) + (declare (octet octet)) + (when (>= j octets-length) + (setq factor (* factor 2.0d0)) + (incf octets-length (the fixnum (ceiling (* factor (- end i))))) + (adjust-array octets octets-length :fill-pointer t)) + (setf (aref (the (array octet *) octets) j) octet) + (incf j))) + (declare (dynamic-extent (function writer))) + (loop + (when (>= i end) + (return)) + (char-to-octets external-format + (char string i) + #'writer) + (incf i)) + (setf (fill-pointer octets) j) + octets)))))) + +(defun octets-to-string (sequence &key + (external-format :latin1) + (start 0) (end (length sequence))) + "Converts the Lisp sequence SEQUENCE of octets from START to END to +string using the external format designated by EXTERNAL-FORMAT." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (setq external-format (maybe-convert-external-format external-format)) + (let* ((factor (encoding-factor external-format)) + (length (- end start)) + (i start) + (reader (etypecase sequence + ((array octet *) + (lambda () + (and (< i end) + (prog1 + (aref (the (array octet *) sequence) i) + (incf i))))) + ((array * *) + (lambda () + (and (< i end) + (prog1 + (aref sequence i) + (incf i))))) + (list + (lambda () + (and (< i end) + (prog1 + (nth i sequence) + (incf i)))))))) + (declare (fixnum i length) (dynamic-extent reader)) + (labels ((pseudo-writer (octet) + (declare (ignore octet)) + (decf i)) + (unreader (char) + (char-to-octets external-format char #'pseudo-writer))) + (declare (dynamic-extent (function pseudo-writer) (function unreader))) + (let ((*current-unreader* #'unreader)) + (flet ((next-char () + (code-char (octets-to-char-code external-format reader)))) + (declare (inline next-char)) + (etypecase factor + (integer + (let* ((string-length (ceiling length factor)) + (string (make-array string-length + :element-type 'char*))) + (declare (fixnum string-length)) + (loop for j of-type fixnum from 0 below string-length + do (setf (schar string j) (next-char)) + finally (return string)))) + (double-float + ;; this is a bit clunky but hopefully a bit more efficient than + ;; using VECTOR-PUSH-EXTEND + (let* ((string-length (ceiling length (the double-float factor))) + (string (make-array string-length + :element-type 'char* + :fill-pointer t + :adjustable t)) + (j 0)) + (declare (fixnum j string-length) + (double-float factor)) + (loop + (when (>= i end) + (return)) + (when (>= j string-length) + (setq factor (/ factor 2.0d0)) + (incf string-length (the fixnum (ceiling (- end i) factor))) + (adjust-array string string-length :fill-pointer t)) + (setf (char string j) (next-char)) + (incf j)) + (setf (fill-pointer string) j) + string)))))))) + +(defun octet-length (string &key (external-format :latin1) (start 0) (end (length string))) + "Returns the length of the substring of STRING from START to END in +octets if encoded using the external format EXTERNAL-FORMAT. Might +return NIL if there's no efficient way to compute the length without +iterating through the whole string." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (setq external-format (maybe-convert-external-format external-format)) + (let ((factor (encoding-factor external-format))) + (typecase factor + (fixnum (* factor (- end start)))))) Modified: trunk/test/packages.lisp ============================================================================== --- trunk/test/packages.lisp (original) +++ trunk/test/packages.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.4 2007/01/01 23:47:16 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/packages.lisp,v 1.6 2008/05/17 16:38:26 edi Exp $ -;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -30,4 +30,5 @@ (in-package :cl-user) (defpackage :flexi-streams-test - (:use :cl :flexi-streams)) + (:use :cl :flexi-streams) + (:export :run-tests)) Modified: trunk/test/test.lisp ============================================================================== --- trunk/test/test.lisp (original) +++ trunk/test/test.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.17 2007/12/29 22:58:44 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $ -;;; Copyright (c) 2006-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -29,6 +29,13 @@ (in-package :flexi-streams-test) +(defconstant +buffer-size+ 8192 + "Size of buffers for COPY-STREAM* below.") + +(defvar *copy-function* nil + "Which function to use when copying from one stream to the other - +see for example COPY-FILE below.") + (defvar *this-file* (load-time-value (or #.*compile-file-pathname* *load-pathname*)) "The pathname of the file \(`test.lisp') where this variable was @@ -89,13 +96,17 @@ (append args `(:eol-style ,eol-style :little-endian ,little-endian)))))))) -(defun create-test-combinations (file-name symbols) - "For a name suffix FILE-NAME and a list of symbols SYMBOLS -denoting different encodings of the corresponding file returns a -list of lists which can be used as arglists for COMPARE-FILES." +(defun create-test-combinations (file-name symbols &optional simplep) + "For a name suffix FILE-NAME and a list of symbols SYMBOLS denoting +different encodings of the corresponding file returns a list of lists +which can be used as arglists for COMPARE-FILES. If SIMPLEP is true, +a list which can be used for the string tests below is returned." (let ((file-variants (loop for symbol in symbols nconc (create-file-variants file-name symbol)))) (loop for (name-in . external-format-in) in file-variants + when simplep + collect (list name-in external-format-in) + else nconc (loop for (name-out . external-format-out) in file-variants collect (list name-in external-format-in name-out external-format-out))))) @@ -121,6 +132,17 @@ while line do (write-line line out)))) +(defun copy-stream* (stream-in external-format-in stream-out external-format-out) + "Like COPY-STREAM, but uses READ-SEQUENCE and WRITE-SEQUENCE instead +of READ-LINE and WRITE-LINE." + (let ((in (make-flexi-stream stream-in :external-format external-format-in)) + (out (make-flexi-stream stream-out :external-format external-format-out)) + (buffer (make-array +buffer-size+ :element-type 'flex::char*))) + (loop + (let ((position (read-sequence buffer in))) + (when (zerop position) (return)) + (write-sequence buffer out :end position))))) + (defun copy-file (path-in external-format-in path-out external-format-out direction-out direction-in) "Copies the contents of the file denoted by the pathname PATH-IN to the file denoted by the pathname PATH-OUT using flexi @@ -139,7 +161,7 @@ :direction direction-out :if-does-not-exist :create :if-exists :supersede) - (copy-stream in external-format-in out external-format-out)))) + (funcall *copy-function* in external-format-in out external-format-out)))) #+:lispworks (defun copy-file-lw (path-in external-format-in path-out external-format-out direction-out direction-in) @@ -158,7 +180,7 @@ :direction :output :if-does-not-exist :create :if-exists :supersede) - (copy-stream in external-format-in out external-format-out)))) + (funcall *copy-function* in external-format-in out external-format-out)))) (defun compare-files (path-in external-format-in path-out external-format-out) "Copies the contents of the file (in the `test') denoted by the @@ -175,7 +197,8 @@ (full-path-orig (merge-pathnames path-out *this-file*))) (dolist (direction-out '(:output :io)) (dolist (direction-in '(:input :io)) - (format *error-output* "Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (format *error-output* "Test \(using ~A) ~S ~S [~A]~% --> ~S [~A].~%" + *copy-function* path-in (flex::normalize-external-format external-format-in) direction-in (flex::normalize-external-format external-format-out) direction-out) (copy-file full-path-in external-format-in @@ -186,7 +209,8 @@ (t (format *error-output* " Test failed!!!~%"))) (terpri *error-output*) #+:lispworks - (format *error-output* "LW-Test ~S ~S [~A]~% --> ~S [~A].~%" path-in + (format *error-output* "LW-Test \(using ~A) ~S ~S [~A]~% --> ~S [~A].~%" + *copy-function* path-in (flex::normalize-external-format external-format-in) direction-in (flex::normalize-external-format external-format-out) direction-out) #+:lispworks @@ -200,6 +224,27 @@ #+:lispworks (terpri *error-output*))))) +(defun file-as-octet-vector (pathspec) + "Returns the contents of the file denoted by PATHSPEC as a vector of +octets." + (with-open-file (in pathspec :element-type 'octet) + (let ((vector (make-array (file-length in) :element-type 'octet))) + (read-sequence vector in) + vector))) + +(defun file-as-string (pathspec external-format) + "Reads the contents of the file denoted by PATHSPEC using the +external format EXTERNAL-FORMAT and returns the result as a string." + (with-open-file (in pathspec :element-type 'octet) + (let* ((number-of-octets (file-length in)) + (in (make-flexi-stream in :external-format external-format)) + (string (make-array number-of-octets + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer string) (read-sequence string in)) + string))) + (defmacro with-test ((test-description) &body body) "Defines a test. Two utilities are available inside of the body of the maco: The function FAIL, and the macro CHECK. FAIL, the lowest @@ -231,25 +276,148 @@ (terpri *error-output*)) ,successp)))) +(defun old-string-to-octets (string &key + (external-format (make-external-format :latin1)) + (start 0) end) + "The old version of STRING-TO-OCTETS. We can use it to test +in-memory streams." + (declare (optimize speed)) + (with-output-to-sequence (out) + (let ((flexi (make-flexi-stream out :external-format external-format))) + (write-string string flexi :start start :end end)))) + +(defun old-octets-to-string (vector &key + (external-format (make-external-format :latin1)) + (start 0) (end (length vector))) + "The old version of OCTETS-TO-STRING. We can use it to test +in-memory streams." + (declare (optimize speed)) + (with-input-from-sequence (in vector :start start :end end) + (let ((flexi (make-flexi-stream in :external-format external-format)) + (result (make-array (- end start) + :element-type #+:lispworks 'lw:simple-char + #-:lispworks 'character + :fill-pointer t))) + (setf (fill-pointer result) + (read-sequence result flexi)) + result))) + +(defun string-test (pathspec external-format) + "Tests whether conversion from strings to octets and vice versa +using the external format EXTERNAL-FORMAT works as expected, using the +contents of the file denoted by PATHSPEC as test data and assuming +that the stream conversion functions work. + +Also tests with the old versions of the conversion functions in order +to test in-memory streams." + (let* ((full-path (merge-pathnames pathspec *this-file*)) + (octets-vector (file-as-octet-vector full-path)) + (octets-list (coerce octets-vector 'list)) + (string (file-as-string full-path external-format))) + (with-test ((format nil "String tests with format ~S." + (flex::normalize-external-format external-format))) + (check (string= (octets-to-string octets-vector :external-format external-format) string)) + (check (string= (octets-to-string octets-list :external-format external-format) string)) + (check (equalp (string-to-octets string :external-format external-format) octets-vector)) + (check (string= (old-octets-to-string octets-vector :external-format external-format) string)) + (check (string= (old-octets-to-string octets-list :external-format external-format) string)) + (check (equalp (old-string-to-octets string :external-format external-format) octets-vector))))) + +(defun sequence-equal (seq1 seq2) + "Whether the two sequences have the same elements." + (and (= (length seq1) (length seq2)) + (loop for i below (length seq1) + always (eql (elt seq1 i) (elt seq2 i))))) + +(defun read-sequence-test (pathspec external-format) + "Several tests to confirm that READ-SEQUENCE behaves as expected." + (with-test ((format nil "READ-SEQUENCE tests with format ~S." + (flex::normalize-external-format external-format))) + (let* ((full-path (merge-pathnames pathspec *this-file*)) + (file-string (file-as-string full-path external-format)) + (string-length (length file-string)) + (octets (file-as-octet-vector full-path)) + (octet-length (length octets))) + (when (external-format-equal external-format (make-external-format :utf8)) + #-:openmcl + ;; FLEXI-STREAMS puts integers into the list, but OpenMCL + ;; thinks they are characters... + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (list (make-list octet-length))) + (setf (flexi-stream-element-type in) 'octet) + #-:clisp + (read-sequence list in) + #+:clisp + (ext:read-byte-sequence list in) + (check (sequence-equal list octets)))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (third (floor octet-length 3)) + (half (floor octet-length 2)) + (vector (make-array half :element-type 'octet))) + (check (sequence-equal (loop repeat third + collect (read-byte in)) + (subseq octets 0 third))) + (read-sequence vector in) + (check (sequence-equal vector (subseq octets third (+ third half))))))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (string (make-string (- string-length 10) :element-type 'flex::char*))) + (setf (flexi-stream-element-type in) 'octet) + (check (sequence-equal (loop repeat 10 + collect (read-char in)) + (subseq file-string 0 10))) + (read-sequence string in) + (check (sequence-equal string (subseq file-string 10))))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (list (make-list (- string-length 100)))) + (check (sequence-equal (loop repeat 50 + collect (read-char in)) + (subseq file-string 0 50))) + #-:clisp + (read-sequence list in) + #+:clisp + (ext:read-char-sequence list in) + (check (sequence-equal list (subseq file-string 50 (- string-length 50)))) + (check (sequence-equal (loop repeat 50 + collect (read-char in)) + (subseq file-string (- string-length 50)))))) + (with-open-file (in full-path :element-type 'octet) + (let* ((in (make-flexi-stream in :external-format external-format)) + (array (make-array (- string-length 50)))) + (check (sequence-equal (loop repeat 25 + collect (read-char in)) + (subseq file-string 0 25))) + #-:clisp + (read-sequence array in) + #+:clisp + (ext:read-char-sequence array in) + (check (sequence-equal array (subseq file-string 25 (- string-length 25)))) + (check (sequence-equal (loop repeat 25 + collect (read-char in)) + (subseq file-string (- string-length 25))))))))) + (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE -restart each time a FLEXI-STREAM-ENCODING-ERROR is signalled. Signals -an error when there are more or less FLEXI-STREAM-ENCODING-ERRORs than -there are elements in VALUES." +restart each time a EXTERNAL-FORMAT-ENCODING-ERROR is signalled. +Signals an error when there are more or less +EXTERNAL-FORMAT-ENCODING-ERRORs than there are elements in VALUES." (flex::with-unique-names (value-stack condition-counter) `(let ((,value-stack ',values) (,condition-counter 0)) - (handler-bind ((flexi-stream-encoding-error + (handler-bind ((external-format-encoding-error #'(lambda (c) (declare (ignore c)) (unless ,value-stack - (error "Too many FLEXI-STREAM-ENCODING-ERRORs signalled, expected only ~A." + (error "Too many encoding errors signalled, expected only ~A." ,(length values))) (incf ,condition-counter) (use-value (pop ,value-stack))))) (prog1 (progn , at body) (when ,value-stack - (error "~A FLEXI-STREAM-ENCODING-ERRORs signalled, but ~A were expected." + (error "~A encoding errors signalled, but ~A were expected." ,condition-counter ,(length values)))))))) (defun read-flexi-line (sequence external-format) @@ -259,51 +427,84 @@ (setq in (make-flexi-stream in :external-format external-format)) (read-line in))) -(defun encoding-error-handling-test () - "Tests several possible encoding errors and how they are handled." - (with-test ("Handling of encoding errors.") +(defun read-flexi-line* (sequence external-format) + "Like READ-FLEXI-LINE but uses OCTETS-TO-STRING internally." + (octets-to-string sequence :external-format external-format)) + +(defun error-handling-test () + "Tests several possible errors and how they are handled." + (with-test ("Handling of errors.") + ;; handling of EOF in the middle of CRLF + (check (string= #.(string #\Return) + (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) (let ((*substitution-char* #\?)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "a??" (read-flexi-line `(,(char-code #\a) 128 200) :ascii))) + (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 128 200) :ascii))) ;; :WINDOWS-1253 doesn't have a characters with codes 170 and 210 (check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253))) + (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))) ;; not a valid UTF-8 sequence - (check (string= "??" (read-flexi-line `(#xe4 #xf6 #xfc) :utf8))) + (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))) + (check (string= "??" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 - (check (string= "??" (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8))) + (check (string= "??" (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) (let ((*substitution-char* nil)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "abc" (using-values (#\b #\c) (read-flexi-line `(,(char-code #\a) 128 200) :ascii)))) + (check (string= "abc" (using-values (#\b #\c) + (read-flexi-line* `#(,(char-code #\a) 128 200) :ascii)))) ;; :WINDOWS-1253 encoding doesn't have a characters with codes 170 and 210 (check (string= "axy" (using-values (#\x #\y) (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253)))) + (check (string= "axy" (using-values (#\x #\y) + (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))) ;; not a valid UTF-8 sequence - (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#xe4 #xf6 #xfc) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 - (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line `(#b11111110 #b11111111) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8)))) + (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) ;; only one byte - (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16le)))) + (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le)))) + (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16le)))) ;; two bytes, but value of resulting word suggests that another word follows - (check (string= "R" (using-values (#\R) (read-flexi-line `(#x01 #xd8) :utf-16le)))) + (check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le)))) + (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le)))) ;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff - (check (string= "T" (using-values (#\T) (read-flexi-line `(#x01 #xd8 #xff #xdb) :utf-16le)))) + (check (string= "T" (using-values (#\T) (read-flexi-line '(#x01 #xd8 #xff #xdb) :utf-16le)))) + (check (string= "T" (using-values (#\T) (read-flexi-line* #(#x01 #xd8 #xff #xdb) :utf-16le)))) ;; the same as for little endian above, but using inverse order of bytes in words - (check (string= "E" (using-values (#\E) (read-flexi-line `(#x01) :utf-16be)))) - (check (string= "R" (using-values (#\R) (read-flexi-line `(#xd8 #x01) :utf-16be)))) - (check (string= "T" (using-values (#\T) (read-flexi-line `(#xd8 #x01 #xdb #xff) :utf-16be)))) + (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be)))) + (check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be)))) + (check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be)))) + (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16be)))) + (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be)))) + (check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be)))) ;; the only case when error is signalled for UTF-32 is at end of file ;; in the middle of 4-byte sequence, both for big and little endian - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32le)))) (check (string= "aY" (using-values (#\Y) (read-flexi-line `(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line `(#x01 #x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32be)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32le)))) + (check (string= "aY" (using-values (#\Y) + (read-flexi-line* `#(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32be)))) + (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32be)))) (check (string= "aY" (using-values (#\Y) - (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) + (read-flexi-line* `#(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) (defun unread-char-test () "Tests whether UNREAD-CHAR behaves as expected." @@ -311,11 +512,11 @@ (flet ((test-one-file (file-name external-format) (with-open-file (in (merge-pathnames file-name *this-file*) :element-type 'flex:octet) - (setq in (make-flexi-stream in :external-format external-format)) - (loop repeat 300 - for char = (read-char in) - do (unread-char char in) - (check (char= (read-char in) char)))))) + (let ((in (make-flexi-stream in :external-format external-format))) + (loop repeat 300 + for char = (read-char in) + do (unread-char char in) + (check (char= (read-char in) char))))))) (loop for (file-name symbols) in *test-files* do (loop for symbol in symbols do (loop for (file-name . external-format) in (create-file-variants file-name symbol) @@ -323,18 +524,29 @@ (defun run-tests () "Applies COMPARE-FILES to all test scenarios created with -CREATE-TEST-COMBINATIONS, runs test for handling of encoding errors, -and shows simple statistics at the end." +CREATE-TEST-COMBINATIONS, runs other tests like handling of encoding +errors, shows simple statistics at the end." (let* ((*test-success-counter* 0) - (args-list (loop for (file-name symbols) in *test-files* - nconc (create-test-combinations file-name symbols))) - (no-tests (* 4 (length args-list)))) + (compare-files-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols))) + (no-tests (* 8 (length compare-files-args-list)))) #+:lispworks (setq no-tests (* 2 no-tests)) - (dolist (args args-list) - (apply #'compare-files args)) + (dolist (*copy-function* '(copy-stream copy-stream*)) + (dolist (args compare-files-args-list) + (apply 'compare-files args))) + (let ((string-test-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols t)))) + (incf no-tests (length string-test-args-list)) + (dolist (args string-test-args-list) + (apply 'string-test args))) + (let ((read-sequence-test-args-list (loop for (file-name symbols) in *test-files* + nconc (create-test-combinations file-name symbols t)))) + (incf no-tests (length read-sequence-test-args-list)) + (dolist (args read-sequence-test-args-list) + (apply 'read-sequence-test args))) (incf no-tests) - (encoding-error-handling-test) + (error-handling-test) (incf no-tests) (unread-char-test) (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%" Modified: trunk/util.lisp ============================================================================== --- trunk/util.lisp (original) +++ trunk/util.lisp Wed May 21 08:00:42 2008 @@ -1,7 +1,7 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.13 2007/01/01 23:46:49 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $ -;;; Copyright (c) 2005-2007, Dr. Edmund Weitz. All rights reserved. +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions @@ -31,7 +31,14 @@ #+:lispworks (eval-when (:compile-toplevel :load-toplevel :execute) - (import 'lw:with-unique-names)) + (import '(lw:with-unique-names lw:when-let))) + +#-:lispworks +(defmacro when-let ((var form) &body body) + "Evaluates FORM and binds VAR to the result, then executes BODY +if VAR has a true value." + `(let ((,var ,form)) + (when ,var , at body))) #-:lispworks (defmacro with-unique-names ((&rest bindings) &body body) @@ -108,7 +115,9 @@ (unless (find real-name +name-map+ :test #'eq :key #'cdr) - (error "~S is not known to be a name for an external format." name)) + (error 'external-format-error + :format-control "~S is not known to be a name for an external format." + :format-arguments (list name))) real-name)) (defun ascii-name-p (name) @@ -161,6 +170,26 @@ (defmacro with-accessors (slot-entries instance &body body) "For LispWorks, we prefer SLOT-VALUE over accessors for better performance." + ;; note that we assume that the variables have the same names as the + ;; slots `(with-slots ,(mapcar #'car slot-entries) ,instance - , at body)) \ No newline at end of file + , at body)) + +(defun make-octet-buffer (&optional (size +buffer-size+)) + "Creates and returns a fresh buffer \(a specialized array) of size ++BUFFER-SIZE+ to hold octets." + (declare #.*standard-optimize-settings*) + (make-array size :element-type 'octet)) + +(defun type-equal (type1 type2) + "Whether TYPE1 and TYPE2 denote the same type." + (declare #.*standard-optimize-settings*) + (and (subtypep type1 type2) + (subtypep type2 type1))) + +(defun maybe-rewind (stream octets) + "Tries to `rewind' the \(binary) stream STREAM by OCTETS octets. +Returns a true value if it succeeds." + (when-let (position (file-position stream)) + (file-position stream (- position octets)))) \ No newline at end of file From eweitz at common-lisp.net Wed May 21 14:49:58 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Wed, 21 May 2008 10:49:58 -0400 (EDT) Subject: [flexi-streams-cvs] r46 - tags Message-ID: <20080521144958.E140A6D232@common-lisp.net> Author: eweitz Date: Wed May 21 10:49:58 2008 New Revision: 46 Added: tags/ Log: Tags dir From eweitz at common-lisp.net Wed May 21 14:50:03 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Wed, 21 May 2008 10:50:03 -0400 (EDT) Subject: [flexi-streams-cvs] r47 - tags/flexi-streams-0.15.0 Message-ID: <20080521145003.BBB5F6D232@common-lisp.net> Author: eweitz Date: Wed May 21 10:50:03 2008 New Revision: 47 Added: tags/flexi-streams-0.15.0/ - copied from r46, trunk/ Log: Tag for reference From eweitz at common-lisp.net Wed May 21 14:50:49 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Wed, 21 May 2008 10:50:49 -0400 (EDT) Subject: [flexi-streams-cvs] r48 - tags/flexi-streams-0.14.0 Message-ID: <20080521145049.2A2B86D250@common-lisp.net> Author: eweitz Date: Wed May 21 10:50:48 2008 New Revision: 48 Added: tags/flexi-streams-0.14.0/ - copied from r1, trunk/ Log: Tag for reference From eweitz at common-lisp.net Wed May 21 15:12:49 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Wed, 21 May 2008 11:12:49 -0400 (EDT) Subject: [flexi-streams-cvs] r49 - branches/edi Message-ID: <20080521151249.D40F550078@common-lisp.net> Author: eweitz Date: Wed May 21 11:12:49 2008 New Revision: 49 Removed: branches/edi/ Log: Cleanup From eweitz at common-lisp.net Fri May 23 15:00:05 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Fri, 23 May 2008 11:00:05 -0400 (EDT) Subject: [flexi-streams-cvs] r50 - in trunk: . doc Message-ID: <20080523150005.A523C1207B@common-lisp.net> Author: eweitz Date: Fri May 23 11:00:03 2008 New Revision: 50 Added: trunk/lw-char-stream.lisp Removed: trunk/lw-binary-stream.lisp Modified: trunk/CHANGELOG trunk/doc/index.html trunk/flexi-streams.asd trunk/input.lisp trunk/output.lisp Log: Update to 0.15.3 Modified: trunk/CHANGELOG ============================================================================== --- trunk/CHANGELOG (original) +++ trunk/CHANGELOG Fri May 23 11:00:03 2008 @@ -1,3 +1,16 @@ +Version 0.15.3 +2008-05-23 +Avoid CHANGE-CLASS on LispWorks if possible + +Version 0.15.2 +2008-05-22 +Remove debugging remnants (d'ooh!) + +Version 0.15.1 +2008-05-21 +Direct access to underlying stream in case of binary sequence operations +More tests + Version 0.15.0 2008-05-21 Complete redesign, various additions, bugfixes, performance improvements (with the help of Hans H?bner) Modified: trunk/doc/index.html ============================================================================== --- trunk/doc/index.html (original) +++ trunk/doc/index.html Fri May 23 11:00:03 2008 @@ -224,7 +224,7 @@

    FLEXI-STREAMS together with this documentation can be downloaded from http://weitz.de/files/flexi-streams.tar.gz. The -current version is 0.15.0. +current version is 0.15.3.

    Before you install FLEXI-STREAMS you first need to install the -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.108 2008/05/21 11:53:08 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.111 2008/05/23 14:56:47 edi Exp $

    BACK TO MY HOMEPAGE Modified: trunk/flexi-streams.asd ============================================================================== --- trunk/flexi-streams.asd (original) +++ trunk/flexi-streams.asd Fri May 23 11:00:03 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.65 2008/05/21 11:53:07 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.69 2008/05/23 14:56:46 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -35,7 +35,7 @@ (in-package :flexi-streams-system) (defsystem :flexi-streams - :version "0.15.0" + :version "0.15.3" :serial t :components ((:file "packages") (:file "mapping") @@ -51,7 +51,7 @@ (:file "decode") (:file "in-memory") (:file "stream") - #+:lispworks (:file "lw-binary-stream") + #+:lispworks (:file "lw-char-stream") (:file "output") (:file "input") (:file "io") Modified: trunk/input.lisp ============================================================================== --- trunk/input.lisp (original) +++ trunk/input.lisp Fri May 23 11:00:03 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.70 2008/05/21 00:18:35 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.75 2008/05/23 14:43:09 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -55,9 +55,8 @@ #+:lispworks (defmethod read-byte* ((flexi-input-stream flexi-input-stream)) - "Reads one byte \(octet) from the underlying stream of -FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not -empty)." + "Reads one byte \(octet) from the underlying \(binary) stream of +FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty)." (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) @@ -71,20 +70,14 @@ (return-from read-byte* nil)) (incf position) (or (pop octet-stack) - ;; we use READ-SEQUENCE because READ-BYTE doesn't work with all - ;; bivalent streams in LispWorks - (let* ((buffer (make-array 1 :element-type 'octet)) - (new-position (read-sequence buffer stream))) - (cond ((zerop new-position) - (decf position) nil) - (t (aref buffer 0))))))) + (read-byte stream nil nil) + (progn (decf position) nil)))) #+:lispworks -(defmethod read-byte* ((flexi-input-stream flexi-binary-input-stream)) +(defmethod read-byte* ((flexi-input-stream flexi-char-input-stream)) "Reads one byte \(octet) from the underlying stream of FLEXI-OUTPUT-STREAM \(or from the internal stack if it's not empty). -Optimized version \(only needed for LispWorks) in case the underlying -stream is binary." +Only used for LispWorks bivalent streams which aren't binary." (declare #.*standard-optimize-settings*) (with-accessors ((position flexi-stream-position) (bound flexi-stream-bound) @@ -98,8 +91,13 @@ (return-from read-byte* nil)) (incf position) (or (pop octet-stack) - (read-byte stream nil nil) - (progn (decf position) nil)))) + ;; we use READ-SEQUENCE because READ-BYTE doesn't work with all + ;; bivalent streams in LispWorks + (let* ((buffer (make-array 1 :element-type 'octet)) + (new-position (read-sequence buffer stream))) + (cond ((zerop new-position) + (decf position) nil) + (t (aref buffer 0))))))) (defmethod stream-clear-input ((flexi-input-stream flexi-input-stream)) "Calls the corresponding method for the underlying input stream @@ -201,7 +199,7 @@ based on the element type of the sequence \(which takes precedence) and the element type of the stream. What you'll really get might also depend on your Lisp. Some of the implementations are more picky than -others - see for example FLEXI-STREAMS-TEST:READ-SEQUENCE-TEST." +others - see for example FLEXI-STREAMS-TEST::SEQUENCE-TEST." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (with-accessors ((position flexi-stream-position) @@ -213,21 +211,37 @@ (element-type flexi-stream-element-type) (stream flexi-stream-stream)) flexi-input-stream + (when (>= start end) + (return-from stream-read-sequence start)) + (when (or (subtypep (etypecase sequence + (vector (array-element-type sequence)) + (list t)) + 'integer) + (and (not (stringp sequence)) + (type-equal element-type 'octet))) + ;; if binary data is requested, just read from the underlying + ;; stream directly and skip the rest (but flush octet stack + ;; first) + (let ((index start)) + (declare (fixnum index)) + (when octet-stack + (replace sequence octet-stack :start1 start :end1 end) + (let ((octets-flushed (min (length octet-stack) (- end start)))) + (incf index octets-flushed) + (setq octet-stack (nthcdr octets-flushed octet-stack)))) + (setq index (read-sequence sequence stream :start index :end end)) + (when (> index start) + (setq last-char-code nil + last-octet (elt sequence (1- index)))) + (return-from stream-read-sequence index))) (let* (buffer (buffer-pos 0) (buffer-end 0) (index start) - ;; whether we will deliver characters and thus the number - ;; of octets to read might not be equal to the number of - ;; sequence elements to fill - (want-chars-p (or (stringp sequence) - (and (vectorp sequence) - (not (subtypep (array-element-type sequence) 'integer))) - (not (type-equal element-type 'octet)))) ;; whether we will later be able to rewind the stream if ;; needed (to get rid of unused octets in the buffer) - (can-rewind-p (and want-chars-p (maybe-rewind stream 0))) - (factor (if want-chars-p (encoding-factor external-format) 1)) + (can-rewind-p (maybe-rewind stream 0)) + (factor (encoding-factor external-format)) (integer-factor (floor factor)) ;; it's an interesting question whether it makes sense ;; performance-wise to make RESERVE significantly bigger @@ -237,7 +251,7 @@ ((not can-rewind-p) (* 2 integer-factor)) (t (ceiling (* (- factor integer-factor) (- end start))))))) (declare (fixnum buffer-pos buffer-end index integer-factor reserve) - (boolean want-chars-p can-rewind-p)) + (boolean can-rewind-p)) (flet ((compute-fill-amount () "Computes the amount of octets we can savely read into the buffer without violating the stream's bound \(if there is one) and @@ -293,23 +307,17 @@ (unread-char% char flexi-input-stream))) (declare (dynamic-extent (function next-octet) (function unreader))) (let ((*current-unreader* #'unreader)) - (macrolet ((iterate (octetp set-place) + (macrolet ((iterate (set-place) "A very unhygienic macro to implement the actual iteration through the sequence including housekeeping for the -flexi stream. If OCTETP is true, we put octets into the stream, -otherwise characters. SET-PLACE is the place \(using the index INDEX) -used to access the sequence." +flexi stream. SET-PLACE is the place \(using the index INDEX) used to +access the sequence." `(flet ((leave () "This is the function used to abort the LOOP iteration below." (when (> index start) - ;; if something was read at all, - ;; update LAST-OCTET and - ;; LAST-CHAR-CODE accordingly - (setq ,(if octetp 'last-char-code 'last-octet) - nil - ,(if octetp 'last-octet 'last-char-code) - ,(sublis '((index . (1- index))) set-place))) + (setq last-octet nil + last-char-code ,(sublis '((index . (1- index))) set-place))) (return-from stream-read-sequence index))) (loop (when (>= index end) @@ -327,28 +335,15 @@ (push (aref (the (array octet *) buffer) buffer-end) octet-stack))))) (leave)) - (let ((next-thing ,(if octetp - '(next-octet) - '(octets-to-char-code external-format #'next-octet)))) - (unless next-thing (leave)) - (setf ,set-place ,(if octetp 'next-thing '(code-char next-thing))) + (let ((next-char-code (octets-to-char-code external-format #'next-octet))) + (unless next-char-code + (leave)) + (setf ,set-place (code-char next-char-code)) (incf index)))))) (etypecase sequence - (string (iterate nil (char sequence index))) - (array - (let ((array-element-type (array-element-type sequence))) - (cond ((type-equal array-element-type 'octet) - (iterate t (aref (the (array octet *) sequence) index))) - ((or (subtypep array-element-type 'integer) - (type-equal element-type 'octet)) - (iterate t (aref sequence index))) - (t - (iterate nil (aref sequence index)))))) - (list - (cond ((type-equal element-type 'octet) - (iterate t (nth index sequence))) - (t - (iterate nil (nth index sequence))))))))))))) + (string (iterate (char sequence index))) + (array (iterate (aref sequence index))) + (list (iterate (nth index sequence))))))))))) (defmethod stream-unread-char ((stream flexi-input-stream) char) "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM. Added: trunk/lw-char-stream.lisp ============================================================================== --- (empty file) +++ trunk/lw-char-stream.lisp Fri May 23 11:00:03 2008 @@ -0,0 +1,77 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/lw-char-stream.lisp,v 1.1 2008/05/23 14:43:09 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defclass flexi-char-output-stream (flexi-output-stream) + () + (:documentation "This class is for output streams where the +underlying stream is bivalent but not binary. It exists solely for +the purpose of optimizing output to binary streams on LispWorks. See +WRITE-BYTE*.")) + +(defclass flexi-char-input-stream (flexi-input-stream) + () + (:documentation "This class is for input streams where the +underlying stream is bivalent but not binary. It exists solely for +the purpose of optimizing input to binary streams on LispWorks. See +READ-BYTE*.")) + +(defclass flexi-char-io-stream (flexi-char-input-stream flexi-char-output-stream flexi-io-stream) + () + (:documentation "This class is for bidirectional streams where the +underlying stream is bivalent but not binary. It exists solely for +the purpose of optimizing input and output from/to binary streams on +LispWorks. See READ-BYTE* and WRITE-BYTE*.")) + +(defmethod initialize-instance :after ((flexi-stream flexi-output-stream) &rest initargs) + "Might change the class of FLEXI-STREAM for optimization purposes. +Only needed for LispWorks." + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) + (with-accessors ((stream flexi-stream-stream)) + flexi-stream + (unless (subtypep (stream-element-type stream) 'octet) + (change-class flexi-stream + (typecase flexi-stream + (flexi-io-stream 'flexi-char-io-stream) + (otherwise 'flexi-char-output-stream)))))) + +(defmethod initialize-instance :after ((flexi-stream flexi-input-stream) &rest initargs) + "Might change the class of FLEXI-STREAM for optimization purposes. +Only needed for LispWorks." + (declare #.*standard-optimize-settings*) + (declare (ignore initargs)) + (with-accessors ((stream flexi-stream-stream)) + flexi-stream + (unless (subtypep (stream-element-type stream) 'octet) + (change-class flexi-stream + (typecase flexi-stream + (flexi-io-stream 'flexi-char-io-stream) + (otherwise 'flexi-char-input-stream)))))) Modified: trunk/output.lisp ============================================================================== --- trunk/output.lisp (original) +++ trunk/output.lisp Fri May 23 11:00:03 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.60 2008/05/21 01:26:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.63 2008/05/23 14:43:09 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -44,6 +44,15 @@ #+:lispworks (defmethod write-byte* (byte (flexi-output-stream flexi-output-stream)) (declare #.*standard-optimize-settings*) + (with-accessors ((stream flexi-stream-stream)) + flexi-output-stream + (write-byte byte stream))) + +#+:lispworks +(defmethod write-byte* (byte (flexi-output-stream flexi-char-output-stream)) + "This method is only used for LispWorks bivalent streams which +aren't binary." + (declare #.*standard-optimize-settings*) ;; we use WRITE-SEQUENCE because WRITE-BYTE doesn't work with all ;; bivalent streams in LispWorks (4.4.6) (with-accessors ((stream flexi-stream-stream)) @@ -53,15 +62,6 @@ stream) byte)) -#+:lispworks -(defmethod write-byte* (byte (flexi-output-stream flexi-binary-output-stream)) - "Optimized version \(only needed for LispWorks) in case the -underlying stream is binary." - (declare #.*standard-optimize-settings*) - (with-accessors ((stream flexi-stream-stream)) - flexi-output-stream - (write-byte byte stream))) - (defmethod stream-write-char ((stream flexi-output-stream) char) (declare #.*standard-optimize-settings*) (with-accessors ((external-format flexi-stream-external-format)) @@ -142,15 +142,18 @@ (external-format flexi-stream-external-format) (stream flexi-stream-stream)) stream + (when (>= start end) + (return-from stream-write-sequence sequence)) + (when (and (vectorp sequence) + (subtypep (array-element-type sequence) 'integer)) + ;; if this is pure binary output, just send all the stuff to the + ;; underlying stream directly and skip the rest + (setq column nil) + (return-from stream-write-sequence + (write-sequence sequence stream :start start :end end))) (let* ((octet-seen-p nil) (buffer-pos 0) - ;; whether we might receive characters and thus the number - ;; of octets to output might not be equal to the number of - ;; sequence elements to write - (chars-p (or (listp sequence) - (and (vectorp sequence) - (not (subtypep (array-element-type sequence) 'integer))))) - (factor (if chars-p (encoding-factor external-format) 1)) + (factor (encoding-factor external-format)) (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) (buffer (make-octet-buffer buffer-size))) (declare (fixnum buffer-pos buffer-size) @@ -178,28 +181,20 @@ (write-octet object)) (character (write-character object))))) (declare (dynamic-extent (function write-octet))) - (macrolet ((iterate (octets-p output-form) + (macrolet ((iterate (output-form) "An unhygienic macro to implement the actual iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one -sequence element and put its octet representation into the buffer. -OCTETS-P is true if we know in advance that we will send octets." - `(progn - ,@(if octets-p '((setq octet-seen-p t))) - (loop for index of-type fixnum from start below end - do ,output-form - finally (when (plusp buffer-pos) - (flush-buffer)))))) +sequence element and put its octet representation into the buffer." + `(loop for index of-type fixnum from start below end + do ,output-form + finally (when (plusp buffer-pos) + (flush-buffer))))) (etypecase sequence - (string (iterate nil (write-character (char sequence index)))) - (array - (let ((array-element-type (array-element-type sequence))) - (cond ((type-equal array-element-type 'octet) - (iterate t (write-octet (aref (the (array octet *) sequence) index)))) - ((subtypep array-element-type 'integer) - (iterate t (write-octet (aref sequence index)))) - (t (iterate nil (write-object (aref sequence index))))))) - (list (iterate nil (write-object (nth index sequence))))) - ;; update the column slot, setting if to NIL if we sent octets + (string (iterate (write-character (char sequence index)))) + (array (iterate (write-object (aref sequence index)))) + (list (iterate (write-object (nth index sequence))))) + ;; update the column slot, setting it to NIL if we sent + ;; octets (setq column (cond (octet-seen-p nil) (t (let ((last-newline-pos (position #\Newline sequence @@ -208,8 +203,7 @@ :end end :from-end t))) (cond (last-newline-pos (- end last-newline-pos 1)) - (column (+ column (- end start)))))))))))) - + (column (+ column (- end start)))))))))))) sequence) (defmethod stream-write-string ((stream flexi-output-stream) string From eweitz at common-lisp.net Fri May 23 15:00:39 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Fri, 23 May 2008 11:00:39 -0400 (EDT) Subject: [flexi-streams-cvs] r51 - tags/flexi-streams-0.15.3 Message-ID: <20080523150039.9941016220@common-lisp.net> Author: eweitz Date: Fri May 23 11:00:39 2008 New Revision: 51 Added: tags/flexi-streams-0.15.3/ - copied from r50, trunk/ Log: Tag for reference From eweitz at common-lisp.net Sat May 24 23:29:57 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 24 May 2008 19:29:57 -0400 (EDT) Subject: [flexi-streams-cvs] r52 - branches/edi Message-ID: <20080524232957.C87F336173@common-lisp.net> Author: eweitz Date: Sat May 24 19:29:57 2008 New Revision: 52 Added: branches/edi/ - copied from r51, trunk/ Log: More needless optimization From eweitz at common-lisp.net Sat May 24 23:34:52 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 24 May 2008 19:34:52 -0400 (EDT) Subject: [flexi-streams-cvs] r53 - in branches/edi: . test Message-ID: <20080524233452.5343533079@common-lisp.net> Author: eweitz Date: Sat May 24 19:34:51 2008 New Revision: 53 Added: branches/edi/conditions.lisp Modified: branches/edi/encode.lisp branches/edi/output.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Faster encoding - passes all tests on LW Added: branches/edi/conditions.lisp ============================================================================== --- (empty file) +++ branches/edi/conditions.lisp Sat May 24 19:34:51 2008 @@ -0,0 +1,108 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(define-condition flexi-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to flexi +streams.")) + +(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition) + () + (:documentation "Like FLEXI-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition flexi-stream-element-type-error (flexi-stream-error) + ((element-type :initarg :element-type + :reader flexi-stream-element-type-error-element-type)) + (:report (lambda (condition stream) + (format stream "Element type ~S not allowed." + (flexi-stream-element-type-error-element-type condition)))) + (:documentation "Errors of this type are signalled if the flexi +stream has a wrong element type.")) + +(define-condition flexi-stream-out-of-sync-error (flexi-stream-error) + () + (:report (lambda (condition stream) + (declare (ignore condition)) + (format stream "Stream out of sync from previous +lookahead, couldn't rewind."))) + (:documentation "This can happen if you're trying to write to an IO +stream which had prior to that `looked ahead' while reading and now +can't `rewind' to the octet where you /should/ be.")) + +(define-condition in-memory-stream-error (stream-error) + () + (:documentation "Superclass for all errors related to +IN-MEMORY streams.")) + +(define-condition in-memory-stream-simple-error (in-memory-stream-error simple-condition) + () + (:documentation "Like IN-MEMORY-STREAM-ERROR but with formatting +capabilities.")) + +(define-condition in-memory-stream-closed-error (in-memory-stream-error) + () + (:report (lambda (condition stream) + (format stream "~S is closed." + (stream-error-stream condition)))) + (:documentation "An error that is signalled when someone is trying +to read from or write to a closed IN-MEMORY stream.")) + +(define-condition in-memory-stream-position-spec-error (in-memory-stream-simple-error) + ((position-spec :initarg :position-spec + :reader in-memory-stream-position-spec-error-position-spec)) + (:documentation "Errors of this type are signalled if an erroneous +position spec is used in conjunction with FILE-POSITION.")) + +(define-condition external-format-error () + ((external-format :initarg :external-format + :initform nil + :reader external-format-error-external-format)) + (:documentation "Superclass for all errors related to external +formats.")) + +(define-condition external-format-simple-error (external-format-error simple-condition) + () + (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting +capabilities.")) + +(define-condition external-format-encoding-error (external-format-simple-error) + () + (:documentation "Errors of this type are signalled if there is an +encoding problem.")) + +(defun signal-encoding-error (external-format format-control &rest format-args) + "Convenience function similar to ERROR to signal conditions of type +EXTERNAL-FORMAT-ENCODING-ERROR." + (error 'external-format-encoding-error + :format-control format-control + :format-arguments format-args + :external-format external-format)) Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,6 +29,125 @@ (in-package :flexi-streams) +(defgeneric compute-number-of-octets (format sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "Computes the exact number of octets required to +encode the sequence of characters in SEQUENCE from START to END using +the external format FORMAT.")) + +(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (- end start)) + +(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 2) + ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (* 4 (- end start))) + +(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (+ (call-next-method) + (* (case (external-format-name format) + (:utf-32 4) + (otherwise 1)) + (count #\Newline sequence :start start :end end :test #'char=)))) + (defgeneric char-to-octets (format char writer) (declare #.*standard-optimize-settings*) (:documentation "Converts the character CHAR to a sequence of octets @@ -37,72 +156,188 @@ repeatedly each octet. The return value of this function is unspecified.")) -(defmethod char-to-octets ((format flexi-latin-1-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((octet (char-code char))) +(defgeneric write-sequence* (format stream sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "A generic function which dispatches on the external +format and does the real work for STREAM-WRITE-SEQUENCE.")) + +(defgeneric string-to-octets* (format string start end) + (declare #.*standard-optimize-settings*) + (:documentation "A generic function which dispatches on the external +format and does the real work for STRING-TO-OCTETS.")) + +(defmacro define-sequence-writers ((format-class) &body body) + "Utility macro which defines methods for WRITE-SEQUENCE* and +STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the +docstring of DEFINE-CHAR-ENCODERS." + `(progn + (defmethod write-sequence* ((format ,format-class) stream sequence start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((column flexi-stream-column)) + stream + (let* ((octet-seen-p nil) + (buffer-pos 0) + ;; estimate should be good enough... + (factor (encoding-factor format)) + ;; we don't want arbitrarily large buffer, do we? + (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) + (buffer (make-octet-buffer buffer-size))) + (declare (fixnum buffer-pos buffer-size) + (boolean octet-seen-p) + (type (array octet *) buffer)) + (macrolet ((octet-writer (form) + `(write-octet ,form))) + (labels ((flush-buffer () + "Sends all octets in BUFFER to the underlying stream." + (write-sequence buffer stream :end buffer-pos) + (setq buffer-pos 0)) + (write-octet (octet) + "Adds one octet to the buffer and flushes it if necessary." + (declare (octet octet)) + (when (>= buffer-pos buffer-size) + (flush-buffer)) + (setf (aref buffer buffer-pos) octet) + (incf buffer-pos)) + (write-object (object) + "Dispatches to WRITE-OCTET or WRITE-CHARACTER +depending on the type of OBJECT." + (etypecase object + (octet (setq octet-seen-p t) + (write-octet object)) + (character (symbol-macrolet ((char-getter object)) + , at body))))) + (macrolet ((iterate (&body output-forms) + "An unhygienic macro to implement the actual +iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one +sequence element and put its octet representation into the buffer." + `(loop for index of-type fixnum from start below end + do (progn , at output-forms) + finally (when (plusp buffer-pos) + (flush-buffer))))) + (etypecase sequence + (string (iterate + (symbol-macrolet ((char-getter (char sequence index))) + , at body))) + (array (iterate + (symbol-macrolet ((char-getter (aref sequence index))) + , at body))) + (list (iterate (write-object (nth index sequence)))))) + ;; update the column slot, setting it to NIL if we sent + ;; octets + (setq column + (cond (octet-seen-p nil) + (t (let ((last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (cond (last-newline-pos (- end last-newline-pos 1)) + (column (+ column (- end start))))))))))))) + (defmethod string-to-octets* ((format ,format-class) string start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (let ((octets (make-array (compute-number-of-octets format string start end) + :element-type 'octet)) + (j 0)) + (declare (fixnum j)) + (loop for i of-type fixnum from start below end do + (macrolet ((octet-writer (form) + `(progn + (setf (aref (the (array octet *) octets) j) ,form) + (incf j)))) + (symbol-macrolet ((char-getter (char string i))) + (progn , at body)))) + octets)))) + +;; char-getter can be called more than once - no side effects +(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body) + "Utility macro which defines several encoding-related methods for +the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where +it is assumed that CR-FORMAT-CLASS is the same encoding as +FORMAT-CLASS but with CR line endings and similar for +CRLF-FORMAT-CLASS. BODY is a code template for the code to convert +one character to octets. BODY must contain a symbol CHAR-GETTER +representing the form which is used to obtain the character and a +forms like \(OCTET-WRITE ) to write the octet . The +CHAR-GETTER form might be called more than once." + (let ((body `((locally + (declare #.*fixnum-optimize-settings*) + , at body)))) + `(progn + (defmethod char-to-octets ((format ,format-class) char writer) + (declare (character char) (function writer)) + (symbol-macrolet ((char-getter char)) + (macrolet ((octet-writer (form) + `(funcall writer ,form))) + , at body))) + (define-sequence-writers (,format-class) , at body) + (define-sequence-writers (,cr-format-class) + ,@(sublis `((char-getter . ,(with-unique-names (char) + `(let ((,char char-getter)) + (declare (character ,char)) + (if (char= ,char #\Newline) + #\Return + ,char))))) + body)) + (define-sequence-writers (,crlf-format-class) + ,(with-unique-names (char write-char) + `(flet ((,write-char (,char) + ,@(sublis `((char-getter . ,char)) body))) + (let ((,char char-getter)) + (declare (character ,char)) + (cond ((char= ,char #\Newline) + (,write-char #\Return) + (,write-char #\Newline)) + (t (,write-char ,char)))))))))) + +(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) + (let ((octet (char-code char-getter))) (when (> octet 255) - (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet)) - (funcall writer octet))) + (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet)) + (octet-writer octet))) -(defmethod char-to-octets ((format flexi-ascii-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((octet (char-code char))) +(define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format) + (let ((octet (char-code char-getter))) (when (> octet 127) - (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet)) - (funcall writer octet))) + (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet)) + (octet-writer octet))) -(defmethod char-to-octets ((format flexi-8-bit-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) +(define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format) (with-accessors ((encoding-hash external-format-encoding-hash)) format - (let ((octet (gethash (char-code char) encoding-hash))) + (let ((octet (gethash (char-code char-getter) encoding-hash))) (unless octet - (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet)) - (funcall writer octet)))) + (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet)) + (octet-writer octet)))) -(defmethod char-to-octets ((format flexi-utf-8-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((char-code (char-code char))) +(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format) + (let ((char-code (char-code char-getter))) (tagbody (cond ((< char-code #x80) - (funcall writer char-code) + (octet-writer char-code) (go zero)) ((< char-code #x800) - (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code))) + (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code))) (go one)) ((< char-code #x10000) - (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code))) + (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code))) (go two)) - ((< char-code #x200000) - (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code))) - (go three)) - ((< char-code #x4000000) - (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code))) - (go four)) - (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100)))) - (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code))) - four - (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code))) - three - (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code))) + (t + (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code))))) + (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code))) two - (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code))) + (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code))) one - (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code))) + (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code))) zero))) -(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) +(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format) (flet ((write-word (word) - (funcall writer (ldb (byte 8 0) word)) - (funcall writer (ldb (byte 8 8) word)))) + (octet-writer (ldb (byte 8 0) word)) + (octet-writer (ldb (byte 8 8) word)))) (declare (inline write-word)) - (let ((char-code (char-code char))) + (let ((char-code (char-code char-getter))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) @@ -110,14 +345,12 @@ (write-word (logior #xd800 (ldb (byte 10 10) char-code))) (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) -(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) +(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format) (flet ((write-word (word) - (funcall writer (ldb (byte 8 8) word)) - (funcall writer (ldb (byte 8 0) word)))) + (octet-writer (ldb (byte 8 8) word)) + (octet-writer (ldb (byte 8 0) word)))) (declare (inline write-word)) - (let ((char-code (char-code char))) + (let ((char-code (char-code char-getter))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) @@ -125,23 +358,19 @@ (write-word (logior #xd800 (ldb (byte 10 10) char-code))) (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) -(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((char-code (char-code char))) - (funcall writer (ldb (byte 8 0) char-code)) - (funcall writer (ldb (byte 8 8) char-code)) - (funcall writer (ldb (byte 8 16) char-code)) - (funcall writer (ldb (byte 8 24) char-code)))) - -(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer) - (declare #.*fixnum-optimize-settings*) - (declare (character char) (function writer)) - (let ((char-code (char-code char))) - (funcall writer (ldb (byte 8 24) char-code)) - (funcall writer (ldb (byte 8 16) char-code)) - (funcall writer (ldb (byte 8 8) char-code)) - (funcall writer (ldb (byte 8 0) char-code)))) +(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format) + (let ((char-code (char-code char-getter))) + (octet-writer (ldb (byte 8 0) char-code)) + (octet-writer (ldb (byte 8 8) char-code)) + (octet-writer (ldb (byte 8 16) char-code)) + (octet-writer (ldb (byte 8 24) char-code)))) + +(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) + (let ((char-code (char-code char-getter))) + (octet-writer (ldb (byte 8 24) char-code)) + (octet-writer (ldb (byte 8 16) char-code)) + (octet-writer (ldb (byte 8 8) char-code)) + (octet-writer (ldb (byte 8 0) char-code)))) (defmethod char-to-octets ((format flexi-cr-mixin) char writer) (declare #.*fixnum-optimize-settings*) Modified: branches/edi/output.lisp ============================================================================== --- branches/edi/output.lisp (original) +++ branches/edi/output.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.63 2008/05/23 14:43:09 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.65 2008/05/24 23:15:25 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -129,7 +129,7 @@ ;; needed for AllegroCL - grrr... (stream-write-char stream #\Newline)) -(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key) +(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key) "An optimized version which uses a buffer underneath. The function can accepts characters as well as octets and it decides what to do based on the element type of the sequence \(if possible) or on the @@ -141,7 +141,7 @@ (with-accessors ((column flexi-stream-column) (external-format flexi-stream-external-format) (stream flexi-stream-stream)) - stream + flexi-output-stream (when (>= start end) (return-from stream-write-sequence sequence)) (when (and (vectorp sequence) @@ -151,59 +151,8 @@ (setq column nil) (return-from stream-write-sequence (write-sequence sequence stream :start start :end end))) - (let* ((octet-seen-p nil) - (buffer-pos 0) - (factor (encoding-factor external-format)) - (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) - (buffer (make-octet-buffer buffer-size))) - (declare (fixnum buffer-pos buffer-size) - (boolean octet-seen-p) - (type (array octet *) buffer)) - (labels ((flush-buffer () - "Sends all octets in BUFFER to the underlying stream." - (write-sequence buffer stream :end buffer-pos) - (setq buffer-pos 0)) - (write-octet (octet) - "Adds one octet to the buffer and flush it if necessary." - (declare (octet octet)) - (when (>= buffer-pos buffer-size) - (flush-buffer)) - (setf (aref buffer buffer-pos) octet) - (incf buffer-pos)) - (write-character (char) - "Adds the octets representing the character CHAR to the buffer." - (char-to-octets external-format char #'write-octet)) - (write-object (object) - "Dispatches to WRITE-OCTET or WRITE-CHARACTER -depending on the type of OBJECT." - (etypecase object - (octet (setq octet-seen-p t) - (write-octet object)) - (character (write-character object))))) - (declare (dynamic-extent (function write-octet))) - (macrolet ((iterate (output-form) - "An unhygienic macro to implement the actual -iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one -sequence element and put its octet representation into the buffer." - `(loop for index of-type fixnum from start below end - do ,output-form - finally (when (plusp buffer-pos) - (flush-buffer))))) - (etypecase sequence - (string (iterate (write-character (char sequence index)))) - (array (iterate (write-object (aref sequence index)))) - (list (iterate (write-object (nth index sequence))))) - ;; update the column slot, setting it to NIL if we sent - ;; octets - (setq column - (cond (octet-seen-p nil) - (t (let ((last-newline-pos (position #\Newline sequence - :test #'char= - :start start - :end end - :from-end t))) - (cond (last-newline-pos (- end last-newline-pos 1)) - (column (+ column (- end start)))))))))))) + ;; otherwise hand over to the external format to do the work + (write-sequence* external-format flexi-output-stream sequence start end)) sequence) (defmethod stream-write-string ((stream flexi-output-stream) string Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -36,56 +36,10 @@ octets corresponding to the external format designated by EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) - (declare (fixnum start end) (string string)) + (declare (string string)) (setq external-format (maybe-convert-external-format external-format)) - (let ((factor (encoding-factor external-format)) - (length (- end start))) - (declare (fixnum length)) - (etypecase factor - (integer - (let ((octets (make-array (* factor length) :element-type 'octet)) - (j 0)) - (declare (fixnum j)) - (flet ((writer (octet) - (declare (octet octet)) - (setf (aref (the (array octet *) octets) j) octet) - (incf j))) - (declare (dynamic-extent (function writer))) - (loop for i of-type fixnum from start below end do - (char-to-octets external-format - (char string i) - #'writer))) - octets)) - (double-float - ;; this is a bit clunky but hopefully a bit more efficient than - ;; using VECTOR-PUSH-EXTEND - (let* ((octets-length (ceiling (* factor length))) - (octets (make-array octets-length - :element-type 'octet - :fill-pointer t - :adjustable t)) - (i start) - (j 0)) - (declare (fixnum i j octets-length) - (double-float factor)) - (flet ((writer (octet) - (declare (octet octet)) - (when (>= j octets-length) - (setq factor (* factor 2.0d0)) - (incf octets-length (the fixnum (ceiling (* factor (- end i))))) - (adjust-array octets octets-length :fill-pointer t)) - (setf (aref (the (array octet *) octets) j) octet) - (incf j))) - (declare (dynamic-extent (function writer))) - (loop - (when (>= i end) - (return)) - (char-to-octets external-format - (char string i) - #'writer) - (incf i)) - (setf (fill-pointer octets) j) - octets)))))) + ;; the external format knows how to do it... + (string-to-octets* external-format string start end)) (defun octets-to-string (sequence &key (external-format :latin1) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sat May 24 19:34:51 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -329,10 +329,11 @@ (loop for i below (length seq1) always (eql (elt seq1 i) (elt seq2 i))))) -(defun read-sequence-test (pathspec external-format) - "Several tests to confirm that READ-SEQUENCE behaves as expected." - (with-test ((format nil "READ-SEQUENCE tests with format ~S." - (flex::normalize-external-format external-format))) +(defun sequence-test (pathspec external-format) + "Several tests to confirm that READ-SEQUENCE and WRITE-SEQUENCE +behave as expected." + (with-test ((format nil "Sequence tests with format ~S and file ~A." + (flex::normalize-external-format external-format) pathspec)) (let* ((full-path (merge-pathnames pathspec *this-file*)) (file-string (file-as-string full-path external-format)) (string-length (length file-string)) @@ -397,7 +398,33 @@ (check (sequence-equal array (subseq file-string 25 (- string-length 25)))) (check (sequence-equal (loop repeat 25 collect (read-char in)) - (subseq file-string (- string-length 25))))))))) + (subseq file-string (- string-length 25)))))) + (let ((path-out (ensure-directories-exist (merge-pathnames pathspec *tmp-dir*)))) + (with-open-file (out path-out + :direction :output + :if-exists :supersede + :element-type 'octet) + (let ((out (make-flexi-stream out :external-format external-format))) + (write-sequence octets out))) + (check (file-equal full-path path-out)) + (with-open-file (out path-out + :direction :output + :if-exists :supersede + :element-type 'octet) + (let ((out (make-flexi-stream out :external-format external-format))) + (write-sequence file-string out))) + (check (file-equal full-path path-out)) + (with-open-file (out path-out + :direction :output + :if-exists :supersede + :element-type 'octet) + (let ((out (make-flexi-stream out :external-format external-format))) + (write-sequence file-string out :end 100) + (write-sequence octets out + :start (length (string-to-octets file-string + :external-format external-format + :end 100))))) + (check (file-equal full-path path-out)))))) (defmacro using-values ((&rest values) &body body) "Executes BODY and feeds an element from VALUES to the USE-VALUE @@ -544,7 +571,7 @@ nconc (create-test-combinations file-name symbols t)))) (incf no-tests (length read-sequence-test-args-list)) (dolist (args read-sequence-test-args-list) - (apply 'read-sequence-test args))) + (apply 'sequence-test args))) (incf no-tests) (error-handling-test) (incf no-tests) From eweitz at common-lisp.net Sun May 25 01:43:57 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 24 May 2008 21:43:57 -0400 (EDT) Subject: [flexi-streams-cvs] r54 - in branches/edi: . doc Message-ID: <20080525014357.557B93A049@common-lisp.net> Author: eweitz Date: Sat May 24 21:43:56 2008 New Revision: 54 Modified: branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/packages.lisp branches/edi/specials.lisp branches/edi/strings.lisp branches/edi/util.lisp Log: Compute decoding length Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sat May 24 21:43:56 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.16 2008/05/20 23:01:50 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.18 2008/05/25 01:42:50 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,6 +29,234 @@ (in-package :flexi-streams) +(defgeneric compute-number-of-chars (format sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "Computes the exact number of characters required to +decode the sequence of octets in SEQUENCE from START to END using the +external format FORMAT.")) + +(defmethod compute-number-of-chars :around (format (list list) start end) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'vector) start end)) + +(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (- end start)) + +(defmethod compute-number-of-chars ((format flexi-crlf-8-bit-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((i start) + (length (- end start))) + (declare (fixnum i length)) + (loop + (when (>= i end) + (return)) + (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=))) + (unless position + (return)) + (setq i (1+ position)) + (decf length))) + length)) + +(defun check-end (format start end i) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end i)) + (unless (= i end) + (signal-encoding-error format "These ~A octet~:P can't be ~ +decoded using ~A as the sequence is too short. ~A octet~:P ~ +missing at then end." + (- end start) + (external-format-name format) + (- i end)))) + +(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((octet (aref sequence i)) + (length (cond ((not (logbitp 7 octet)) 1) + ((= #b11000000 (logand octet #b11100000)) 2) + ((= #b11100000 (logand octet #b11110000)) 3) + (t 4)))) + (declare (fixnum length) (octet octet)) + (incf sum) + (incf i length))) + (check-end format start end i) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (octet last-octet)) + (loop + (when (>= i end) + (return)) + (let* ((octet (aref sequence i)) + (length (cond ((not (logbitp 7 octet)) 1) + ((= #b11000000 (logand octet #b11100000)) 2) + ((= #b11100000 (logand octet #b11110000)) 3) + (t 4)))) + (declare (fixnum length) (octet octet)) + (unless (and (= octet +lf+) (= last-octet +cr+)) + (incf sum)) + (incf i length) + (setq last-octet octet))) + (check-end format start end i) + sum)) + +(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (unless (evenp (- end start)) + (signal-encoding-error format "~A octet~:P cannot be decoded using ~ +UTF-16 as ~:*~A is not even." + (- end start)))) + +(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((high-octet (aref sequence (1+ i))) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (octet high-octet)) + (incf sum) + (incf i length))) + (check-end format start end i) + sum)) + +(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((high-octet (aref sequence i)) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (octet high-octet)) + (incf sum) + (incf i length))) + (check-end format start end i) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (octet last-octet)) + (loop + (when (>= i end) + (return)) + (let* ((high-octet (aref sequence (1+ i))) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (octet high-octet)) + (unless (and (zerop high-octet) + (= (the octet (aref sequence i)) +lf+) + (= last-octet +cr+)) + (incf sum)) + (incf i length) + (setq last-octet (if (zerop high-octet) + (aref sequence i) + 0)))) + (check-end format start end i) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (octet last-octet)) + (loop + (when (>= i end) + (return)) + (let* ((high-octet (aref sequence i)) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (octet high-octet)) + (unless (and (zerop high-octet) + (= (the octet (aref sequence (1+ i))) +lf+) + (= last-octet +cr+)) + (incf sum)) + (incf i length) + (setq last-octet (if (zerop high-octet) + (aref sequence (1+ i)) + 0)))) + (check-end format start end i) + sum)) +(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((length (- end start))) + (unless (zerop (mod length 4)) + (signal-encoding-error format "~A octet~:P cannot be decoded using ~ +UTF-32 as ~:*~A is not a multiple-value of four." + length)))) + +(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (/ (- end start) 4)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((i start) + (length (/ (- end start) 4))) + (decf end 8) + (loop + (when (> i end) + (return)) + (cond ((loop for j of-type fixnum from i + for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) + always (= octet (aref sequence j))) + (decf length) + (incf i 8)) + (t (incf i 4)))) + length)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((i start) + (length (/ (- end start) 4))) + (decf end 8) + (loop + (when (> i end) + (return)) + (cond ((loop for j of-type fixnum from i + for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) + always (= octet (aref sequence j))) + (decf length) + (incf i 8)) + (t (incf i 4)))) + length)) + (defun recover-from-encoding-error (external-format format-control &rest format-args) "Helper function used by OCTETS-TO-CHAR-CODE below to deal with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns @@ -242,7 +470,7 @@ (declare (ignore reader)) (let ((char-code (call-next-method))) (case char-code - (#.(char-code #\Return) #.(char-code #\Newline)) + (#.+cr+ #.(char-code #\Newline)) (otherwise char-code)))) (defmethod octets-to-char-code ((format flexi-crlf-mixin) reader) @@ -251,13 +479,13 @@ (declare (ignore reader)) (let ((char-code (call-next-method))) (case char-code - (#.(char-code #\Return) + (#.+cr+ (let ((next-char-code (call-next-method))) (case next-char-code - (#.(char-code #\Linefeed) #.(char-code #\Newline)) + (#.+lf+ #.(char-code #\Newline)) ;; we saw a CR but no LF afterwards, but then the data ;; ended, so we just return #\Return - ((nil) #.(char-code #\Return)) + ((nil) +cr+) ;; if the character we peeked at wasn't a ;; linefeed character we unread its constituents (otherwise (funcall *current-unreader* (code-char next-char-code)) Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Sat May 24 21:43:56 2008 @@ -116,6 +116,7 @@

  • string-to-octets
  • octets-to-string
  • octet-length +
  • char-length
  • File positions @@ -1005,16 +1006,30 @@

  • [Function] -
    octet-length string &key external-format start end => length-or-nil +
    octet-length string &key external-format start end => length


    Returns the length of the substring of string from start to end in octets if encoded using the external format designated -by external-format. Might return NIL -if there's no efficient way to compute the length without iterating -through the whole string. +by external-format. +The defaults for +start and end +are 0 and the length of the string. The default +for external-format is :LATIN1. +
    + +


    [Function] +
    char-length sequence &key external-format start end => length + +


    + +Kind of the inverse of OCTET-LENGTH. +Returns the length of the subsequence (of octets) of sequence from start to end in +characters if decoded using +the external format designated +by external-format. The defaults for start and end are 0 and the length of the sequence. The default @@ -1060,7 +1075,7 @@ his work on making FLEXI-STREAMS faster.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.111 2008/05/23 14:56:47 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.112 2008/05/25 01:41:25 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Sat May 24 21:43:56 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.35 2008/05/21 01:43:43 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.36 2008/05/25 01:40:54 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -40,6 +40,7 @@ (:export :*default-eol-style* :*default-little-endian* :*substitution-char* + :char-length :external-format-eol-style :external-format-error :external-format-error-external-format Modified: branches/edi/specials.lisp ============================================================================== --- branches/edi/specials.lisp (original) +++ branches/edi/specials.lisp Sat May 24 21:43:56 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.32 2008/05/20 23:01:51 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/specials.lisp,v 1.33 2008/05/25 01:40:54 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -49,6 +49,10 @@ "Like *STANDARD-OPTIMIZE-SETTINGS*, but \(on LispWorks) with all arithmetic being fixnum arithmetic.") +(defconstant +lf+ (char-code #\Linefeed)) + +(defconstant +cr+ (char-code #\Return)) + (defvar *current-unreader* nil "A unary function which might be called to `unread' a character \(i.e. the sequence of octets it represents). Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 24 21:43:56 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.26 2008/05/25 01:41:32 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -116,12 +116,17 @@ (defun octet-length (string &key (external-format :latin1) (start 0) (end (length string))) "Returns the length of the substring of STRING from START to END in -octets if encoded using the external format EXTERNAL-FORMAT. Might -return NIL if there's no efficient way to compute the length without -iterating through the whole string." +octets if encoded using the external format EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) - (let ((factor (encoding-factor external-format))) - (typecase factor - (fixnum (* factor (- end start)))))) + (compute-number-of-octets external-format string start end)) + +(defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence))) + "Kind of the inverse of OCTET-LENGTH. Returns the length of the +subsequence \(of octets) of SEQUENCE from START to END in characters +if decoded using the external format EXTERNAL-FORMAT." + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (setq external-format (maybe-convert-external-format external-format)) + (compute-number-of-chars external-format sequence start end)) Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Sat May 24 21:43:56 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.22 2008/05/25 01:40:54 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -115,7 +115,7 @@ (unless (find real-name +name-map+ :test #'eq :key #'cdr) - (error 'external-format-error + (error 'external-format-simple-error :format-control "~S is not known to be a name for an external format." :format-arguments (list name))) real-name)) From eweitz at common-lisp.net Sun May 25 03:14:27 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 24 May 2008 23:14:27 -0400 (EDT) Subject: [flexi-streams-cvs] r55 - in branches/edi: . doc test Message-ID: <20080525031427.E7E821F00E@common-lisp.net> Author: eweitz Date: Sat May 24 23:14:26 2008 New Revision: 55 Modified: branches/edi/conditions.lisp branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/input.lisp branches/edi/packages.lisp branches/edi/strings.lisp branches/edi/test/test.lisp branches/edi/util.lisp Log: Pre-compute string length Enhanced condition hierarchy Passes tests on LW Modified: branches/edi/conditions.lisp ============================================================================== --- branches/edi/conditions.lisp (original) +++ branches/edi/conditions.lisp Sat May 24 23:14:26 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -82,22 +82,32 @@ (:documentation "Errors of this type are signalled if an erroneous position spec is used in conjunction with FILE-POSITION.")) -(define-condition external-format-error () +(define-condition external-format-condition (simple-condition) ((external-format :initarg :external-format :initform nil - :reader external-format-error-external-format)) + :reader external-format-condition-external-format)) + (:documentation "Superclass for all conditions related to external +formats.")) + +(define-condition external-format-error (external-format-condition error) + () (:documentation "Superclass for all errors related to external formats.")) -(define-condition external-format-simple-error (external-format-error simple-condition) +(define-condition external-format-warning (external-format-condition warning) () - (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting -capabilities.")) + (:documentation "Superclass for all warnings related to external +formats.")) -(define-condition external-format-encoding-error (external-format-simple-error) +(define-condition external-format-encoding-error (external-format-error) () (:documentation "Errors of this type are signalled if there is an encoding problem.")) + +(define-condition external-format-encoding-warning (external-format-warning) + () + (:documentation "Warnings of this type are signalled if there is an +encoding problem.")) (defun signal-encoding-error (external-format format-control &rest format-args) "Convenience function similar to ERROR to signal conditions of type @@ -106,3 +116,11 @@ :format-control format-control :format-arguments format-args :external-format external-format)) + +(defun signal-encoding-warning (external-format format-control &rest format-args) + "Convenience function similar to WARN to signal conditions of type +EXTERNAL-FORMAT-ENCODING-WARNING." + (warn 'external-format-encoding-warning + :format-control format-control + :format-arguments format-args + :external-format external-format)) Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sat May 24 23:14:26 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.18 2008/05/25 01:42:50 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.19 2008/05/25 03:07:59 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,23 +29,26 @@ (in-package :flexi-streams) -(defgeneric compute-number-of-chars (format sequence start end) +(defgeneric compute-number-of-chars (format sequence start end warnp) (declare #.*standard-optimize-settings*) (:documentation "Computes the exact number of characters required to decode the sequence of octets in SEQUENCE from START to END using the -external format FORMAT.")) +external format FORMAT. If WARNP is NIL, warnings will be muffled.")) -(defmethod compute-number-of-chars :around (format (list list) start end) +(defmethod compute-number-of-chars :around (format (list list) start end warnp) (declare #.*standard-optimize-settings*) - (call-next-method format (coerce list 'vector) start end)) + (call-next-method format (coerce list 'vector) start end warnp)) -(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (declare (ignore sequence)) (- end start)) -(defmethod compute-number-of-chars ((format flexi-crlf-8-bit-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) + ;; this method only applies to the 8-bit formats as all other + ;; formats with CRLF line endings have their own specialized methods + ;; below (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((i start) @@ -61,18 +64,23 @@ (decf length))) length)) -(defun check-end (format start end i) +(defgeneric check-end (format start end i warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end i)) - (unless (= i end) - (signal-encoding-error format "These ~A octet~:P can't be ~ -decoded using ~A as the sequence is too short. ~A octet~:P ~ -missing at then end." - (- end start) - (external-format-name format) - (- i end)))) + (:method (format start end i warnp) + (when (and warnp (> i end)) + (signal-encoding-warning format "These ~A octet~:P can't be ~ +decoded using ~A as the sequence is too short. ~A octet~:P missing ~ +at then end." + (- end start) + (external-format-name format) + (- i end)))) + (:method ((format flexi-utf-16-format) start end i warnp) + ;; don't warn twice + (when (evenp (- end start)) + (call-next-method)))) -(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) @@ -89,10 +97,10 @@ (declare (fixnum length) (octet octet)) (incf sum) (incf i length))) - (check-end format start end i) + (check-end format start end i warnp) sum)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) @@ -112,25 +120,26 @@ (incf sum)) (incf i length) (setq last-octet octet))) - (check-end format start end i) + (check-end format start end i warnp) sum)) -(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end) +(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (unless (evenp (- end start)) - (signal-encoding-error format "~A octet~:P cannot be decoded using ~ -UTF-16 as ~:*~A is not even." - (- end start)))) + (when (and warnp (oddp (- end start))) + (signal-encoding-warning format "~A octet~:P cannot be decoded ~ +using UTF-16 as ~:*~A is not even." + (- end start)))) -(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) (i start)) (declare (fixnum i sum)) + (decf end 2) (loop - (when (>= i end) + (when (> i end) (return)) (let* ((high-octet (aref sequence (1+ i))) (length (cond ((<= #xd8 high-octet #xdf) 4) @@ -138,17 +147,18 @@ (declare (fixnum length) (octet high-octet)) (incf sum) (incf i length))) - (check-end format start end i) + (check-end format start (+ end 2) i warnp) sum)) -(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) (i start)) (declare (fixnum i sum)) + (decf end 2) (loop - (when (>= i end) + (when (> i end) (return)) (let* ((high-octet (aref sequence i)) (length (cond ((<= #xd8 high-octet #xdf) 4) @@ -156,18 +166,19 @@ (declare (fixnum length) (octet high-octet)) (incf sum) (incf i length))) - (check-end format start end i) + (check-end format start (+ end 2) i warnp) sum)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) (i start) (last-octet 0)) (declare (fixnum i sum) (octet last-octet)) + (decf end 2) (loop - (when (>= i end) + (when (> i end) (return)) (let* ((high-octet (aref sequence (1+ i))) (length (cond ((<= #xd8 high-octet #xdf) 4) @@ -175,24 +186,25 @@ (declare (fixnum length) (octet high-octet)) (unless (and (zerop high-octet) (= (the octet (aref sequence i)) +lf+) - (= last-octet +cr+)) + (= last-octet +cr+)) (incf sum)) - (incf i length) (setq last-octet (if (zerop high-octet) (aref sequence i) - 0)))) - (check-end format start end i) + 0)) + (incf i length))) + (check-end format start (+ end 2) i warnp) sum)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) (i start) (last-octet 0)) (declare (fixnum i sum) (octet last-octet)) + (decf end 2) (loop - (when (>= i end) + (when (> i end) (return)) (let* ((high-octet (aref sequence i)) (length (cond ((<= #xd8 high-octet #xdf) 4) @@ -202,32 +214,33 @@ (= (the octet (aref sequence (1+ i))) +lf+) (= last-octet +cr+)) (incf sum)) - (incf i length) (setq last-octet (if (zerop high-octet) (aref sequence (1+ i)) - 0)))) - (check-end format start end i) + 0)) + (incf i length))) + (check-end format start (+ end 2) i warnp) sum)) -(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end) + +(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((length (- end start))) - (unless (zerop (mod length 4)) - (signal-encoding-error format "~A octet~:P cannot be decoded using ~ -UTF-32 as ~:*~A is not a multiple-value of four." - length)))) + (when (and warnp (plusp (mod length 4))) + (signal-encoding-warning format "~A octet~:P cannot be decoded ~ +using UTF-32 as ~:*~A is not a multiple-value of four." + length)))) -(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (declare (ignore sequence)) - (/ (- end start) 4)) + (ceiling (- end start) 4)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((i start) - (length (/ (- end start) 4))) + (length (ceiling (- end start) 4))) (decf end 8) (loop (when (> i end) @@ -240,11 +253,11 @@ (t (incf i 4)))) length)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((i start) - (length (/ (- end start) 4))) + (length (ceiling (- end start) 4))) (decf end 8) (loop (when (> i end) Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Sat May 24 23:14:26 2008 @@ -69,7 +69,12 @@

  • external-format-equal
  • *default-eol-style*
  • *default-little-endian* +
  • external-format-condition +
  • external-format-condition-external-format +
  • external-format-error +
  • external-format-warning
  • external-format-encoding-error +
  • *substitution-char*
  • Flexi streams
      @@ -86,7 +91,6 @@
    1. flexi-stream-stream
    2. unread-byte
    3. peek-byte -
    4. *substitution-char*
    5. octet
    6. flexi-stream-error
    7. flexi-stream-out-of-sync-error @@ -526,29 +530,98 @@

  • [Condition] -
    external-format-error +
    external-format-condition


    -All errors related to external formats are of this type. -There's a slot for the external format which can be accessed with EXTERNAL-FORMAT-ERROR-EXTERNAL-FORMAT. +All conditions related to external formats are of this type. +There's a slot for the external format which can be accessed with EXTERNAL-FORMAT-CONDITION-EXTERNAL-FORMAT.


    [Reader] -
    external-format-error-external-format condition => external-format +
    external-format-condition-external-format condition => external-format


    If condition is of -type EXTERNAL-FORMAT-ERROR, +type EXTERNAL-FORMAT-CONDITION, this function will return the associated external format. Note that -there are errors which happen during the creation of external formats -where this method returns NIL. +there are situation which happen during the creation of external +formats where this method returns NIL. +
    + +


    [Condition] +
    external-format-warning + +


    +All warnings related to external formats are of this type. +This is a subtype of EXTERNAL-FORMAT-CONDITION. +
    + +


    [Condition] +
    external-format-error + +


    +All errors related to external formats are of this type. +This is a subtype of EXTERNAL-FORMAT-CONDITION.


    [Condition]
    external-format-encoding-error


    -All errors related to encoding problems with flexi streams are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE -restart is provided. See also *SUBSTITUTION-CHAR* and example for it. EXTERNAL-FORMAT-ENCODING-ERROR is a subtype of EXTERNAL-FORMAT-ERROR. +All errors related to encoding problems with external formats are of this type. (This includes situation where an end of file is encountered in the middle of a multi-octet character.) When this condition is signalled during reading, USE-VALUE +restart is provided. See also *SUBSTITUTION-CHAR* and the example for it. EXTERNAL-FORMAT-ENCODING-ERROR is a subtype of EXTERNAL-FORMAT-ERROR. +
    + +


    [Special variable] +
    *substitution-char* + +


    +If this value is not NIL, it should be a character which is used +(as if by a USE-VALUE restart) whenever during reading an error of +type EXTERNAL-FORMAT-ENCODING-ERROR would have been signalled otherwise. + +
    +CL-USER 1 > (defun foo ()
    +              ;; not a valid UTF-8 sequence
    +              (with-input-from-sequence (in '(#xe4 #xf6 #xfc))
    +                (setq in (make-flexi-stream in :external-format :utf8))
    +                (read-line in)))
    +FOO
    +
    +CL-USER 2 > (foo)
    +
    +Error: Unexpected value #xF6 in UTF-8 sequence.
    +  1 (continue) Specify a character to be used instead.
    +  2 (abort) Return to level 0.
    +  3 Return to top loop level 0.
    +
    +Type :b for backtrace, :c <option number> to proceed,  or :? for other options
    +
    +CL-USER 3 : 1 > :c
    +Type a character: x
    +
    +Error: End of file while in UTF-8 sequence.
    +  1 (continue) Specify a character to be used instead.
    +  2 (abort) Return to level 0.
    +  3 Return to top loop level 0.
    +
    +Type :b for backtrace, :c <option number> to proceed,  or :? for other options
    +
    +CL-USER 4 : 1 > :c
    +Type a character: y
    +"xy"
    +T
    +
    +CL-USER 5 > (handler-bind ((external-format-encoding-error (lambda (condition)
    +                                                          (use-value #\-))))
    +              (foo))
    +"--"
    +T
    +
    +CL-USER 6 > (let ((*substitution-char* #\?))
    +              (foo))
    +"??"
    +T
    +

    Flexi streams

    @@ -739,59 +812,6 @@ Note that the parameters aren't in the same order as with PEEK-CHAR because it doesn't make much sense to make stream an optional argument. -


    [Special variable] -
    *substitution-char* - -


    -If this value is not NIL, it should be a character which is used -(as if by a USE-VALUE restart) whenever during reading an error of -type EXTERNAL-FORMAT-ENCODING-ERROR would have been signalled otherwise. - -
    -CL-USER 1 > (defun foo ()
    -              ;; not a valid UTF-8 sequence
    -              (with-input-from-sequence (in '(#xe4 #xf6 #xfc))
    -                (setq in (make-flexi-stream in :external-format :utf8))
    -                (read-line in)))
    -FOO
    -
    -CL-USER 2 > (foo)
    -
    -Error: Unexpected value #xF6 in UTF-8 sequence.
    -  1 (continue) Specify a character to be used instead.
    -  2 (abort) Return to level 0.
    -  3 Return to top loop level 0.
    -
    -Type :b for backtrace, :c <option number> to proceed,  or :? for other options
    -
    -CL-USER 3 : 1 > :c
    -Type a character: x
    -
    -Error: End of file while in UTF-8 sequence.
    -  1 (continue) Specify a character to be used instead.
    -  2 (abort) Return to level 0.
    -  3 Return to top loop level 0.
    -
    -Type :b for backtrace, :c <option number> to proceed,  or :? for other options
    -
    -CL-USER 4 : 1 > :c
    -Type a character: y
    -"xy"
    -T
    -
    -CL-USER 5 > (handler-bind ((external-format-encoding-error (lambda (condition)
    -                                                          (use-value #\-))))
    -              (foo))
    -"--"
    -T
    -
    -CL-USER 6 > (let ((*substitution-char* #\?))
    -              (foo))
    -"??"
    -T
    -
    -
    -


    [Type]
    octet @@ -997,7 +1017,7 @@


    Converts the Lisp sequence sequence of octets -from start to end to string +from start to end to a string using the external format designated by external-format. The defaults for start and end @@ -1075,7 +1095,7 @@ his work on making FLEXI-STREAMS faster.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.112 2008/05/25 01:41:25 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Sat May 24 23:14:26 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.75 2008/05/23 14:43:09 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.76 2008/05/25 03:07:59 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -354,10 +354,10 @@ (with-accessors ((last-char-code flexi-stream-last-char-code)) stream (unless last-char-code - (error 'flexi-stream-simple-error + (error 'flexi-stream-error :format-control "No character to unread from this stream \(or external format has changed or last reading operation was binary).")) (unless (= (char-code char) last-char-code) - (error 'flexi-stream-simple-error + (error 'flexi-stream-error :format-control "Last character read (~S) was different from ~S." :format-arguments (list (code-char last-char-code) char))) (unread-char% char stream) @@ -374,10 +374,10 @@ (position flexi-stream-position)) flexi-input-stream (unless last-octet - (error 'flexi-stream-simple-error + (error 'flexi-stream-error :format-control "No byte to unread from this stream \(or last reading operation read a character).")) (unless (= byte last-octet) - (error 'flexi-stream-simple-error + (error 'flexi-stream-error :format-control "Last byte read was different from #x~X." :format-arguments (list byte))) (setq last-octet nil) Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Sat May 24 23:14:26 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.36 2008/05/25 01:40:54 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -41,14 +41,17 @@ :*default-little-endian* :*substitution-char* :char-length + :external-format-condition + :external-format-condition-external-format :external-format-eol-style :external-format-error - :external-format-error-external-format :external-format-encoding-error + :external-format-encoding-warning :external-format-equal :external-format-id :external-format-little-endian :external-format-name + :external-format-warning :flexi-input-stream :flexi-output-stream :flexi-io-stream Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 24 23:14:26 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.26 2008/05/25 01:41:32 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.28 2008/05/25 03:07:59 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -45,13 +45,11 @@ (external-format :latin1) (start 0) (end (length sequence))) "Converts the Lisp sequence SEQUENCE of octets from START to END to -string using the external format designated by EXTERNAL-FORMAT." +a string using the external format designated by EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) - (let* ((factor (encoding-factor external-format)) - (length (- end start)) - (i start) + (let* ((i start) (reader (etypecase sequence ((array octet *) (lambda () @@ -82,37 +80,12 @@ (flet ((next-char () (code-char (octets-to-char-code external-format reader)))) (declare (inline next-char)) - (etypecase factor - (integer - (let* ((string-length (ceiling length factor)) - (string (make-array string-length - :element-type 'char*))) - (declare (fixnum string-length)) - (loop for j of-type fixnum from 0 below string-length - do (setf (schar string j) (next-char)) - finally (return string)))) - (double-float - ;; this is a bit clunky but hopefully a bit more efficient than - ;; using VECTOR-PUSH-EXTEND - (let* ((string-length (ceiling length (the double-float factor))) - (string (make-array string-length - :element-type 'char* - :fill-pointer t - :adjustable t)) - (j 0)) - (declare (fixnum j string-length) - (double-float factor)) - (loop - (when (>= i end) - (return)) - (when (>= j string-length) - (setq factor (/ factor 2.0d0)) - (incf string-length (the fixnum (ceiling (- end i) factor))) - (adjust-array string string-length :fill-pointer t)) - (setf (char string j) (next-char)) - (incf j)) - (setf (fill-pointer string) j) - string)))))))) + (let* ((string-length (compute-number-of-chars external-format sequence start end nil)) + (string (make-array string-length :element-type 'char*))) + (declare (fixnum string-length)) + (loop for j of-type fixnum from 0 below string-length + do (setf (schar string j) (next-char)) + finally (return string)))))))) (defun octet-length (string &key (external-format :latin1) (start 0) (end (length string))) "Returns the length of the substring of STRING from START to END in @@ -129,4 +102,4 @@ (declare #.*standard-optimize-settings*) (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) - (compute-number-of-chars external-format sequence start end)) + (compute-number-of-chars external-format sequence start end t)) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sat May 24 23:14:26 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -264,8 +264,8 @@ `(handler-case (unless ,expression (fail "Expression ~S failed.~%" ',expression)) - (condition (c) - (fail "Expression ~S failed signaling condition of type ~A: ~A.~%" + (error (c) + (fail "Expression ~S failed signalling error of type ~A: ~A.~%" ',expression (type-of c) c))))) (format *error-output* "Test ~S~%" ,test-description) , at body @@ -473,10 +473,10 @@ (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))) ;; not a valid UTF-8 sequence (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))) - (check (string= "??" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))) + (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8))) - (check (string= "??" (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) + (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) (let ((*substitution-char* nil)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "abc" (using-values (#\b #\c) @@ -490,13 +490,13 @@ (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))) ;; not a valid UTF-8 sequence (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))) - (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))) + (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8)))) - (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) + (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) ;; only one byte (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le)))) - (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16le)))) + (check (string= "" (read-flexi-line* #(#x01) :utf-16le))) ;; two bytes, but value of resulting word suggests that another word follows (check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le)))) (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le)))) @@ -507,7 +507,7 @@ (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be)))) (check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be)))) (check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be)))) - (check (string= "E" (using-values (#\E) (read-flexi-line* #(#x01) :utf-16be)))) + (check (string= "" (read-flexi-line* #(#x01) :utf-16be))) (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be)))) (check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be)))) ;; the only case when error is signalled for UTF-32 is at end of file Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Sat May 24 23:14:26 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.22 2008/05/25 01:40:54 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.23 2008/05/25 03:07:59 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -115,7 +115,7 @@ (unless (find real-name +name-map+ :test #'eq :key #'cdr) - (error 'external-format-simple-error + (error 'external-format-error :format-control "~S is not known to be a name for an external format." :format-arguments (list name))) real-name)) From eweitz at common-lisp.net Sun May 25 03:35:25 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sat, 24 May 2008 23:35:25 -0400 (EDT) Subject: [flexi-streams-cvs] r56 - branches/edi Message-ID: <20080525033525.23D1D33107@common-lisp.net> Author: eweitz Date: Sat May 24 23:35:21 2008 New Revision: 56 Modified: branches/edi/decode.lisp branches/edi/encode.lisp branches/edi/input.lisp branches/edi/strings.lisp Log: Some cosmetic fixes Passes tests on AllegroCL as well now Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sat May 24 23:35:21 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.19 2008/05/25 03:07:59 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -42,7 +42,7 @@ (defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence)) + (declare (ignore sequence warnp)) (- end start)) (defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) @@ -51,6 +51,7 @@ ;; below (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) + (declare (ignore warnp)) (let ((i start) (length (- end start))) (declare (fixnum i length)) @@ -66,8 +67,9 @@ (defgeneric check-end (format start end i warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end i)) (:method (format start end i warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end i)) (when (and warnp (> i end)) (signal-encoding-warning format "These ~A octet~:P can't be ~ decoded using ~A as the sequence is too short. ~A octet~:P missing ~ @@ -76,6 +78,9 @@ (external-format-name format) (- i end)))) (:method ((format flexi-utf-16-format) start end i warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end i)) + (declare (ignore i warnp)) ;; don't warn twice (when (evenp (- end start)) (call-next-method)))) @@ -94,7 +99,7 @@ ((= #b11000000 (logand octet #b11100000)) 2) ((= #b11100000 (logand octet #b11110000)) 3) (t 4)))) - (declare (fixnum length) (octet octet)) + (declare (fixnum length) (type octet octet)) (incf sum) (incf i length))) (check-end format start end i warnp) @@ -106,7 +111,7 @@ (let ((sum 0) (i start) (last-octet 0)) - (declare (fixnum i sum) (octet last-octet)) + (declare (fixnum i sum) (type octet last-octet)) (loop (when (>= i end) (return)) @@ -115,7 +120,7 @@ ((= #b11000000 (logand octet #b11100000)) 2) ((= #b11100000 (logand octet #b11110000)) 3) (t 4)))) - (declare (fixnum length) (octet octet)) + (declare (fixnum length) (type octet octet)) (unless (and (= octet +lf+) (= last-octet +cr+)) (incf sum)) (incf i length) @@ -126,6 +131,7 @@ (defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) + (declare (ignore sequence)) (when (and warnp (oddp (- end start))) (signal-encoding-warning format "~A octet~:P cannot be decoded ~ using UTF-16 as ~:*~A is not even." @@ -144,7 +150,7 @@ (let* ((high-octet (aref sequence (1+ i))) (length (cond ((<= #xd8 high-octet #xdf) 4) (t 2)))) - (declare (fixnum length) (octet high-octet)) + (declare (fixnum length) (type octet high-octet)) (incf sum) (incf i length))) (check-end format start (+ end 2) i warnp) @@ -163,7 +169,7 @@ (let* ((high-octet (aref sequence i)) (length (cond ((<= #xd8 high-octet #xdf) 4) (t 2)))) - (declare (fixnum length) (octet high-octet)) + (declare (fixnum length) (type octet high-octet)) (incf sum) (incf i length))) (check-end format start (+ end 2) i warnp) @@ -175,7 +181,7 @@ (let ((sum 0) (i start) (last-octet 0)) - (declare (fixnum i sum) (octet last-octet)) + (declare (fixnum i sum) (type octet last-octet)) (decf end 2) (loop (when (> i end) @@ -183,7 +189,7 @@ (let* ((high-octet (aref sequence (1+ i))) (length (cond ((<= #xd8 high-octet #xdf) 4) (t 2)))) - (declare (fixnum length) (octet high-octet)) + (declare (fixnum length) (type octet high-octet)) (unless (and (zerop high-octet) (= (the octet (aref sequence i)) +lf+) (= last-octet +cr+)) @@ -201,7 +207,7 @@ (let ((sum 0) (i start) (last-octet 0)) - (declare (fixnum i sum) (octet last-octet)) + (declare (fixnum i sum) (type octet last-octet)) (decf end 2) (loop (when (> i end) @@ -209,7 +215,7 @@ (let* ((high-octet (aref sequence i)) (length (cond ((<= #xd8 high-octet #xdf) 4) (t 2)))) - (declare (fixnum length) (octet high-octet)) + (declare (fixnum length) (type octet high-octet)) (unless (and (zerop high-octet) (= (the octet (aref sequence (1+ i))) +lf+) (= last-octet +cr+)) @@ -224,6 +230,7 @@ (defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) + (declare (ignore sequence)) (let ((length (- end start))) (when (and warnp (plusp (mod length 4))) (signal-encoding-warning format "~A octet~:P cannot be decoded ~ @@ -233,12 +240,13 @@ (defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence)) + (declare (ignore sequence warnp)) (ceiling (- end start) 4)) (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) + (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) (decf end 8) @@ -256,6 +264,7 @@ (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) + (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) (decf end 8) Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sat May 24 23:35:21 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -55,7 +55,7 @@ ((< char-code #x800) 2) ((< char-code #x10000) 3) (t 4)))) - (declare (fixnum char-length) (char-code-integer char-code)) + (declare (fixnum char-length) (type char-code-integer char-code)) (incf sum char-length) (incf i))) sum)) @@ -75,7 +75,7 @@ ((< char-code #x800) 2) ((< char-code #x10000) 3) (t 4)))) - (declare (fixnum char-length) (char-code-integer char-code)) + (declare (fixnum char-length) (type char-code-integer char-code)) (incf sum char-length) (incf i))) sum)) @@ -92,7 +92,7 @@ (let* ((char-code (char-code (aref sequence i))) (char-length (cond ((< char-code #x10000) 2) (t 4)))) - (declare (fixnum char-length) (char-code-integer char-code)) + (declare (fixnum char-length) (type char-code-integer char-code)) (incf sum char-length) (incf i))) sum)) @@ -110,7 +110,7 @@ (char-length (cond ((= char-code #.(char-code #\Newline)) 4) ((< char-code #x10000) 2) (t 4)))) - (declare (fixnum char-length) (char-code-integer char-code)) + (declare (fixnum char-length) (type char-code-integer char-code)) (incf sum char-length) (incf i))) sum)) @@ -128,7 +128,7 @@ (char-length (cond ((= char-code #.(char-code #\Newline)) 4) ((< char-code #x10000) 2) (t 4)))) - (declare (fixnum char-length) (char-code-integer char-code)) + (declare (fixnum char-length) (type char-code-integer char-code)) (incf sum char-length) (incf i))) sum)) @@ -194,7 +194,7 @@ (setq buffer-pos 0)) (write-octet (octet) "Adds one octet to the buffer and flushes it if necessary." - (declare (octet octet)) + (declare (type octet octet)) (when (>= buffer-pos buffer-size) (flush-buffer)) (setf (aref buffer buffer-pos) octet) Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Sat May 24 23:35:21 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.76 2008/05/25 03:07:59 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -150,8 +150,7 @@ (external-format flexi-stream-external-format)) flexi-input-stream (let ((counter 0) octets-reversed) - (declare (integer position) - (fixnum counter)) + (declare (fixnum counter)) (flet ((writer (octet) (incf counter) (push octet octets-reversed))) Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sat May 24 23:35:21 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.28 2008/05/25 03:07:59 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -69,7 +69,7 @@ (prog1 (nth i sequence) (incf i)))))))) - (declare (fixnum i length) (dynamic-extent reader)) + (declare (fixnum i) (dynamic-extent reader)) (labels ((pseudo-writer (octet) (declare (ignore octet)) (decf i)) @@ -100,6 +100,6 @@ subsequence \(of octets) of SEQUENCE from START to END in characters if decoded using the external format EXTERNAL-FORMAT." (declare #.*standard-optimize-settings*) - (declare (fixnum start end) (string string)) + (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) (compute-number-of-chars external-format sequence start end t)) From eweitz at common-lisp.net Sun May 25 12:26:48 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 25 May 2008 08:26:48 -0400 (EDT) Subject: [flexi-streams-cvs] r57 - branches/edi Message-ID: <20080525122648.B8687690E3@common-lisp.net> Author: eweitz Date: Sun May 25 08:26:47 2008 New Revision: 57 Added: branches/edi/length.lisp (contents, props changed) Modified: branches/edi/decode.lisp branches/edi/encode.lisp branches/edi/external-format.lisp branches/edi/flexi-streams.asd Log: Re-org Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,256 +29,6 @@ (in-package :flexi-streams) -(defgeneric compute-number-of-chars (format sequence start end warnp) - (declare #.*standard-optimize-settings*) - (:documentation "Computes the exact number of characters required to -decode the sequence of octets in SEQUENCE from START to END using the -external format FORMAT. If WARNP is NIL, warnings will be muffled.")) - -(defmethod compute-number-of-chars :around (format (list list) start end warnp) - (declare #.*standard-optimize-settings*) - (call-next-method format (coerce list 'vector) start end warnp)) - -(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence warnp)) - (- end start)) - -(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) - ;; this method only applies to the 8-bit formats as all other - ;; formats with CRLF line endings have their own specialized methods - ;; below - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore warnp)) - (let ((i start) - (length (- end start))) - (declare (fixnum i length)) - (loop - (when (>= i end) - (return)) - (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=))) - (unless position - (return)) - (setq i (1+ position)) - (decf length))) - length)) - -(defgeneric check-end (format start end i warnp) - (declare #.*fixnum-optimize-settings*) - (:method (format start end i warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end i)) - (when (and warnp (> i end)) - (signal-encoding-warning format "These ~A octet~:P can't be ~ -decoded using ~A as the sequence is too short. ~A octet~:P missing ~ -at then end." - (- end start) - (external-format-name format) - (- i end)))) - (:method ((format flexi-utf-16-format) start end i warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end i)) - (declare (ignore i warnp)) - ;; don't warn twice - (when (evenp (- end start)) - (call-next-method)))) - -(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((octet (aref sequence i)) - (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) - (t 4)))) - (declare (fixnum length) (type octet octet)) - (incf sum) - (incf i length))) - (check-end format start end i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start) - (last-octet 0)) - (declare (fixnum i sum) (type octet last-octet)) - (loop - (when (>= i end) - (return)) - (let* ((octet (aref sequence i)) - (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) - (t 4)))) - (declare (fixnum length) (type octet octet)) - (unless (and (= octet +lf+) (= last-octet +cr+)) - (incf sum)) - (incf i length) - (setq last-octet octet))) - (check-end format start end i warnp) - sum)) - -(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (when (and warnp (oddp (- end start))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ -using UTF-16 as ~:*~A is not even." - (- end start)))) - -(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence (1+ i))) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (incf sum) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence i)) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (incf sum) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start) - (last-octet 0)) - (declare (fixnum i sum) (type octet last-octet)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence (1+ i))) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (unless (and (zerop high-octet) - (= (the octet (aref sequence i)) +lf+) - (= last-octet +cr+)) - (incf sum)) - (setq last-octet (if (zerop high-octet) - (aref sequence i) - 0)) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start) - (last-octet 0)) - (declare (fixnum i sum) (type octet last-octet)) - (decf end 2) - (loop - (when (> i end) - (return)) - (let* ((high-octet (aref sequence i)) - (length (cond ((<= #xd8 high-octet #xdf) 4) - (t 2)))) - (declare (fixnum length) (type octet high-octet)) - (unless (and (zerop high-octet) - (= (the octet (aref sequence (1+ i))) +lf+) - (= last-octet +cr+)) - (incf sum)) - (setq last-octet (if (zerop high-octet) - (aref sequence (1+ i)) - 0)) - (incf i length))) - (check-end format start (+ end 2) i warnp) - sum)) - -(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (let ((length (- end start))) - (when (and warnp (plusp (mod length 4))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ -using UTF-32 as ~:*~A is not a multiple-value of four." - length)))) - -(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence warnp)) - (ceiling (- end start) 4)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore warnp)) - (let ((i start) - (length (ceiling (- end start) 4))) - (decf end 8) - (loop - (when (> i end) - (return)) - (cond ((loop for j of-type fixnum from i - for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) - always (= octet (aref sequence j))) - (decf length) - (incf i 8)) - (t (incf i 4)))) - length)) - -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore warnp)) - (let ((i start) - (length (ceiling (- end start) 4))) - (decf end 8) - (loop - (when (> i end) - (return)) - (cond ((loop for j of-type fixnum from i - for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) - always (= octet (aref sequence j))) - (decf length) - (incf i 8)) - (t (incf i 4)))) - length)) - (defun recover-from-encoding-error (external-format format-control &rest format-args) "Helper function used by OCTETS-TO-CHAR-CODE below to deal with encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,125 +29,6 @@ (in-package :flexi-streams) -(defgeneric compute-number-of-octets (format sequence start end) - (declare #.*standard-optimize-settings*) - (:documentation "Computes the exact number of octets required to -encode the sequence of characters in SEQUENCE from START to END using -the external format FORMAT.")) - -(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (- end start)) - -(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((< char-code #x80) 1) - ((< char-code #x800) 2) - ((< char-code #x10000) 3) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((= char-code #.(char-code #\Newline)) 2) - ((< char-code #x80) 1) - ((< char-code #x800) 2) - ((< char-code #x10000) 3) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((< char-code #x10000) 2) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((= char-code #.(char-code #\Newline)) 4) - ((< char-code #x10000) 2) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (let ((sum 0) - (i start)) - (declare (fixnum i sum)) - (loop - (when (>= i end) - (return)) - (let* ((char-code (char-code (aref sequence i))) - (char-length (cond ((= char-code #.(char-code #\Newline)) 4) - ((< char-code #x10000) 2) - (t 4)))) - (declare (fixnum char-length) (type char-code-integer char-code)) - (incf sum char-length) - (incf i))) - sum)) - -(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (declare (ignore sequence)) - (* 4 (- end start))) - -(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) - (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) - (+ (call-next-method) - (* (case (external-format-name format) - (:utf-32 4) - (otherwise 1)) - (count #\Newline sequence :start start :end end :test #'char=)))) - (defgeneric char-to-octets (format char writer) (declare #.*standard-optimize-settings*) (:documentation "Converts the character CHAR to a sequence of octets Modified: branches/edi/external-format.lisp ============================================================================== --- branches/edi/external-format.lisp (original) +++ branches/edi/external-format.lisp Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.22 2008/05/25 12:26:02 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -387,46 +387,3 @@ NORMALIZE-EXTERNAL-FORMAT." (print-unreadable-object (object stream :type t :identity t) (prin1 (normalize-external-format object) stream))) - -(defgeneric encoding-factor (format) - (:documentation "Given an external format FORMAT, returns a factor -which denotes the octets to characters ratio to expect when -encoding/decoding. If the returned value is an integer, the factor is -assumed to be exact. If it is a \(double) float, the factor is -supposed to be based on heuristics and usually not exact. - -This factor is used in string.lisp.") - (declare #.*standard-optimize-settings*)) - -(defmethod encoding-factor ((format flexi-8-bit-format)) - (declare #.*standard-optimize-settings*) - ;; 8-bit encodings map octets to characters in an exact one-to-one - ;; fashion - 1) - -(defmethod encoding-factor ((format flexi-utf-8-format)) - (declare #.*standard-optimize-settings*) - ;; UTF-8 characters can be anything from one to six octets, but we - ;; assume that the "overhead" is only about 5 percent - this - ;; estimate is obviously very much dependant on the content - 1.05d0) - -(defmethod encoding-factor ((format flexi-utf-16-format)) - (declare #.*standard-optimize-settings*) - ;; usually one character maps to two octets, but characters with - ;; code points above #x10000 map to four octets - we assume that we - ;; usually don't see these characters but of course have to return a - ;; float - 2.0d0) - -(defmethod encoding-factor ((format flexi-utf-32-format)) - (declare #.*standard-optimize-settings*) - ;; UTF-32 always matches every character to four octets - 4) - -(defmethod encoding-factor ((format flexi-crlf-mixin)) - (declare #.*standard-optimize-settings*) - ;; if the sequence #\Return #\Linefeed is the line-end marker, this - ;; obviously makes encodings potentially longer and definitely makes - ;; the estimate unexact - (* 1.02d0 (call-next-method))) Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Sun May 25 08:26:47 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.69 2008/05/23 14:56:46 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -47,6 +47,7 @@ (:file "util") (:file "conditions") (:file "external-format") + (:file "length") (:file "encode") (:file "decode") (:file "in-memory") Added: branches/edi/length.lisp ============================================================================== --- (empty file) +++ branches/edi/length.lisp Sun May 25 08:26:47 2008 @@ -0,0 +1,444 @@ +;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- +;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $ + +;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: + +;;; * Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. + +;;; * Redistributions in binary form must reproduce the above +;;; copyright notice, this list of conditions and the following +;;; disclaimer in the documentation and/or other materials +;;; provided with the distribution. + +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED +;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(in-package :flexi-streams) + +(defgeneric encoding-factor (format) + (:documentation "Given an external format FORMAT, returns a factor +which denotes the octets to characters ratio to expect when +encoding/decoding. If the returned value is an integer, the factor is +assumed to be exact. If it is a \(double) float, the factor is +supposed to be based on heuristics and usually not exact. + +This factor is used in string.lisp.") + (declare #.*standard-optimize-settings*)) + +(defmethod encoding-factor ((format flexi-8-bit-format)) + (declare #.*standard-optimize-settings*) + ;; 8-bit encodings map octets to characters in an exact one-to-one + ;; fashion + 1) + +(defmethod encoding-factor ((format flexi-utf-8-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-8 characters can be anything from one to six octets, but we + ;; assume that the "overhead" is only about 5 percent - this + ;; estimate is obviously very much dependant on the content + 1.05d0) + +(defmethod encoding-factor ((format flexi-utf-16-format)) + (declare #.*standard-optimize-settings*) + ;; usually one character maps to two octets, but characters with + ;; code points above #x10000 map to four octets - we assume that we + ;; usually don't see these characters but of course have to return a + ;; float + 2.0d0) + +(defmethod encoding-factor ((format flexi-utf-32-format)) + (declare #.*standard-optimize-settings*) + ;; UTF-32 always matches every character to four octets + 4) + +(defmethod encoding-factor ((format flexi-crlf-mixin)) + (declare #.*standard-optimize-settings*) + ;; if the sequence #\Return #\Linefeed is the line-end marker, this + ;; obviously makes encodings potentially longer and definitely makes + ;; the estimate unexact + (* 1.02d0 (call-next-method))) + +(defgeneric check-end (format start end i warnp) + (declare #.*fixnum-optimize-settings*) + (:documentation "Helper function used below to determine if we tried +to read past the end of the sequence.") + (:method (format start end i warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end i)) + (when (and warnp (> i end)) + (signal-encoding-warning format "These ~A octet~:P can't be ~ +decoded using ~A as the sequence is too short. ~A octet~:P missing ~ +at then end." + (- end start) + (external-format-name format) + (- i end)))) + (:method ((format flexi-utf-16-format) start end i warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end i)) + (declare (ignore i warnp)) + ;; don't warn twice + (when (evenp (- end start)) + (call-next-method)))) + +(defgeneric compute-number-of-chars (format sequence start end warnp) + (declare #.*standard-optimize-settings*) + (:documentation "Computes the exact number of characters required to +decode the sequence of octets in SEQUENCE from START to END using the +external format FORMAT. If WARNP is NIL, warnings will be muffled.")) + +(defmethod compute-number-of-chars :around (format (list list) start end warnp) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'vector) start end warnp)) + +(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence warnp)) + (- end start)) + +(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) + ;; this method only applies to the 8-bit formats as all other + ;; formats with CRLF line endings have their own specialized methods + ;; below + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore warnp)) + (let ((i start) + (length (- end start))) + (declare (fixnum i length)) + (loop + (when (>= i end) + (return)) + (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=))) + (unless position + (return)) + (setq i (1+ position)) + (decf length))) + length)) + +(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((octet (aref sequence i)) + (length (cond ((not (logbitp 7 octet)) 1) + ((= #b11000000 (logand octet #b11100000)) 2) + ((= #b11100000 (logand octet #b11110000)) 3) + (t 4)))) + (declare (fixnum length) (type octet octet)) + (incf sum) + (incf i length))) + (check-end format start end i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (type octet last-octet)) + (loop + (when (>= i end) + (return)) + (let* ((octet (aref sequence i)) + (length (cond ((not (logbitp 7 octet)) 1) + ((= #b11000000 (logand octet #b11100000)) 2) + ((= #b11100000 (logand octet #b11110000)) 3) + (t 4)))) + (declare (fixnum length) (type octet octet)) + (unless (and (= octet +lf+) (= last-octet +cr+)) + (incf sum)) + (incf i length) + (setq last-octet octet))) + (check-end format start end i warnp) + sum)) + +(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (when (and warnp (oddp (- end start))) + (signal-encoding-warning format "~A octet~:P cannot be decoded ~ +using UTF-16 as ~:*~A is not even." + (- end start)))) + +(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence (1+ i))) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (incf sum) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence i)) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (incf sum) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (type octet last-octet)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence (1+ i))) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (unless (and (zerop high-octet) + (= (the octet (aref sequence i)) +lf+) + (= last-octet +cr+)) + (incf sum)) + (setq last-octet (if (zerop high-octet) + (aref sequence i) + 0)) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start) + (last-octet 0)) + (declare (fixnum i sum) (type octet last-octet)) + (decf end 2) + (loop + (when (> i end) + (return)) + (let* ((high-octet (aref sequence i)) + (length (cond ((<= #xd8 high-octet #xdf) 4) + (t 2)))) + (declare (fixnum length) (type octet high-octet)) + (unless (and (zerop high-octet) + (= (the octet (aref sequence (1+ i))) +lf+) + (= last-octet +cr+)) + (incf sum)) + (setq last-octet (if (zerop high-octet) + (aref sequence (1+ i)) + 0)) + (incf i length))) + (check-end format start (+ end 2) i warnp) + sum)) + +(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (let ((length (- end start))) + (when (and warnp (plusp (mod length 4))) + (signal-encoding-warning format "~A octet~:P cannot be decoded ~ +using UTF-32 as ~:*~A is not a multiple-value of four." + length)))) + +(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence warnp)) + (ceiling (- end start) 4)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore warnp)) + (let ((i start) + (length (ceiling (- end start) 4))) + (decf end 8) + (loop + (when (> i end) + (return)) + (cond ((loop for j of-type fixnum from i + for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0) + always (= octet (aref sequence j))) + (decf length) + (incf i 8)) + (t (incf i 4)))) + length)) + +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore warnp)) + (let ((i start) + (length (ceiling (- end start) 4))) + (decf end 8) + (loop + (when (> i end) + (return)) + (cond ((loop for j of-type fixnum from i + for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+) + always (= octet (aref sequence j))) + (decf length) + (incf i 8)) + (t (incf i 4)))) + length)) + +(defgeneric compute-number-of-octets (format sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "Computes the exact number of octets required to +encode the sequence of characters in SEQUENCE from START to END using +the external format FORMAT.")) + +(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (- end start)) + +(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 2) + ((< char-code #x80) 1) + ((< char-code #x800) 2) + ((< char-code #x10000) 3) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (let ((sum 0) + (i start)) + (declare (fixnum i sum)) + (loop + (when (>= i end) + (return)) + (let* ((char-code (char-code (aref sequence i))) + (char-length (cond ((= char-code #.(char-code #\Newline)) 4) + ((< char-code #x10000) 2) + (t 4)))) + (declare (fixnum char-length) (type char-code-integer char-code)) + (incf sum char-length) + (incf i))) + sum)) + +(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (declare (ignore sequence)) + (* 4 (- end start))) + +(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) + (declare #.*fixnum-optimize-settings*) + (declare (fixnum start end)) + (+ (call-next-method) + (* (case (external-format-name format) + (:utf-32 4) + (otherwise 1)) + (count #\Newline sequence :start start :end end :test #'char=)))) \ No newline at end of file From eweitz at common-lisp.net Sun May 25 20:28:27 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 25 May 2008 16:28:27 -0400 (EDT) Subject: [flexi-streams-cvs] r58 - in branches/edi: . doc Message-ID: <20080525202827.0AC72702EF@common-lisp.net> Author: eweitz Date: Sun May 25 16:28:25 2008 New Revision: 58 Modified: branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/encode.lisp branches/edi/input.lisp branches/edi/length.lisp branches/edi/mapping.lisp branches/edi/strings.lisp Log: Optimized the other direction as well Passes tests on LispWorks Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -60,26 +60,217 @@ The special variable *CURRENT-UNREADER* must be bound correctly whenever this function is called.")) -(defmethod octets-to-char-code ((format flexi-latin-1-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) - (funcall reader)) +(defgeneric octets-to-string* (format sequence start end) + (declare #.*standard-optimize-settings*) + (:documentation "A generic function which dispatches on the external +format and does the real work for OCTETS-TO-STRING.")) -(defmethod octets-to-char-code ((format flexi-ascii-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) - (when-let (octet (funcall reader)) +(defmethod octets-to-string* :around (format (list list) start end) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'vector) start end)) + +(defmacro define-sequence-readers ((format-class) &body body) + "Non-hygienic utility macro which defines methods for READ-SEQUENCE* +and OCTETS-TO-STRING* for the class FORMAT-CLASS. BODY is described +in the docstring of DEFINE-CHAR-ENCODERS but can additionally contain +a form (UNGET

    ) which has to be replaced by the correct code to +`unread' the octets for the character designated by ." + (let* ((body `((block char-decoder + (locally + (declare #.*fixnum-optimize-settings*) + , at body))))) + `(progn + (defmethod read-sequence* ((format ,format-class) flexi-input-stream sequence start end) + (with-accessors ((position flexi-stream-position) + (bound flexi-stream-bound) + (octet-stack flexi-stream-octet-stack) + (last-octet flexi-stream-last-octet) + (last-char-code flexi-stream-last-char-code) + (stream flexi-stream-stream)) + flexi-input-stream + (let* (buffer + (buffer-pos 0) + (buffer-end 0) + (index start) + ;; whether we will later be able to rewind the stream if + ;; needed (to get rid of unused octets in the buffer) + (can-rewind-p (maybe-rewind stream 0)) + (factor (encoding-factor format)) + (integer-factor (floor factor)) + ;; it's an interesting question whether it makes sense + ;; performance-wise to make RESERVE significantly bigger + ;; (and thus put potentially a lot more octets into + ;; OCTET-STACK), especially for UTF-8 + (reserve (cond ((not (floatp factor)) 0) + ((not can-rewind-p) (* 2 integer-factor)) + (t (ceiling (* (- factor integer-factor) (- end start))))))) + (declare (fixnum buffer-pos buffer-end index integer-factor reserve) + (boolean can-rewind-p)) + (flet ((compute-fill-amount () + "Computes the amount of octets we can savely read into +the buffer without violating the stream's bound \(if there is one) and +without potentially reading much more than we need \(unless we can +rewind afterwards)." + (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor + (the fixnum (- end index)))) + reserve)) + +buffer-size+))) + (cond (bound (min minimum (- bound position))) + (t minimum)))) + (fill-buffer (end) + "Tries to fill the buffer from BUFFER-POS to END and +returns NIL if the buffer doesn't contain any new data." + ;; put data from octet stack into buffer if there is any + (loop + (when (>= buffer-pos end) + (return)) + (let ((next-octet (pop octet-stack))) + (cond (next-octet + (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet)) + (incf buffer-pos)) + (t (return))))) + (setq buffer-end (read-sequence buffer stream + :start buffer-pos + :end end)) + ;; BUFFER-POS is only greater than zero if the buffer + ;; already contains unread data from the octet stack + ;; (see below), so we test for ZEROP here and do /not/ + ;; compare with BUFFER-POS + (unless (zerop buffer-end) + (incf position buffer-end)))) + (let ((minimum (compute-fill-amount))) + (declare (fixnum minimum)) + (setq buffer (make-octet-buffer minimum)) + ;; fill buffer for the first time or return immediately if + ;; we don't succeed + (unless (fill-buffer minimum) + (return-from read-sequence* start))) + (setq buffer-pos 0) + (macrolet ((iterate (set-place) + "A very unhygienic macro to implement the +actual iteration through the sequence including housekeeping for the +flexi stream. SET-PLACE is the place \(using the index INDEX) used to +access the sequence." + `(flet ((leave () + "This is the function used to +abort the LOOP iteration below." + (when (> index start) + (setq last-octet nil + last-char-code ,(sublis '((index . (1- index))) set-place))) + (return-from read-sequence* index))) + (loop + (when (>= index end) + ;; check if there are octets in the + ;; buffer we didn't use - see + ;; COMPUTE-FILL-AMOUNT above + (let ((rest (- buffer-end buffer-pos))) + (when (plusp rest) + (or (and can-rewind-p + (maybe-rewind stream rest)) + (loop + (when (>= buffer-pos buffer-end) + (return)) + (decf buffer-end) + (push (aref (the (array octet *) buffer) buffer-end) + octet-stack))))) + (leave)) + (let ((next-char-code + (progn (symbol-macrolet + ((octet-getter + ;; this is the code to retrieve the next octet (or + ;; NIL) and to fill the buffer if needed + (block next-octet + (when (>= buffer-pos buffer-end) + (setq buffer-pos 0) + (unless (fill-buffer (compute-fill-amount)) + (return-from next-octet))) + (prog1 + (aref (the (array octet *) buffer) buffer-pos) + (incf buffer-pos))))) + (macrolet ((unget (form) + `(unread-char% ,form flexi-input-stream))) + ,', at body))))) + (unless next-char-code + (leave)) + (setf ,set-place (code-char next-char-code)) + (incf index)))))) + (etypecase sequence + (string (iterate (char sequence index))) + (array (iterate (aref sequence index))) + (list (iterate (nth index sequence))))))))) + (defmethod octets-to-string* ((format ,format-class) sequence start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (let* ((i start) + (string-length (compute-number-of-chars format sequence start end nil)) + (string (make-array string-length :element-type 'char*))) + (declare (fixnum i string-length)) + (loop for j of-type fixnum from 0 below string-length + do (setf (schar string j) + (code-char (macrolet ((unget (form) + `(decf i (character-length format ,form)))) + (symbol-macrolet ((octet-getter (and (< i end) + (prog1 + (aref sequence i) + (incf i))))) + , at body)))) + finally (return string))))))) + +(defmacro define-char-decoders ((lf-format-class cr-format-class crlf-format-class) &body body) + "Non-hygienic utility macro which defines several decoding-related +methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and +CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same +encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and +similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. +BODY is a code template for the code to read octets and return one +character. BODY must contain a symbol OCTET-GETTER representing the +form which is used to obtain the next octet." + `(progn + (defmethod octets-to-char-code ((format ,lf-format-class) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (symbol-macrolet ((octet-getter (funcall reader))) + ,@(sublis '((char-decoder . octets-to-char-code)) + body))) + (define-sequence-readers (,lf-format-class) , at body) + (define-sequence-readers (,cr-format-class) + ,(with-unique-names (char-code) + `(let ((,char-code (progn , at body))) + (case ,char-code + (#.+cr+ #.(char-code #\Newline)) + (otherwise ,char-code))))) + (define-sequence-readers (,crlf-format-class) + ,(with-unique-names (char-code next-char-code get-char-code) + `(flet ((,get-char-code () , at body)) + (let ((,char-code (,get-char-code))) + (case ,char-code + (#.+cr+ + (let ((,next-char-code (,get-char-code))) + (case ,next-char-code + (#.+lf+ #.(char-code #\Newline)) + ;; we saw a CR but no LF afterwards, but then the data + ;; ended, so we just return #\Return + ((nil) +cr+) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise (unget (code-char ,next-char-code)) + ,char-code)))) + (otherwise ,char-code)))))))) + +(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) + octet-getter) + +(define-char-decoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format) + (when-let (octet octet-getter) (if (> (the octet octet) 127) (recover-from-encoding-error format "No character which corresponds to octet #x~X." octet) octet))) -(defmethod octets-to-char-code ((format flexi-8-bit-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format) (with-accessors ((decoding-table external-format-decoding-table)) format - (when-let (octet (funcall reader)) + (when-let (octet octet-getter) (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) (the octet octet)))) (if (or (null char-code) @@ -88,19 +279,17 @@ "No character which corresponds to octet #x~X." octet) char-code))))) -(defmethod octets-to-char-code ((format flexi-utf-8-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-8 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (let ((octet (read-next-byte))) (declare (type octet octet)) @@ -113,11 +302,7 @@ (values (logand octet #b00001111) 2)) ((= #b11110000 (logand octet #b11111000)) (values (logand octet #b00000111) 3)) - ((= #b11111000 (logand octet #b11111100)) - (values (logand octet #b00000011) 4)) - ((= #b11111100 (logand octet #b11111110)) - (values (logand octet #b00000001) 5)) - (t (return-from octets-to-char-code + (t (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X at start of UTF-8 sequence." octet)))) @@ -130,24 +315,22 @@ repeat count for octet of-type octet = (read-next-byte) unless (= #b10000000 (logand octet #b11000000)) - do (return-from octets-to-char-code + do (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X in UTF-8 sequence." octet)) finally (return result))))))) -(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-16 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () (+ (the octet (read-next-byte)) @@ -159,7 +342,7 @@ (let ((next-word (read-next-word))) (declare (type (unsigned-byte 16) next-word)) (unless (<= #xdc00 next-word #xdfff) - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) @@ -168,19 +351,17 @@ #x10000))) (t word))))))) -(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-16 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () (+ (ash (the octet (read-next-byte)) 8) @@ -192,7 +373,7 @@ (let ((next-word (read-next-word))) (declare (type (unsigned-byte 16) next-word)) (unless (<= #xdc00 next-word #xdfff) - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) @@ -201,37 +382,33 @@ #x10000))) (t word))))))) -(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-32 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 0 to 24 by 8 for octet of-type octet = (read-next-byte) sum (ash octet count))))) -(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) +(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) (let (first-octet-seen) (declare (boolean first-octet-seen)) (macrolet ((read-next-byte () '(prog1 - (or (funcall reader) + (or octet-getter (cond (first-octet-seen - (return-from octets-to-char-code + (return-from char-decoder (recover-from-encoding-error format "End of data while in UTF-32 sequence."))) - (t (return-from octets-to-char-code nil)))) + (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (loop for count of-type fixnum from 24 downto 0 by 8 for octet of-type octet = (read-next-byte) Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Sun May 25 16:28:25 2008 @@ -996,7 +996,7 @@

    Strings

    -This section collects a few convenience functions for strings conversions: +This section collects a few convenience functions for strings conversions.


    [Function]
    string-to-octets string &key external-format start end => vector @@ -1009,7 +1009,9 @@ start and end are 0 and the length of the string. The default for external-format is :LATIN1. - +

    +In spite of the name, string can be any sequence of characters, but +the function is optimized for strings.


    [Function] @@ -1023,6 +1025,11 @@ start and end are 0 and the length of the sequence. The default for external-format is :LATIN1. +

    +This function is optimized for the case +of sequence being +a vector. +Don't use lists if you are in hurry.


    [Function] @@ -1030,14 +1037,17 @@


    -Returns the length of the substring of string from start to end in +Returns the length of the subsequence of string from start to end in octets if encoded using the external format designated by external-format. The defaults for start and end -are 0 and the length of the string. The default +are 0 and the length of string. The default for external-format is :LATIN1. +

    +In spite of the name, string can be any sequence of characters, but +the function is optimized for strings.


    [Function] @@ -1054,6 +1064,11 @@ start and end are 0 and the length of the sequence. The default for external-format is :LATIN1. +

    +This function is optimized for the case +of sequence being +a vector. +Don't use lists if you are in hurry.
     

    File positions

    @@ -1095,7 +1110,7 @@ his work on making FLEXI-STREAMS faster.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -47,130 +47,140 @@ (:documentation "A generic function which dispatches on the external format and does the real work for STRING-TO-OCTETS.")) +(defmethod string-to-octets* :around (format (list list) start end) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'string*) start end)) + (defmacro define-sequence-writers ((format-class) &body body) - "Utility macro which defines methods for WRITE-SEQUENCE* and -STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the -docstring of DEFINE-CHAR-ENCODERS." - `(progn - (defmethod write-sequence* ((format ,format-class) stream sequence start end) - (declare #.*standard-optimize-settings*) - (declare (fixnum start end)) - (with-accessors ((column flexi-stream-column)) - stream - (let* ((octet-seen-p nil) - (buffer-pos 0) - ;; estimate should be good enough... - (factor (encoding-factor format)) - ;; we don't want arbitrarily large buffer, do we? - (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) - (buffer (make-octet-buffer buffer-size))) - (declare (fixnum buffer-pos buffer-size) - (boolean octet-seen-p) - (type (array octet *) buffer)) - (macrolet ((octet-writer (form) - `(write-octet ,form))) - (labels ((flush-buffer () - "Sends all octets in BUFFER to the underlying stream." - (write-sequence buffer stream :end buffer-pos) - (setq buffer-pos 0)) - (write-octet (octet) - "Adds one octet to the buffer and flushes it if necessary." - (declare (type octet octet)) - (when (>= buffer-pos buffer-size) - (flush-buffer)) - (setf (aref buffer buffer-pos) octet) - (incf buffer-pos)) - (write-object (object) - "Dispatches to WRITE-OCTET or WRITE-CHARACTER -depending on the type of OBJECT." - (etypecase object - (octet (setq octet-seen-p t) - (write-octet object)) - (character (symbol-macrolet ((char-getter object)) - , at body))))) - (macrolet ((iterate (&body output-forms) - "An unhygienic macro to implement the actual -iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one -sequence element and put its octet representation into the buffer." - `(loop for index of-type fixnum from start below end - do (progn , at output-forms) - finally (when (plusp buffer-pos) - (flush-buffer))))) - (etypecase sequence - (string (iterate - (symbol-macrolet ((char-getter (char sequence index))) - , at body))) - (array (iterate - (symbol-macrolet ((char-getter (aref sequence index))) - , at body))) - (list (iterate (write-object (nth index sequence)))))) - ;; update the column slot, setting it to NIL if we sent - ;; octets - (setq column - (cond (octet-seen-p nil) - (t (let ((last-newline-pos (position #\Newline sequence - :test #'char= - :start start - :end end - :from-end t))) - (cond (last-newline-pos (- end last-newline-pos 1)) - (column (+ column (- end start))))))))))))) - (defmethod string-to-octets* ((format ,format-class) string start end) - (declare #.*standard-optimize-settings*) - (declare (fixnum start end) (string string)) - (let ((octets (make-array (compute-number-of-octets format string start end) - :element-type 'octet)) - (j 0)) - (declare (fixnum j)) - (loop for i of-type fixnum from start below end do - (macrolet ((octet-writer (form) - `(progn - (setf (aref (the (array octet *) octets) j) ,form) - (incf j)))) - (symbol-macrolet ((char-getter (char string i))) - (progn , at body)))) - octets)))) - -;; char-getter can be called more than once - no side effects -(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body) - "Utility macro which defines several encoding-related methods for -the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where -it is assumed that CR-FORMAT-CLASS is the same encoding as -FORMAT-CLASS but with CR line endings and similar for -CRLF-FORMAT-CLASS. BODY is a code template for the code to convert -one character to octets. BODY must contain a symbol CHAR-GETTER -representing the form which is used to obtain the character and a -forms like \(OCTET-WRITE ) to write the octet . The -CHAR-GETTER form might be called more than once." + "Non-hygienic utility macro which defines methods for +WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For +BODY see the docstring of DEFINE-CHAR-ENCODERS." (let ((body `((locally (declare #.*fixnum-optimize-settings*) , at body)))) `(progn - (defmethod char-to-octets ((format ,format-class) char writer) - (declare (character char) (function writer)) - (symbol-macrolet ((char-getter char)) - (macrolet ((octet-writer (form) - `(funcall writer ,form))) - , at body))) - (define-sequence-writers (,format-class) , at body) - (define-sequence-writers (,cr-format-class) - ,@(sublis `((char-getter . ,(with-unique-names (char) - `(let ((,char char-getter)) - (declare (character ,char)) - (if (char= ,char #\Newline) - #\Return - ,char))))) - body)) - (define-sequence-writers (,crlf-format-class) - ,(with-unique-names (char write-char) - `(flet ((,write-char (,char) - ,@(sublis `((char-getter . ,char)) body))) - (let ((,char char-getter)) - (declare (character ,char)) - (cond ((char= ,char #\Newline) - (,write-char #\Return) - (,write-char #\Newline)) - (t (,write-char ,char)))))))))) + (defmethod string-to-octets* ((format ,format-class) string start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end) (string string)) + (let ((octets (make-array (compute-number-of-octets format string start end) + :element-type 'octet)) + (j 0)) + (declare (fixnum j)) + (loop for i of-type fixnum from start below end do + (macrolet ((octet-writer (form) + `(progn + (setf (aref (the (array octet *) octets) j) ,form) + (incf j)))) + (symbol-macrolet ((char-getter (char string i))) + (progn , at body)))) + octets)) + (defmethod write-sequence* ((format ,format-class) stream sequence start end) + (declare #.*standard-optimize-settings*) + (declare (fixnum start end)) + (with-accessors ((column flexi-stream-column)) + stream + (let* ((octet-seen-p nil) + (buffer-pos 0) + ;; estimate should be good enough... + (factor (encoding-factor format)) + ;; we don't want arbitrarily large buffer, do we? + (buffer-size (min +buffer-size+ (ceiling (* factor (- end start))))) + (buffer (make-octet-buffer buffer-size))) + (declare (fixnum buffer-pos buffer-size) + (boolean octet-seen-p) + (type (array octet *) buffer)) + (macrolet ((octet-writer (form) + `(write-octet ,form))) + (labels ((flush-buffer () + "Sends all octets in BUFFER to the underlying stream." + (write-sequence buffer stream :end buffer-pos) + (setq buffer-pos 0)) + (write-octet (octet) + "Adds one octet to the buffer and flushes it if necessary." + (declare (type octet octet)) + (when (>= buffer-pos buffer-size) + (flush-buffer)) + (setf (aref buffer buffer-pos) octet) + (incf buffer-pos)) + (write-object (object) + "Dispatches to WRITE-OCTET or WRITE-CHARACTER +depending on the type of OBJECT." + (etypecase object + (octet (setq octet-seen-p t) + (write-octet object)) + (character (symbol-macrolet ((char-getter object)) + , at body))))) + (macrolet ((iterate (&body output-forms) + "An unhygienic macro to implement the actual +iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one +sequence element and put its octet representation into the buffer." + `(loop for index of-type fixnum from start below end + do (progn , at output-forms) + finally (when (plusp buffer-pos) + (flush-buffer))))) + (etypecase sequence + (string (iterate + (symbol-macrolet ((char-getter (char sequence index))) + , at body))) + (array (iterate + (symbol-macrolet ((char-getter (aref sequence index))) + , at body))) + (list (iterate (write-object (nth index sequence)))))) + ;; update the column slot, setting it to NIL if we sent + ;; octets + (setq column + (cond (octet-seen-p nil) + (t (let ((last-newline-pos (position #\Newline sequence + :test #'char= + :start start + :end end + :from-end t))) + (cond (last-newline-pos (- end last-newline-pos 1)) + (column (+ column (- end start)))))))))))))))) + +(defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body) + "Non-hygienic utility macro which defines several encoding-related +methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and +CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same +encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and +similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. +BODY is a code template for the code to convert one character to +octets. BODY must contain a symbol CHAR-GETTER representing the form +which is used to obtain the character and a forms like \(OCTET-WRITE +) to write the octet . The CHAR-GETTER form might be +called more than once." + `(progn + (defmethod char-to-octets ((format ,lf-format-class) char writer) + (declare #.*fixnum-optimize-settings*) + (declare (character char) (function writer)) + (symbol-macrolet ((char-getter char)) + (macrolet ((octet-writer (form) + `(funcall writer ,form))) + , at body))) + (define-sequence-writers (,lf-format-class) , at body) + (define-sequence-writers (,cr-format-class) + ;; modify the body so that the getter replaces a #\Newline + ;; with a #\Return + ,@(sublis `((char-getter . ,(with-unique-names (char) + `(let ((,char char-getter)) + (declare (character ,char)) + (if (char= ,char #\Newline) + #\Return + ,char))))) + body)) + (define-sequence-writers (,crlf-format-class) + ;; modify the body so that we potentially write octets for + ;; two characters (#\Return and #\Linefeed) - the original + ;; body is wrapped with the WRITE-CHAR local function + ,(with-unique-names (char write-char) + `(flet ((,write-char (,char) + ,@(sublis `((char-getter . ,char)) body))) + (let ((,char char-getter)) + (declare (character ,char)) + (cond ((char= ,char #\Newline) + (,write-char #\Return) + (,write-char #\Linefeed)) + (t (,write-char ,char))))))))) (define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) (let ((octet (char-code char-getter))) Modified: branches/edi/input.lisp ============================================================================== --- branches/edi/input.lisp (original) +++ branches/edi/input.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.78 2008/05/25 19:25:44 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -201,9 +201,7 @@ others - see for example FLEXI-STREAMS-TEST::SEQUENCE-TEST." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) - (with-accessors ((position flexi-stream-position) - (bound flexi-stream-bound) - (octet-stack flexi-stream-octet-stack) + (with-accessors ((octet-stack flexi-stream-octet-stack) (external-format flexi-stream-external-format) (last-octet flexi-stream-last-octet) (last-char-code flexi-stream-last-char-code) @@ -233,116 +231,8 @@ (setq last-char-code nil last-octet (elt sequence (1- index)))) (return-from stream-read-sequence index))) - (let* (buffer - (buffer-pos 0) - (buffer-end 0) - (index start) - ;; whether we will later be able to rewind the stream if - ;; needed (to get rid of unused octets in the buffer) - (can-rewind-p (maybe-rewind stream 0)) - (factor (encoding-factor external-format)) - (integer-factor (floor factor)) - ;; it's an interesting question whether it makes sense - ;; performance-wise to make RESERVE significantly bigger - ;; (and thus put potentially a lot more octets into - ;; OCTET-STACK), especially for UTF-8 - (reserve (cond ((not (floatp factor)) 0) - ((not can-rewind-p) (* 2 integer-factor)) - (t (ceiling (* (- factor integer-factor) (- end start))))))) - (declare (fixnum buffer-pos buffer-end index integer-factor reserve) - (boolean can-rewind-p)) - (flet ((compute-fill-amount () - "Computes the amount of octets we can savely read into -the buffer without violating the stream's bound \(if there is one) and -without potentially reading much more than we need \(unless we can -rewind afterwards)." - (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor - (the fixnum (- end index)))) - reserve)) - +buffer-size+))) - (cond (bound (min minimum (- bound position))) - (t minimum)))) - (fill-buffer (end) - "Tries to fill the buffer from BUFFER-POS to END and -returns NIL if the buffer doesn't contain any new data." - ;; put data from octet stack into buffer if there is any - (loop - (when (>= buffer-pos end) - (return)) - (let ((next-octet (pop octet-stack))) - (cond (next-octet - (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet)) - (incf buffer-pos)) - (t (return))))) - (setq buffer-end (read-sequence buffer stream - :start buffer-pos - :end end)) - ;; BUFFER-POS is only greater than zero if the buffer - ;; already contains unread data from the octet stack - ;; (see below), so we test for ZEROP here and do /not/ - ;; compare with BUFFER-POS - (unless (zerop buffer-end) - (incf position buffer-end)))) - (let ((minimum (compute-fill-amount))) - (declare (fixnum minimum)) - (setq buffer (make-octet-buffer minimum)) - ;; fill buffer for the first time or return immediately if - ;; we don't succeed - (unless (fill-buffer minimum) - (return-from stream-read-sequence start))) - (setq buffer-pos 0) - (flet ((next-octet () - "Returns the next octet from the buffer and fills it -if it is exhausted. Returns NIL if there's no more data on the -stream." - (when (>= buffer-pos buffer-end) - (setq buffer-pos 0) - (unless (fill-buffer (compute-fill-amount)) - (return-from next-octet))) - (prog1 - (aref (the (array octet *) buffer) buffer-pos) - (incf buffer-pos))) - (unreader (char) - (unread-char% char flexi-input-stream))) - (declare (dynamic-extent (function next-octet) (function unreader))) - (let ((*current-unreader* #'unreader)) - (macrolet ((iterate (set-place) - "A very unhygienic macro to implement the -actual iteration through the sequence including housekeeping for the -flexi stream. SET-PLACE is the place \(using the index INDEX) used to -access the sequence." - `(flet ((leave () - "This is the function used to abort -the LOOP iteration below." - (when (> index start) - (setq last-octet nil - last-char-code ,(sublis '((index . (1- index))) set-place))) - (return-from stream-read-sequence index))) - (loop - (when (>= index end) - ;; check if there are octets in the - ;; buffer we didn't use - see - ;; COMPUTE-FILL-AMOUNT above - (let ((rest (- buffer-end buffer-pos))) - (when (plusp rest) - (or (and can-rewind-p - (maybe-rewind stream rest)) - (loop - (when (>= buffer-pos buffer-end) - (return)) - (decf buffer-end) - (push (aref (the (array octet *) buffer) buffer-end) - octet-stack))))) - (leave)) - (let ((next-char-code (octets-to-char-code external-format #'next-octet))) - (unless next-char-code - (leave)) - (setf ,set-place (code-char next-char-code)) - (incf index)))))) - (etypecase sequence - (string (iterate (char sequence index))) - (array (iterate (aref sequence index))) - (list (iterate (nth index sequence))))))))))) + ;; otherwise hand over to the external format to do the work + (read-sequence* external-format flexi-input-stream sequence start end))) (defmethod stream-unread-char ((stream flexi-input-stream) char) "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM. Modified: branches/edi/length.lisp ============================================================================== --- branches/edi/length.lisp (original) +++ branches/edi/length.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -115,7 +115,7 @@ ;; formats with CRLF line endings have their own specialized methods ;; below (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore warnp)) (let ((i start) (length (- end start))) @@ -132,7 +132,7 @@ (defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start)) (declare (fixnum i sum)) @@ -152,7 +152,7 @@ (defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start) (last-octet 0)) @@ -175,7 +175,7 @@ (defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore sequence)) (when (and warnp (oddp (- end start))) (signal-encoding-warning format "~A octet~:P cannot be decoded ~ @@ -203,7 +203,7 @@ (defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start)) (declare (fixnum i sum)) @@ -222,7 +222,7 @@ (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start) (last-octet 0)) @@ -248,7 +248,7 @@ (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (let ((sum 0) (i start) (last-octet 0)) @@ -290,7 +290,7 @@ (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) @@ -308,7 +308,7 @@ (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (vector sequence)) (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) @@ -330,22 +330,26 @@ encode the sequence of characters in SEQUENCE from START to END using the external format FORMAT.")) -(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end) +(defmethod compute-number-of-octets :around (format (list list) start end) + (declare #.*standard-optimize-settings*) + (call-next-method format (coerce list 'string*) start end)) + +(defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence)) + (declare (ignore string)) (- end start)) -(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((< char-code #x80) 1) ((< char-code #x800) 2) ((< char-code #x10000) 3) @@ -355,16 +359,16 @@ (incf i))) sum)) -(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((= char-code #.(char-code #\Newline)) 2) ((< char-code #x80) 1) ((< char-code #x800) 2) @@ -375,16 +379,16 @@ (incf i))) sum)) -(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((< char-code #x10000) 2) (t 4)))) (declare (fixnum char-length) (type char-code-integer char-code)) @@ -392,16 +396,16 @@ (incf i))) sum)) -(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((= char-code #.(char-code #\Newline)) 4) ((< char-code #x10000) 2) (t 4)))) @@ -410,16 +414,16 @@ (incf i))) sum)) -(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (let ((sum 0) (i start)) (declare (fixnum i sum)) (loop (when (>= i end) (return)) - (let* ((char-code (char-code (aref sequence i))) + (let* ((char-code (char-code (char string i))) (char-length (cond ((= char-code #.(char-code #\Newline)) 4) ((< char-code #x10000) 2) (t 4)))) @@ -428,17 +432,39 @@ (incf i))) sum)) -(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end) +(defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence)) + (declare (ignore string)) (* 4 (- end start))) -(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end) +(defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end) (declare #.*fixnum-optimize-settings*) - (declare (fixnum start end)) + (declare (fixnum start end) (string string)) (+ (call-next-method) (* (case (external-format-name format) (:utf-32 4) (otherwise 1)) - (count #\Newline sequence :start start :end end :test #'char=)))) \ No newline at end of file + (count #\Newline string :start start :end end :test #'char=)))) + +(defgeneric character-length (format char) + (declare #.*fixnum-optimize-settings*) + (:documentation "Returns the number of octets needed to encode the +single character CHAR.") + (:method (format char) + (compute-number-of-octets format (string char) 0 1))) + +(defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline))) + (declare #.*fixnum-optimize-settings*) + (+ (call-next-method format +cr+) + (call-next-method format +lf+))) + +(defmethod character-length ((format flexi-8-bit-format) char) + (declare #.*fixnum-optimize-settings*) + (declare (ignore char)) + 1) + +(defmethod character-length ((format flexi-utf-32-format) char) + (declare #.*fixnum-optimize-settings*) + (declare (ignore char)) + 4) \ No newline at end of file Modified: branches/edi/mapping.lisp ============================================================================== --- branches/edi/mapping.lisp (original) +++ branches/edi/mapping.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.3 2008/05/25 19:07:53 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -39,6 +39,12 @@ #+:lispworks 'lw:simple-char #-:lispworks 'character) +(deftype string* () + "Convenience shortcut to paper over the difference between LispWorks +and the other Lisps." + #+:lispworks 'lw:text-string + #-:lispworks 'string) + (deftype char-code-integer () "The subtype of integers which can be returned by the function CHAR-CODE." '(integer 0 #.(1- char-code-limit))) Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sun May 25 16:28:25 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -34,7 +34,10 @@ (start 0) (end (length string))) "Converts the Lisp string STRING from START to END to an array of octets corresponding to the external format designated by -EXTERNAL-FORMAT." +EXTERNAL-FORMAT. + +In spite of the name, STRING can be any sequence of characters, but +the function is optimized for strings." (declare #.*standard-optimize-settings*) (declare (string string)) (setq external-format (maybe-convert-external-format external-format)) @@ -45,51 +48,22 @@ (external-format :latin1) (start 0) (end (length sequence))) "Converts the Lisp sequence SEQUENCE of octets from START to END to -a string using the external format designated by EXTERNAL-FORMAT." +a string using the external format designated by EXTERNAL-FORMAT. + +This function is optimized for the case of SEQUENCE being a vector. +Don't use lists if you're in a hurry." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) - (let* ((i start) - (reader (etypecase sequence - ((array octet *) - (lambda () - (and (< i end) - (prog1 - (aref (the (array octet *) sequence) i) - (incf i))))) - ((array * *) - (lambda () - (and (< i end) - (prog1 - (aref sequence i) - (incf i))))) - (list - (lambda () - (and (< i end) - (prog1 - (nth i sequence) - (incf i)))))))) - (declare (fixnum i) (dynamic-extent reader)) - (labels ((pseudo-writer (octet) - (declare (ignore octet)) - (decf i)) - (unreader (char) - (char-to-octets external-format char #'pseudo-writer))) - (declare (dynamic-extent (function pseudo-writer) (function unreader))) - (let ((*current-unreader* #'unreader)) - (flet ((next-char () - (code-char (octets-to-char-code external-format reader)))) - (declare (inline next-char)) - (let* ((string-length (compute-number-of-chars external-format sequence start end nil)) - (string (make-array string-length :element-type 'char*))) - (declare (fixnum string-length)) - (loop for j of-type fixnum from 0 below string-length - do (setf (schar string j) (next-char)) - finally (return string)))))))) + ;; the external format knows how to do it... + (octets-to-string* external-format sequence start end)) (defun octet-length (string &key (external-format :latin1) (start 0) (end (length string))) "Returns the length of the substring of STRING from START to END in -octets if encoded using the external format EXTERNAL-FORMAT." +octets if encoded using the external format EXTERNAL-FORMAT. + +In spite of the name, STRING can be any sequence of characters, but +the function is optimized for strings." (declare #.*standard-optimize-settings*) (declare (fixnum start end) (string string)) (setq external-format (maybe-convert-external-format external-format)) @@ -98,7 +72,10 @@ (defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence))) "Kind of the inverse of OCTET-LENGTH. Returns the length of the subsequence \(of octets) of SEQUENCE from START to END in characters -if decoded using the external format EXTERNAL-FORMAT." +if decoded using the external format EXTERNAL-FORMAT. + +This function is optimized for the case of SEQUENCE being a vector. +Don't use lists if you're in a hurry." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) From eweitz at common-lisp.net Sun May 25 20:45:13 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 25 May 2008 16:45:13 -0400 (EDT) Subject: [flexi-streams-cvs] r59 - branches/edi Message-ID: <20080525204513.EB6883E05E@common-lisp.net> Author: eweitz Date: Sun May 25 16:45:09 2008 New Revision: 59 Modified: branches/edi/decode.lisp branches/edi/encode.lisp Log: ANSI compliance fix Tests pass on ClozureCL and AllegroCL now Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 25 16:45:09 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -67,7 +67,7 @@ (defmethod octets-to-string* :around (format (list list) start end) (declare #.*standard-optimize-settings*) - (call-next-method format (coerce list 'vector) start end)) + (octets-to-string* format (coerce list 'vector) start end)) (defmacro define-sequence-readers ((format-class) &body body) "Non-hygienic utility macro which defines methods for READ-SEQUENCE* Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sun May 25 16:45:09 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.22 2008/05/25 20:44:03 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -49,7 +49,7 @@ (defmethod string-to-octets* :around (format (list list) start end) (declare #.*standard-optimize-settings*) - (call-next-method format (coerce list 'string*) start end)) + (string-to-octets* format (coerce list 'string*) start end)) (defmacro define-sequence-writers ((format-class) &body body) "Non-hygienic utility macro which defines methods for From eweitz at common-lisp.net Sun May 25 21:36:38 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 25 May 2008 17:36:38 -0400 (EDT) Subject: [flexi-streams-cvs] r60 - branches/edi Message-ID: <20080525213638.CE2D833079@common-lisp.net> Author: eweitz Date: Sun May 25 17:36:37 2008 New Revision: 60 Modified: branches/edi/encode.lisp branches/edi/util.lisp Log: Help some Lisps optimize the encoding functions Modified: branches/edi/encode.lisp ============================================================================== --- branches/edi/encode.lisp (original) +++ branches/edi/encode.lisp Sun May 25 17:36:37 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.22 2008/05/25 20:44:03 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.23 2008/05/25 21:26:12 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -203,65 +203,67 @@ (octet-writer octet)))) (define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format) + ;; the old version using LDB was more elegant, but some Lisps had + ;; trouble optimizing it (let ((char-code (char-code char-getter))) (tagbody (cond ((< char-code #x80) (octet-writer char-code) (go zero)) ((< char-code #x800) - (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code))) + (octet-writer (logior* #b11000000 (ash* char-code -6))) (go one)) ((< char-code #x10000) - (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code))) + (octet-writer (logior* #b11100000 (ash* char-code -12))) (go two)) (t - (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code))))) - (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code))) + (octet-writer (logior* #b11110000 (ash* char-code -18))))) + (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -12)))) two - (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code))) + (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -6)))) one - (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code))) + (octet-writer (logior* #b10000000 (logand* #b00111111 char-code))) zero))) (define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format) (flet ((write-word (word) - (octet-writer (ldb (byte 8 0) word)) - (octet-writer (ldb (byte 8 8) word)))) + (octet-writer (logand* #x00ff word)) + (octet-writer (ash* (logand* #xff00 word) -8)))) (declare (inline write-word)) (let ((char-code (char-code char-getter))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) + (write-word (logior* #xd800 (ash* char-code -10))) + (write-word (logior* #xdc00 (logand* #x03ff char-code)))))))) (define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format) (flet ((write-word (word) - (octet-writer (ldb (byte 8 8) word)) - (octet-writer (ldb (byte 8 0) word)))) + (octet-writer (ash* (logand* #xff00 word) -8)) + (octet-writer (logand* #x00ff word)))) (declare (inline write-word)) (let ((char-code (char-code char-getter))) (declare (type char-code-integer char-code)) (cond ((< char-code #x10000) (write-word char-code)) (t (decf char-code #x10000) - (write-word (logior #xd800 (ldb (byte 10 10) char-code))) - (write-word (logior #xdc00 (ldb (byte 10 0) char-code)))))))) + (write-word (logior* #xd800 (ash* char-code -10))) + (write-word (logior* #xdc00 (logand* #x03ff char-code)))))))) (define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format) (let ((char-code (char-code char-getter))) - (octet-writer (ldb (byte 8 0) char-code)) - (octet-writer (ldb (byte 8 8) char-code)) - (octet-writer (ldb (byte 8 16) char-code)) - (octet-writer (ldb (byte 8 24) char-code)))) + (octet-writer (logand* #x00ff char-code)) + (octet-writer (logand* #x00ff (ash* char-code -8))) + (octet-writer (logand* #x00ff (ash* char-code -16))) + (octet-writer (logand* #x00ff (ash* char-code -24))))) (define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) (let ((char-code (char-code char-getter))) - (octet-writer (ldb (byte 8 24) char-code)) - (octet-writer (ldb (byte 8 16) char-code)) - (octet-writer (ldb (byte 8 8) char-code)) - (octet-writer (ldb (byte 8 0) char-code)))) + (octet-writer (logand* #x00ff (ash* char-code -24))) + (octet-writer (logand* #x00ff (ash* char-code -16))) + (octet-writer (logand* #x00ff (ash* char-code -8))) + (octet-writer (logand* #x00ff char-code)))) (defmethod char-to-octets ((format flexi-cr-mixin) char writer) (declare #.*fixnum-optimize-settings*) Modified: branches/edi/util.lisp ============================================================================== --- branches/edi/util.lisp (original) +++ branches/edi/util.lisp Sun May 25 17:36:37 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.23 2008/05/25 03:07:59 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/util.lisp,v 1.24 2008/05/25 21:26:12 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -192,4 +192,16 @@ "Tries to `rewind' the \(binary) stream STREAM by OCTETS octets. Returns a true value if it succeeds." (when-let (position (file-position stream)) - (file-position stream (- position octets)))) \ No newline at end of file + (file-position stream (- position octets)))) + +(defmacro logand* (x y) + "Solely for optimization purposes. Some Lisps need it, some don't." + `(the fixnum (logand ,x ,y))) + +(defmacro logior* (x y) + "Solely for optimization purposes. Some Lisps need it, some don't." + `(the fixnum (logior ,x ,y))) + +(defmacro ash* (integer count) + "Solely for optimization purposes. Some Lisps need it, some don't." + `(the fixnum (ash ,integer ,count))) From eweitz at common-lisp.net Sun May 25 23:43:23 2008 From: eweitz at common-lisp.net (eweitz at common-lisp.net) Date: Sun, 25 May 2008 19:43:23 -0400 (EDT) Subject: [flexi-streams-cvs] r61 - in branches/edi: . doc test Message-ID: <20080525234323.82DB47903C@common-lisp.net> Author: eweitz Date: Sun May 25 19:43:22 2008 New Revision: 61 Modified: branches/edi/CHANGELOG branches/edi/conditions.lisp branches/edi/decode.lisp branches/edi/doc/index.html branches/edi/flexi-streams.asd branches/edi/length.lisp branches/edi/packages.lisp branches/edi/strings.lisp branches/edi/test/test.lisp Log: Ready for release Modified: branches/edi/CHANGELOG ============================================================================== --- branches/edi/CHANGELOG (original) +++ branches/edi/CHANGELOG Sun May 25 19:43:22 2008 @@ -1,3 +1,10 @@ +Version 1.0.0 +2008-05-26 +More redesign for the sake of performance +More checks for invalid data +More tests +Exported functions for length computation + Version 0.15.3 2008-05-23 Avoid CHANGE-CLASS on LispWorks if possible Modified: branches/edi/conditions.lisp ============================================================================== --- branches/edi/conditions.lisp (original) +++ branches/edi/conditions.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.9 2008/05/25 22:23:58 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -93,21 +93,11 @@ () (:documentation "Superclass for all errors related to external formats.")) - -(define-condition external-format-warning (external-format-condition warning) - () - (:documentation "Superclass for all warnings related to external -formats.")) (define-condition external-format-encoding-error (external-format-error) () (:documentation "Errors of this type are signalled if there is an encoding problem.")) - -(define-condition external-format-encoding-warning (external-format-warning) - () - (:documentation "Warnings of this type are signalled if there is an -encoding problem.")) (defun signal-encoding-error (external-format format-control &rest format-args) "Convenience function similar to ERROR to signal conditions of type @@ -116,11 +106,3 @@ :format-control format-control :format-arguments format-args :external-format external-format)) - -(defun signal-encoding-warning (external-format format-control &rest format-args) - "Convenience function similar to WARN to signal conditions of type -EXTERNAL-FORMAT-ENCODING-WARNING." - (warn 'external-format-encoding-warning - :format-control format-control - :format-arguments format-args - :external-format external-format)) Modified: branches/edi/decode.lisp ============================================================================== --- branches/edi/decode.lisp (original) +++ branches/edi/decode.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.29 2008/05/25 23:19:19 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -202,7 +202,7 @@ (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (let* ((i start) - (string-length (compute-number-of-chars format sequence start end nil)) + (string-length (compute-number-of-chars format sequence start end)) (string (make-array string-length :element-type 'char*))) (declare (fixnum i string-length)) (loop for j of-type fixnum from 0 below string-length @@ -223,39 +223,46 @@ encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class. BODY is a code template for the code to read octets and return one -character. BODY must contain a symbol OCTET-GETTER representing the -form which is used to obtain the next octet." - `(progn - (defmethod octets-to-char-code ((format ,lf-format-class) reader) - (declare #.*fixnum-optimize-settings*) - (declare (function reader)) - (symbol-macrolet ((octet-getter (funcall reader))) - ,@(sublis '((char-decoder . octets-to-char-code)) - body))) - (define-sequence-readers (,lf-format-class) , at body) - (define-sequence-readers (,cr-format-class) - ,(with-unique-names (char-code) - `(let ((,char-code (progn , at body))) - (case ,char-code - (#.+cr+ #.(char-code #\Newline)) - (otherwise ,char-code))))) - (define-sequence-readers (,crlf-format-class) - ,(with-unique-names (char-code next-char-code get-char-code) - `(flet ((,get-char-code () , at body)) - (let ((,char-code (,get-char-code))) +character code. BODY must contain a symbol OCTET-GETTER representing +the form which is used to obtain the next octet." + (let* ((body (with-unique-names (char-code) + `((let ((,char-code (progn , at body))) + (when (and ,char-code + (or (<= #xd8 (logand* #x00ff (ash* ,char-code -8)) #xdf) + (> ,char-code #x10ffff))) + (recover-from-encoding-error format "Illegal code point ~A \(#x~:*~X)." ,char-code)) + ,char-code))))) + `(progn + (defmethod octets-to-char-code ((format ,lf-format-class) reader) + (declare #.*fixnum-optimize-settings*) + (declare (function reader)) + (symbol-macrolet ((octet-getter (funcall reader))) + ,@(sublis '((char-decoder . octets-to-char-code)) + body))) + (define-sequence-readers (,lf-format-class) , at body) + (define-sequence-readers (,cr-format-class) + ,(with-unique-names (char-code) + `(let ((,char-code (progn , at body))) (case ,char-code - (#.+cr+ - (let ((,next-char-code (,get-char-code))) - (case ,next-char-code - (#.+lf+ #.(char-code #\Newline)) - ;; we saw a CR but no LF afterwards, but then the data - ;; ended, so we just return #\Return - ((nil) +cr+) - ;; if the character we peeked at wasn't a - ;; linefeed character we unread its constituents - (otherwise (unget (code-char ,next-char-code)) - ,char-code)))) - (otherwise ,char-code)))))))) + (#.+cr+ #.(char-code #\Newline)) + (otherwise ,char-code))))) + (define-sequence-readers (,crlf-format-class) + ,(with-unique-names (char-code next-char-code get-char-code) + `(flet ((,get-char-code () , at body)) + (let ((,char-code (,get-char-code))) + (case ,char-code + (#.+cr+ + (let ((,next-char-code (,get-char-code))) + (case ,next-char-code + (#.+lf+ #.(char-code #\Newline)) + ;; we saw a CR but no LF afterwards, but then the data + ;; ended, so we just return #\Return + ((nil) +cr+) + ;; if the character we peeked at wasn't a + ;; linefeed character we unread its constituents + (otherwise (unget (code-char ,next-char-code)) + ,char-code)))) + (otherwise ,char-code))))))))) (define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format) octet-getter) @@ -296,25 +303,28 @@ (multiple-value-bind (start count) (cond ((not (logbitp 7 octet)) (values octet 0)) - ((= #b11000000 (logand octet #b11100000)) - (values (logand octet #b00011111) 1)) - ((= #b11100000 (logand octet #b11110000)) - (values (logand octet #b00001111) 2)) - ((= #b11110000 (logand octet #b11111000)) - (values (logand octet #b00000111) 3)) + ((= #b11000000 (logand* octet #b11100000)) + (when (= #b11000000 (logand* octet #b11111110)) + (return-from char-decoder + (recover-from-encoding-error format + "Illegal value #x~X leads to `overlong' UTF-8 sequence." + octet))) + (values (logand* octet #b00011111) 1)) + ((= #b11100000 (logand* octet #b11110000)) + (values (logand* octet #b00001111) 2)) + ((= #b11110000 (logand* octet #b11111000)) + (values (logand* octet #b00000111) 3)) (t (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X at start of UTF-8 sequence." octet)))) (declare (fixnum count)) - ;; note that we currently don't check for "overlong" - ;; sequences or other illegal values (loop for result of-type code-point - = start then (+ (ash result 6) - (logand octet #b111111)) + = start then (+ (ash* result 6) + (logand* octet #b111111)) repeat count for octet of-type octet = (read-next-byte) - unless (= #b10000000 (logand octet #b11000000)) + unless (= #b10000000 (logand* octet #b11000000)) do (return-from char-decoder (recover-from-encoding-error format "Unexpected value #x~X in UTF-8 sequence." octet)) @@ -334,7 +344,7 @@ (setq first-octet-seen t)))) (flet ((read-next-word () (+ (the octet (read-next-byte)) - (ash (the octet (read-next-byte)) 8)))) + (ash* (the octet (read-next-byte)) 8)))) (declare (inline read-next-word)) (let ((word (read-next-word))) (declare (type (unsigned-byte 16) word)) @@ -346,8 +356,8 @@ (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) + (+ (ash* (logand* #b1111111111 word) 10) + (logand* #b1111111111 next-word) #x10000))) (t word))))))) @@ -364,7 +374,7 @@ (t (return-from char-decoder nil)))) (setq first-octet-seen t)))) (flet ((read-next-word () - (+ (ash (the octet (read-next-byte)) 8) + (+ (ash* (the octet (read-next-byte)) 8) (the octet (read-next-byte))))) (declare (inline read-next-word)) (let ((word (read-next-word))) @@ -377,8 +387,8 @@ (recover-from-encoding-error format "Unexpected UTF-16 word #x~X following #x~X." next-word word))) - (+ (ash (logand #b1111111111 word) 10) - (logand #b1111111111 next-word) + (+ (ash* (logand* #b1111111111 word) 10) + (logand* #b1111111111 next-word) #x10000))) (t word))))))) @@ -396,7 +406,7 @@ (setq first-octet-seen t)))) (loop for count of-type fixnum from 0 to 24 by 8 for octet of-type octet = (read-next-byte) - sum (ash octet count))))) + sum (ash* octet count))))) (define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format) (let (first-octet-seen) @@ -412,7 +422,7 @@ (setq first-octet-seen t)))) (loop for count of-type fixnum from 24 downto 0 by 8 for octet of-type octet = (read-next-byte) - sum (ash octet count))))) + sum (ash* octet count))))) (defmethod octets-to-char-code ((format flexi-cr-mixin) reader) (declare #.*fixnum-optimize-settings*) Modified: branches/edi/doc/index.html ============================================================================== --- branches/edi/doc/index.html (original) +++ branches/edi/doc/index.html Sun May 25 19:43:22 2008 @@ -72,7 +72,6 @@

  • external-format-condition
  • external-format-condition-external-format
  • external-format-error -
  • external-format-warning
  • external-format-encoding-error
  • *substitution-char* @@ -229,7 +228,7 @@

    FLEXI-STREAMS together with this documentation can be downloaded from http://weitz.de/files/flexi-streams.tar.gz. The -current version is 0.15.3. +current version is 1.0.0.

    Before you install FLEXI-STREAMS you first need to install the


    [Condition] -
    external-format-warning - -


    -All warnings related to external formats are of this type. -This is a subtype of EXTERNAL-FORMAT-CONDITION. -
    - -


    [Condition]
    external-format-error


    @@ -1063,7 +1054,7 @@ The defaults for start and end are 0 and the length of the sequence. The default -for external-format is :LATIN1. +for external-format is :LATIN1. Note that this function doesn't check for the validity of the data in sequence.

    This function is optimized for the case of sequence being @@ -1110,7 +1101,7 @@ his work on making FLEXI-STREAMS faster.

    -$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $ +$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.119 2008/05/25 23:42:30 edi Exp $

    BACK TO MY HOMEPAGE Modified: branches/edi/flexi-streams.asd ============================================================================== --- branches/edi/flexi-streams.asd (original) +++ branches/edi/flexi-streams.asd Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.71 2008/05/25 23:42:28 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -35,7 +35,7 @@ (in-package :flexi-streams-system) (defsystem :flexi-streams - :version "0.15.3" + :version "1.0.0" :serial t :components ((:file "packages") (:file "mapping") Modified: branches/edi/length.lisp ============================================================================== --- branches/edi/length.lisp (original) +++ branches/edi/length.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.4 2008/05/25 22:23:58 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -72,51 +72,50 @@ ;; the estimate unexact (* 1.02d0 (call-next-method))) -(defgeneric check-end (format start end i warnp) +(defgeneric check-end (format start end i) (declare #.*fixnum-optimize-settings*) (:documentation "Helper function used below to determine if we tried to read past the end of the sequence.") - (:method (format start end i warnp) + (:method (format start end i) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end i)) - (when (and warnp (> i end)) - (signal-encoding-warning format "These ~A octet~:P can't be ~ + (when (> i end) + (signal-encoding-error format "These ~A octet~:P can't be ~ decoded using ~A as the sequence is too short. ~A octet~:P missing ~ at then end." - (- end start) - (external-format-name format) - (- i end)))) - (:method ((format flexi-utf-16-format) start end i warnp) + (- end start) + (external-format-name format) + (- i end)))) + (:method ((format flexi-utf-16-format) start end i) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end i)) - (declare (ignore i warnp)) + (declare (ignore i)) ;; don't warn twice (when (evenp (- end start)) (call-next-method)))) -(defgeneric compute-number-of-chars (format sequence start end warnp) +(defgeneric compute-number-of-chars (format sequence start end) (declare #.*standard-optimize-settings*) (:documentation "Computes the exact number of characters required to decode the sequence of octets in SEQUENCE from START to END using the -external format FORMAT. If WARNP is NIL, warnings will be muffled.")) +external format FORMAT.")) -(defmethod compute-number-of-chars :around (format (list list) start end warnp) +(defmethod compute-number-of-chars :around (format (list list) start end) (declare #.*standard-optimize-settings*) - (call-next-method format (coerce list 'vector) start end warnp)) + (call-next-method format (coerce list 'vector) start end)) -(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence warnp)) + (declare (ignore sequence)) (- end start)) -(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end) ;; this method only applies to the 8-bit formats as all other ;; formats with CRLF line endings have their own specialized methods ;; below (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) - (declare (ignore warnp)) (let ((i start) (length (- end start))) (declare (fixnum i length)) @@ -130,7 +129,7 @@ (decf length))) length)) -(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -140,17 +139,18 @@ (when (>= i end) (return)) (let* ((octet (aref sequence i)) + ;; note that there are no validity checks here (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) + ((= #b11000000 (logand* octet #b11100000)) 2) + ((= #b11100000 (logand* octet #b11110000)) 3) (t 4)))) (declare (fixnum length) (type octet octet)) (incf sum) (incf i length))) - (check-end format start end i warnp) + (check-end format start end i) sum)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -161,28 +161,29 @@ (when (>= i end) (return)) (let* ((octet (aref sequence i)) + ;; note that there are no validity checks here (length (cond ((not (logbitp 7 octet)) 1) - ((= #b11000000 (logand octet #b11100000)) 2) - ((= #b11100000 (logand octet #b11110000)) 3) + ((= #b11000000 (logand* octet #b11100000)) 2) + ((= #b11100000 (logand* octet #b11110000)) 3) (t 4)))) (declare (fixnum length) (type octet octet)) (unless (and (= octet +lf+) (= last-octet +cr+)) (incf sum)) (incf i length) (setq last-octet octet))) - (check-end format start end i warnp) + (check-end format start end i) sum)) -(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp) +(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (declare (ignore sequence)) - (when (and warnp (oddp (- end start))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ + (when (oddp (- end start)) + (signal-encoding-error format "~A octet~:P cannot be decoded ~ using UTF-16 as ~:*~A is not even." - (- end start)))) + (- end start)))) -(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (let ((sum 0) @@ -198,10 +199,10 @@ (declare (fixnum length) (type octet high-octet)) (incf sum) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum)) -(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -217,10 +218,10 @@ (declare (fixnum length) (type octet high-octet)) (incf sum) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -243,10 +244,10 @@ (aref sequence i) 0)) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) (let ((sum 0) @@ -269,29 +270,28 @@ (aref sequence (1+ i)) 0)) (incf i length))) - (check-end format start (+ end 2) i warnp) + (check-end format start (+ end 2) i) sum)) -(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp) +(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) (declare (ignore sequence)) (let ((length (- end start))) - (when (and warnp (plusp (mod length 4))) - (signal-encoding-warning format "~A octet~:P cannot be decoded ~ + (when (plusp (mod length 4)) + (signal-encoding-error format "~A octet~:P cannot be decoded ~ using UTF-32 as ~:*~A is not a multiple-value of four." - length)))) + length)))) -(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end)) - (declare (ignore sequence warnp)) + (declare (ignore sequence)) (ceiling (- end start) 4)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) - (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) (decf end 8) @@ -306,10 +306,9 @@ (t (incf i 4)))) length)) -(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp) +(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end) (declare #.*fixnum-optimize-settings*) (declare (fixnum start end) (vector sequence)) - (declare (ignore warnp)) (let ((i start) (length (ceiling (- end start) 4))) (decf end 8) Modified: branches/edi/packages.lisp ============================================================================== --- branches/edi/packages.lisp (original) +++ branches/edi/packages.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.38 2008/05/25 22:23:58 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -46,12 +46,10 @@ :external-format-eol-style :external-format-error :external-format-encoding-error - :external-format-encoding-warning :external-format-equal :external-format-id :external-format-little-endian :external-format-name - :external-format-warning :flexi-input-stream :flexi-output-stream :flexi-io-stream Modified: branches/edi/strings.lisp ============================================================================== --- branches/edi/strings.lisp (original) +++ branches/edi/strings.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.32 2008/05/25 23:09:13 edi Exp $ ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved. @@ -72,11 +72,12 @@ (defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence))) "Kind of the inverse of OCTET-LENGTH. Returns the length of the subsequence \(of octets) of SEQUENCE from START to END in characters -if decoded using the external format EXTERNAL-FORMAT. +if decoded using the external format EXTERNAL-FORMAT. Note that this +function doesn't check for the validity of the data in SEQUENCE. This function is optimized for the case of SEQUENCE being a vector. Don't use lists if you're in a hurry." (declare #.*standard-optimize-settings*) (declare (fixnum start end)) (setq external-format (maybe-convert-external-format external-format)) - (compute-number-of-chars external-format sequence start end t)) + (compute-number-of-chars external-format sequence start end)) Modified: branches/edi/test/test.lisp ============================================================================== --- branches/edi/test/test.lisp (original) +++ branches/edi/test/test.lisp Sun May 25 19:43:22 2008 @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $ +;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.35 2008/05/25 23:10:47 edi Exp $ ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved. @@ -29,6 +29,48 @@ (in-package :flexi-streams-test) +(defmacro with-test ((test-description) &body body) + "Defines a test. Two utilities are available inside of the body of +the maco: The function FAIL, and the macro CHECK. FAIL, the lowest +level utility, marks the test defined by WITH-TEST as failed. CHECK +checks whether its argument is true, otherwise it calls FAIL. If +during evaluation of the specified expression any condition is +signalled, this is also considered a failure. + +WITH-TEST prints reports while the tests run. It also increments +*TEST-SUCCESS-COUNT* if a test completes successfully." + (flex::with-unique-names (successp) + `(let ((,successp t)) + (flet ((fail (format-str &rest format-args) + (setf ,successp nil) + (apply #'format *error-output* format-str format-args))) + (macrolet ((check (expression) + `(handler-case + (unless ,expression + (fail "Expression ~S failed.~%" ',expression)) + (error (c) + (fail "Expression ~S failed signalling error of type ~A: ~A.~%" + ',expression (type-of c) c)))) + (with-expected-error ((condition-type) &body body) + `(handler-case (progn , at body) + (,condition-type () t) + (:no-error (&rest args) + (declare (ignore args)) + (fail "Expected condition ~S not signalled~%" + ',condition-type))))) + (format *error-output* "Test ~S~%" ,test-description) + , at body + (if ,successp + (incf *test-success-counter*) + (format *error-output* " Test failed!!!~%")) + (terpri *error-output*) + (terpri *error-output*)) + ,successp)))) + +;; LW can't indent this correctly because it's in a MACROLET +#+:lispworks +(editor:setup-indent "with-expected-error" 1 2 4) + (defconstant +buffer-size+ 8192 "Size of buffers for COPY-STREAM* below.") @@ -245,37 +287,6 @@ (setf (fill-pointer string) (read-sequence string in)) string))) -(defmacro with-test ((test-description) &body body) - "Defines a test. Two utilities are available inside of the body of -the maco: The function FAIL, and the macro CHECK. FAIL, the lowest -level utility, marks the test defined by WITH-TEST as failed. CHECK -checks whether its argument is true, otherwise it calls FAIL. If -during evaluation of the specified expression any condition is -signalled, this is also considered a failure. - -WITH-TEST prints reports while the tests run. It also increments -*TEST-SUCCESS-COUNT* if a test completes successfully." - (flex::with-unique-names (successp) - `(let ((,successp t)) - (flet ((fail (format-str &rest format-args) - (setf ,successp nil) - (apply #'format *error-output* format-str format-args))) - (macrolet ((check (expression) - `(handler-case - (unless ,expression - (fail "Expression ~S failed.~%" ',expression)) - (error (c) - (fail "Expression ~S failed signalling error of type ~A: ~A.~%" - ',expression (type-of c) c))))) - (format *error-output* "Test ~S~%" ,test-description) - , at body - (if ,successp - (incf *test-success-counter*) - (format *error-output* " Test failed!!!~%")) - (terpri *error-output*) - (terpri *error-output*)) - ,successp)))) - (defun old-string-to-octets (string &key (external-format (make-external-format :latin1)) (start 0) end) @@ -460,7 +471,51 @@ (defun error-handling-test () "Tests several possible errors and how they are handled." - (with-test ("Handling of errors.") + (with-test ("Illegal values.") + (macrolet ((want-encoding-error (input format) + `(with-expected-error (external-format-encoding-error) + (read-flexi-line* ,input ,format)))) + ;; "overlong" + (want-encoding-error #(#b11000000) :utf-8) + (want-encoding-error #(#b11000001) :utf-8) + ;; examples of invalid lead octets + (want-encoding-error #(#b11111000) :utf-8) + (want-encoding-error #(#b11111001) :utf-8) + (want-encoding-error #(#b11111100) :utf-8) + (want-encoding-error #(#b11111101) :utf-8) + (want-encoding-error #(#b11111110) :utf-8) + (want-encoding-error #(#b11111111) :utf-8) + ;; illegal code points + (want-encoding-error #(#x00 #x00 #x11 #x00) :utf-32le) + (want-encoding-error #(#x00 #xd8) :utf-16le) + (want-encoding-error #(#xff #xdf) :utf-16le))) + (with-test ("Illegal lengths.") + (macrolet ((want-encoding-error (input format) + `(with-expected-error (external-format-encoding-error) + (read-flexi-line* ,input ,format)))) + ;; UTF-8 sequences which are too short + (want-encoding-error #(#xe4 #xf6 #xfc) :utf8) + (want-encoding-error #(#xc0) :utf8) + (want-encoding-error #(#xe0 #xff) :utf8) + (want-encoding-error #(#xf0 #xff #xff) :utf8) + ;; UTF-16 wants an even number of octets + (want-encoding-error #(#x01) :utf-16le) + (want-encoding-error #(#x01 #x01 #x01) :utf-16le) + (want-encoding-error #(#x01) :utf-16be) + (want-encoding-error #(#x01 #x01 #x01) :utf-16be) + ;; another word should follow but it doesn't + (want-encoding-error #(#x01 #xd8) :utf-16le) + (want-encoding-error #(#xd8 #x01) :utf-16be) + ;; UTF-32 always wants four octets + (want-encoding-error #(#x01) :utf-32le) + (want-encoding-error #(#x01 #x01) :utf-32le) + (want-encoding-error #(#x01 #x01 #x01) :utf-32le) + (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32le) + (want-encoding-error #(#x01) :utf-32be) + (want-encoding-error #(#x01 #x01) :utf-32be) + (want-encoding-error #(#x01 #x01 #x01) :utf-32be) + (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32be))) + (with-test ("Errors while decoding and substitution of characters.") ;; handling of EOF in the middle of CRLF (check (string= #.(string #\Return) (read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf)))) @@ -472,11 +527,7 @@ (check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253))) (check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))) ;; not a valid UTF-8 sequence - (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))) - (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))) - ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 - (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8))) - (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) + (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))) (let ((*substitution-char* nil)) ;; :ASCII doesn't have characters with char codes > 127 (check (string= "abc" (using-values (#\b #\c) @@ -490,16 +541,12 @@ (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))) ;; not a valid UTF-8 sequence (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))) - (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))) ;; UTF-8 can't start neither with #b11111110 nor with #b11111111 (check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8)))) - (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8)))) ;; only one byte (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le)))) - (check (string= "" (read-flexi-line* #(#x01) :utf-16le))) ;; two bytes, but value of resulting word suggests that another word follows (check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le)))) - (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le)))) ;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff (check (string= "T" (using-values (#\T) (read-flexi-line '(#x01 #xd8 #xff #xdb) :utf-16le)))) (check (string= "T" (using-values (#\T) (read-flexi-line* #(#x01 #xd8 #xff #xdb) :utf-16le)))) @@ -507,11 +554,10 @@ (check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be)))) (check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be)))) (check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be)))) - (check (string= "" (read-flexi-line* #(#x01) :utf-16be))) - (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be)))) (check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be)))) - ;; the only case when error is signalled for UTF-32 is at end of file - ;; in the middle of 4-byte sequence, both for big and little endian + ;; the only case when errors are signalled for UTF-32 is at end + ;; of file in the middle of 4-byte sequence, both for big and + ;; little endian (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32le)))) (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32le)))) (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32le)))) @@ -521,17 +567,7 @@ (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32be)))) (check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32be)))) (check (string= "aY" (using-values (#\Y) - (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32le)))) - (check (string= "aY" (using-values (#\Y) - (read-flexi-line* `#(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32be)))) - (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32be)))) - (check (string= "aY" (using-values (#\Y) - (read-flexi-line* `#(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) + (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))))) (defun unread-char-test () "Tests whether UNREAD-CHAR behaves as expected." @@ -572,7 +608,7 @@ (incf no-tests (length read-sequence-test-args-list)) (dolist (args read-sequence-test-args-list) (apply 'sequence-test args))) - (incf no-tests) + (incf no-tests 3) (error-handling-test) (incf no-tests) (unread-char-test)