From johan at riise-data.no Fri Feb 23 01:42:04 2007 From: johan at riise-data.no (Johan Ur Riise) Date: Fri, 23 Feb 2007 02:42:04 +0100 Subject: [araneida-devel] utf-8 and unsigned-byte 8 Message-ID: <20070223014204.GA32725@riise-data.no> I have a patch that enables utf-8 request streams, and at the same time let me send binary files using the static file handler. The idea is to open a (unsigned-byte 8) stream in the accept, then immediately replace it with a flexi-stream of type character with external encoding utf-8. After that, the stream can be reconfigured to unsigned-byte, for instance for sending the content in a file download. A problem is that it introduces ainother dependancy, to the flexi-streams package by E Weitz. Anyway, it's attached -- Hilsen Johan Ur Riise -------------- next part -------------- Fri Feb 23 02:09:04 CET 2007 Johan Ur Riise * utfi-8 character streams and binary streams using flexi-streams To enable utf-8 request streams, and at the same time allow binary files for the static file handler. The streams are opened with ielement-type unsigned-byte length 8, then immediately re?placed with a flexi-stream of type character with external format utf-8. Then, when you are ready to send the content, you can change the stream element type back to binary. This patch also reintroduces static-file handler from the main araneida branch. This adds a dependancy to flexi-streams. diff -rN old-araneida-slim2/araneida.asd new-araneida-slim2/araneida.asd 18c18,19 < #+sbcl sb-bsd-sockets) --- > #+sbcl sb-bsd-sockets > flexi-streams) 75a77 > (:file "static-file-handler" :depends-on ("handler" )) diff -rN old-araneida-slim2/compat/compat-sbcl.lisp new-araneida-slim2/compat/compat-sbcl.lisp 3,5d2 < (defparameter *open-external-format-arguments* < '(:element-type (unsigned-byte 8) :external-format :iso-8859-1)) < 13,24c10,11 < (let ((fd (sb-sys:fd-stream-fd s))) < (multiple-value-bind (r e) (ignore-errors (close s) t) < (unless r < (format t "Unable to close fd ~A: ~A, trying harder ~%" fd e) < (multiple-value-bind (r e) (ignore-errors (close s :abort t) t) < (unless r < (format t "still unable to close ~A: ~A, try harder ~%" fd e) < (multiple-value-bind (r e) < (ignore-errors (sb-unix:unix-close fd) t) < (unless r < (format t "Even unix-close failed on ~A:~A, giving up~%" < fd e))))))))) --- > "Closes a flexi stream" > (close s)) 43,46c30,37 < (sb-bsd-sockets:socket-make-stream socket :element-type 'character < :external-format :iso-8859-1 < :name "socket" < :input t :output t :buffering :full))) --- > (let ((stream (sb-bsd-sockets:socket-make-stream socket > :element-type '(unsigned-byte 8) > :external-format :utf8 > :name "socket" > :input t :output t :buffering :full))) > (setq stream (make-flexi-stream stream :external-format :utf-8)) > stream) > )) diff -rN old-araneida-slim2/defpackage.lisp new-araneida-slim2/defpackage.lisp 22a23 > :static-file-handler 72a74 > :flexi-streams diff -rN old-araneida-slim2/static-file-handler.lisp new-araneida-slim2/static-file-handler.lisp 0a1,115 > (in-package :araneida) > > ;;; XXX Should abstract this somehow so that it can be done on things > ;;; other than filename suffix > > (defvar *content-types* > '(("html" "text/html") > ("gif" "image/gif") > ("jpg" "image/jpeg") > ("png" "image/png") > ("css" "text/css") > ("class" "application/octet-stream") > ("doc" "application/octet-stream") > ("zip" "application/octet-stream") > ("gz" "application/octet-stream") > ("ASF" "video/x-ms-asf") > ("tar" "application/octet-stream") > ("avi" "video/x-msvideo") > ("txt" "text/plain"))) > > (defun read-mime-types (filename) > "Read a standard-format mime.types file and return an alist suitable for > assigning to *content-types*" > (labels ((chop-comment (string) > (subseq string 0 (position #\# string))) > (collect-extns (type extns) > (loop for e in (split extns) > if (> (length e) 0) > collect (list e type)))) > (with-open-file (in filename :direction :input) > (let ((eof (gensym))) > (loop for line = (read-line in nil eof) > until (eq line eof) > for (type extns) = (araneida::remove-if-empty > (split (chop-comment line) 2)) > append (collect-extns type extns)))))) > > (defun copy-stream (from to) > "Copy into TO from FROM until end of the input file. Do not > translate or otherwise maul anything." > ; We used to catch sequence type mismatches, but given bivalent streams these days.... > (let ((buf (make-array 4096 :element-type (stream-element-type from)))) > (do ((pos (read-sequence buf from) (read-sequence buf from))) > ((= 0 pos) nil) > (write-sequence buf to :end pos)))) > > ;; a host lisp compatibility file can override this to set the > ;; appropriate external format for reading in files to send with > ;; send-file > > (defun send-file (r file-name &key content-type) > (let ((stream (request-stream r)) > (content-type > (or content-type > (cadr (assoc (or (pathname-type file-name) "txt") *content-types* > :test #'string=)))) > (in (apply #'open file-name > :direction :input > '(:element-type (unsigned-byte 8) > :external-format :latin1)))) > (unwind-protect > (progn > (request-send-headers r :content-type content-type > :conditional t > :content-length (file-length in) > :last-modified (file-write-date in)) > (setf (flexi-stream-external-format stream) '(:latin-1 :eol-style :lf)) > (copy-stream in stream)) > (close in)))) > > (defclass static-file-handler (handler) > ((pathname :initarg :pathname :accessor static-file-pathname > :documentation "Root pathname for URI components to merge against. Requests may not be made outside this hierarchy") > (default-name :initarg :default-name :accessor static-file-default-name > :initform "index.html"))) > > (defmethod handle-request-response > ((handler static-file-handler) method request) > ;; chop arg-string into /-delimited components. > ;; remove .. components along with the component preceding them > (let* ((path (cons :relative > (loop for p on > (nreverse (split > (request-unhandled-part request) > nil "/")) > if (string-equal (car p) "..") > do (setf p (cdr p)) > else collect (car p) into v > finally (return (nreverse v))))) > (name (let ((n (car (last path)))) > (if (> (length n) 0) n nil))) > (path (butlast path)) > (dot-pos (and name (position #\. name :from-end t))) > (extension (and dot-pos (subseq name (1+ dot-pos)))) > (name (urlstring-unescape (if dot-pos (subseq name 0 dot-pos) name))) > (file (make-pathname :name name :directory path :type extension)) > (fnam (merge-pathnames file (static-file-pathname handler)))) > (when (and (pathname-name fnam) > (probe-file fnam) > (not (pathname-name (truename fnam)))) > (request-redirect request > (concatenate 'string (araneida::request-urlstring request) "/")) > (return-from handle-request-response t)) > (when (not (pathname-name fnam)) > (setf fnam (merge-pathnames fnam (static-file-default-name handler)))) > > (with-file-error-handlers > (progn > (send-file request fnam) > t) > (format nil "Can't read ~S: ~A~%" fnam c) > ))) > > >