[bknr-cvs] edi changed trunk/thirdparty/cl-unicode/

BKNR Commits bknr at bknr.net
Thu Jul 24 14:58:40 UTC 2008


Revision: 3622
Author: edi
URL: http://bknr.net/trac/changeset/3622

Update to 0.1.1

U   trunk/thirdparty/cl-unicode/CHANGELOG.txt
U   trunk/thirdparty/cl-unicode/api.lisp
U   trunk/thirdparty/cl-unicode/cl-unicode.asd
U   trunk/thirdparty/cl-unicode/doc/index.html
U   trunk/thirdparty/cl-unicode/packages.lisp
U   trunk/thirdparty/cl-unicode/specials.lisp
U   trunk/thirdparty/cl-unicode/util.lisp

Modified: trunk/thirdparty/cl-unicode/CHANGELOG.txt
===================================================================
--- trunk/thirdparty/cl-unicode/CHANGELOG.txt	2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/CHANGELOG.txt	2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,3 +1,7 @@
+Version 0.1.1
+2008-07-24
+Make ADD-HANGUL-NAMES faster for ClozureCL
+
 Version 0.1.0
 2008-07-24
 Initial release

Modified: trunk/thirdparty/cl-unicode/api.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/api.lisp	2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/api.lisp	2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.30 2008/07/22 02:42:13 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.31 2008/07/24 14:46:20 edi Exp $
 
 ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
 
@@ -39,6 +39,7 @@
    (unicode-name (char-code char)))
   (:method ((code-point integer))
    (or (gethash code-point *code-points-to-names*)
+       (maybe-compute-hangul-syllable-name code-point)
        (maybe-compute-cjk-name code-point))))
 
 (defgeneric unicode1-name (c)
@@ -102,6 +103,7 @@
     (setq scripts-to-try (list scripts-to-try)))
   (let* ((canonicalized-name (canonicalize-name name))
          (code-point (or (gethash canonicalized-name *names-to-code-points*)
+                         (maybe-find-hangul-syllable-code-point canonicalized-name)
                          (maybe-find-cjk-code-point canonicalized-name)
                          (and try-unicode1-names-p
                               (gethash canonicalized-name *unicode1-names-to-code-points*))

Modified: trunk/thirdparty/cl-unicode/cl-unicode.asd
===================================================================
--- trunk/thirdparty/cl-unicode/cl-unicode.asd	2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/cl-unicode.asd	2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/cl-unicode.asd,v 1.21 2008/07/21 22:09:22 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/cl-unicode.asd,v 1.22 2008/07/24 14:56:31 edi Exp $
 
 ;;; Copyright (c) 2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -63,7 +63,7 @@
   (call-next-method))
 
 (defsystem :cl-unicode
-  :version "0.1.0"
+  :version "0.1.1"
   :serial t
   :depends-on (:cl-ppcre)
   :components ((:file "packages")

Modified: trunk/thirdparty/cl-unicode/doc/index.html
===================================================================
--- trunk/thirdparty/cl-unicode/doc/index.html	2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/doc/index.html	2008-07-24 14:58:40 UTC (rev 3622)
@@ -72,7 +72,7 @@
 
 CL-UNICODE together with this documentation can be downloaded from <a
 href="http://weitz.de/files/cl-unicode.tar.gz">http://weitz.de/files/cl-unicode.tar.gz</a>. The
-current version is 0.1.0.
+current version is 0.1.1.
 <p>
 The library comes with a system definition
 for <a href="http://www.cliki.net/asdf">ASDF</a> and you compile and
@@ -1270,7 +1270,7 @@
 This documentation was prepared with <a href="http://weitz.de/documentation-template/">DOCUMENTATION-TEMPLATE</a>.
 </p>
 <p>
-$Header: /usr/local/cvsrep/cl-unicode/doc/index.html,v 1.12 2008/07/23 14:55:26 edi Exp $
+$Header: /usr/local/cvsrep/cl-unicode/doc/index.html,v 1.13 2008/07/24 14:56:33 edi Exp $
 <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
 
 </body>

Modified: trunk/thirdparty/cl-unicode/packages.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/packages.lisp	2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/packages.lisp	2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/packages.lisp,v 1.23 2008/07/22 02:42:13 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/packages.lisp,v 1.24 2008/07/24 14:46:20 edi Exp $
 
 ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
 
@@ -31,7 +31,9 @@
 
 (defpackage :cl-unicode
   (:use :cl)
-  (:import-from :cl-ppcre :with-rebinding)
+  (:import-from :cl-ppcre
+                :*standard-optimize-settings*
+                :with-rebinding)
   (:export :+code-point-limit+
            :*scripts-to-try*
            :*try-abbreviations-p*

Modified: trunk/thirdparty/cl-unicode/specials.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/specials.lisp	2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/specials.lisp	2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/specials.lisp,v 1.14 2008/07/23 01:08:24 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/specials.lisp,v 1.16 2008/07/24 14:50:37 edi Exp $
 
 ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
 
@@ -36,7 +36,7 @@
   "A hash tables which maps property symbols \(see PROPERTY-SYMBOL) to
 their \"canonical names\", i.e. to strings.")
 
-(defvar *names-to-code-points* (make-hash-table :test 'equalp :size 31000)
+(defvar *names-to-code-points* (make-hash-table :test 'equalp :size 20000)
   "A hash table which \(case-insensitively) maps \"canonicalized\"
 character names to their code points.")
 
@@ -44,7 +44,7 @@
   "A hash table which \(case-insensitively) maps \"canonicalized\"
 Unicode 1.0 character names to their code points.")
 
-(defvar *code-points-to-names* (make-hash-table :size 31000)
+(defvar *code-points-to-names* (make-hash-table :size 20000)
   "A hash table which maps code points to the corresponding character
 names.")
 
@@ -85,6 +85,10 @@
   "A hash table which maps code points to their Jamo short names.
 Needed to compute Hangul syllable names - see COMPUTE-HANGUL-NAME.")
 
+(defvar *hangul-syllables-to-code-points* (make-hash-table :test 'equalp :size 12000)
+  "A hash table which \(case-insensitively) maps Hangul syllable name
+parts to their code points.")
+
 (defvar *try-unicode1-names-p* t
   "This is the default value for the :TRY-UNICODE1-NAMES-P keyword
 argument to CHARACTER-NAMED.")

Modified: trunk/thirdparty/cl-unicode/util.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/util.lisp	2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/util.lisp	2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/util.lisp,v 1.27 2008/07/23 14:11:40 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/util.lisp,v 1.29 2008/07/24 14:46:20 edi Exp $
 
 ;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
 
@@ -166,45 +166,75 @@
     (when (cjk-unified-ideograph-p code-point)
       code-point)))
 
-(defconstant +s-base+ #xac00
-  "The constant `SBase' from chapter 3 of the Unicode book.")
-(defconstant +l-base+ #x1100
-  "The constant `LBase' from chapter 3 of the Unicode book.")
-(defconstant +v-base+ #x1161
-  "The constant `VBase' from chapter 3 of the Unicode book.")
-(defconstant +t-base+ #x11a7
-  "The constant `TBase' from chapter 3 of the Unicode book.")
-(defconstant +v-count+ 21
-  "The constant `VCount' from chapter 3 of the Unicode book.")
-(defconstant +t-count+ 28
-  "The constant `TCount' from chapter 3 of the Unicode book.")
-(define-symbol-macro +n-count+
-  ;; the constant `NCount' from chapter 3 of the Unicode book
-  (* +v-count+ +t-count+))
+(defmacro define-hangul-constant (name value)
+  (flet ((create-symbol (name)
+           (intern (format nil "+~:@(~C-~A~)+" (char name 0) (subseq name 1)) :cl-unicode)))
+    `(eval-when (:compile-toplevel :load-toplevel :execute)
+       (defconstant ,(create-symbol name) ,value
+         ,(format nil "The constant `~A' from chapter 3 of the Unicode book." name)))))
 
+(define-hangul-constant "SBase" #xac00)
+(define-hangul-constant "LBase" #x1100)
+(define-hangul-constant "VBase" #x1161)
+(define-hangul-constant "TBase" #x11a7)
+(define-hangul-constant "VCount" 21)
+(define-hangul-constant "TCount" 28)
+(define-hangul-constant "NCount" (* +v-count+ +t-count+))
+
+(declaim (inline compute-hangul-name))
 (defun compute-hangul-name (code-point)
-  "Algorithmically derives the Hangul syllable name of the character
-with code point CODE-POINT as described in section 3.12 of the Unicode
-book."
+  "Algorithmically derives the Hangul syllable name \(the part behind
+\"HANGUL SYLLABLE \") of the character with code point CODE-POINT as
+described in section 3.12 of the Unicode book."
+  (declare #.*standard-optimize-settings*)
+  (declare (fixnum code-point))
   (let* ((s-index (- code-point +s-base+))
          (l-value (+ +l-base+ (floor s-index +n-count+)))
          (v-value (+ +v-base+ (floor (mod s-index +n-count+) +t-count+)))
          (t-value (+ +t-base+ (mod s-index +t-count+))))
-    (format nil "HANGUL SYLLABLE ~A~A~@[~A~]"
+    (declare (fixnum s-index t-value))
+    (format nil "~A~A~@[~A~]"
             (gethash l-value *jamo-short-names*)
             (gethash v-value *jamo-short-names*)
             (and (/= t-value +t-base+)
                  (gethash t-value *jamo-short-names*)))))
 
+(defconstant +first-hangul-syllable+ #xac00
+  "The code point of the first Hangul syllable the name of which can
+be algorithmically derived.")
+(defconstant +last-hangul-syllable+ #xd7a3
+  "The code point of the last Hangul syllable the name of which can be
+algorithmically derived.")
+
 (defun add-hangul-names ()
   "Computes the names for all Hangul syllables and registers them in
-the appropriate hash tables."
+the *HANGUL-SYLLABLES-TO-CODE-POINTS* hash table.  Used for
+CHARACTER-NAMED."
+  (declare #.*standard-optimize-settings*)
   (format t "~&;;; Computing Hangul syllable names")
-  (loop for code-point from #xac00 to #xd7a3
+  (loop for code-point from +first-hangul-syllable+ to +last-hangul-syllable+
         for name = (compute-hangul-name code-point)
-        do (setf (gethash (canonicalize-name name) *names-to-code-points*) code-point
-                 (gethash code-point *code-points-to-names*) name)))
+        do (setf (gethash name *hangul-syllables-to-code-points*) code-point)))
 
+(defun hangul-syllable-p (code-point)
+  "Returns a true value if CODE-POINT is the code point of a Hangul
+syllable for which we can algorithmically derive the name."
+  (<= +first-hangul-syllable+ code-point +last-hangul-syllable+))
+
+(defun maybe-compute-hangul-syllable-name (code-point)
+  "Computes the name for CODE-POINT if CODE-POINT denotes a Hangul
+syllable the name of which can be algorithmically derived."
+  (when (hangul-syllable-p code-point)
+    (format nil "HANGUL SYLLABLE ~X" (compute-hangul-name code-point))))
+
+(defun maybe-find-hangul-syllable-code-point (name)
+  "Computes the code point for NAME if NAME is the name of a Hangul
+syllable the name of which can be algorithmically derived."
+  (ppcre:register-groups-bind (name)
+      ;; canonicalized
+      ("(?i)^HANGULSYLLABLE([A-Z]*)$" name)
+    (gethash name *hangul-syllables-to-code-points*)))
+
 (defmacro ensure-code-point (c)
   "Helper macro so that C can be treated like a code point even if it
 is a Lisp character."
@@ -214,7 +244,7 @@
        (character (char-code ,c)))))
 
 (defun unicode-name-reader (stream char arg)
-  "The reader functino used when the alternative character syntax is
+  "The reader function used when the alternative character syntax is
 enabled."
   (declare (ignore char arg))
   (let ((name (with-output-to-string (out)




More information about the Bknr-cvs mailing list