[Cmucl-cvs] [git] CMU Common Lisp branch master updated. snapshot-2014-11-3-g2b1925b
Raymond Toy
rtoy at common-lisp.net
Thu Nov 6 03:53:04 UTC 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMU Common Lisp".
The branch, master has been updated
via 2b1925b64fac2d5bd5b46826a2f2678b52c1683c (commit)
from 4c0e29679bad7b315f9da448280b3a7061549ac3 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 2b1925b64fac2d5bd5b46826a2f2678b52c1683c
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Wed Nov 5 19:52:57 2014 -0800
Add iterators and loop paths for codepoints and glyphs
* src/code/string.lisp:
* Add WITH-STRING-CODEPOINT-ITERATOR to allow iterating over the
codepoints in a string.
* Add WITH-STRING-GLYPO-ITERATOR to allow iterating over the
glpyhs in a string.
* src/code/exports.lisp:
* Export WITH-STRING-CODEPOINT-ITERATOR and
WITH-STRING-GLYPH-ITERATOR.
* src/code/loop.lisp:
* Support (loop for cp being the codepoint of string ...) which
extracts the consecutive codepoints from the string. Allow
codepoints, code-point, and code-points as aliases of codepoint.
* Support (loop for g-string being the glyph of string ...) which
extracts each glyph (as a string) from the string. Allow glpyhs
as an alias.
* tests/extended-loop.lisp:
* New file of tests for the new loop paths.
* src/i18n/locale/cmucl.pot:
* Update.
diff --git a/src/code/exports.lisp b/src/code/exports.lisp
index 455894c..e75e5d7 100644
--- a/src/code/exports.lisp
+++ b/src/code/exports.lisp
@@ -919,7 +919,9 @@
"+UNICODE-CATEGORY-UPPER+"
"+UNICODE-CATEGORY-TITLE+"
"LOAD-ALL-UNICODE-DATA"
- "SURROGATES"))
+ "SURROGATES"
+ "WITH-STRING-CODEPOINT-ITERATOR"
+ "WITH-STRING-GLYPH-ITERATOR"))
(defpackage "EVAL"
(:export "*EVAL-STACK-TRACE*" "*INTERNAL-APPLY-NODE-TRACE*"
diff --git a/src/code/loop.lisp b/src/code/loop.lisp
index 0ca34c2..29b9c23 100644
--- a/src/code/loop.lisp
+++ b/src/code/loop.lisp
@@ -45,6 +45,9 @@
;;;> United States of America
;;;> +1-617-221-1000
+;;;> See page 391 of http://bitsavers.trailing-edge.com/pdf/symbolics/software/genera_8/Symbolics_Common_Lisp_Language_Concepts.pdf
+;;;> for more information on how to extend loop.
+
;; $aclHeader: loop.cl,v 1.5 91/12/04 01:13:48 cox acl4_1 $
#+cmu
@@ -2153,6 +2156,77 @@ collected result will be returned as the value of the LOOP."
,variable)
(,next-fn)))
())))
+
+
+;; Supports
+;;
+;; (loop for cp being the codepoint of string ...)
+;;
+;; where cp is the codepoint of charactor (or surrogate pair) in the
+;; string.
+(defun loop-string-codepoint-iteration-path (variable data-type prep-phrases)
+ (cond ((or (cdr prep-phrases) (not (member (caar prep-phrases) '(:in :of))))
+ (loop-error _N"Too many prepositions!"))
+ ((null prep-phrases) (loop-error _N"Missing OF or IN in ~S iteration path.")))
+ (let ((string-var (loop-gentemp 'loop-codepoint-string-))
+ (next-fn (loop-gentemp 'loop-codepoint-next-))
+ (post-steps nil))
+ (let* ((val-var nil)
+ (temp-val-var (loop-gentemp 'loop-codepoint-val-temp-))
+ (temp-predicate-var (loop-gentemp 'loop-codepoint-predicate-var-))
+ (variable (or variable (loop-gentemp)))
+ (bindings `((,variable nil ,data-type)
+ (,string-var ,(cadar prep-phrases)))))
+ (push `(lisp::with-string-codepoint-iterator (,next-fn ,string-var)) *loop-wrappers*)
+ (setq val-var variable)
+ `(,bindings ;bindings
+ () ;prologue
+ () ;pre-test
+ () ;parallel steps
+ (not
+ (multiple-value-bind (,temp-predicate-var ,temp-val-var)
+ (,next-fn)
+ (when ,temp-predicate-var
+ (setq ,val-var ,temp-val-var))
+ ,temp-predicate-var
+ )) ;post-test
+ ,post-steps))))
+
+;; Supports
+;; (loop for g-string being the glyph of string ...)
+;;
+;; where g-string is a string consisting of a character and all
+;; trailing combining characters.
+(defun loop-string-glyph-string-iteration-path (variable data-type prep-phrases)
+ (cond ((or (cdr prep-phrases) (not (member (caar prep-phrases) '(:in :of))))
+ (loop-error _N"Too many prepositions!"))
+ ((null prep-phrases) (loop-error _N"Missing OF or IN in ~S iteration path.")))
+ (let ((string-var (loop-gentemp 'loop-glyph-string-))
+ (next-fn (loop-gentemp 'loop-glyph-next-))
+ (post-steps nil))
+ (let* ((val-var nil)
+ (temp-index-var (loop-gentemp 'loop-glyph-index-temp-))
+ (temp-len-var (loop-gentemp 'loop-glyph-len-temp-))
+ (temp-predicate-var (loop-gentemp 'loop-glyph-predicate-var-))
+ (variable (or variable (loop-gentemp)))
+ (bindings `((,variable nil ,data-type)
+ (,string-var ,(cadar prep-phrases)))))
+ (push `(lisp::with-string-glyph-iterator (,next-fn ,string-var)) *loop-wrappers*)
+ (setq val-var variable)
+ `(,bindings ;bindings
+ () ;prologue
+ () ;pre-test
+ () ;parallel steps
+ (not
+ (multiple-value-bind (,temp-predicate-var ,temp-index-var ,temp-len-var)
+ (,next-fn)
+ (when ,temp-predicate-var
+ (setq ,val-var (subseq ,string-var ,temp-index-var ,temp-len-var)))
+ ,temp-predicate-var
+ )) ;post-test
+ ,post-steps))))
+
+
;;;; ANSI Loop
@@ -2235,12 +2309,22 @@ collected result will be returned as the value of the LOOP."
:preposition-groups '((:of :in))
:inclusive-permitted nil
:user-data '(:symbol-types (:internal :external)))
+ (when extended-p
+ (add-loop-path '(code-point code-points codepoint codepoints)
+ 'loop-string-codepoint-iteration-path
+ w
+ :preposition-groups '((:of :in))
+ :inclusive-permitted nil)
+ (add-loop-path '(glyph glyphs)
+ 'loop-string-glyph-string-iteration-path
+ w
+ :preposition-groups '((:of :in))
+ :inclusive-permitted nil))
w))
(defparameter *loop-ansi-universe*
- (make-ansi-loop-universe nil))
-
+ (make-ansi-loop-universe t))
(defun loop-standard-expansion (keywords-and-forms environment universe)
(if (and keywords-and-forms (symbolp (car keywords-and-forms)))
diff --git a/src/code/string.lisp b/src/code/string.lisp
index 8005cda..bfaffb4 100644
--- a/src/code/string.lisp
+++ b/src/code/string.lisp
@@ -87,6 +87,30 @@
(values (truly-the codepoint code) nil))))
(t (values (truly-the codepoint code) nil)))))
+(defmacro with-string-codepoint-iterator ((next string) &body body)
+ _N"WITH-STRING-CODEPOINT-ITERATOR ((next string) &body body)
+ provides a method of looping through a string from the beginning to
+ the end of the string prodcucing successive codepoints from the
+ string. NEXT is bound to a generator macro that, within the scope
+ of the invocation, returns one or two values. The first value tells
+ whether any objects remain in the string. When the first value is
+ non-NIL, the second value is the codepoint of the next object."
+ (let ((n-next (gensym "WITH-STRING-CODEPOINT-ITERATOR-")))
+ `(let ((,n-next
+ (let* ((s ,string)
+ (len (length s))
+ (index 0))
+ (labels
+ ((,next ()
+ (when (< index len)
+ (multiple-value-bind (cp surrogatep)
+ (codepoint s index)
+ (incf index (if surrogatep 2 1))
+ (values t cp)))))
+ #',next))))
+ (macrolet ((,next () '(funcall ,n-next)))
+ , at body))))
+
(defun surrogates (codepoint)
"Return the high and low surrogate characters for Codepoint. If
Codepoint is in the BMP, the first return value is the corresponding
@@ -1013,6 +1037,31 @@
(values (subseq string n index) (and (> n 0) n))
(values (subseq string index n) (and (< n (length string)) n)))))
+(defmacro with-string-glyph-iterator ((next string) &body body)
+ _N"WITH-STRING-GLYPH-ITERATOR ((next string) &body body)
+ provides a method of looping through a string from the beginning to
+ the end of the string prodcucing successive glyphs from the string.
+ NEXT is bound to a generator macro that, within the scope of the
+ invocation, returns one or three values. The first value tells
+ whether any objects remain in the string. When the first value is
+ non-NIL, the second value is the index into the string of the glyph
+ and the third value is index of the next glyph."
+ (let ((n-next (gensym "WITH-STRING-GLYPH-ITERATOR-")))
+ `(let ((,n-next
+ (let* ((s ,string)
+ (len (length s))
+ (index 0))
+ (labels
+ ((,next ()
+ (when (< index len)
+ (let ((glyph-end (%glyph-f s index)))
+ (multiple-value-prog1
+ (values t index glyph-end)
+ (setf index glyph-end))))))
+ #',next))))
+ (macrolet ((,next () '(funcall ,n-next)))
+ , at body))))
+
#+unicode
(defun string-reverse* (sequence)
(declare (optimize (speed 3) (space 0) (safety 0))
diff --git a/src/i18n/locale/cmucl.pot b/src/i18n/locale/cmucl.pot
index a885634..608de17 100644
--- a/src/i18n/locale/cmucl.pot
+++ b/src/i18n/locale/cmucl.pot
@@ -3703,6 +3703,17 @@ msgstr ""
#: src/code/string.lisp
msgid ""
+"WITH-STRING-CODEPOINT-ITERATOR ((next string) &body body)\n"
+" provides a method of looping through a string from the beginning to\n"
+" the end of the string prodcucing successive codepoints from the\n"
+" string. NEXT is bound to a generator macro that, within the scope\n"
+" of the invocation, returns one or two values. The first value tells\n"
+" whether any objects remain in the string. When the first value is\n"
+" non-NIL, the second value is the codepoint of the next object."
+msgstr ""
+
+#: src/code/string.lisp
+msgid ""
"Return the high and low surrogate characters for Codepoint. If\n"
" Codepoint is in the BMP, the first return value is the corresponding\n"
" character and the second is NIL."
@@ -3961,6 +3972,18 @@ msgstr ""
#: src/code/string.lisp
msgid ""
+"WITH-STRING-GLYPH-ITERATOR ((next string) &body body)\n"
+" provides a method of looping through a string from the beginning to\n"
+" the end of the string prodcucing successive glyphs from the string.\n"
+" NEXT is bound to a generator macro that, within the scope of the\n"
+" invocation, returns one or three values. The first value tells\n"
+" whether any objects remain in the string. When the first value is\n"
+" non-NIL, the second value is the index into the string of the glyph\n"
+" and the third value is index of the next glyph."
+msgstr ""
+
+#: src/code/string.lisp
+msgid ""
"Convert String to Unicode Normalization Form D (NFD) using the\n"
" canonical decomposition. The NFD string is returned"
msgstr ""
diff --git a/tests/extended-loop.lisp b/tests/extended-loop.lisp
new file mode 100644
index 0000000..c184540
--- /dev/null
+++ b/tests/extended-loop.lisp
@@ -0,0 +1,30 @@
+(defpackage :extended-loop-tests
+ (:use :cl :lisp-unit))
+
+(in-package "EXTENDED-LOOP-TESTS")
+
+(define-test loop-codepoint
+ (:tag :extended-loop)
+ (let ((codepoints (mapcar #'(lambda (c)
+ (if (characterp c)
+ (char-code c)
+ c))
+ '(#\a #\b #\greek_capital_letter_gamma
+ ;; This is a random code point that
+ ;; requires a surrogate pair in our
+ ;; UTF-16 string represntation.
+ 65536
+ #\c))))
+ (assert-equal codepoints
+ (loop for c being the codepoints of (lisp::codepoints-string codepoints)
+ collect c))))
+
+(define-test loop-glyph-string
+ (:tag :extended-loop)
+ (let* ((s (string #\Latin_Small_Letter_A_With_Diaeresis_and_macron))
+ (d (lisp::string-to-nfkd s)))
+ (assert-equal (list s)
+ (loop for g being the glyphs of s collect g))
+ (assert-equal (list d)
+ (loop for g being the glyphs of d collect g))))
+
-----------------------------------------------------------------------
Summary of changes:
src/code/exports.lisp | 4 ++-
src/code/loop.lisp | 88 +++++++++++++++++++++++++++++++++++++++++++++--
src/code/string.lisp | 49 ++++++++++++++++++++++++++
src/i18n/locale/cmucl.pot | 23 +++++++++++++
tests/extended-loop.lisp | 30 ++++++++++++++++
5 files changed, 191 insertions(+), 3 deletions(-)
create mode 100644 tests/extended-loop.lisp
hooks/post-receive
--
CMU Common Lisp
More information about the cmucl-cvs
mailing list