[cmucl-cvs] [git] CMU Common Lisp branch master updated. snapshot-2013-03-12-g90b155a
Raymond Toy
rtoy at common-lisp.net
Thu Mar 7 03:25:47 UTC 2013
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMU Common Lisp".
The branch, master has been updated
via 90b155a2a8cbf269e022f191f9b8566da8ace0da (commit)
via e129c45a44b1dc1bd8806f19caf5782ca5f60f78 (commit)
via 58b88ebd2133e51ad084ae6835dc65137138cfb3 (commit)
via cae10dd1d8688fdbcd1e4c3a16d0130b8e8cdb41 (commit)
via b735224c492e7ff7a2dcd4fe1804a950401e8a65 (commit)
via 424edfe8570cd4eb38086d6bdbaa8cd7b0030772 (commit)
from 10ebd126e43b344377d384c55c1c611a82e9f4ae (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 90b155a2a8cbf269e022f191f9b8566da8ace0da
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Wed Mar 6 19:18:04 2013 -0800
Update.
diff --git a/src/general-info/release-20e.txt b/src/general-info/release-20e.txt
index a438863..231c85d 100644
--- a/src/general-info/release-20e.txt
+++ b/src/general-info/release-20e.txt
@@ -41,6 +41,8 @@ New in this release:
derivation.)
* :I486 and :PENTIUM (Always assume we're running on at least a
Pentium.)
+ * Update unicode to support Unicode 6.2.
+
* ANSI compliance fixes:
* Attempts to modify the standard readtable or the standard pprint
dispatch table will now signal a continuable error.
commit e129c45a44b1dc1bd8806f19caf5782ca5f60f78
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Wed Mar 6 00:37:50 2013 -0800
Reindent STRING-NEXT-WORD-BREAK neatly.
diff --git a/src/code/string.lisp b/src/code/string.lisp
index 7c6e3c1..4b90930 100644
--- a/src/code/string.lisp
+++ b/src/code/string.lisp
@@ -1586,80 +1586,80 @@
2
1))
(cat (char-word-break-category c)))
- (case cat
- ((:extend-or-format)
- (case context
- ((:cr :sep) j)
- (otherwise (lookup (+ j next-j) context))))
- (otherwise
- (case context
- ((:cr)
- (if (= c (char-code #\linefeed))
- ;; Rule WB3: Don't break CRLF, continue looking
- (lookup (+ j next-j) cat)
- j))
- ((:aletter)
- (case cat
- ((:aletter :numeric :extendnumlet)
- ;; Rules WB5, WB9, ?
- (lookup (+ j next-j) cat))
- ((:midletter :midnumlet)
- ;; Rule WB6, need to keep looking
- (lookup (+ j next-j) :aletter-midletter))
- (otherwise j)))
- ((:aletter-midletter)
- (case cat
- ((:aletter)
- ;; Rule WB7
- (lookup (+ j next-j) cat))
- (otherwise
- ;; Rule WB6 and WB7 were extended, but the
- ;; region didn't end with :aletter. So
- ;; backup and break at that point.
- (let ((j2 (index-of-previous-non-ignored j)))
- (if (< i j2) j2 j)))))
- ((:numeric)
- (case cat
- ((:numeric :aletter :extendnumlet)
- ;; Rules WB8, WB10, ?
- (lookup (+ j next-j) cat))
- ((:midnum :midnumlet)
- ;; Rules WB11, need to keep looking
- (lookup (+ j next-j) :numeric-midnum))
- (otherwise j)))
- ((:numeric-midnum)
- (case cat
- ((:numeric)
- ;; Rule WB11, keep looking
- (lookup (+ j next-j) cat))
- (otherwise
- ;; Rule WB11, WB12 were extended, but the
- ;; region didn't end with :numeric, so
- ;; backup and break at that point.
- (let ((j2 (index-of-previous-non-ignored j)))
- (if (< i j2) j2 j)))))
- ((:midletter :midnum :midnumlet)
- ;; Rule WB14
- j)
- ((:katakana)
- (case cat
- ((:katakana :extendnumlet)
- ;; Rule WB13, WB13a
- (lookup (+ j next-j) cat))
- (otherwise j)))
- ((:extendnumlet)
- (case cat
- ((:extendnumlet :aletter :numeric :katakana)
- ;; Rule WB13a, WB13b
- (lookup (+ j next-j) cat))
- (otherwise j)))
- ((:regional_indicator)
- (case cat
- ((:regional_indicator)
- ;; Rule WB13c
- (lookup (+ j next-j) cat))
- (otherwise j)))
- (otherwise j)))))))))
+ (case cat
+ ((:extend-or-format)
+ (case context
+ ((:cr :sep) j)
+ (otherwise (lookup (+ j next-j) context))))
+ (otherwise
+ (case context
+ ((:cr)
+ (if (= c (char-code #\linefeed))
+ ;; Rule WB3: Don't break CRLF, continue looking
+ (lookup (+ j next-j) cat)
+ j))
+ ((:aletter)
+ (case cat
+ ((:aletter :numeric :extendnumlet)
+ ;; Rules WB5, WB9, ?
+ (lookup (+ j next-j) cat))
+ ((:midletter :midnumlet)
+ ;; Rule WB6, need to keep looking
+ (lookup (+ j next-j) :aletter-midletter))
+ (otherwise j)))
+ ((:aletter-midletter)
+ (case cat
+ ((:aletter)
+ ;; Rule WB7
+ (lookup (+ j next-j) cat))
+ (otherwise
+ ;; Rule WB6 and WB7 were extended, but the
+ ;; region didn't end with :aletter. So
+ ;; backup and break at that point.
+ (let ((j2 (index-of-previous-non-ignored j)))
+ (if (< i j2) j2 j)))))
+ ((:numeric)
+ (case cat
+ ((:numeric :aletter :extendnumlet)
+ ;; Rules WB8, WB10, ?
+ (lookup (+ j next-j) cat))
+ ((:midnum :midnumlet)
+ ;; Rules WB11, need to keep looking
+ (lookup (+ j next-j) :numeric-midnum))
+ (otherwise j)))
+ ((:numeric-midnum)
+ (case cat
+ ((:numeric)
+ ;; Rule WB11, keep looking
+ (lookup (+ j next-j) cat))
+ (otherwise
+ ;; Rule WB11, WB12 were extended, but the
+ ;; region didn't end with :numeric, so
+ ;; backup and break at that point.
+ (let ((j2 (index-of-previous-non-ignored j)))
+ (if (< i j2) j2 j)))))
+ ((:midletter :midnum :midnumlet)
+ ;; Rule WB14
+ j)
+ ((:katakana)
+ (case cat
+ ((:katakana :extendnumlet)
+ ;; Rule WB13, WB13a
+ (lookup (+ j next-j) cat))
+ (otherwise j)))
+ ((:extendnumlet)
+ (case cat
+ ((:extendnumlet :aletter :numeric :katakana)
+ ;; Rule WB13a, WB13b
+ (lookup (+ j next-j) cat))
+ (otherwise j)))
+ ((:regional_indicator)
+ (case cat
+ ((:regional_indicator)
+ ;; Rule WB13c
+ (lookup (+ j next-j) cat))
+ (otherwise j)))
+ (otherwise j)))))))))
(declare (notinline lookup left-context))
(cond ((< i 0)
;; Rule WB1
commit 58b88ebd2133e51ad084ae6835dc65137138cfb3
Merge: cae10dd 10ebd12
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Wed Mar 6 00:30:11 2013 -0800
Merge branch 'master' into rtoy-unicode-6.2
commit cae10dd1d8688fdbcd1e4c3a16d0130b8e8cdb41
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Wed Mar 6 00:13:22 2013 -0800
Implement Rule WB13c for regional indicators.
diff --git a/src/code/string.lisp b/src/code/string.lisp
index 5a1a814..7c6e3c1 100644
--- a/src/code/string.lisp
+++ b/src/code/string.lisp
@@ -1653,6 +1653,12 @@
;; Rule WB13a, WB13b
(lookup (+ j next-j) cat))
(otherwise j)))
+ ((:regional_indicator)
+ (case cat
+ ((:regional_indicator)
+ ;; Rule WB13c
+ (lookup (+ j next-j) cat))
+ (otherwise j)))
(otherwise j)))))))))
(declare (notinline lookup left-context))
(cond ((< i 0)
commit b735224c492e7ff7a2dcd4fe1804a950401e8a65
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Tue Mar 5 22:15:43 2013 -0800
Fix PARSE-WORD-BREAK-LINE to handle codepoints outside the BMP. The
count needs to be incremented one to adjust for the UTF-16 encoding of
strings that we use.
diff --git a/src/i18n/tests/word-break-test.lisp b/src/i18n/tests/word-break-test.lisp
index 2fefec7..899b5a2 100644
--- a/src/i18n/tests/word-break-test.lisp
+++ b/src/i18n/tests/word-break-test.lisp
@@ -33,8 +33,12 @@
(let ((c (read s nil nil)))
(unless c
(return))
+ ;; Handle codepoints outside the BMP carefully.
(if (> c #xffff)
(let ((s (lisp::codepoints-string (list c))))
+ ;; Need to increment the count because of our
+ ;; UTF-16 encoding of strings.
+ (incf count)
(vector-push-extend (aref s 0) string)
(vector-push-extend (aref s 1) string))
(vector-push-extend (code-char c) string))
commit 424edfe8570cd4eb38086d6bdbaa8cd7b0030772
Author: Raymond Toy <toy.raymond at gmail.com>
Date: Mon Mar 4 21:54:28 2013 -0800
Update to Unicode 6.2.
Still needs work because the word-break tests fail.
diff --git a/src/code/unidata.lisp b/src/code/unidata.lisp
index 37134cc..55e3a28 100644
--- a/src/code/unidata.lisp
+++ b/src/code/unidata.lisp
@@ -22,7 +22,7 @@
(defvar *unidata-path* #p"ext-formats:unidata.bin")
-(defvar *unidata-version* "$Revision: 1.28 $")
+(defvar *unidata-version* "$Revision: 1.29 $")
(defstruct unidata
range
@@ -61,7 +61,7 @@
;; The expected Unicode version. This needs to be synced with
;; build-unidata.lisp.
(defconstant +unicode-major-version+ 6)
-(defconstant +unicode-minor-version+ 1)
+(defconstant +unicode-minor-version+ 2)
(defconstant +unicode-update-version+ 0)
;;; These need to be synched with tools/build-unidata.lisp
@@ -1163,7 +1163,7 @@
;; pack-word-break in tools/build-unidata.lisp!
(aref #(:other :cr :lf :newline :extend :format
:katakana :aletter :midnumlet :midletter :midnum
- :numeric :extendnumlet)
+ :numeric :extendnumlet :regional_indicator)
(unicode-word-break-code code)))
;; Support for character name completion for slime.
diff --git a/src/i18n/BidiMirroring.txt b/src/i18n/BidiMirroring.txt
index 2e719bc..ec41b76 100644
--- a/src/i18n/BidiMirroring.txt
+++ b/src/i18n/BidiMirroring.txt
@@ -1,19 +1,19 @@
-# BidiMirroring-6.1.0.txt
-# Date: 2011-12-20, 19:31:00 GMT [KW, LI]
+# BidiMirroring-6.2.0.txt
+# Date: 2012-05-15, 24:19:00 GMT [KW, LI]
#
# Bidi_Mirroring_Glyph Property
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This data file lists characters that have the Bidi_Mirrored=Yes property
# value, for which there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
#
-# The repertoire covered by the file is Unicode 6.1.0.
+# The repertoire covered by the file is Unicode 6.2.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
@@ -30,16 +30,8 @@
# characters exist with mirrored glyphs, are
# listed as comments at the end of the file.
#
-# Note: (2011-12-19) There is an inconsistency between the
-# following statement about the default value
-# of the Bidi_Mirroring_Glyph property and the
-# value of the @missing line for Bidi_Mirroring_Glyph in
-# PropertyValueAliases.txt. This inconsistency was discovered too
-# late in the release process to be resolved by
-# the UTC. The inconsistency will be resolved in a future revision.
-#
# Formally, the default value of the Bidi_Mirroring_Glyph property
-# for each code point is the code point itself, unless a mapping to
+# for each code point is <none>, unless a mapping to
# some other character is specified in this data file. When a code
# point has the default value for the Bidi_Mirroring_Glyph property,
# that means that no other character exists whose glyph is suitable
@@ -50,12 +42,13 @@
#
# This file was originally created by Markus Scherer.
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler,
-# and for Unicode 6.1 by Ken Whistler and Laurentiu Iancu.
+# and for Unicode 6.1 and 6.2 by Ken Whistler and Laurentiu Iancu.
#
# ############################################################
#
# Property: Bidi_Mirroring_Glyph
#
+# @missing: 0000..10FFFF; <none>
0028; 0029 # LEFT PARENTHESIS
0029; 0028 # RIGHT PARENTHESIS
diff --git a/src/i18n/CaseFolding.txt b/src/i18n/CaseFolding.txt
index 0d9a409..df1813d 100644
--- a/src/i18n/CaseFolding.txt
+++ b/src/i18n/CaseFolding.txt
@@ -1,8 +1,8 @@
-# CaseFolding-6.1.0.txt
-# Date: 2011-07-25, 21:21:56 GMT [MD]
+# CaseFolding-6.2.0.txt
+# Date: 2012-08-14, 17:54:49 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -1222,3 +1222,5 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
10426; C; 1044E; # DESERET CAPITAL LETTER OI
10427; C; 1044F; # DESERET CAPITAL LETTER EW
+#
+# EOF
diff --git a/src/i18n/CompositionExclusions.txt b/src/i18n/CompositionExclusions.txt
index f12f7d6..cd19f42 100644
--- a/src/i18n/CompositionExclusions.txt
+++ b/src/i18n/CompositionExclusions.txt
@@ -1,5 +1,5 @@
-# CompositionExclusions-6.1.0.txt
-# Date: 2011-07-12, 00:13:00 GMT [KW, LI]
+# CompositionExclusions-6.2.0.txt
+# Date: 2012-05-15, 22:21:00 GMT [KW, LI]
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
@@ -7,7 +7,7 @@
# This file is a normative contributory data file in the
# Unicode Character Database.
#
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For more information, see
@@ -203,3 +203,4 @@ FB4E # HEBREW LETTER PE WITH RAFE
# Total code points: 4
+# EOF
diff --git a/src/i18n/DerivedNormalizationProps.txt b/src/i18n/DerivedNormalizationProps.txt
index 2d71747..2ecd8e2 100644
--- a/src/i18n/DerivedNormalizationProps.txt
+++ b/src/i18n/DerivedNormalizationProps.txt
@@ -1,8 +1,8 @@
-# DerivedNormalizationProps-6.1.0.txt
-# Date: 2011-07-26, 04:18:07 GMT [MD]
+# DerivedNormalizationProps-6.2.0.txt
+# Date: 2012-05-23, 20:34:48 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
diff --git a/src/i18n/NameAliases.txt b/src/i18n/NameAliases.txt
index 3992620..482fb92 100644
--- a/src/i18n/NameAliases.txt
+++ b/src/i18n/NameAliases.txt
@@ -1,5 +1,5 @@
-# NameAliases-6.1.0.txt
-# Date: 2012-01-03, 21:52:00 GMT [KW]
+# NameAliases-6.2.0.txt
+# Date: 2012-05-15, 18:44:00 GMT [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
@@ -216,6 +216,7 @@
01A2;LATIN CAPITAL LETTER GHA;correction
01A3;LATIN SMALL LETTER GHA;correction
034F;CGJ;abbreviation
+0709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction
0CDE;KANNADA LETTER LLLA;correction
0E9D;LAO LETTER FO FON;correction
0E9F;LAO LETTER FO FAY;correction
diff --git a/src/i18n/NormalizationCorrections.txt b/src/i18n/NormalizationCorrections.txt
index 61800b8..b53bb40 100644
--- a/src/i18n/NormalizationCorrections.txt
+++ b/src/i18n/NormalizationCorrections.txt
@@ -1,10 +1,10 @@
-# NormalizationCorrections-6.1.0.txt
-# Date: 2011-06-23, 00:46:00 GMT [KW, LI]
+# NormalizationCorrections-6.2.0.txt
+# Date: 2012-05-15, 22:25:00 GMT [KW, LI]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The normalization stability policy of the Unicode Consortium
@@ -46,3 +46,5 @@ F951;96FB;964B;3.2.0 # Corrigendum 3
2F91F;43AB;243AB;4.0.0 # Corrigendum 4
2F95F;7AAE;7AEE;4.0.0 # Corrigendum 4
2F9BF;4D57;45D7;4.0.0 # Corrigendum 4
+
+# EOF
diff --git a/src/i18n/SpecialCasing.txt b/src/i18n/SpecialCasing.txt
index d650b6d..994043f 100644
--- a/src/i18n/SpecialCasing.txt
+++ b/src/i18n/SpecialCasing.txt
@@ -1,8 +1,8 @@
-# SpecialCasing-6.1.0.txt
-# Date: 2011-11-27, 05:10:51 GMT [MD]
+# SpecialCasing-6.2.0.txt
+# Date: 2012-05-23, 20:35:15 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
diff --git a/src/i18n/UnicodeData.txt b/src/i18n/UnicodeData.txt
index 9f20405..086379e 100644
--- a/src/i18n/UnicodeData.txt
+++ b/src/i18n/UnicodeData.txt
@@ -7190,6 +7190,7 @@
20B7;SPESMILO SIGN;Sc;0;ET;;;;;N;;;;;
20B8;TENGE SIGN;Sc;0;ET;;;;;N;;;;;
20B9;INDIAN RUPEE SIGN;Sc;0;ET;;;;;N;;;;;
+20BA;TURKISH LIRA SIGN;Sc;0;ET;;;;;N;;;;;
20D0;COMBINING LEFT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING LEFT HARPOON ABOVE;;;;
20D1;COMBINING RIGHT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING RIGHT HARPOON ABOVE;;;;
20D2;COMBINING LONG VERTICAL LINE OVERLAY;Mn;1;NSM;;;;;N;NON-SPACING LONG VERTICAL BAR OVERLAY;;;;
@@ -18703,8 +18704,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1242F;CUNEIFORM NUMERIC SIGN THREE SHARU VARIANT FORM;Nl;0;L;;;;3;N;;;;;
12430;CUNEIFORM NUMERIC SIGN FOUR SHARU;Nl;0;L;;;;4;N;;;;;
12431;CUNEIFORM NUMERIC SIGN FIVE SHARU;Nl;0;L;;;;5;N;;;;;
-12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;;N;;;;;
-12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;;N;;;;;
+12432;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH;Nl;0;L;;;;216000;N;;;;;
+12433;CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN;Nl;0;L;;;;432000;N;;;;;
12434;CUNEIFORM NUMERIC SIGN ONE BURU;Nl;0;L;;;;1;N;;;;;
12435;CUNEIFORM NUMERIC SIGN TWO BURU;Nl;0;L;;;;2;N;;;;;
12436;CUNEIFORM NUMERIC SIGN THREE BURU;Nl;0;L;;;;3;N;;;;;
@@ -18739,8 +18740,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
12453;CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM;Nl;0;L;;;;4;N;;;;;
12454;CUNEIFORM NUMERIC SIGN FIVE BAN2;Nl;0;L;;;;5;N;;;;;
12455;CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM;Nl;0;L;;;;5;N;;;;;
-12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;;N;;;;;
-12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;;N;;;;;
+12456;CUNEIFORM NUMERIC SIGN NIGIDAMIN;Nl;0;L;;;;-1;N;;;;;
+12457;CUNEIFORM NUMERIC SIGN NIGIDAESH;Nl;0;L;;;;-1;N;;;;;
12458;CUNEIFORM NUMERIC SIGN ONE ESHE3;Nl;0;L;;;;1;N;;;;;
12459;CUNEIFORM NUMERIC SIGN TWO ESHE3;Nl;0;L;;;;2;N;;;;;
1245A;CUNEIFORM NUMERIC SIGN ONE THIRD DISH;Nl;0;L;;;;1/3;N;;;;;
diff --git a/src/i18n/WordBreakProperty.txt b/src/i18n/WordBreakProperty.txt
index 7f3225c..2caa16b 100644
--- a/src/i18n/WordBreakProperty.txt
+++ b/src/i18n/WordBreakProperty.txt
@@ -1,8 +1,8 @@
-# WordBreakProperty-6.1.0.txt
-# Date: 2011-11-27, 05:10:51 GMT [MD]
+# WordBreakProperty-6.2.0.txt
+# Date: 2012-08-13, 19:12:09 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@@ -395,6 +395,12 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# ================================================
+1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
+
+# Total code points: 26
+
+# ================================================
+
00AD ; Format # Cf SOFT HYPHEN
0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT
06DD ; Format # Cf ARABIC END OF AYAH
diff --git a/src/i18n/tests/NormalizationTest.txt b/src/i18n/tests/NormalizationTest.txt
index 68e5f07..806021a 100644
--- a/src/i18n/tests/NormalizationTest.txt
+++ b/src/i18n/tests/NormalizationTest.txt
@@ -1,8 +1,8 @@
-# NormalizationTest-6.1.0.txt
-# Date: 2011-11-27, 05:10:33 GMT [MD]
+# NormalizationTest-6.2.0.txt
+# Date: 2012-08-14, 17:54:58 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -18428,4 +18428,4 @@ D750 0334 11B5;D750 0334 11B5;1112 1173 0334 11B5;D750 0334 11B5;1112 1173 0334
11131 0334 11127;11131 0334 11127;11131 0334 11127;11131 0334 11127;11131 0334 11127; # (âð±âÌ´âð§; âð±âÌ´âð§; âð±âÌ´âð§; âð±âÌ´âð§; âð±âÌ´âð§; ) CHAKMA O MARK, COMBINING TILDE OVERLAY, CHAKMA VOWEL SIGN A
11132 0334 11127;11132 0334 11127;11132 0334 11127;11132 0334 11127;11132 0334 11127; # (âð²âÌ´âð§; âð²âÌ´âð§; âð²âÌ´âð§; âð²âÌ´âð§; âð²âÌ´âð§; ) CHAKMA AU MARK, COMBINING TILDE OVERLAY, CHAKMA VOWEL SIGN A
#
-# END OF FILE
+# EOF
diff --git a/src/i18n/tests/WordBreakTest.txt b/src/i18n/tests/WordBreakTest.txt
index 7957ea3..864dbce 100644
--- a/src/i18n/tests/WordBreakTest.txt
+++ b/src/i18n/tests/WordBreakTest.txt
@@ -1,8 +1,8 @@
-# WordBreakTest-6.1.0.txt
-# Date: 2011-12-07, 23:28:40 GMT [MD]
+# WordBreakTest-6.2.0.txt
+# Date: 2012-08-22, 12:41:18 GMT [MD]
#
# Unicode Character Database
-# Copyright (c) 1991-2011 Unicode, Inc.
+# Copyright (c) 1991-2012 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -15,7 +15,7 @@
# Ã wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
-# - (x) the Word_Break property* for the sample character
+# - (x) the Word_Break property value for the sample character
# - [x] the rule that determines whether there is a break or not
#
# These samples may be extended or changed in the future.
@@ -42,6 +42,8 @@
÷ 0001 à 0308 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0001 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0001 à 0308 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0001 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0001 à 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0001 à 0308 à 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0001 à 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -86,6 +88,8 @@
÷ 000D ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 000D ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000D ÷ 0308 à 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -130,6 +134,8 @@
÷ 000A ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 000A ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000A ÷ 0308 à 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -174,6 +180,8 @@
÷ 000B ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 000B ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 000B ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000B ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000B ÷ 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000B ÷ 0308 à 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 000B ÷ 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -218,6 +226,8 @@
÷ 3031 à 0308 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 3031 à 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 3031 à 0308 à 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 3031 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 3031 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 3031 à 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 3031 à 0308 à 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 3031 à 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -262,6 +272,8 @@
÷ 0041 à 0308 à 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) à [4.0] COMBINING DIAERESIS (Extend_FE) à [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0041 à 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0041 à 0308 à 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0041 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0041 à 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0041 à 0308 à 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0041 à 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -306,6 +318,8 @@
÷ 003A à 0308 ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 003A ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 003A à 0308 ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 003A à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 003A à 00AD ÷ # ÷ [0.2] COLON (MidLetter) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 003A à 0308 à 00AD ÷ # ÷ [0.2] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 003A à 0300 ÷ # ÷ [0.2] COLON (MidLetter) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -350,6 +364,8 @@
÷ 002C à 0308 ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 002C ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 002C à 0308 ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 002C à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 002C à 00AD ÷ # ÷ [0.2] COMMA (MidNum) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 002C à 0308 à 00AD ÷ # ÷ [0.2] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 002C à 0300 ÷ # ÷ [0.2] COMMA (MidNum) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -394,6 +410,8 @@
÷ 0027 à 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0027 à 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0027 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0027 à 00AD ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0027 à 0308 à 00AD ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0027 à 0300 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -438,6 +456,8 @@
÷ 0030 à 0308 à 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) à [4.0] COMBINING DIAERESIS (Extend_FE) à [8.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0030 à 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0030 à 0308 à 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0030 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0030 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0030 à 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0030 à 0308 à 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0030 à 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -482,6 +502,8 @@
÷ 005F à 0308 à 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.2] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 005F à 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 005F à 0308 à 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 005F ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 005F à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 005F à 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 005F à 0308 à 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 005F à 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -504,6 +526,52 @@
÷ 005F à 0308 à 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
÷ 005F à 0031 ÷ 002E à 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 005F à 0308 à 0031 ÷ 002E à 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3]
+÷ 1F1E6 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 à 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 1F1E6 à 0308 à 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.3] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 1F1E6 à 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 à 0308 à 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 à 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 à 0308 à 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 à 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0061 à 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 à 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0061 ÷ 0027 à 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002E à 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 à 0308 ÷ 0031 ÷ 002E à 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 00AD ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 00AD à 0308 ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
÷ 00AD ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@@ -526,6 +594,8 @@
÷ 00AD à 0308 ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 00AD ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 00AD à 0308 ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 00AD ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 00AD à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 00AD à 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 00AD à 0308 à 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 00AD à 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -570,6 +640,8 @@
÷ 0300 à 0308 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0300 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0300 à 0308 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0300 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0300 à 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0300 à 0308 à 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0300 à 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -614,6 +686,8 @@
÷ 0061 à 2060 à 0308 à 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0061 à 2060 à 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0061 à 2060 à 0308 à 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 à 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0061 à 2060 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0061 à 2060 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 à 2060 à 0308 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 à 2060 à 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -658,6 +732,8 @@
÷ 0061 ÷ 003A à 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0061 ÷ 003A ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0061 ÷ 003A à 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0061 ÷ 003A à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0061 ÷ 003A à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 003A à 0308 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 003A à 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -702,6 +778,8 @@
÷ 0061 ÷ 0027 à 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0061 ÷ 0027 à 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0061 ÷ 0027 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0061 ÷ 0027 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 0027 à 0308 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 0027 à 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -746,6 +824,8 @@
÷ 0061 ÷ 0027 à 2060 à 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0061 ÷ 0027 à 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0061 ÷ 0027 à 2060 à 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 à 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0061 ÷ 0027 à 2060 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0061 ÷ 0027 à 2060 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 0027 à 2060 à 0308 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 0027 à 2060 à 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -790,6 +870,8 @@
÷ 0061 ÷ 002C à 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0061 ÷ 002C ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0061 ÷ 002C à 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0061 ÷ 002C à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0061 ÷ 002C à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 002C à 0308 à 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0061 ÷ 002C à 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -834,6 +916,8 @@
÷ 0031 ÷ 003A à 0308 ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0031 ÷ 003A ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0031 ÷ 003A à 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0031 ÷ 003A à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0031 ÷ 003A à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 003A à 0308 à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 003A à 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -878,6 +962,8 @@
÷ 0031 à 0027 à 0308 à 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) à [12.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0031 ÷ 0027 à 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0031 ÷ 0027 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0031 ÷ 0027 à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 0027 à 0308 à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 0027 à 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -922,6 +1008,8 @@
÷ 0031 à 002C à 0308 à 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) à [12.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) à [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0031 ÷ 002C ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0031 ÷ 002C à 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0031 ÷ 002C à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0031 ÷ 002C à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 002C à 0308 à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 002C à 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -966,6 +1054,8 @@
÷ 0031 à 002E à 2060 à 0308 à 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) à [12.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
÷ 0031 ÷ 002E à 2060 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
÷ 0031 ÷ 002E à 2060 à 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E à 2060 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
+÷ 0031 ÷ 002E à 2060 à 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0031 ÷ 002E à 2060 à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 002E à 2060 à 0308 à 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING DIAERESIS (Extend_FE) à [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
÷ 0031 ÷ 002E à 2060 à 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
@@ -998,4 +1088,17 @@
÷ 2060 ÷ 0061 à 2060 à 0062 à 2060 à 00AD à 2060 à 0062 à 2060 à 0079 à 2060 à 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [5.0] LATIN SMALL LETTER B (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] SOFT HYPHEN (Format_FE) à [4.0] WORD JOINER (Format_FE) à [5.0] LATIN SMALL LETTER B (ALetter) à [4.0] WORD JOINER (Format_FE) à [5.0] LATIN SMALL LETTER Y (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 ÷ 0061 à 2060 ÷ 0024 à 2060 ÷ 002D à 2060 ÷ 0033 à 2060 à 0034 à 2060 à 002C à 2060 à 0035 à 2060 à 0036 à 2060 à 0037 à 2060 à 002E à 2060 à 0031 à 2060 à 0034 à 2060 ÷ 0025 à 2060 ÷ 0062 à 2060 à 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] DOLLAR SIGN (Other) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] HYPHEN-MINUS (Other) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT THREE (Numeric) à [4.0] WORD JOINER (Format_FE) à [8.0] DIGIT FOUR (Numeric) à [4.0] WORD JOINER (Format_FE) à [12.0] COMMA (MidNum) à [4.0] WORD JOINER (Format_FE) à [11.0] DIGIT FIVE (Numeric) à [4.0] WORD JOINER (Format_FE) à [8.0] DIGIT SIX (Numeric) à [4.0] WORD JOINER (Format_FE) à [8.0] DIGIT SEVEN (Numeric) à [4.0] WORD JOINER (Format_FE) à [12.0] FULL STOP (MidNumLet) à [4.0] WORD JOINER (Format_FE) à [11.0] DIGIT ONE (Numeric) à [4.0] WORD JOINER (Format_FE) à [8.0] DIGIT FOUR (Numeric) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] PERCENT SIGN (Other) à [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER B (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
÷ 2060 ÷ 0033 à 2060 à 0061 à 2060 à 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] DIGIT THREE (Numeric) à [4.0] WORD JOINER (Format_FE) à [10.0] LATIN SMALL LETTER A (ALetter) à [4.0] WORD JOINER (Format_FE) à [4.0] WORD JOINER (Format_FE) ÷ [0.3]
-# Lines: 978
+÷ 0061 ÷ 1F1E6 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 1F1F7 à 1F1FA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [0.3]
+÷ 1F1F7 à 1F1FA à 1F1F8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) ÷ [0.3]
+÷ 1F1F7 à 1F1FA à 1F1F8 à 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3]
+÷ 1F1F7 à 1F1FA ÷ 200B ÷ 1F1F8 à 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [999.0] ZERO WIDTH SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3]
+÷ 1F1E6 à 1F1E7 à 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
+÷ 1F1E6 à 200D à 1F1E7 à 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [4.0] ZERO WIDTH JOINER (Extend_FE) à [13.3] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
+÷ 1F1E6 à 1F1E7 à 200D à 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) à [13.3] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) à [4.0] ZERO WIDTH JOINER (Extend_FE) à [13.3] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
+÷ 0020 à 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) à [4.0] ZERO WIDTH JOINER (Extend_FE) ÷ [999.0] ARABIC LETTER NOON (ALetter) ÷ [0.3]
+÷ 0646 à 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (ALetter) à [4.0] ZERO WIDTH JOINER (Extend_FE) ÷ [999.0] SPACE (Other) ÷ [0.3]
+#
+# Lines: 1078
+#
+# EOF
diff --git a/src/i18n/tests/word-break-test.lisp b/src/i18n/tests/word-break-test.lisp
index 42d961a..2fefec7 100644
--- a/src/i18n/tests/word-break-test.lisp
+++ b/src/i18n/tests/word-break-test.lisp
@@ -33,7 +33,11 @@
(let ((c (read s nil nil)))
(unless c
(return))
- (vector-push-extend (code-char c) string)
+ (if (> c #xffff)
+ (let ((s (lisp::codepoints-string (list c))))
+ (vector-push-extend (aref s 0) string)
+ (vector-push-extend (aref s 1) string))
+ (vector-push-extend (code-char c) string))
(let ((c (read s)))
(handle-break c))
(incf count)))))
diff --git a/src/i18n/unidata.bin b/src/i18n/unidata.bin
index 0ee2dd9..30816cf 100644
Binary files a/src/i18n/unidata.bin and b/src/i18n/unidata.bin differ
diff --git a/src/tools/build-unidata.lisp b/src/tools/build-unidata.lisp
index 363d095..0f5bc42 100644
--- a/src/tools/build-unidata.lisp
+++ b/src/tools/build-unidata.lisp
@@ -54,7 +54,7 @@
;; The expected Unicode version
(defconstant +unicode-major-version+ 6)
-(defconstant +unicode-minor-version+ 1)
+(defconstant +unicode-minor-version+ 2)
(defconstant +unicode-update-version+ 0)
;;; These need to be synched with code/unidata.lisp
@@ -281,11 +281,14 @@
(cdr x))))
(mapc (lambda (x) (pass2 (cdr x))) (rest trie))))
(format t "~& Initializing...~%")
+ (force-output)
(let ((trie (cons nil nil)))
(loop for (name . code) in entries do (add-to-trie trie name code))
(format t "~& Pass 1...~%")
+ (force-output)
(pass1 trie 0)
(format t "~& Sorting...~%")
+ (force-output)
(dolist (key (sort (loop for k being the hash-keys of khash
collect k)
#'> :key #'length))
@@ -316,8 +319,10 @@
vec2 (make-array top :element-type '(unsigned-byte 32))
vec3 (make-array top :element-type '(unsigned-byte 32)))
(format t "~& Pass 2...~%")
+ (force-output)
(pass2 trie)
(format t "~& Finalizing~%")
+ (force-output)
(dotimes (i top)
(let ((xxx (aref vec2 i)))
(dotimes (j (aref keyl (ash xxx -18)))
@@ -614,9 +619,10 @@
;; ucd-directory should be the directory where UnicodeData.txt is
;; located.
(defun foreach-ucd (name ucd-directory fn)
- (format t "~& ~A~%" name)
+ (format t "~& ~A~%" name)
(with-open-file (s (make-pathname :name name :type "txt"
:defaults ucd-directory))
+ (format t "file = ~s~%" s)
(cond
((string= name "Unihan")
(loop for line = (read-line s nil) while line do
@@ -811,6 +817,7 @@
ucd-directory
(lambda (min max prop)
(let ((code (intern (string-upcase prop) "KEYWORD")))
+ (format t "~X-~X code = ~S~%" min max code)
(loop for i from min to max
as ent = (find-ucd i) do
(when ent
@@ -941,16 +948,18 @@
(or (position (ucdent-word-break ucdent)
'(:other :cr :lf :newline :extend :format
:katakana :aletter :midnumlet :midletter :midnum
- :numeric :extendnumlet))
+ :numeric :extendnumlet :regional_indicator))
0))
;; ucd-directory should be the directory where UnicodeData.txt is
;; located.
(defun build-unidata (&optional (ucd-directory "target:i18n/"))
(format t "~&Reading data from ~S~%" (probe-file ucd-directory))
+ (force-output)
(multiple-value-bind (ucd range) (read-data ucd-directory)
(setf (unidata-range *unicode-data*) range)
(format t "~&Building character name tables~%")
+ (force-output)
(let* ((data (loop for ent across ucd
when (char/= (char (ucdent-name ent) 0) #\<)
collect (cons (ucdent-name ent) (ucdent-code ent))
@@ -965,6 +974,7 @@
(make-ntrie32 :split #x54 :hvec hvec :mvec mvec :lvec lvec))))
(format t "~&Building Unicode 1.0 character name tables~%")
+ (force-output)
(let* ((data (loop for ent across ucd
when (plusp (length (ucdent-name1 ent)))
collect (cons (ucdent-name1 ent) (ucdent-code ent))))
@@ -976,12 +986,14 @@
(make-ntrie32 :split #x54 :hvec hvec :mvec mvec :lvec lvec))))
(format t "~&Building general category table~%")
+ (force-output)
(multiple-value-bind (hvec mvec lvec)
(pack ucd range #'ucdent-cat 0 8 #x53)
(setf (unidata-category *unicode-data*)
(make-ntrie8 :split #x53 :hvec hvec :mvec mvec :lvec lvec)))
(format t "~&Building simple case-conversion table~%")
+ (force-output)
(let ((svec (make-array 100 :element-type '(unsigned-byte 16)
:fill-pointer 0 :adjustable t)))
(vector-push-extend 0 svec)
@@ -993,12 +1005,14 @@
:svec (copy-seq svec)))))
(format t "~&Building numeric-values table~%")
+ (force-output)
(multiple-value-bind (hvec mvec lvec)
(pack ucd range #'pack-numeric 0 32 #x63)
(setf (unidata-numeric *unicode-data*)
(make-ntrie32 :split #x63 :hvec hvec :mvec mvec :lvec lvec)))
(format t "~&Building decomposition table~%")
+ (force-output)
(let ((tabl (make-array 6000 :element-type '(unsigned-byte 16)
:fill-pointer 0 :adjustable t)))
(multiple-value-bind (hvec mvec lvec)
@@ -1009,12 +1023,14 @@
:tabl (copy-seq tabl)))))
(format t "~&Building combining-class table~%")
+ (force-output)
(multiple-value-bind (hvec mvec lvec)
(pack ucd range #'ucdent-comb 0 8 #x64)
(setf (unidata-combining *unicode-data*)
(make-ntrie8 :split #x64 :hvec hvec :mvec mvec :lvec lvec)))
(format t "~&Building bidi information table~%")
+ (force-output)
(let ((tabl (make-array 10 :element-type '(unsigned-byte 16)
:fill-pointer 0 :adjustable t)))
(multiple-value-bind (hvec mvec lvec)
@@ -1025,6 +1041,7 @@
:tabl (copy-seq tabl)))))
(format t "~&Building normalization quick-check tables~%")
+ (force-output)
(progn
(multiple-value-bind (hvec mvec lvec)
(pack ucd range (lambda (x)
@@ -1056,6 +1073,7 @@
(make-ntrie2 :split #x55 :hvec hvec :mvec mvec :lvec lvec))))
(format t "~&Building composition exclusion table~%")
+ (force-output)
(let ((exclusions (make-array 1 :element-type '(unsigned-byte 32)
:adjustable t
:fill-pointer 0)))
@@ -1065,8 +1083,10 @@
(setf (unidata-comp-exclusions *unicode-data*) (copy-seq exclusions)))
(format t "~&Building full case mapping tables~%")
+ (force-output)
(progn
(format t "~& Lower...~%")
+ (force-output)
(let ((tabl (make-array 100 :element-type '(unsigned-byte 16)
:fill-pointer 0 :adjustable t))
(split #x65))
@@ -1077,6 +1097,7 @@
(make-full-case :split split :hvec hvec :mvec mvec :lvec lvec
:tabl (copy-seq tabl)))))
(format t "~& Title...~%")
+ (force-output)
(let ((tabl (make-array 100 :element-type '(unsigned-byte 16)
:fill-pointer 0 :adjustable t))
(split #x65))
@@ -1087,6 +1108,7 @@
(make-full-case :split split :hvec hvec :mvec mvec :lvec lvec
:tabl (copy-seq tabl)))))
(format t "~& Upper...~%")
+ (force-output)
(let ((tabl (make-array 100 :element-type '(unsigned-byte 16)
:fill-pointer 0 :adjustable t))
(split #x65))
@@ -1098,8 +1120,10 @@
:tabl (copy-seq tabl))))))
(format t "~&Building case-folding tables~%")
+ (force-output)
(progn
(format t "~& Simple...~%")
+ (force-output)
(let ((split #x54))
(multiple-value-bind (hvec mvec lvec)
(pack ucd range (lambda (x) (pack-case-folding-simple x))
@@ -1107,6 +1131,7 @@
(setf (unidata-case-fold-simple *unicode-data*)
(make-ntrie32 :split split :hvec hvec :mvec mvec :lvec lvec))))
(format t "~& Full...~%")
+ (force-output)
(let ((tabl (make-array 100 :element-type '(unsigned-byte 16)
:fill-pointer 0 :adjustable t))
(split #x65))
@@ -1118,6 +1143,7 @@
:tabl (copy-seq tabl))))))
(format t "~&Building word-break table~%")
+ (force-output)
(let ((split #x66))
(multiple-value-bind (hvec mvec lvec)
(pack ucd range (lambda (x) (pack-word-break x))
-----------------------------------------------------------------------
Summary of changes:
src/code/string.lisp | 142 +++++++++++++++++---------------
src/code/unidata.lisp | 6 +-
src/general-info/release-20e.txt | 2 +
src/i18n/BidiMirroring.txt | 21 ++---
src/i18n/CaseFolding.txt | 8 +-
src/i18n/CompositionExclusions.txt | 7 +-
src/i18n/DerivedNormalizationProps.txt | 6 +-
src/i18n/NameAliases.txt | 5 +-
src/i18n/NormalizationCorrections.txt | 8 +-
src/i18n/SpecialCasing.txt | 6 +-
src/i18n/UnicodeData.txt | 9 +-
src/i18n/WordBreakProperty.txt | 12 ++-
src/i18n/tests/NormalizationTest.txt | 8 +-
src/i18n/tests/WordBreakTest.txt | 113 ++++++++++++++++++++++++-
src/i18n/tests/word-break-test.lisp | 10 ++-
src/i18n/unidata.bin | Bin 1490793 -> 1490993 bytes
src/tools/build-unidata.lisp | 32 +++++++-
17 files changed, 273 insertions(+), 122 deletions(-)
hooks/post-receive
--
CMU Common Lisp
More information about the cmucl-cvs
mailing list