Revision: 3623
Author: edi
URL: http://bknr.net/trac/changeset/3623
Tag version 0.1.1
A tags/thirdparty/cl-unicode-0.1.1/
Copied: tags/thirdparty/cl-unicode-0.1.1 (from rev 3622, trunk/thirdparty/cl-unicode)
Revision: 3622
Author: edi
URL: http://bknr.net/trac/changeset/3622
Update to 0.1.1
U trunk/thirdparty/cl-unicode/CHANGELOG.txt
U trunk/thirdparty/cl-unicode/api.lisp
U trunk/thirdparty/cl-unicode/cl-unicode.asd
U trunk/thirdparty/cl-unicode/doc/index.html
U trunk/thirdparty/cl-unicode/packages.lisp
U trunk/thirdparty/cl-unicode/specials.lisp
U trunk/thirdparty/cl-unicode/util.lisp
Modified: trunk/thirdparty/cl-unicode/CHANGELOG.txt
===================================================================
--- trunk/thirdparty/cl-unicode/CHANGELOG.txt 2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/CHANGELOG.txt 2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,3 +1,7 @@
+Version 0.1.1
+2008-07-24
+Make ADD-HANGUL-NAMES faster for ClozureCL
+
Version 0.1.0
2008-07-24
Initial release
Modified: trunk/thirdparty/cl-unicode/api.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/api.lisp 2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/api.lisp 2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.30 2008/07/22 02:42:13 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.31 2008/07/24 14:46:20 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -39,6 +39,7 @@
(unicode-name (char-code char)))
(:method ((code-point integer))
(or (gethash code-point *code-points-to-names*)
+ (maybe-compute-hangul-syllable-name code-point)
(maybe-compute-cjk-name code-point))))
(defgeneric unicode1-name (c)
@@ -102,6 +103,7 @@
(setq scripts-to-try (list scripts-to-try)))
(let* ((canonicalized-name (canonicalize-name name))
(code-point (or (gethash canonicalized-name *names-to-code-points*)
+ (maybe-find-hangul-syllable-code-point canonicalized-name)
(maybe-find-cjk-code-point canonicalized-name)
(and try-unicode1-names-p
(gethash canonicalized-name *unicode1-names-to-code-points*))
Modified: trunk/thirdparty/cl-unicode/cl-unicode.asd
===================================================================
--- trunk/thirdparty/cl-unicode/cl-unicode.asd 2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/cl-unicode.asd 2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/cl-unicode.asd,v 1.21 2008/07/21 22:09:22 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/cl-unicode.asd,v 1.22 2008/07/24 14:56:31 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -63,7 +63,7 @@
(call-next-method))
(defsystem :cl-unicode
- :version "0.1.0"
+ :version "0.1.1"
:serial t
:depends-on (:cl-ppcre)
:components ((:file "packages")
Modified: trunk/thirdparty/cl-unicode/doc/index.html
===================================================================
--- trunk/thirdparty/cl-unicode/doc/index.html 2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/doc/index.html 2008-07-24 14:58:40 UTC (rev 3622)
@@ -72,7 +72,7 @@
CL-UNICODE together with this documentation can be downloaded from <a
href="http://weitz.de/files/cl-unicode.tar.gz">http://weitz.de/files/cl-unicode.tar.gz</a>. The
-current version is 0.1.0.
+current version is 0.1.1.
<p>
The library comes with a system definition
for <a href="http://www.cliki.net/asdf">ASDF</a> and you compile and
@@ -1270,7 +1270,7 @@
This documentation was prepared with <a href="http://weitz.de/documentation-template/">DOCUMENTATION-TEMPLATE</a>.
</p>
<p>
-$Header: /usr/local/cvsrep/cl-unicode/doc/index.html,v 1.12 2008/07/23 14:55:26 edi Exp $
+$Header: /usr/local/cvsrep/cl-unicode/doc/index.html,v 1.13 2008/07/24 14:56:33 edi Exp $
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: trunk/thirdparty/cl-unicode/packages.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/packages.lisp 2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/packages.lisp 2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/packages.lisp,v 1.23 2008/07/22 02:42:13 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/packages.lisp,v 1.24 2008/07/24 14:46:20 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -31,7 +31,9 @@
(defpackage :cl-unicode
(:use :cl)
- (:import-from :cl-ppcre :with-rebinding)
+ (:import-from :cl-ppcre
+ :*standard-optimize-settings*
+ :with-rebinding)
(:export :+code-point-limit+
:*scripts-to-try*
:*try-abbreviations-p*
Modified: trunk/thirdparty/cl-unicode/specials.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/specials.lisp 2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/specials.lisp 2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/specials.lisp,v 1.14 2008/07/23 01:08:24 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/specials.lisp,v 1.16 2008/07/24 14:50:37 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -36,7 +36,7 @@
"A hash tables which maps property symbols \(see PROPERTY-SYMBOL) to
their \"canonical names\", i.e. to strings.")
-(defvar *names-to-code-points* (make-hash-table :test 'equalp :size 31000)
+(defvar *names-to-code-points* (make-hash-table :test 'equalp :size 20000)
"A hash table which \(case-insensitively) maps \"canonicalized\"
character names to their code points.")
@@ -44,7 +44,7 @@
"A hash table which \(case-insensitively) maps \"canonicalized\"
Unicode 1.0 character names to their code points.")
-(defvar *code-points-to-names* (make-hash-table :size 31000)
+(defvar *code-points-to-names* (make-hash-table :size 20000)
"A hash table which maps code points to the corresponding character
names.")
@@ -85,6 +85,10 @@
"A hash table which maps code points to their Jamo short names.
Needed to compute Hangul syllable names - see COMPUTE-HANGUL-NAME.")
+(defvar *hangul-syllables-to-code-points* (make-hash-table :test 'equalp :size 12000)
+ "A hash table which \(case-insensitively) maps Hangul syllable name
+parts to their code points.")
+
(defvar *try-unicode1-names-p* t
"This is the default value for the :TRY-UNICODE1-NAMES-P keyword
argument to CHARACTER-NAMED.")
Modified: trunk/thirdparty/cl-unicode/util.lisp
===================================================================
--- trunk/thirdparty/cl-unicode/util.lisp 2008-07-24 14:50:37 UTC (rev 3621)
+++ trunk/thirdparty/cl-unicode/util.lisp 2008-07-24 14:58:40 UTC (rev 3622)
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-unicode/util.lisp,v 1.27 2008/07/23 14:11:40 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-unicode/util.lisp,v 1.29 2008/07/24 14:46:20 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -166,45 +166,75 @@
(when (cjk-unified-ideograph-p code-point)
code-point)))
-(defconstant +s-base+ #xac00
- "The constant `SBase' from chapter 3 of the Unicode book.")
-(defconstant +l-base+ #x1100
- "The constant `LBase' from chapter 3 of the Unicode book.")
-(defconstant +v-base+ #x1161
- "The constant `VBase' from chapter 3 of the Unicode book.")
-(defconstant +t-base+ #x11a7
- "The constant `TBase' from chapter 3 of the Unicode book.")
-(defconstant +v-count+ 21
- "The constant `VCount' from chapter 3 of the Unicode book.")
-(defconstant +t-count+ 28
- "The constant `TCount' from chapter 3 of the Unicode book.")
-(define-symbol-macro +n-count+
- ;; the constant `NCount' from chapter 3 of the Unicode book
- (* +v-count+ +t-count+))
+(defmacro define-hangul-constant (name value)
+ (flet ((create-symbol (name)
+ (intern (format nil "+~:@(~C-~A~)+" (char name 0) (subseq name 1)) :cl-unicode)))
+ `(eval-when (:compile-toplevel :load-toplevel :execute)
+ (defconstant ,(create-symbol name) ,value
+ ,(format nil "The constant `~A' from chapter 3 of the Unicode book." name)))))
+(define-hangul-constant "SBase" #xac00)
+(define-hangul-constant "LBase" #x1100)
+(define-hangul-constant "VBase" #x1161)
+(define-hangul-constant "TBase" #x11a7)
+(define-hangul-constant "VCount" 21)
+(define-hangul-constant "TCount" 28)
+(define-hangul-constant "NCount" (* +v-count+ +t-count+))
+
+(declaim (inline compute-hangul-name))
(defun compute-hangul-name (code-point)
- "Algorithmically derives the Hangul syllable name of the character
-with code point CODE-POINT as described in section 3.12 of the Unicode
-book."
+ "Algorithmically derives the Hangul syllable name \(the part behind
+\"HANGUL SYLLABLE \") of the character with code point CODE-POINT as
+described in section 3.12 of the Unicode book."
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum code-point))
(let* ((s-index (- code-point +s-base+))
(l-value (+ +l-base+ (floor s-index +n-count+)))
(v-value (+ +v-base+ (floor (mod s-index +n-count+) +t-count+)))
(t-value (+ +t-base+ (mod s-index +t-count+))))
- (format nil "HANGUL SYLLABLE ~A~A~@[~A~]"
+ (declare (fixnum s-index t-value))
+ (format nil "~A~A~@[~A~]"
(gethash l-value *jamo-short-names*)
(gethash v-value *jamo-short-names*)
(and (/= t-value +t-base+)
(gethash t-value *jamo-short-names*)))))
+(defconstant +first-hangul-syllable+ #xac00
+ "The code point of the first Hangul syllable the name of which can
+be algorithmically derived.")
+(defconstant +last-hangul-syllable+ #xd7a3
+ "The code point of the last Hangul syllable the name of which can be
+algorithmically derived.")
+
(defun add-hangul-names ()
"Computes the names for all Hangul syllables and registers them in
-the appropriate hash tables."
+the *HANGUL-SYLLABLES-TO-CODE-POINTS* hash table. Used for
+CHARACTER-NAMED."
+ (declare #.*standard-optimize-settings*)
(format t "~&;;; Computing Hangul syllable names")
- (loop for code-point from #xac00 to #xd7a3
+ (loop for code-point from +first-hangul-syllable+ to +last-hangul-syllable+
for name = (compute-hangul-name code-point)
- do (setf (gethash (canonicalize-name name) *names-to-code-points*) code-point
- (gethash code-point *code-points-to-names*) name)))
+ do (setf (gethash name *hangul-syllables-to-code-points*) code-point)))
+(defun hangul-syllable-p (code-point)
+ "Returns a true value if CODE-POINT is the code point of a Hangul
+syllable for which we can algorithmically derive the name."
+ (<= +first-hangul-syllable+ code-point +last-hangul-syllable+))
+
+(defun maybe-compute-hangul-syllable-name (code-point)
+ "Computes the name for CODE-POINT if CODE-POINT denotes a Hangul
+syllable the name of which can be algorithmically derived."
+ (when (hangul-syllable-p code-point)
+ (format nil "HANGUL SYLLABLE ~X" (compute-hangul-name code-point))))
+
+(defun maybe-find-hangul-syllable-code-point (name)
+ "Computes the code point for NAME if NAME is the name of a Hangul
+syllable the name of which can be algorithmically derived."
+ (ppcre:register-groups-bind (name)
+ ;; canonicalized
+ ("(?i)^HANGULSYLLABLE([A-Z]*)$" name)
+ (gethash name *hangul-syllables-to-code-points*)))
+
(defmacro ensure-code-point (c)
"Helper macro so that C can be treated like a code point even if it
is a Lisp character."
@@ -214,7 +244,7 @@
(character (char-code ,c)))))
(defun unicode-name-reader (stream char arg)
- "The reader functino used when the alternative character syntax is
+ "The reader function used when the alternative character syntax is
enabled."
(declare (ignore char arg))
(let ((name (with-output-to-string (out)