Revision: 3622 Author: edi URL: http://bknr.net/trac/changeset/3622
Update to 0.1.1
U trunk/thirdparty/cl-unicode/CHANGELOG.txt U trunk/thirdparty/cl-unicode/api.lisp U trunk/thirdparty/cl-unicode/cl-unicode.asd U trunk/thirdparty/cl-unicode/doc/index.html U trunk/thirdparty/cl-unicode/packages.lisp U trunk/thirdparty/cl-unicode/specials.lisp U trunk/thirdparty/cl-unicode/util.lisp
Modified: trunk/thirdparty/cl-unicode/CHANGELOG.txt =================================================================== --- trunk/thirdparty/cl-unicode/CHANGELOG.txt 2008-07-24 14:50:37 UTC (rev 3621) +++ trunk/thirdparty/cl-unicode/CHANGELOG.txt 2008-07-24 14:58:40 UTC (rev 3622) @@ -1,3 +1,7 @@ +Version 0.1.1 +2008-07-24 +Make ADD-HANGUL-NAMES faster for ClozureCL + Version 0.1.0 2008-07-24 Initial release
Modified: trunk/thirdparty/cl-unicode/api.lisp =================================================================== --- trunk/thirdparty/cl-unicode/api.lisp 2008-07-24 14:50:37 UTC (rev 3621) +++ trunk/thirdparty/cl-unicode/api.lisp 2008-07-24 14:58:40 UTC (rev 3622) @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.30 2008/07/22 02:42:13 edi Exp $ +;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.31 2008/07/24 14:46:20 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -39,6 +39,7 @@ (unicode-name (char-code char))) (:method ((code-point integer)) (or (gethash code-point *code-points-to-names*) + (maybe-compute-hangul-syllable-name code-point) (maybe-compute-cjk-name code-point))))
(defgeneric unicode1-name (c) @@ -102,6 +103,7 @@ (setq scripts-to-try (list scripts-to-try))) (let* ((canonicalized-name (canonicalize-name name)) (code-point (or (gethash canonicalized-name *names-to-code-points*) + (maybe-find-hangul-syllable-code-point canonicalized-name) (maybe-find-cjk-code-point canonicalized-name) (and try-unicode1-names-p (gethash canonicalized-name *unicode1-names-to-code-points*))
Modified: trunk/thirdparty/cl-unicode/cl-unicode.asd =================================================================== --- trunk/thirdparty/cl-unicode/cl-unicode.asd 2008-07-24 14:50:37 UTC (rev 3621) +++ trunk/thirdparty/cl-unicode/cl-unicode.asd 2008-07-24 14:58:40 UTC (rev 3622) @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/cl-unicode/cl-unicode.asd,v 1.21 2008/07/21 22:09:22 edi Exp $ +;;; $Header: /usr/local/cvsrep/cl-unicode/cl-unicode.asd,v 1.22 2008/07/24 14:56:31 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -63,7 +63,7 @@ (call-next-method))
(defsystem :cl-unicode - :version "0.1.0" + :version "0.1.1" :serial t :depends-on (:cl-ppcre) :components ((:file "packages")
Modified: trunk/thirdparty/cl-unicode/doc/index.html =================================================================== --- trunk/thirdparty/cl-unicode/doc/index.html 2008-07-24 14:50:37 UTC (rev 3621) +++ trunk/thirdparty/cl-unicode/doc/index.html 2008-07-24 14:58:40 UTC (rev 3622) @@ -72,7 +72,7 @@
CL-UNICODE together with this documentation can be downloaded from <a href="http://weitz.de/files/cl-unicode.tar.gz">http://weitz.de/files/cl-unicode.tar.gz</a>. The -current version is 0.1.0. +current version is 0.1.1. <p> The library comes with a system definition for <a href="http://www.cliki.net/asdf">ASDF</a> and you compile and @@ -1270,7 +1270,7 @@ This documentation was prepared with <a href="http://weitz.de/documentation-template/">DOCUMENTATION-TEMPLATE</a>. </p> <p> -$Header: /usr/local/cvsrep/cl-unicode/doc/index.html,v 1.12 2008/07/23 14:55:26 edi Exp $ +$Header: /usr/local/cvsrep/cl-unicode/doc/index.html,v 1.13 2008/07/24 14:56:33 edi Exp $ <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: trunk/thirdparty/cl-unicode/packages.lisp =================================================================== --- trunk/thirdparty/cl-unicode/packages.lisp 2008-07-24 14:50:37 UTC (rev 3621) +++ trunk/thirdparty/cl-unicode/packages.lisp 2008-07-24 14:58:40 UTC (rev 3622) @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/cl-unicode/packages.lisp,v 1.23 2008/07/22 02:42:13 edi Exp $ +;;; $Header: /usr/local/cvsrep/cl-unicode/packages.lisp,v 1.24 2008/07/24 14:46:20 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -31,7 +31,9 @@
(defpackage :cl-unicode (:use :cl) - (:import-from :cl-ppcre :with-rebinding) + (:import-from :cl-ppcre + :*standard-optimize-settings* + :with-rebinding) (:export :+code-point-limit+ :*scripts-to-try* :*try-abbreviations-p*
Modified: trunk/thirdparty/cl-unicode/specials.lisp =================================================================== --- trunk/thirdparty/cl-unicode/specials.lisp 2008-07-24 14:50:37 UTC (rev 3621) +++ trunk/thirdparty/cl-unicode/specials.lisp 2008-07-24 14:58:40 UTC (rev 3622) @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/cl-unicode/specials.lisp,v 1.14 2008/07/23 01:08:24 edi Exp $ +;;; $Header: /usr/local/cvsrep/cl-unicode/specials.lisp,v 1.16 2008/07/24 14:50:37 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -36,7 +36,7 @@ "A hash tables which maps property symbols (see PROPERTY-SYMBOL) to their "canonical names", i.e. to strings.")
-(defvar *names-to-code-points* (make-hash-table :test 'equalp :size 31000) +(defvar *names-to-code-points* (make-hash-table :test 'equalp :size 20000) "A hash table which (case-insensitively) maps "canonicalized" character names to their code points.")
@@ -44,7 +44,7 @@ "A hash table which (case-insensitively) maps "canonicalized" Unicode 1.0 character names to their code points.")
-(defvar *code-points-to-names* (make-hash-table :size 31000) +(defvar *code-points-to-names* (make-hash-table :size 20000) "A hash table which maps code points to the corresponding character names.")
@@ -85,6 +85,10 @@ "A hash table which maps code points to their Jamo short names. Needed to compute Hangul syllable names - see COMPUTE-HANGUL-NAME.")
+(defvar *hangul-syllables-to-code-points* (make-hash-table :test 'equalp :size 12000) + "A hash table which (case-insensitively) maps Hangul syllable name +parts to their code points.") + (defvar *try-unicode1-names-p* t "This is the default value for the :TRY-UNICODE1-NAMES-P keyword argument to CHARACTER-NAMED.")
Modified: trunk/thirdparty/cl-unicode/util.lisp =================================================================== --- trunk/thirdparty/cl-unicode/util.lisp 2008-07-24 14:50:37 UTC (rev 3621) +++ trunk/thirdparty/cl-unicode/util.lisp 2008-07-24 14:58:40 UTC (rev 3622) @@ -1,5 +1,5 @@ ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*- -;;; $Header: /usr/local/cvsrep/cl-unicode/util.lisp,v 1.27 2008/07/23 14:11:40 edi Exp $ +;;; $Header: /usr/local/cvsrep/cl-unicode/util.lisp,v 1.29 2008/07/24 14:46:20 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
@@ -166,45 +166,75 @@ (when (cjk-unified-ideograph-p code-point) code-point)))
-(defconstant +s-base+ #xac00 - "The constant `SBase' from chapter 3 of the Unicode book.") -(defconstant +l-base+ #x1100 - "The constant `LBase' from chapter 3 of the Unicode book.") -(defconstant +v-base+ #x1161 - "The constant `VBase' from chapter 3 of the Unicode book.") -(defconstant +t-base+ #x11a7 - "The constant `TBase' from chapter 3 of the Unicode book.") -(defconstant +v-count+ 21 - "The constant `VCount' from chapter 3 of the Unicode book.") -(defconstant +t-count+ 28 - "The constant `TCount' from chapter 3 of the Unicode book.") -(define-symbol-macro +n-count+ - ;; the constant `NCount' from chapter 3 of the Unicode book - (* +v-count+ +t-count+)) +(defmacro define-hangul-constant (name value) + (flet ((create-symbol (name) + (intern (format nil "+~:@(~C-~A~)+" (char name 0) (subseq name 1)) :cl-unicode))) + `(eval-when (:compile-toplevel :load-toplevel :execute) + (defconstant ,(create-symbol name) ,value + ,(format nil "The constant `~A' from chapter 3 of the Unicode book." name)))))
+(define-hangul-constant "SBase" #xac00) +(define-hangul-constant "LBase" #x1100) +(define-hangul-constant "VBase" #x1161) +(define-hangul-constant "TBase" #x11a7) +(define-hangul-constant "VCount" 21) +(define-hangul-constant "TCount" 28) +(define-hangul-constant "NCount" (* +v-count+ +t-count+)) + +(declaim (inline compute-hangul-name)) (defun compute-hangul-name (code-point) - "Algorithmically derives the Hangul syllable name of the character -with code point CODE-POINT as described in section 3.12 of the Unicode -book." + "Algorithmically derives the Hangul syllable name (the part behind +"HANGUL SYLLABLE ") of the character with code point CODE-POINT as +described in section 3.12 of the Unicode book." + (declare #.*standard-optimize-settings*) + (declare (fixnum code-point)) (let* ((s-index (- code-point +s-base+)) (l-value (+ +l-base+ (floor s-index +n-count+))) (v-value (+ +v-base+ (floor (mod s-index +n-count+) +t-count+))) (t-value (+ +t-base+ (mod s-index +t-count+)))) - (format nil "HANGUL SYLLABLE ~A~A~@[~A~]" + (declare (fixnum s-index t-value)) + (format nil "~A~A~@[~A~]" (gethash l-value *jamo-short-names*) (gethash v-value *jamo-short-names*) (and (/= t-value +t-base+) (gethash t-value *jamo-short-names*)))))
+(defconstant +first-hangul-syllable+ #xac00 + "The code point of the first Hangul syllable the name of which can +be algorithmically derived.") +(defconstant +last-hangul-syllable+ #xd7a3 + "The code point of the last Hangul syllable the name of which can be +algorithmically derived.") + (defun add-hangul-names () "Computes the names for all Hangul syllables and registers them in -the appropriate hash tables." +the *HANGUL-SYLLABLES-TO-CODE-POINTS* hash table. Used for +CHARACTER-NAMED." + (declare #.*standard-optimize-settings*) (format t "~&;;; Computing Hangul syllable names") - (loop for code-point from #xac00 to #xd7a3 + (loop for code-point from +first-hangul-syllable+ to +last-hangul-syllable+ for name = (compute-hangul-name code-point) - do (setf (gethash (canonicalize-name name) *names-to-code-points*) code-point - (gethash code-point *code-points-to-names*) name))) + do (setf (gethash name *hangul-syllables-to-code-points*) code-point)))
+(defun hangul-syllable-p (code-point) + "Returns a true value if CODE-POINT is the code point of a Hangul +syllable for which we can algorithmically derive the name." + (<= +first-hangul-syllable+ code-point +last-hangul-syllable+)) + +(defun maybe-compute-hangul-syllable-name (code-point) + "Computes the name for CODE-POINT if CODE-POINT denotes a Hangul +syllable the name of which can be algorithmically derived." + (when (hangul-syllable-p code-point) + (format nil "HANGUL SYLLABLE ~X" (compute-hangul-name code-point)))) + +(defun maybe-find-hangul-syllable-code-point (name) + "Computes the code point for NAME if NAME is the name of a Hangul +syllable the name of which can be algorithmically derived." + (ppcre:register-groups-bind (name) + ;; canonicalized + ("(?i)^HANGULSYLLABLE([A-Z]*)$" name) + (gethash name *hangul-syllables-to-code-points*))) + (defmacro ensure-code-point (c) "Helper macro so that C can be treated like a code point even if it is a Lisp character." @@ -214,7 +244,7 @@ (character (char-code ,c)))))
(defun unicode-name-reader (stream char arg) - "The reader functino used when the alternative character syntax is + "The reader function used when the alternative character syntax is enabled." (declare (ignore char arg)) (let ((name (with-output-to-string (out)