Raymond Toy pushed to branch issue-120-software-type-in-c at cmucl / cmucl

Commits:

8 changed files:

Changes:

  • src/code/pathname.lisp
    ... ... @@ -252,6 +252,14 @@
    252 252
     ;;; This constructor is used to make an instance of the correct type
    
    253 253
     ;;; from parsed arguments.
    
    254 254
     
    
    255
    +#+darwin
    
    256
    +(defvar *enable-darwin-path-normalization* nil
    
    257
    +  "When non-NIL, pathnames are on Darwin are normalized when created.
    
    258
    +  Otherwise, the pathnames are unchanged.
    
    259
    +
    
    260
    +  This must be NIL during bootstrapping because Unicode is not yet
    
    261
    +  available.")
    
    262
    +
    
    255 263
     (defun %make-pathname-object (host device directory name type version)
    
    256 264
       (if (typep host 'logical-host)
    
    257 265
           (flet ((upcasify (thing)
    
    ... ... @@ -271,7 +279,30 @@
    271 279
     				(upcasify name)
    
    272 280
     				(upcasify type)
    
    273 281
     				(upcasify version)))
    
    274
    -      (%make-pathname host device directory name type version)))
    
    282
    +      #-darwin
    
    283
    +      (%make-pathname host device directory name type version)
    
    284
    +      #+darwin
    
    285
    +      (flet ((normalize-name (piece)
    
    286
    +	       ;; Normalize Darwin pathnames by converting Hangul
    
    287
    +	       ;; syllables to conjoining jamo, and converting the
    
    288
    +	       ;; string to NFD form, but skipping over a range of
    
    289
    +	       ;; characters.
    
    290
    +	       (typecase piece
    
    291
    +		 (string
    
    292
    +		  (if *enable-darwin-path-normalization*
    
    293
    +		      (decompose (unicode::decompose-hangul piece)
    
    294
    +				 :compatibility nil
    
    295
    +				 :darwinp t)
    
    296
    +		      piece))
    
    297
    +		 (t
    
    298
    +		  ;; What should we do about lisp::pattern objects
    
    299
    +		  ;; that occur in the name component?
    
    300
    +		  piece))))
    
    301
    +	(%make-pathname host device
    
    302
    +			(mapcar #'normalize-name directory)
    
    303
    +			(normalize-name name)
    
    304
    +			(normalize-name type)
    
    305
    +			version))))
    
    275 306
     
    
    276 307
     ;;; *LOGICAL-HOSTS* --internal.
    
    277 308
     ;;;
    

  • src/code/save.lisp
    ... ... @@ -202,7 +202,7 @@
    202 202
     				 (site-init "library:site-init")
    
    203 203
     				 (print-herald t)
    
    204 204
     				 (process-command-line t)
    
    205
    -		                  #+:executable
    
    205
    +		                 #+:executable
    
    206 206
     		                 (executable nil)
    
    207 207
     				 (batch-mode nil)
    
    208 208
     				 (quiet nil))
    

  • src/code/string.lisp
    ... ... @@ -1097,7 +1097,10 @@
    1097 1097
     
    
    1098 1098
     #+unicode
    
    1099 1099
     (progn
    
    1100
    -(defun decompose (string &optional (compatibility t))
    
    1100
    +(defun decompose (string &key (compatibility t) (start 0) end darwinp)
    
    1101
    +  "Convert STRING to NFD (or NFKD).  If :darwinp is non-NIL, then
    
    1102
    +  characters in the ranges U2000-U2FFF, UF900-UFA6A, and U2F800-U2FA1D
    
    1103
    +  are not decomposed, as specified for Darwin pathnames."
    
    1101 1104
       (declare (type string string))
    
    1102 1105
       (let ((result (make-string (cond ((< (length string) 40)
    
    1103 1106
     				    (* 5 (length string)))
    
    ... ... @@ -1113,8 +1116,13 @@
    1113 1116
     		 (declare (type kernel:index i))
    
    1114 1117
     		 (multiple-value-bind (code wide) (codepoint string i)
    
    1115 1118
     		   (when wide (incf i))
    
    1116
    -		   (let ((decomp (unicode-decomp code compatibility)))
    
    1117
    -		     (if decomp (rec decomp 0 (length decomp)) (out code))))))
    
    1119
    +		   (if (and darwinp
    
    1120
    +			    (or (<= #x2000 code #x2fff)
    
    1121
    +				(<= #xf900 code #xfa6a)
    
    1122
    +				(<= #x2f800 code #x2fa1d)))
    
    1123
    +		       (out code)
    
    1124
    +		       (let ((decomp (unicode-decomp code compatibility)))
    
    1125
    +			 (if decomp (rec decomp 0 (length decomp)) (out code)))))))
    
    1118 1126
     	     (out (code)
    
    1119 1127
     	       (multiple-value-bind (hi lo) (surrogates code)
    
    1120 1128
     		 (outch hi)
    
    ... ... @@ -1151,7 +1159,7 @@
    1151 1159
     					  (schar result (1+ last)))))
    
    1152 1160
     			    (decf last (if wide2 2 1)))
    
    1153 1161
     			   (t (return))))))))
    
    1154
    -      (with-string string
    
    1162
    +      (with-one-string string start end offset-var
    
    1155 1163
     	(rec string start end))
    
    1156 1164
           (shrink-vector result fillptr))))
    
    1157 1165
     
    
    ... ... @@ -1251,12 +1259,12 @@
    1251 1259
     (defun string-to-nfd (string)
    
    1252 1260
       _N"Convert String to Unicode Normalization Form D (NFD) using the
    
    1253 1261
       canonical decomposition.  The NFD string is returned"
    
    1254
    -  (decompose string nil))
    
    1262
    +  (decompose string :compatibility nil))
    
    1255 1263
     
    
    1256 1264
     (defun string-to-nfkd (string)
    
    1257 1265
       _N"Convert String to Unicode Normalization Form KD (NFKD) uisng the
    
    1258 1266
       compatible decomposition form.  The NFKD string is returned."
    
    1259
    -  (decompose string t))
    
    1267
    +  (decompose string :compatibility t))
    
    1260 1268
     
    
    1261 1269
     (defun string-to-nfc (string)
    
    1262 1270
       _N"Convert String to Unicode Normalization Form C (NFC).  If the
    

  • src/code/unicode.lisp
    ... ... @@ -517,3 +517,55 @@
    517 517
           (if (eq casing :simple)
    
    518 518
     	  (cl:string-capitalize string :start start :end end)
    
    519 519
     	  (string-capitalize-full string :start start :end end :casing casing))))
    
    520
    +
    
    521
    +
    
    522
    +(defun decompose-hangul-syllable (cp stream)
    
    523
    +  "Decompose the Hangul syllable codepoint CP to an equivalent sequence
    
    524
    +  of conjoining jamo and print the decomposed result to the stream
    
    525
    +  STREAM."
    
    526
    +  (let* ((s-base #xac00)
    
    527
    +	 (l-base #x1100)
    
    528
    +	 (v-base #x1161)
    
    529
    +	 (t-base #x11a7)
    
    530
    +	 (v-count 21)
    
    531
    +	 (t-count 28)
    
    532
    +	 (n-count (* v-count t-count)))
    
    533
    +    ;; Step 1: Compute index of the syllable S
    
    534
    +    (let ((s-index (- cp s-base)))
    
    535
    +      ;; Step 2: If s is in the range 0 <= s <= s-count, the compute
    
    536
    +      ;; the components.
    
    537
    +      (let ((l (+ l-base (truncate s-index n-count)))
    
    538
    +	    (v (+ v-base (truncate (mod s-index n-count) t-count)))
    
    539
    +	    (tt (+ t-base (mod s-index t-count))))
    
    540
    +	;; Step 3: If tt = t-base, then there is no trailing character
    
    541
    +	;; so replace s by the sequence <l,v>.  Otherwise there is a
    
    542
    +	;; trailing character, so replace s by the sequence <l,v,tt>.
    
    543
    +	(princ (code-char l) stream)
    
    544
    +	(princ (code-char v) stream)
    
    545
    +	(unless (= tt t-base)
    
    546
    +	  (princ (code-char tt) stream)))))
    
    547
    +  (values))
    
    548
    +
    
    549
    +(defun is-hangul-syllable (codepoint)
    
    550
    +  "Test if CODEPOINT is a Hangul syllable"
    
    551
    +  (let* ((s-base #xac00)
    
    552
    +	 (l-count 19)
    
    553
    +	 (v-count 21)
    
    554
    +	 (t-count 28)
    
    555
    +	 (n-count (* v-count t-count))
    
    556
    +	 (number-of-syllables (* l-count n-count)))
    
    557
    +    (<= 0 (- codepoint s-base) number-of-syllables)))
    
    558
    +
    
    559
    +(defun decompose-hangul (string)
    
    560
    +  "Decompose any Hangul syllables in STRING to an equivalent sequence of
    
    561
    +  conjoining jamo characters."
    
    562
    +  (with-output-to-string (s)
    
    563
    +    (loop for cp being the codepoints of string
    
    564
    +	  do
    
    565
    +	     (if (is-hangul-syllable cp)
    
    566
    +		 (decompose-hangul-syllable cp s)
    
    567
    +		 (multiple-value-bind (high low)
    
    568
    +		     (surrogates cp)
    
    569
    +		   (princ high s)
    
    570
    +		   (when low
    
    571
    +		     (princ low s)))))))

  • src/general-info/release-21e.md
    ... ... @@ -22,63 +22,68 @@ public domain.
    22 22
       * Feature enhancements
    
    23 23
       * Changes
    
    24 24
         * Update to ASDF 3.3.6
    
    25
    -    * The default external format is `:utf-8` instead of `:iso8859-1`
    
    25
    +    * The default external format is `:utf-8` instead of `:iso8859-1`.
    
    26 26
       * ANSI compliance fixes:
    
    27 27
       * Bug fixes:
    
    28 28
         * ~~#97~~ Fixes stepping through the source forms in the debugger.  This has been broken for quite some time, but it works now.
    
    29 29
     
    
    30 30
       * Gitlab tickets:
    
    31
    -    * ~~#68~~ gcc8.1.1 can't build lisp.  Change optimization from `-O2` to `-O1`
    
    32
    -    * ~~#72~~ CMU user manual now part of cmucl-site
    
    33
    -    * ~~#73~~ Update clx from upstream clx
    
    34
    -    * ~~#77~~ Added tests for sqrt for exceptional values
    
    31
    +    * ~~#68~~ gcc8.1.1 can't build lisp.  Change optimization from `-O2` to `-O1`.
    
    32
    +    * ~~#72~~ CMU user manual now part of cmucl-site.
    
    33
    +    * ~~#73~~ Update clx from upstream clx.
    
    34
    +    * ~~#77~~ Added tests for sqrt for exceptional values.
    
    35 35
         * ~~#79~~ Autoload ASDF when calling `REQUIRE` the first time.  User's no longer have to explicitly load ASDF anymore.
    
    36 36
         * ~~#80~~ Use ASDF to load contribs.  cmu-contribs still exists but does nothing.  The contrib names are the same, except it's best to use a keyword instead of a string.  So, `:contrib-demos` instead of `"contrib-demos"`.
    
    37
    -    * ~~#81~~ Added contribs from Eric Marsden
    
    38
    -    * ~~#82~~ Replace bc with expr in GNUMakefile
    
    39
    -    * ~~#86~~ Building with gcc 8 and later works when using -O2 optimization
    
    37
    +    * ~~#81~~ Added contribs from Eric Marsden.
    
    38
    +    * ~~#82~~ Replace bc with expr in GNUMakefile.
    
    39
    +    * ~~#86~~ Building with gcc 8 and later works when using -O2 optimization.
    
    40 40
         * ~~#90~~ Some static symbols have been removed.  This probably makes the fasl files incompatible with older versions.
    
    41
    -    * ~~#91~~ Loop destructuring no longer incorrectly signals an error
    
    42
    -    * ~~#95~~ Disassembler syntax of x86 je and movzx is incorrect
    
    41
    +    * ~~#91~~ Loop destructuring no longer incorrectly signals an error.
    
    42
    +    * ~~#95~~ Disassembler syntax of x86 je and movzx is incorrect.
    
    43 43
         * ~~#97~~ Define and use ud2 instruction instead of int3.  Fixes single-stepping.
    
    44
    -    * ~~#98~~ fstpd is not an Intel instruction; disassemble as `fstp dword ptr [addr]`
    
    44
    +    * ~~#98~~ fstpd is not an Intel instruction; disassemble as `fstp dword ptr [addr]`.
    
    45 45
         * ~~#100~~ ldb prints out Unicode base-chars correctly instead of just the low 8 bits.
    
    46
    -    * ~~#103~~ RANDOM-MT19937-UPDATE assembly routine still exists
    
    46
    +    * ~~#103~~ RANDOM-MT19937-UPDATE assembly routine still exists.
    
    47 47
         * ~~#104~~ Single-stepping broken (fixed via #97).
    
    48
    -    * ~~#107~~ Replace u_int8_t with uint8_t
    
    49
    -    * ~~#108~~ Update ASDF
    
    50
    -    * ~~#112~~ CLX can't connect to X server via inet sockets
    
    48
    +    * ~~#107~~ Replace u_int8_t with uint8_t.
    
    49
    +    * ~~#108~~ Update ASDF.
    
    50
    +    * ~~#112~~ CLX can't connect to X server via inet sockets.
    
    51 51
         * ~~#113~~ REQUIRE on contribs can pull in the wrong things via ASDF.
    
    52
    -    * ~~#120~~ `SOFTWARE-TYPE` and `SOFTWARE-VERSION` are implemented in C.
    
    52
    +    * ~~#120~~ `SOFTWARE-VERSION` is implemented in C.
    
    53 53
         * ~~#121~~ Wrong column index in FILL-POINTER-OUTPUT-STREAM
    
    54 54
         * ~~#122~~ gcc 11 can't build cmucl
    
    55 55
         * ~~#124~~ directory with `:wild-inferiors` doesn't descend subdirectories 
    
    56 56
         * ~~#125~~ Linux `unix-stat` returning incorrect values
    
    57 57
         * ~~#127~~ Linux unix-getpwuid segfaults when given non-existent uid.
    
    58
    -    * ~~#128~~ `QUIT` accepts an exit code
    
    59
    -    * ~~#130~~ Move file-author to C 
    
    60
    -    * ~~#132~~ Ansi test `RENAME-FILE.1` no longer fails
    
    61
    -    * ~~#134~~ Handle the case of `(expt complex complex-rational)`
    
    62
    -    * ~~#136~~ `ensure-directories-exist` should return the given pathspec
    
    63
    -    * #139 `*default-external-format*` defaults to `:utf-8`; add alias for `:locale` external format
    
    64
    -    * ~~#140~~ External format for streams that are not `file-stream`'s
    
    65
    -    * ~~#141~~ Disallow locales that are pathnames to a localedef file
    
    66
    -    * ~~#142~~ `(random 0)` signals incorrect error
    
    67
    -    * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream`
    
    68
    -    * ~~#149~~ Call setlocale(3C) on startup
    
    69
    -    * ~~#150~~ Add aliases for external format cp949 and euckr
    
    58
    +    * ~~#128~~ `QUIT` accepts an exit code.
    
    59
    +    * ~~#130~~ Move file-author to C.
    
    60
    +    * ~~#132~~ Ansi test `RENAME-FILE.1` no longer fails.
    
    61
    +    * ~~#134~~ Handle the case of `(expt complex complex-rational)`.
    
    62
    +    * ~~#136~~ `ensure-directories-exist` should return the given pathspec.
    
    63
    +    * #139 `*default-external-format*` defaults to `:utf-8`; add alias for `:locale` external format.
    
    64
    +    * ~~#140~~ External format for streams that are not `file-stream`'s.
    
    65
    +    * ~~#141~~ Disallow locales that are pathnames to a localedef file.
    
    66
    +    * ~~#142~~ `(random 0)` signals incorrect error.
    
    67
    +    * ~~#147~~ `stream-line-column` method missing for `fundamental-character-output-stream`.
    
    68
    +    * ~~#149~~ Call setlocale(3C) on startup.
    
    69
    +    * ~~#150~~ Add aliases for external format cp949 and euckr.
    
    70 70
         * ~~#151~~ Change `*default-external-format*` to `:utf-8`.
    
    71
    -    * ~~#155~~ Wrap help strings neatly
    
    72
    -    * ~~#157~~ `(directory "foo/**/")` only returns directories now
    
    73
    -    * ~~#163~~ Add command-line option `-version` and `--version` to get lisp version
    
    74
    -    * ~~#165~~ Avoid inserting NIL into simple `LOOP` from `FORMAT`
    
    75
    -    * ~~#166~~ Fix incorrect type declaration for exponent from `integer-decode-float`
    
    71
    +    * ~~#152~~ Add new external format, `:locale` as an alias to the codeset from LANG and friends.
    
    72
    +    * ~~#!53~~ Terminals default to an encoding of `:locale`.
    
    73
    +    * ~~#155~~ Wrap help strings neatly.
    
    74
    +    * ~~#157~~ `(directory "foo/**/")` only returns directories now.
    
    75
    +    * #158 Darwin uses utf-8, but we don't support all the rules for pathnames.
    
    76
    +    * ~~#162~~ `*filename-encoding*` defaults to `:null` to mean no encoding.
    
    77
    +    * ~~#163~~ Add command-line option `-version` and `--version` to get lisp version.
    
    78
    +    * ~~#165~~ Avoid inserting NIL into simple `LOOP` from `FORMAT`.
    
    79
    +    * ~~#166~~ Fix incorrect type declaration for exponent from `integer-decode-float`.
    
    76 80
         * ~~#167~~ Low bound for `decode-float-exponent` type was off by one.
    
    77
    -    * ~~#168~~ Don't use negated forms for jmp instructions when possible
    
    78
    -    * ~~#169~~ Add pprinter for `define-vop` and `sc-case`
    
    81
    +    * ~~#168~~ Don't use negated forms for jmp instructions when possible.
    
    82
    +    * ~~#169~~ Add pprinter for `define-vop` and `sc-case`.
    
    79 83
         * ~~#172~~ Declare `pathname-match-p` as returning `nil` or `pathname`.
    
    80
    -    * ~~#173~~ Add pprinter for `define-assembly-routine`
    
    84
    +    * ~~#173~~ Add pprinter for `define-assembly-routine`.
    
    81 85
         * ~~#176~~ `SHORT-SITE-NAME` and `LONG-SITE-NAME` return `NIL`.
    
    86
    +    * ~~#177~~ Add pprinter for `deftransform` and `defoptimizer`.
    
    82 87
       * Other changes:
    
    83 88
       * Improvements to the PCL implementation of CLOS:
    
    84 89
       * Changes to building procedure:
    

  • src/i18n/locale/cmucl.pot
    ... ... @@ -4012,6 +4012,14 @@ msgid ""
    4012 4012
     "  string is returned."
    
    4013 4013
     msgstr ""
    
    4014 4014
     
    
    4015
    +#: src/code/string.lisp
    
    4016
    +msgid ""
    
    4017
    +"Convert String to NFD (or NFKD).  If :darwinp is non-NIL, then\n"
    
    4018
    +"    characters in the ranges U2000-U2FFF, UF900-UFA6A, and\n"
    
    4019
    +"    U2F800-U2FA1D are not decomposed, as specified for Darwin\n"
    
    4020
    +"    pathnames."
    
    4021
    +msgstr ""
    
    4022
    +
    
    4015 4023
     #: src/code/string.lisp
    
    4016 4024
     msgid ""
    
    4017 4025
     "Convert a sequence of codepoints to a string.  Codepoints outside\n"
    
    ... ... @@ -15267,6 +15275,23 @@ msgid ""
    15267 15275
     "  delimited by non-case-modifiable chars.  "
    
    15268 15276
     msgstr ""
    
    15269 15277
     
    
    15278
    +#: src/code/unicode.lisp
    
    15279
    +msgid ""
    
    15280
    +"Decompose the Hangul syllable codepoint CP to an equivalent sequence\n"
    
    15281
    +"  of conjoining jamo and print the decomposed result to the stream\n"
    
    15282
    +"  STREAM."
    
    15283
    +msgstr ""
    
    15284
    +
    
    15285
    +#: src/code/unicode.lisp
    
    15286
    +msgid "Test if CODEPOINT is a Hangul syllable"
    
    15287
    +msgstr ""
    
    15288
    +
    
    15289
    +#: src/code/unicode.lisp
    
    15290
    +msgid ""
    
    15291
    +"Decompose any Hangul syllables in STRING to an equivalent sequence of\n"
    
    15292
    +"  conjoining jamo characters."
    
    15293
    +msgstr ""
    
    15294
    +
    
    15270 15295
     #: src/compiler/macros.lisp
    
    15271 15296
     msgid ""
    
    15272 15297
     "Policy Node Condition*\n"
    

  • tests/issues.lisp
    ... ... @@ -832,6 +832,54 @@
    832 832
     
    
    833 833
     
    
    834 834
     
    
    835
    +(define-test issue.158
    
    836
    +    (:tag :issues)
    
    837
    +  (let* ((name (string #\Hangul_Syllable_Gyek))
    
    838
    +	 (path (make-pathname :directory (list :relative name)
    
    839
    +			      :name name
    
    840
    +			      :type name)))
    
    841
    +    ;; Enable this when we implement normalization for Darwin
    
    842
    +    #+(and nil darwin)
    
    843
    +    (let ((expected '(4352 4456 4543)))
    
    844
    +      ;; Tests that on Darwin the Hangul pathname has been normalized
    
    845
    +      ;; correctly.  We fill in the directory, name, and type components
    
    846
    +      ;; with the same thing since it shouldn't really matter.
    
    847
    +      ;;
    
    848
    +      ;; The expected value is the conjoining jamo for the character
    
    849
    +      ;; #\Hangul_Syllable_Gyek.
    
    850
    +      (assert-equal (map 'list #'char-code (second (pathname-directory path)))
    
    851
    +		    expected)
    
    852
    +      (assert-equal (map 'list #'char-code (pathname-name path))
    
    853
    +		    expected)
    
    854
    +      (assert-equal (map 'list #'char-code (pathname-type path))
    
    855
    +		    expected))
    
    856
    +    #-darwin
    
    857
    +    (let ((expected (list (char-code #\Hangul_Syllable_Gyek))))
    
    858
    +      ;; For other OSes, just assume that the pathname is unchanged.
    
    859
    +      (assert-equal (map 'list #'char-code (second (pathname-directory path)))
    
    860
    +		    expected)
    
    861
    +      (assert-equal (map 'list #'char-code (pathname-name path))
    
    862
    +		    expected)
    
    863
    +      (assert-equal (map 'list #'char-code (pathname-type path))
    
    864
    +		    expected))))
    
    865
    +
    
    866
    +(define-test issue.158.dir
    
    867
    +    (:tag :issues)
    
    868
    +  (flet ((get-file ()
    
    869
    +	   ;; This assumes that there is only one file in resources/darwin
    
    870
    +	   (let ((files (directory (merge-pathnames "resources/darwin/*.txt" *test-path*))))
    
    871
    +	     (assert-equal (length files) 1)
    
    872
    +	     (first files))))
    
    873
    +    (let ((f (get-file))
    
    874
    +	  (expected-name "안녕하십니까"))
    
    875
    +      #+darwin
    
    876
    +      (assert-equal (pathname-name f)
    
    877
    +		    (unicode::decompose-hangul expected-name))
    
    878
    +      #-darwin
    
    879
    +      (assert-equal (pathname-name f) expected-name))))
    
    880
    +    
    
    881
    +
    
    882
    +
    
    835 883
     (define-test issue.166
    
    836 884
         (:tag :issues)
    
    837 885
       ;; While this tests for the correct return value, the problem was
    
    ... ... @@ -896,4 +944,3 @@
    896 944
         (assert-true (typep idf-max-expo 'kernel:double-float-int-exponent))
    
    897 945
         (assert-true (typep (1- idf-max-expo) 'kernel:double-float-int-exponent))
    
    898 946
         (assert-false (typep (1+ idf-max-expo) 'kernel:double-float-int-exponent))))
    899
    -    

  • tests/resources/darwin/안녕하십니까.txt
    1
    +The file name of this file is "안녕하십니까.txt" ("Hello" in Korean.)
    
    2
    +
    
    3
    +