Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl

Commits:

3 changed files:

Changes:

  • src/pcl/simple-streams/external-formats/utf-16.lisp
    ... ... @@ -158,16 +158,16 @@ Unicode replacement character.")
    158 158
         ;; The state is list. Copy it
    
    159 159
     	      `(copy-list ,state))
    
    160 160
       (octet-count (code state error)
    
    161
    -    `(progn
    
    162
    -       #+nil
    
    161
    +    `(let ((bom-count 0))
    
    163 162
            (unless ,state
    
    164 163
     	 ;; Output BOM
    
    165
    -	 (output #xFEFF)
    
    164
    +	 (setf bom-count 2)
    
    166 165
     	 (setf ,state t))
    
    167
    -       (cond ((< ,code #x10000)
    
    168
    -	      2)
    
    169
    -	     ((< ,code #x110000)
    
    170
    -	      4)
    
    171
    -	     (t
    
    172
    -	      ;; Replacement character is 2 octets
    
    173
    -	      2)))))
    166
    +       (+ bom-count
    
    167
    +	  (cond ((< ,code #x10000)
    
    168
    +		 2)
    
    169
    +		((< ,code #x110000)
    
    170
    +		 4)
    
    171
    +		(t
    
    172
    +		 ;; Replacement character is 2 octets
    
    173
    +		 2))))))

  • src/pcl/simple-streams/external-formats/utf-32.lisp
    ... ... @@ -116,11 +116,9 @@ Unicode replacement character.")
    116 116
         ;; The state is either NIL or T, so we can just return that.
    
    117 117
         `(progn ,state))
    
    118 118
       (octet-count (code state error)
    
    119
    -    `(progn
    
    120
    -       ;; Should we count the BOM?
    
    121
    -       #+nil
    
    119
    +    `(let ((bom-count 0))
    
    122 120
            (unless ,state
    
    123
    -	 (out #xFEFF)
    
    121
    +	 (setf bom-count 4)
    
    124 122
     	 (setf ,state t))
    
    125 123
            (cond ((lisp::surrogatep ,code)
    
    126 124
     	      (if ,error
    
    ... ... @@ -130,6 +128,6 @@ Unicode replacement character.")
    130 128
     		    (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
    
    131 129
     			     ,code))
    
    132 130
     		  ;; Replacement character is 2 octets
    
    133
    -		  2))
    
    131
    +		  (+ 2 bom-count)))
    
    134 132
     	     (t
    
    135
    -	      4)))))
    133
    +	      (+ 4 bom-count))))))

  • tests/external-formats.lisp
    ... ... @@ -36,7 +36,6 @@
    36 36
         (:tag :octet-count)
    
    37 37
       (test-octet-count *test-unicode* :utf-8))
    
    38 38
     
    
    39
    -#+nil
    
    40 39
     (define-test octet-count.utf-16
    
    41 40
         (:tag :octet-count)
    
    42 41
       (test-octet-count *test-unicode* :utf-16))
    
    ... ... @@ -49,7 +48,6 @@
    49 48
         (:tag :octet-count)
    
    50 49
       (test-octet-count *test-unicode* :utf-16-le))
    
    51 50
     
    
    52
    -#+nil
    
    53 51
     (define-test octet-count.utf-32
    
    54 52
         (:tag :octet-count)
    
    55 53
       (test-octet-count *test-unicode* :utf-32))