Raymond Toy pushed to branch issue-367-count-octets-for-encoding at cmucl / cmucl

Commits:

9 changed files:

Changes:

  • src/pcl/simple-streams/external-formats/ascii.lisp
    ... ... @@ -33,5 +33,15 @@ replaced with a question mark.")
    33 33
     			  (declare (optimize (ext:inhibit-warnings 3)))
    
    34 34
     			(funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code))
    
    35 35
     		      #x3F)
    
    36
    -		  ,code))))
    
    36
    +		  ,code)))
    
    37
    +  ()
    
    38
    +  ()
    
    39
    +  (octet-count (code state error)
    
    40
    +    `(if (> ,code #x7f)
    
    41
    +	 (if ,error
    
    42
    +	     (locally
    
    43
    +		 (declare (optimize (ext:inhibit-warnings 3)))
    
    44
    +	       (funcall ,error "Cannot output codepoint #x~X to ASCII stream" ,code))
    
    45
    +	     1)
    
    46
    +	 1)))
    
    37 47
     

  • src/pcl/simple-streams/external-formats/iso8859-1.lisp
    ... ... @@ -31,4 +31,17 @@ character and illegal outputs are replaced by a question mark.")
    31 31
     		     (funcall ,error "Cannot output codepoint #x~X to ISO8859-1 stream"
    
    32 32
     			      ,code 1))
    
    33 33
     		      #x3F)
    
    34
    -		  ,code))))
    34
    +		  ,code)))
    
    35
    +  ()
    
    36
    +  ()
    
    37
    +  (octet-count (code state error)
    
    38
    +    `(if (> ,code 255)
    
    39
    +	 (if ,error
    
    40
    +	     (locally
    
    41
    +		 ;; No warnings about fdefinition
    
    42
    +		 (declare (optimize (ext:inhibit-warnings 3)))
    
    43
    +	       (funcall ,error
    
    44
    +			(intl:gettext "Cannot output codepoint #x~X to ISO8859-1 stream")
    
    45
    +			,code 1))
    
    46
    +	     1)
    
    47
    +	 1)))

  • src/pcl/simple-streams/external-formats/utf-16-be.lisp
    ... ... @@ -110,4 +110,12 @@ Unicode replacement character.")
    110 110
       (copy-state (state)
    
    111 111
         ;; The state is either NIL or a codepoint, so nothing really
    
    112 112
         ;; special is needed to copy it.
    
    113
    -    `(progn ,state)))
    113
    +    `(progn ,state))
    
    114
    +  (code-to-octets (code state error)
    
    115
    +    `(cond ((< ,code #x10000)
    
    116
    +	    2)
    
    117
    +	   ((< ,code #x110000)
    
    118
    +	    4)
    
    119
    +	   (t
    
    120
    +	    ;; Replacement character is 2 octets
    
    121
    +	    2))))

  • src/pcl/simple-streams/external-formats/utf-16-le.lisp
    ... ... @@ -111,4 +111,12 @@ Unicode replacement character.")
    111 111
       (copy-state (state)
    
    112 112
         ;; The state is either NIL or a codepoint, so nothing really
    
    113 113
         ;; special is needed.
    
    114
    -    `(progn ,state)))
    114
    +    `(progn ,state))
    
    115
    +  (code-to-octets (code state error)
    
    116
    +    `(cond ((< ,code #x10000)
    
    117
    +	    2)
    
    118
    +	   ((< ,code #x110000)
    
    119
    +	    4)
    
    120
    +	   (t
    
    121
    +	    ;; Replacement character is 2 octets
    
    122
    +	    2))))

  • src/pcl/simple-streams/external-formats/utf-16.lisp
    ... ... @@ -156,4 +156,18 @@ Unicode replacement character.")
    156 156
     		    ,c))))))
    
    157 157
       (copy-state (state)
    
    158 158
         ;; The state is list. Copy it
    
    159
    -    `(copy-list ,state)))
    159
    +	      `(copy-list ,state))
    
    160
    +  (code-to-octets (code state error)
    
    161
    +    `(progn
    
    162
    +       #+nil
    
    163
    +       (unless ,state
    
    164
    +	 ;; Output BOM
    
    165
    +	 (output #xFEFF)
    
    166
    +	 (setf ,state t))
    
    167
    +       (cond ((< ,code #x10000)
    
    168
    +	      2)
    
    169
    +	     ((< ,code #x110000)
    
    170
    +	      4)
    
    171
    +	     (t
    
    172
    +	      ;; Replacement character is 2 octets
    
    173
    +	      2)))))

  • src/pcl/simple-streams/external-formats/utf-32-be.lisp
    ... ... @@ -61,4 +61,18 @@ Unicode replacement character.")
    61 61
     				  ,code))
    
    62 62
     		       +replacement-character-code+)))
    
    63 63
     	     (t
    
    64
    -	      (out ,code))))))
    64
    +	      (out ,code)))))
    
    65
    +  ()
    
    66
    +  ()
    
    67
    +  (code-to-octets (code state error)
    
    68
    +    `(cond ((lisp::surrogatep ,code)
    
    69
    +	    (if ,error
    
    70
    +		(locally
    
    71
    +		    ;; No warnings about fdefinition
    
    72
    +		    (declare (optimize (ext:inhibit-warnings 3)))
    
    73
    +		  (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
    
    74
    +			   ,code))
    
    75
    +		;; Replacement character is 2 octets
    
    76
    +		2))
    
    77
    +	   (t
    
    78
    +	    4)))))

  • src/pcl/simple-streams/external-formats/utf-32-le.lisp
    ... ... @@ -62,4 +62,18 @@ Unicode replacement character.")
    62 62
     				  ,code))
    
    63 63
     		       +replacement-character-code+)))
    
    64 64
     	     (t
    
    65
    -	      (out ,code))))))
    65
    +	      (out ,code)))))
    
    66
    +  ()
    
    67
    +  ()
    
    68
    +  (code-to-octets (code state error)
    
    69
    +    `(cond ((lisp::surrogatep ,code)
    
    70
    +	    (if ,error
    
    71
    +		(locally
    
    72
    +		    ;; No warnings about fdefinition
    
    73
    +		    (declare (optimize (ext:inhibit-warnings 3)))
    
    74
    +		  (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
    
    75
    +			   ,code))
    
    76
    +		;; Replacement character is 2 octets
    
    77
    +		2))
    
    78
    +	   (t
    
    79
    +	    4))))

  • src/pcl/simple-streams/external-formats/utf-32.lisp
    ... ... @@ -114,4 +114,22 @@ Unicode replacement character.")
    114 114
       nil
    
    115 115
       (copy-state (state)
    
    116 116
         ;; The state is either NIL or T, so we can just return that.
    
    117
    -    `(progn ,state)))
    117
    +    `(progn ,state))
    
    118
    +  (code-to-octets (code state output error i c)
    
    119
    +    `(progn
    
    120
    +       ;; Should we count the BOM?
    
    121
    +       #+nil
    
    122
    +       (unless ,state
    
    123
    +	 (out #xFEFF)
    
    124
    +	 (setf ,state t))
    
    125
    +       (cond ((lisp::surrogatep ,code)
    
    126
    +	      (if ,error
    
    127
    +		  (locally
    
    128
    +		      ;; No warnings about fdefinition
    
    129
    +		      (declare (optimize (ext:inhibit-warnings 3)))
    
    130
    +		    (funcall ,error "Surrogate code #x~4,'0X is illegal for UTF32 output"
    
    131
    +			     ,code))
    
    132
    +		  ;; Replacement character is 2 octets
    
    133
    +		  2))
    
    134
    +	     (t
    
    135
    +	      4)))))

  • src/pcl/simple-streams/external-formats/utf-8.lisp
    ... ... @@ -127,4 +127,14 @@ replacement character.")
    127 127
              ((< ,code #x800) (utf8 ,code 1))
    
    128 128
              ((< ,code #x10000) (utf8 ,code 2))
    
    129 129
              ((< ,code #x110000) (utf8 ,code 3))
    
    130
    -         (t (error "How did this happen?  Codepoint U+~X is illegal" ,code))))))
    130
    +         (t (error "How did this happen?  Codepoint U+~X is illegal" ,code)))))
    
    131
    +  ()
    
    132
    +  ()
    
    133
    +  (octet-count (code state error)
    
    134
    +    `(locally
    
    135
    +	 (declare (optimize (ext:inhibit-warnings 3)))
    
    136
    +       (cond ((< ,code #x80) 1)
    
    137
    +             ((< ,code #x800) 2)
    
    138
    +             ((< ,code #x10000) 3)
    
    139
    +             ((< ,code #x110000) 4)
    
    140
    +             (t (error "How did this happen?  Codepoint U+~X is illegal" ,code))))))