This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "CMU Common Lisp".
The branch, file-attribute has been created at 81f65db452ae1c7c1b8907d10f5bb8bbd4cff37f (commit)
- Log ----------------------------------------------------------------- commit 81f65db452ae1c7c1b8907d10f5bb8bbd4cff37f Merge: 7ed7451 8a9d1d8 Author: Raymond Toy toy.raymond@gmail.com Date: Mon Jun 25 20:46:20 2012 -0700
Merge branch 'master' into file-attribute
commit 7ed745111ae7a478ba867db257a1888d2948b524 Author: Raymond Toy toy.raymond@gmail.com Date: Thu May 24 21:39:39 2012 -0700
First cut at :file-attribute external format (mostly from Douglas.)
diff --git a/src/code/exports.lisp b/src/code/exports.lisp index a46d5bb..d0cfe58 100644 --- a/src/code/exports.lisp +++ b/src/code/exports.lisp @@ -1511,6 +1511,7 @@ "DESCRIBE-EXTERNAL-FORMAT") ;; Unicode (:export "STRING-TO-OCTETS" "OCTETS-TO-STRING" "*DEFAULT-EXTERNAL-FORMAT*" + "*DEFAULT-SOURCE-EXTERNAL-FORMAT*" "DESCRIBE-EXTERNAL-FORMAT" "LIST-ALL-EXTERNAL-FORMATS" "STRING-ENCODE" "STRING-DECODE" diff --git a/src/code/fd-stream.lisp b/src/code/fd-stream.lisp index 45b5847..e854a73 100644 --- a/src/code/fd-stream.lisp +++ b/src/code/fd-stream.lisp @@ -1362,6 +1362,169 @@ ;;;; Utility functions (misc routines, etc)
+(defvar *stream-encoding-file-attribute-translations* + '(;; Emacs specific codings. + (:iso8859-1 "latin-1" "latin-1-unix" "iso-latin-1-unix" "iso-8859-1-unix") + (:utf-8 "utf-8-unix") + (:euc-jp "euc-jp-unix") + ) + "List of coding translations used by 'stream-encoding-file-attribute to map + the read file coding into a native external-format. Each element is a list of + a native external-format followed byte a list of coding strings that are to be + mapped to this native format.") + +;;; stream-encoding-file-attribute -- Internal +;;; +;;; Read the encoding file option from the stream 's which is expected to be a +;;; character stream with an external-format of :iso8859-1. +;;; +(defun stream-encoding-file-attribute (s) + (let* ((initial-encoding nil) + (declared-encoding nil) + (buffer (make-array 1024 :element-type '(unsigned-byte 8))) + (available (do ((i 0 (1+ i))) + ((>= i 1024) i) + (declare (fixnum i)) + (let ((ch (read-char s nil nil))) + (unless ch (return i)) + (setf (aref buffer i) (char-code ch)))))) + (labels ((decode-ascii (start size offset) + (declare (type fixnum start) + (type (integer 1 4) size) + (type (integer 0 3) offset)) + (let ((ascii (make-array 64 :element-type 'character + :adjustable t :fill-pointer 0))) + (do () + ((< available (+ start size))) + (let* ((code (ecase size + (1 (aref buffer start)) + (2 (let ((b0 (aref buffer start)) + (b1 (aref buffer (1+ start)))) + (ecase offset + (0 (logior (ash b1 8) b0)) + (1 (logior (ash b0 8) b1))))) + (4 + (let ((b0 (aref buffer start)) + (b1 (aref buffer (+ start 1))) + (b2 (aref buffer (+ start 2))) + (b3 (aref buffer (+ start 3)))) + (ecase offset + (0 (logior (ash b3 24) (ash b2 16) (ash b1 8) b0)) + (1 (logior (ash b1 24) (ash b0 16) (ash b3 8) b2)) + (2 (logior (ash b2 24) (ash b3 16) (ash b0 8) b1)) + (3 (logior (ash b0 24) (ash b1 16) (ash b2 8) b3)))))))) + (incf start size) + (let ((ch (if (< 0 code #x80) (code-char code) #?))) + (vector-push-extend ch ascii)))) + ascii)) + (parse-file-option (ascii) + ;; Parse the file options. + (let ((found (search "-*-" ascii)) + (options nil)) + (when found + (block do-file-options + (let* ((start (+ found 3)) + (end (search "-*-" ascii :start2 start))) + (unless end + (return-from do-file-options)) + (unless (find #: ascii :start start :end end) + (return-from do-file-options)) + (do ((opt-start start (1+ semi)) colon semi) + (nil) + (setf colon (position #: ascii :start opt-start :end end)) + (unless colon + (return-from do-file-options)) + (setf semi (or (position #; ascii :start colon :end end) end)) + (let ((option (string-trim '(#\space #\tab) + (subseq ascii opt-start colon))) + (value (string-trim '(#\space #\tab) + (subseq ascii (1+ colon) semi)))) + (push (cons option value) options) + (when (= semi end) (return nil))))))) + (setf declared-encoding + (cond ((cdr (assoc "external-format" options :test 'equalp))) + ((cdr (assoc "encoding" options :test 'equalp))) + ((cdr (assoc "coding" options :test 'equalp)))))))) + (cond ((>= available 4) + (let ((b1 (aref buffer 0)) + (b2 (aref buffer 1)) + (b3 (aref buffer 2)) + (b4 (aref buffer 3))) + (cond ((and (= b1 #x00) (= b2 #x00) (= b3 #xFE) (= b4 #xFF)) + (setf initial-encoding :ucs-4be) + (parse-file-option (decode-ascii 4 4 3))) + ((and (= b1 #xff) (= b2 #xfe)) + (cond ((and (= b3 #x00) (= b4 #x00)) + (setf initial-encoding :ucs-4le) + (parse-file-option (decode-ascii 4 4 0))) + (t + (setf initial-encoding :utf-16le) + (parse-file-option (decode-ascii 2 2 0))))) + ((and (= b1 #x00) (= b2 #x00) (= b3 #xFF) (= b4 #xFE)) + (parse-file-option (decode-ascii 4 4 2))) + ((and (= b1 #xfe) (= b2 #xff)) + (cond ((and (= b3 #x00) (= b4 #x00)) + (parse-file-option (decode-ascii 4 4 1))) + (t + (setf initial-encoding :utf-16be) + (parse-file-option (decode-ascii 2 2 1))))) + ((and (= b1 #xEF) (= b2 #xBB) (= b3 #xBF)) + (setf initial-encoding :utf-8)) + ((and (> b1 0) (= b2 0) (= b3 0) (= b4 0)) + (setf initial-encoding :ucs-4le) + (parse-file-option (decode-ascii 0 4 0))) + ((and (= b1 0) (> b2 0) (= b3 0) (= b4 0)) + (parse-file-option (decode-ascii 0 4 1))) + ((and (= b1 0) (= b2 0) (> b3 0) (= b4 0)) + (parse-file-option (decode-ascii 0 4 2))) + ((and (= b1 0) (= b2 0) (= b3 0) (> b4 0)) + (setf initial-encoding :ucs-4be) + (parse-file-option (decode-ascii 0 4 3))) + ((and (> b1 0) (= b2 0) (> b3 0) (= b4 0)) + (setf initial-encoding :utf-16le) + (parse-file-option (decode-ascii 0 2 0))) + ((and (= b1 0) (> b2 0) (= b3 0) (> b4 0)) + (setf initial-encoding :utf-16be) + (parse-file-option (decode-ascii 0 2 1))) + ((and (= b1 #x2B) (= b2 #x41) + (or (= b3 #x43) (= b3 #x44))) + (setf initial-encoding :utf-7)) + ((and (= b1 #x2F) (= b2 #x2B) (= b3 #x41)) + (setf initial-encoding :utf-7)) + (t + (parse-file-option (decode-ascii 0 1 0)))))) + ((= available 3) + (when (and (= (aref buffer 0) #xEF) + (= (aref buffer 1) #xBB) + (= (aref buffer 2) #xBF)) + (setf initial-encoding :utf-8))) + ((= available 2) + (let ((b1 (aref buffer 0)) + (b2 (aref buffer 1))) + (cond ((and (= b1 #xff) (= b2 #xfe)) + (setf initial-encoding :utf-16le)) + ((and (= b1 #xfe) (= b2 #xff)) + (setf initial-encoding :utf-16be))))))) + ;; + ;; + (cond ((and (not initial-encoding) (not declared-encoding)) + :default) + (t + (let ((encoding (or declared-encoding initial-encoding))) + (when (stringp encoding) + (setf encoding (string-upcase encoding)) + (dolist (translations *stream-encoding-file-attribute-translations*) + (when (member encoding (rest translations) :test 'equalp) + (setf encoding (first translations)) + (return)))) + (let ((external-format + (cond ((eq encoding :default) :default) + ((stringp encoding) + (intern encoding :keyword)) + (t + encoding)))) + external-format)))))) + ;;; SET-ROUTINES -- internal ;;; ;;; Fill in the various routine slots for the given type. Input-p and @@ -1916,20 +2079,7 @@ (setf (fd-stream-flags stream) #b001)) (t (setf (fd-stream-flags stream) #b010))) - - ;; FIXME: setting the external format here should be better - ;; integrated into set-routines. We do it before so that - ;; set-routines can create an in-buffer if appropriate. But we - ;; need to do it after to put the correct input routines for the - ;; external format. ;; - ;;#-unicode-bootstrap ; fails in stream-reinit otherwise - #+(and unicode (not unicode-bootstrap)) - (%set-fd-stream-external-format stream external-format nil) - (set-routines stream element-type input output input-buffer-p - :binary-stream-p binary-stream-p) - #+(and unicode (not unicode-bootstrap)) - (%set-fd-stream-external-format stream external-format nil) (when (and auto-close (fboundp 'finalize)) (finalize stream #'(lambda () @@ -1937,6 +2087,46 @@ (format *terminal-io* (intl:gettext "** Closed ~A~%") name) (when original (revert-file file original))))) + ;; + ;; FIXME: setting the external format here should be better + ;; integrated into set-routines. We do it before so that + ;; set-routines can create an in-buffer if appropriate. But we + ;; need to do it after to put the correct input routines for the + ;; external format. + ;; + ;;#-unicode-bootstrap ; fails in stream-reinit otherwise + #+(and unicode (not unicode-bootstrap)) + (cond ((and (eq external-format :file-attribute) input) + ;; Read the encoding file option with the external-format set to + ;; :iso8859-1, and then change the external-format if necessary. + #+(and unicode (not unicode-bootstrap)) + (%set-fd-stream-external-format stream :iso8859-1 nil) + (set-routines stream element-type input output input-buffer-p + :binary-stream-p binary-stream-p) + #+(and unicode (not unicode-bootstrap)) + (%set-fd-stream-external-format stream :iso8859-1 nil) + (let ((encoding (stream-encoding-file-attribute stream))) + (unless (file-position stream :start) + (error (intl:gettext "The ~A external-format requires a file stream.") + external-format)) + (unless (member encoding '(:iso8859-1 :iso-8859-1)) + (setf (stream-external-format stream) (or encoding :default))))) + ((eq external-format :file-attribute) + ;; Non-input stream, so can not read the file attributes, so use the + ;; :default. + #+(and unicode (not unicode-bootstrap)) + (%set-fd-stream-external-format stream :default nil) + (set-routines stream element-type input output input-buffer-p + :binary-stream-p binary-stream-p) + #+(and unicode (not unicode-bootstrap)) + (%set-fd-stream-external-format stream :default nil)) + (t + #+(and unicode (not unicode-bootstrap)) + (%set-fd-stream-external-format stream external-format nil) + (set-routines stream element-type input output input-buffer-p + :binary-stream-p binary-stream-p) + #+(and unicode (not unicode-bootstrap)) + (%set-fd-stream-external-format stream external-format nil))) stream))
diff --git a/src/code/load.lisp b/src/code/load.lisp index 832853b..89f7705 100644 --- a/src/code/load.lisp +++ b/src/code/load.lisp @@ -19,7 +19,7 @@
(in-package "EXTENSIONS") (export '(*load-if-source-newer* *load-source-types* *load-object-types* - invalid-fasl)) + invalid-fasl *default-source-external-format*))
(in-package "SYSTEM") (export '(foreign-symbol-address alternate-get-global-address)) @@ -94,6 +94,12 @@ (invalid-fasl-pathname condition) (invalid-fasl-version condition) (invalid-fasl-expected-version condition))))) + +(defvar *default-source-external-format* :default + "The external-format that 'load and 'compile-file use when given an + external-format of :default. The default value is :default which will open + the file using the 'ext:*default-external-format*") + ;;; LOAD-FRESH-LINE -- internal. ;;; @@ -523,6 +529,10 @@ defaulting. Probably only necessary if you have source files with a "fasl" type.
+ :EXTERNAL-FORMAT + The external-format to use when opening the FILENAME. The default is + :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*. + The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER* determine the defaults for the corresponding keyword arguments. These variables are also bound to the specified argument values, so specifying a @@ -604,6 +614,8 @@ (*load-pathname* pathname)) (case contents (:source + (when (eq external-format :default) + (setf external-format *default-source-external-format*)) (with-open-file (file truename :external-format external-format :direction :input :if-does-not-exist if-does-not-exist) diff --git a/src/compiler/main.lisp b/src/compiler/main.lisp index 544ccb0..066ff2d 100644 --- a/src/compiler/main.lisp +++ b/src/compiler/main.lisp @@ -738,12 +738,12 @@ :write-date (file-write-date x) :language :lisp)) files))) - + (when (eq external-format :default) + (setf external-format *default-source-external-format*)) (make-source-info :files file-info :current-file file-info #+unicode :external-format - #+unicode (stream::ef-name - (stream::find-external-format external-format)) + #+unicode external-format #+unicode :decoding-error #+unicode decoding-error)))
diff --git a/src/i18n/locale/cmucl.pot b/src/i18n/locale/cmucl.pot index 5b38108..bb807f4 100644 --- a/src/i18n/locale/cmucl.pot +++ b/src/i18n/locale/cmucl.pot @@ -9187,6 +9187,16 @@ msgid "Error reading ~S: ~A" msgstr ""
#: src/code/fd-stream.lisp +msgid "" +"List of coding translations used by 'stream-encoding-file-attribute to map\n" +" the read file coding into a native external-format. Each element is a " +"list of\n" +" a native external-format followed byte a list of coding strings that are " +"to be\n" +" mapped to this native format." +msgstr "" + +#: src/code/fd-stream.lisp msgid "Could not find any input routine for ~S" msgstr ""
@@ -9263,6 +9273,10 @@ msgid "** Closed ~A~%" msgstr ""
#: src/code/fd-stream.lisp +msgid "The ~A external-format requires a file stream." +msgstr "" + +#: src/code/fd-stream.lisp msgid "" "This is a string that OPEN tacks on the end of a file namestring to produce\n" " a name for the :if-exists :rename-and-delete and :rename options. Also,\n" @@ -10064,6 +10078,14 @@ msgid "" msgstr ""
#: src/code/load.lisp +msgid "" +"The external-format that 'load and 'compile-file use when given an\n" +" external-format of :default. The default value is :default which will " +"open\n" +" the file using the 'ext:*default-external-format*" +msgstr "" + +#: src/code/load.lisp msgid "List of free fop tables for the fasloader." msgstr ""
@@ -10133,6 +10155,10 @@ msgid "" " defaulting. Probably only necessary if you have source files with a\n" " "fasl" type. \n" "\n" +" :EXTERNAL-FORMAT\n" +" The external-format to use when opening the FILENAME. The default is\n" +" :default which uses the EXT:*DEFAULT-SOURCE-EXTERNAL-FORMAT*.\n" +"\n" " The variables *LOAD-VERBOSE*, *LOAD-PRINT* and EXT:*LOAD-IF-SOURCE-NEWER" "*\n" " determine the defaults for the corresponding keyword arguments. These\n"
-----------------------------------------------------------------------
hooks/post-receive