
Update of /project/climacs/cvsroot/climacs In directory clnet:/tmp/cvs-serv12086 Added Files: java-syntax.lisp java-syntax-commands.lisp Log Message: Initial checkin. --- /project/climacs/cvsroot/climacs/java-syntax.lisp 2007/05/01 17:46:38 NONE +++ /project/climacs/cvsroot/climacs/java-syntax.lisp 2007/05/01 17:46:38 1.1 ;; -*- Mode: Lisp; Package: CLIMACS-JAVA-SYNTAX -*- ;;; (c) copyright 2005 by ;;; Robert Strandh (strandh@labri.fr) ;;; (c) copyright 2006 by ;;; Troels Henriksen (athas@sigkill.dk) ;;; (c) copyright 2007 by ;;; John Q Splittist (splittist@gmail.com) ;;; ;;; This library is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Library General Public ;;; License as published by the Free Software Foundation; either ;;; version 2 of the License, or (at your option) any later version. ;;; ;;; This library is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Library General Public License for more details. ;;; ;;; You should have received a copy of the GNU Library General Public ;;; License along with this library; if not, write to the ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;;; Boston, MA 02111-1307 USA. ;;;# Syntax module for analysing Java(TM) (in-package :climacs-java-syntax) ;;;# The command table. (define-syntax-command-table java-table :errorp nil) ;;;# The syntax object. ;;; ;;; We could add options here. (define-syntax java-syntax (lr-syntax-mixin fundamental-syntax) ((package :accessor package-of :documentation "A list of strings being the components of the `package' definition, if any.")) (:name "Java") (:pathname-types "java" "jav") (:command-table java-table) (:default-initargs :initial-state |initial-state |)) ;;; Now some ways to indicate what the syntax is. Extra details could be ;;; added. For now we'll show the package, if any. (defmethod name-for-info-pane ((syntax java-syntax) &key pane) (declare (ignore pane)) (format nil "Java~@[:~{~A~^.~}~]" (package-of syntax))) ;;;# Lexing. ;;; ;;; First we define the different states the lexer can be in (as triggered ;;; by the parser.) (define-lexer-state lexer-string-state () () (:documentation "In this state, the lexer is working inside a string delimited by double quote characters.")) (define-lexer-state lexer-line-comment-state () () (:documentation "In this state, the lexer is working inside a line comment starting with //.")) (define-lexer-state lexer-long-comment-state () () (:documentation "In this state, the lexer is working inside a long comment delimited by /* and */.")) ;;; And then we define the various elements of the language. ;;; ;;; First, some high-level concepts: (defclass java-nonterminal (nonterminal) ()) (defclass form (java-nonterminal) ()) ;;; Since we're dealing with things that might not be finished, ;;; we allow for incomplete forms at the end of the buffer. (defclass complete-form-mixin () ()) (defclass incomplete-form-mixin () ()) (defclass comment (java-nonterminal) ()) (defclass line-comment (java-comment) ()) (defclass long-comment (java-comment) ()) ;;; Of course, sometimes people type things that don't (yet) comply ;;; with the language specification. (defclass error-symbol (java-nonterminal) ()) ;;; Finally, we define the relevant lexeme. We will check the `ink' and ;;; and the `face' later during redisplay. (defclass java-lexeme (lexeme) ((ink) (face))) (defclass form-lexeme (form java-lexeme) ()) ;;; Keywords come in various flavours. (defclass keyword-lexeme (form-lexeme) ()) (defclass basic-type () ()) (defclass modifier () ()) (defclass operator () ()) (eval-when (:compile-toplevel :load-toplevel :execute) (defun spelling-to-symbol (name) (intern (concatenate 'string name "-LEXEME") #.*package*))) (defmacro define-keywords (&rest keyword-names) `(progn ,@(loop for (name . supers) in keyword-names for real-name = (spelling-to-symbol name) collecting `(defclass ,real-name (,@ supers keyword-lexeme) ()) into defclasses collecting name into names finally (return (cons `(defparameter *keyword-spellings* ',names) defclasses))))) (define-keywords ("abstract" modifier) ("assert" operator) ("boolean" basic-type) ("break" operator) ("byte" basic-type) ("case" operator) ("catch" operator) ("char" basic-type) ("class" operator) ("const") ; reserved but not used ("continue" operator) ("default" operator) ("do" operator) ("double" basic-type) ("else" operator) ("enum" operator) ("extends" operator) ("final" modifier) ("finally" operator) ("float" basic-type) ("for" operator) ("if" operator) ("int" basic-type) ("goto") ; reserved but not used ("implements" operator) ("import" operator) ("instanceof" operator) ("interface" operator) ("long" basic-type) ("native" basic-type) ("new" operator) ("package" operator) ("private" operator) ("package" operator) ("private" modifier) ("protected" modifier) ("public" modifier) ("return" operator) ("short" basic-type) ("static" modifier) ("striftfp" modifier) ("super" operator) ("switch" operator) ("synchronized" modifier) ("this" operator) ("throw" operator) ("throws" operator) ("transient" modifier) ("try" operator) ("void" operator) ("volatile" modifier) ("while" operator)) (defclass identifier-lexeme (form-lexeme) ()) (defclass literal-lexeme (form-lexeme) ()) (defclass integer-literal-lexeme (literal-lexeme) ()) (defclass decimal-integer-literal-lexeme (integer-literal-lexeme) ()) (defclass octal-integer-literal-lexeme (integer-literal-lexeme) ()) (defclass hex-integer-literal-lexeme (integer-literal-lexeme) ()) (defclass floating-point-literal-lexeme (literal-lexeme) ()) (defclass decimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ()) (defclass hexidecimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ()) ;;; A badly formed, or perhaps unfinished, number. (defclass bad-number-literal-lexeme (literal-lexeme) ()) (defclass boolean-literal-lexeme (literal-lexeme) ()) (defclass character-literal-lexeme (literal-lexeme) ()) (defclass incomplete-character-literal-lexeme (literal-lexeme incomplete-form-mixin) ()) (defclass string-literal-lexeme (literal-lexeme) ()) (defclass null-literal-lexeme (literal-lexeme) ()) (defclass separator-lexeme (form-lexeme) ()) (defclass punctuator-lexeme (form-lexeme) ()) ;;; Separators: ( ) { } [ ] ; , . (defclass semi-colon-lexeme (separator-lexeme) ()) (defclass comma-lexeme (separator-lexeme) ()) (defclass dot-lexeme (separator-lexeme) ()) (defclass delimiter-mixin () ()) (defclass opening-delimiter-mixin (delimiter-mixin) ()) (defclass closing-delimiter-mixin (delimiter-mixin) ()) (defclass left-bracket-lexeme (separator-lexeme opening-delimiter-mixin) ()) (defclass right-bracket-lexeme (separator-lexeme closing-delimiter-mixin) ()) (defclass left-parenthesis-lexeme (separator-lexeme opening-delimiter-mixin) ()) (defclass right-parenthesis-lexeme (separator-lexeme closing-delimiter-mixin) ()) (defclass left-brace-lexeme (separator-lexeme opening-delimiter-mixin) ()) (defclass right-brace-lexeme (separator-lexeme closing-delimiter-mixin) ()) ;;; Operators: ;;; = < > ! ~ ? : ;;; == <= >= != && || ++ -- ;;; + - * / & | ^ % << >> >>> ;;; += -= *= /= &= |= ^= %= <<= >>= >>>= (defmacro define-operators (&rest punctuator-names) `(progn ,@(loop for name in punctuator-names for real-name = (intern (concatenate 'string (string name) "-LEXEME") #.*package*) collecting `(defclass ,real-name (punctuator-lexeme) ())))) (define-operators equal left-angle-bracket right-angle-bracket exclamation tilde question colon eq leq geq neq and-and or-or increment decrement plus minus asterisk slash ampersand pipe circumflex percent left-shift right-shift unsigned-right-shift plus-equal minus-equal asterisk-equal slash-equal ampersand-equal pipe-equal circumflex-equal percent-equal left-shift-equal right-shift-equal unsigned-right-shift-equal) ;;; This for annotated interfaces. (defclass ampersand-lexeme (punctuator-lexeme) ()) ;;; And something for when we come across something completely wrong. (defclass error-lexeme (java-lexeme) ()) ;;; Some lexemes that will drive the parser and lexer. (defclass line-comment-start-lexeme (java-lexeme) ()) (defclass long-comment-start-lexeme (java-lexeme) ()) (defclass comment-end-lexeme (java-lexeme) ()) (defclass string-start-lexeme (java-lexeme) ()) (defclass string-end-lexeme (java-lexeme) ()) ;;; And some lexemes used inside strings and comments. (defclass word-lexeme (java-lexeme) ()) (defclass delimiter-lexeme (java-lexeme) ()) (defclass text-lexeme (java-lexeme) ()) ;;; Some predicates for recognizing the constituents of identifiers. ;;; "The $ character should be used only in mechanically generated ;;; source code or, rarely, to access preexisting names on legacy ;;; systems." (defun java-letter-p (ch) (and (characterp ch) (or (alpha-char-p ch) (char= ch #\_) (char= ch #\$)))) (defun java-letter-or-digit-p (ch) (and (characterp ch) (or (alphanumericp ch) (char= ch #\_) (char= ch #\$)))) ;;; Something to recognise escapes, including unicode escapes (which may ;;; have multiple #\u characters). (defun eat-escape (scan) "Advance over an escape (after the #\\), returning T if valid so far, or NIL." (macrolet ((fo () `(forward-object scan))) (case (object-after scan) ((#\b #\t #\n #\f #\r #\" #\' #\\) (fo) t) (#\u (loop until (end-of-buffer-p scan) while (eql (object-after scan) #\u) do (fo)) (loop until (end-of-buffer-p scan) for char = (object-after scan) with count = 0 while (and (characterp char) (digit-char-p char 16)) do (fo) (incf count) finally (return (or (and (end-of-buffer-p scan) (< count 4)) (= count 4))))) ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7) (loop repeat 3 until (end-of-buffer-p scan) for char = (object-after scan) while (and (characterp char) (digit-char-p char 8)) do (fo)) t) (t nil)))) ;;; The default method for skipping whitespace. (defmethod skip-inter ((syntax java-syntax) state scan) (macrolet ((fo () `(forward-object scan))) (loop when (end-of-buffer-p scan) do (return nil) until (not (whitespacep syntax (object-after scan))) do (fo) finally (return t)))) ;;; The lexing procedure used at the toplevel. Dispatches to lex-token ;;; at the appropriate time - except for standalone dots (where the lexer ;;; doesn't know whether it's looking at a potential number or the ;;; separator in a QualifiedIdentifier). (defmethod lex ((syntax java-syntax) (state lexer-toplevel-state) scan) (macrolet ((fo () `(forward-object scan))) (let ((object (object-after scan))) (case object (#\" (fo) (make-instance 'string-start-lexeme)) (#\' (fo) (cond ((end-of-buffer-p scan) (make-instance 'incomplete-character-literal-lexeme)) (t (cond ((eql (object-after scan) #\\) (fo) (if (not (end-of-buffer-p scan)) (unless (eat-escape scan) (return-from lex (make-instance 'error-lexeme))))) (t (fo))) (cond ((end-of-buffer-p scan) (make-instance 'incomplete-character-literal-lexeme)) ((eql (object-after scan) #\') (fo) (make-instance 'character-literal-lexeme)) (t (make-instance 'error-lexeme)))))) (#\[ (fo) (make-instance 'left-bracket-lexeme)) (#\] (fo) (make-instance 'right-bracket-lexeme)) (#\( (fo) (make-instance 'left-parenthesis-lexeme)) (#\) (fo) (make-instance 'right-parenthesis-lexeme)) (#\{ (fo) (make-instance 'left-brace-lexeme)) (#\} (fo) (make-instance 'right-brace-lexeme)) (#\@ (fo) (make-instance 'ampersand-lexeme)) (#\. (fo) (if (end-of-buffer-p scan) (make-instance 'dot-lexeme) (cond ((and (characterp (object-after scan)) (digit-char-p (object-after scan))) (backward-object scan) (lex-token syntax scan)) (t (make-instance 'dot-lexeme))))) (#\- (fo) (if (end-of-buffer-p scan) (make-instance 'minus-lexeme) (case (object-after scan) (#\- (fo) (make-instance 'decrement-lexeme)) (#\= (fo) (make-instance 'minus-equal-lexeme)) (t (make-instance 'minus-lexeme))))) (#\+ (fo) (if (end-of-buffer-p scan) (make-instance 'plus-lexeme) (case (object-after scan) (#\+ (fo) (make-instance 'increment-lexeme)) (#\= (fo) (make-instance 'plus-equal-lexeme)) (t (make-instance 'plus-lexeme))))) (#\& (fo) (if (end-of-buffer-p scan) (make-instance 'ampersand-lexeme) (case (object-after scan) (#\& (fo) (make-instance 'and-and-lexeme)) (#\= (fo) (make-instance 'ampersand-equal-lexeme)) (t (make-instance 'ampersand-lexeme))))) (#\* (fo) (if (end-of-buffer-p scan) (make-instance 'asterisk-lexeme) (cond ((eql (object-after scan) #\=) (fo) (make-instance 'asterisk-equal-lexeme)) (t (make-instance 'asterisk-lexeme))))) (#\~ (fo) (make-instance 'tilde-lexeme)) (#\! (fo) (if (end-of-buffer-p scan) (make-instance 'exclamation-lexeme) (cond ((eql (object-after scan) #\=) (fo) (make-instance 'neq-lexeme)) (t (make-instance 'exclamation-lexeme))))) (#\/ (fo) (if (end-of-buffer-p scan) (make-instance 'slash-lexeme) (case (object-after scan) (#\= (fo) (make-instance 'slash-equal-lexeme)) (#\* (fo) (make-instance 'long-comment-start-lexeme)) (#\/ (fo) (make-instance 'line-comment-start-lexeme)) (t (make-instance 'slash-lexeme))))) (#\% (fo) (if (end-of-buffer-p scan) [945 lines skipped] --- /project/climacs/cvsroot/climacs/java-syntax-commands.lisp 2007/05/01 17:46:38 NONE +++ /project/climacs/cvsroot/climacs/java-syntax-commands.lisp 2007/05/01 17:46:38 1.1 [1090 lines skipped]