Update of /project/climacs/cvsroot/climacs In directory clnet:/tmp/cvs-serv12086
Added Files: java-syntax.lisp java-syntax-commands.lisp Log Message: Initial checkin.
--- /project/climacs/cvsroot/climacs/java-syntax.lisp 2007/05/01 17:46:38 NONE +++ /project/climacs/cvsroot/climacs/java-syntax.lisp 2007/05/01 17:46:38 1.1 ;; -*- Mode: Lisp; Package: CLIMACS-JAVA-SYNTAX -*-
;;; (c) copyright 2005 by ;;; Robert Strandh (strandh@labri.fr) ;;; (c) copyright 2006 by ;;; Troels Henriksen (athas@sigkill.dk) ;;; (c) copyright 2007 by ;;; John Q Splittist (splittist@gmail.com) ;;; ;;; This library is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Library General Public ;;; License as published by the Free Software Foundation; either ;;; version 2 of the License, or (at your option) any later version. ;;; ;;; This library is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Library General Public License for more details. ;;; ;;; You should have received a copy of the GNU Library General Public ;;; License along with this library; if not, write to the ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;;; Boston, MA 02111-1307 USA.
;;;# Syntax module for analysing Java(TM)
(in-package :climacs-java-syntax)
;;;# The command table.
(define-syntax-command-table java-table :errorp nil)
;;;# The syntax object. ;;; ;;; We could add options here.
(define-syntax java-syntax (lr-syntax-mixin fundamental-syntax) ((package :accessor package-of :documentation "A list of strings being the components of the `package' definition, if any.")) (:name "Java") (:pathname-types "java" "jav") (:command-table java-table) (:default-initargs :initial-state |initial-state |))
;;; Now some ways to indicate what the syntax is. Extra details could be ;;; added. For now we'll show the package, if any.
(defmethod name-for-info-pane ((syntax java-syntax) &key pane) (declare (ignore pane)) (format nil "Java~@[:~{~A~^.~}~]" (package-of syntax)))
;;;# Lexing. ;;; ;;; First we define the different states the lexer can be in (as triggered ;;; by the parser.)
(define-lexer-state lexer-string-state () () (:documentation "In this state, the lexer is working inside a string delimited by double quote characters."))
(define-lexer-state lexer-line-comment-state () () (:documentation "In this state, the lexer is working inside a line comment starting with //."))
(define-lexer-state lexer-long-comment-state () () (:documentation "In this state, the lexer is working inside a long comment delimited by /* and */."))
;;; And then we define the various elements of the language. ;;; ;;; First, some high-level concepts:
(defclass java-nonterminal (nonterminal) ())
(defclass form (java-nonterminal) ())
;;; Since we're dealing with things that might not be finished, ;;; we allow for incomplete forms at the end of the buffer.
(defclass complete-form-mixin () ()) (defclass incomplete-form-mixin () ())
(defclass comment (java-nonterminal) ()) (defclass line-comment (java-comment) ()) (defclass long-comment (java-comment) ())
;;; Of course, sometimes people type things that don't (yet) comply ;;; with the language specification.
(defclass error-symbol (java-nonterminal) ())
;;; Finally, we define the relevant lexeme. We will check the `ink' and ;;; and the `face' later during redisplay.
(defclass java-lexeme (lexeme) ((ink) (face)))
(defclass form-lexeme (form java-lexeme) ())
;;; Keywords come in various flavours.
(defclass keyword-lexeme (form-lexeme) ())
(defclass basic-type () ()) (defclass modifier () ()) (defclass operator () ())
(eval-when (:compile-toplevel :load-toplevel :execute) (defun spelling-to-symbol (name) (intern (concatenate 'string name "-LEXEME") #.*package*)))
(defmacro define-keywords (&rest keyword-names) `(progn ,@(loop for (name . supers) in keyword-names for real-name = (spelling-to-symbol name) collecting `(defclass ,real-name (,@ supers keyword-lexeme) ()) into defclasses collecting name into names finally (return (cons `(defparameter *keyword-spellings* ',names) defclasses)))))
(define-keywords ("abstract" modifier) ("assert" operator) ("boolean" basic-type) ("break" operator) ("byte" basic-type) ("case" operator) ("catch" operator) ("char" basic-type) ("class" operator) ("const") ; reserved but not used ("continue" operator) ("default" operator) ("do" operator) ("double" basic-type) ("else" operator) ("enum" operator) ("extends" operator) ("final" modifier) ("finally" operator) ("float" basic-type) ("for" operator) ("if" operator) ("int" basic-type) ("goto") ; reserved but not used ("implements" operator) ("import" operator) ("instanceof" operator) ("interface" operator) ("long" basic-type) ("native" basic-type) ("new" operator) ("package" operator) ("private" operator) ("package" operator) ("private" modifier) ("protected" modifier) ("public" modifier) ("return" operator) ("short" basic-type) ("static" modifier) ("striftfp" modifier) ("super" operator) ("switch" operator) ("synchronized" modifier) ("this" operator) ("throw" operator) ("throws" operator) ("transient" modifier) ("try" operator) ("void" operator) ("volatile" modifier) ("while" operator))
(defclass identifier-lexeme (form-lexeme) ()) (defclass literal-lexeme (form-lexeme) ()) (defclass integer-literal-lexeme (literal-lexeme) ()) (defclass decimal-integer-literal-lexeme (integer-literal-lexeme) ()) (defclass octal-integer-literal-lexeme (integer-literal-lexeme) ()) (defclass hex-integer-literal-lexeme (integer-literal-lexeme) ()) (defclass floating-point-literal-lexeme (literal-lexeme) ()) (defclass decimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ()) (defclass hexidecimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ()) ;;; A badly formed, or perhaps unfinished, number. (defclass bad-number-literal-lexeme (literal-lexeme) ()) (defclass boolean-literal-lexeme (literal-lexeme) ()) (defclass character-literal-lexeme (literal-lexeme) ()) (defclass incomplete-character-literal-lexeme (literal-lexeme incomplete-form-mixin) ()) (defclass string-literal-lexeme (literal-lexeme) ()) (defclass null-literal-lexeme (literal-lexeme) ()) (defclass separator-lexeme (form-lexeme) ()) (defclass punctuator-lexeme (form-lexeme) ())
;;; Separators: ( ) { } [ ] ; , .
(defclass semi-colon-lexeme (separator-lexeme) ()) (defclass comma-lexeme (separator-lexeme) ()) (defclass dot-lexeme (separator-lexeme) ()) (defclass delimiter-mixin () ()) (defclass opening-delimiter-mixin (delimiter-mixin) ()) (defclass closing-delimiter-mixin (delimiter-mixin) ())
(defclass left-bracket-lexeme (separator-lexeme opening-delimiter-mixin) ()) (defclass right-bracket-lexeme (separator-lexeme closing-delimiter-mixin) ()) (defclass left-parenthesis-lexeme (separator-lexeme opening-delimiter-mixin) ()) (defclass right-parenthesis-lexeme (separator-lexeme closing-delimiter-mixin) ()) (defclass left-brace-lexeme (separator-lexeme opening-delimiter-mixin) ()) (defclass right-brace-lexeme (separator-lexeme closing-delimiter-mixin) ())
;;; Operators: ;;; = < > ! ~ ? : ;;; == <= >= != && || ++ -- ;;; + - * / & | ^ % << >> >>> ;;; += -= *= /= &= |= ^= %= <<= >>= >>>=
(defmacro define-operators (&rest punctuator-names) `(progn ,@(loop for name in punctuator-names for real-name = (intern (concatenate 'string (string name) "-LEXEME") #.*package*) collecting `(defclass ,real-name (punctuator-lexeme) ()))))
(define-operators equal left-angle-bracket right-angle-bracket exclamation tilde question colon eq leq geq neq and-and or-or increment decrement plus minus asterisk slash ampersand pipe circumflex percent left-shift right-shift unsigned-right-shift plus-equal minus-equal asterisk-equal slash-equal ampersand-equal pipe-equal circumflex-equal percent-equal left-shift-equal right-shift-equal unsigned-right-shift-equal)
;;; This for annotated interfaces. (defclass ampersand-lexeme (punctuator-lexeme) ())
;;; And something for when we come across something completely wrong.
(defclass error-lexeme (java-lexeme) ())
;;; Some lexemes that will drive the parser and lexer.
(defclass line-comment-start-lexeme (java-lexeme) ()) (defclass long-comment-start-lexeme (java-lexeme) ()) (defclass comment-end-lexeme (java-lexeme) ()) (defclass string-start-lexeme (java-lexeme) ()) (defclass string-end-lexeme (java-lexeme) ())
;;; And some lexemes used inside strings and comments.
(defclass word-lexeme (java-lexeme) ()) (defclass delimiter-lexeme (java-lexeme) ()) (defclass text-lexeme (java-lexeme) ())
;;; Some predicates for recognizing the constituents of identifiers. ;;; "The $ character should be used only in mechanically generated ;;; source code or, rarely, to access preexisting names on legacy ;;; systems."
(defun java-letter-p (ch) (and (characterp ch) (or (alpha-char-p ch) (char= ch #_) (char= ch #$))))
(defun java-letter-or-digit-p (ch) (and (characterp ch) (or (alphanumericp ch) (char= ch #_) (char= ch #$))))
;;; Something to recognise escapes, including unicode escapes (which may ;;; have multiple #\u characters).
(defun eat-escape (scan) "Advance over an escape (after the #\), returning T if valid so far, or NIL." (macrolet ((fo () `(forward-object scan))) (case (object-after scan) ((#\b #\t #\n #\f #\r #" #' #\) (fo) t) (#\u (loop until (end-of-buffer-p scan) while (eql (object-after scan) #\u) do (fo)) (loop until (end-of-buffer-p scan) for char = (object-after scan) with count = 0 while (and (characterp char) (digit-char-p char 16)) do (fo) (incf count) finally (return (or (and (end-of-buffer-p scan) (< count 4)) (= count 4))))) ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7) (loop repeat 3 until (end-of-buffer-p scan) for char = (object-after scan) while (and (characterp char) (digit-char-p char 8)) do (fo)) t) (t nil))))
;;; The default method for skipping whitespace.
(defmethod skip-inter ((syntax java-syntax) state scan) (macrolet ((fo () `(forward-object scan))) (loop when (end-of-buffer-p scan) do (return nil) until (not (whitespacep syntax (object-after scan))) do (fo) finally (return t))))
;;; The lexing procedure used at the toplevel. Dispatches to lex-token ;;; at the appropriate time - except for standalone dots (where the lexer ;;; doesn't know whether it's looking at a potential number or the ;;; separator in a QualifiedIdentifier).
(defmethod lex ((syntax java-syntax) (state lexer-toplevel-state) scan) (macrolet ((fo () `(forward-object scan))) (let ((object (object-after scan))) (case object (#" (fo) (make-instance 'string-start-lexeme)) (#' (fo) (cond ((end-of-buffer-p scan) (make-instance 'incomplete-character-literal-lexeme)) (t (cond ((eql (object-after scan) #\) (fo) (if (not (end-of-buffer-p scan)) (unless (eat-escape scan) (return-from lex (make-instance 'error-lexeme))))) (t (fo))) (cond ((end-of-buffer-p scan) (make-instance 'incomplete-character-literal-lexeme)) ((eql (object-after scan) #') (fo) (make-instance 'character-literal-lexeme)) (t (make-instance 'error-lexeme)))))) (#[ (fo) (make-instance 'left-bracket-lexeme)) (#] (fo) (make-instance 'right-bracket-lexeme)) (#( (fo) (make-instance 'left-parenthesis-lexeme)) (#) (fo) (make-instance 'right-parenthesis-lexeme)) (#{ (fo) (make-instance 'left-brace-lexeme)) (#} (fo) (make-instance 'right-brace-lexeme)) (#@ (fo) (make-instance 'ampersand-lexeme)) (#. (fo) (if (end-of-buffer-p scan) (make-instance 'dot-lexeme) (cond ((and (characterp (object-after scan)) (digit-char-p (object-after scan))) (backward-object scan) (lex-token syntax scan)) (t (make-instance 'dot-lexeme))))) (#- (fo) (if (end-of-buffer-p scan) (make-instance 'minus-lexeme) (case (object-after scan) (#- (fo) (make-instance 'decrement-lexeme)) (#= (fo) (make-instance 'minus-equal-lexeme)) (t (make-instance 'minus-lexeme))))) (#+ (fo) (if (end-of-buffer-p scan) (make-instance 'plus-lexeme) (case (object-after scan) (#+ (fo) (make-instance 'increment-lexeme)) (#= (fo) (make-instance 'plus-equal-lexeme)) (t (make-instance 'plus-lexeme))))) (#& (fo) (if (end-of-buffer-p scan) (make-instance 'ampersand-lexeme) (case (object-after scan) (#& (fo) (make-instance 'and-and-lexeme)) (#= (fo) (make-instance 'ampersand-equal-lexeme)) (t (make-instance 'ampersand-lexeme))))) (#* (fo) (if (end-of-buffer-p scan) (make-instance 'asterisk-lexeme) (cond ((eql (object-after scan) #=) (fo) (make-instance 'asterisk-equal-lexeme)) (t (make-instance 'asterisk-lexeme))))) (#~ (fo) (make-instance 'tilde-lexeme)) (#! (fo) (if (end-of-buffer-p scan) (make-instance 'exclamation-lexeme) (cond ((eql (object-after scan) #=) (fo) (make-instance 'neq-lexeme)) (t (make-instance 'exclamation-lexeme))))) (#/ (fo) (if (end-of-buffer-p scan) (make-instance 'slash-lexeme) (case (object-after scan) (#= (fo) (make-instance 'slash-equal-lexeme)) (#* (fo) (make-instance 'long-comment-start-lexeme)) (#/ (fo) (make-instance 'line-comment-start-lexeme)) (t (make-instance 'slash-lexeme))))) (#% (fo) (if (end-of-buffer-p scan)
[945 lines skipped] --- /project/climacs/cvsroot/climacs/java-syntax-commands.lisp 2007/05/01 17:46:38 NONE +++ /project/climacs/cvsroot/climacs/java-syntax-commands.lisp 2007/05/01 17:46:38 1.1
[1090 lines skipped]