[climacs-cvs] CVS climacs
dmurray
dmurray at common-lisp.net
Tue May 1 17:46:38 UTC 2007
Update of /project/climacs/cvsroot/climacs
In directory clnet:/tmp/cvs-serv12086
Added Files:
java-syntax.lisp java-syntax-commands.lisp
Log Message:
Initial checkin.
--- /project/climacs/cvsroot/climacs/java-syntax.lisp 2007/05/01 17:46:38 NONE
+++ /project/climacs/cvsroot/climacs/java-syntax.lisp 2007/05/01 17:46:38 1.1
;; -*- Mode: Lisp; Package: CLIMACS-JAVA-SYNTAX -*-
;;; (c) copyright 2005 by
;;; Robert Strandh (strandh at labri.fr)
;;; (c) copyright 2006 by
;;; Troels Henriksen (athas at sigkill.dk)
;;; (c) copyright 2007 by
;;; John Q Splittist (splittist at gmail.com)
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Library General Public License for more details.
;;;
;;; You should have received a copy of the GNU Library General Public
;;; License along with this library; if not, write to the
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;;; Boston, MA 02111-1307 USA.
;;;# Syntax module for analysing Java(TM)
(in-package :climacs-java-syntax)
;;;# The command table.
(define-syntax-command-table java-table
:errorp nil)
;;;# The syntax object.
;;;
;;; We could add options here.
(define-syntax java-syntax (lr-syntax-mixin fundamental-syntax)
((package :accessor package-of
:documentation "A list of strings being the components of
the `package' definition, if any."))
(:name "Java")
(:pathname-types "java" "jav")
(:command-table java-table)
(:default-initargs :initial-state |initial-state |))
;;; Now some ways to indicate what the syntax is. Extra details could be
;;; added. For now we'll show the package, if any.
(defmethod name-for-info-pane ((syntax java-syntax) &key pane)
(declare (ignore pane))
(format nil "Java~@[:~{~A~^.~}~]"
(package-of syntax)))
;;;# Lexing.
;;;
;;; First we define the different states the lexer can be in (as triggered
;;; by the parser.)
(define-lexer-state lexer-string-state ()
()
(:documentation "In this state, the lexer is working inside a string
delimited by double quote characters."))
(define-lexer-state lexer-line-comment-state ()
()
(:documentation "In this state, the lexer is working inside a line
comment starting with //."))
(define-lexer-state lexer-long-comment-state ()
()
(:documentation "In this state, the lexer is working inside a long
comment delimited by /* and */."))
;;; And then we define the various elements of the language.
;;;
;;; First, some high-level concepts:
(defclass java-nonterminal (nonterminal) ())
(defclass form (java-nonterminal) ())
;;; Since we're dealing with things that might not be finished,
;;; we allow for incomplete forms at the end of the buffer.
(defclass complete-form-mixin () ())
(defclass incomplete-form-mixin () ())
(defclass comment (java-nonterminal) ())
(defclass line-comment (java-comment) ())
(defclass long-comment (java-comment) ())
;;; Of course, sometimes people type things that don't (yet) comply
;;; with the language specification.
(defclass error-symbol (java-nonterminal) ())
;;; Finally, we define the relevant lexeme. We will check the `ink' and
;;; and the `face' later during redisplay.
(defclass java-lexeme (lexeme)
((ink)
(face)))
(defclass form-lexeme (form java-lexeme) ())
;;; Keywords come in various flavours.
(defclass keyword-lexeme (form-lexeme) ())
(defclass basic-type () ())
(defclass modifier () ())
(defclass operator () ())
(eval-when (:compile-toplevel :load-toplevel :execute)
(defun spelling-to-symbol (name)
(intern (concatenate 'string name "-LEXEME") #.*package*)))
(defmacro define-keywords (&rest keyword-names)
`(progn
,@(loop for (name . supers) in keyword-names
for real-name = (spelling-to-symbol name)
collecting `(defclass ,real-name (,@ supers keyword-lexeme) ())
into defclasses
collecting name into names
finally (return (cons `(defparameter *keyword-spellings* ',names)
defclasses)))))
(define-keywords
("abstract" modifier)
("assert" operator)
("boolean" basic-type)
("break" operator)
("byte" basic-type)
("case" operator)
("catch" operator)
("char" basic-type)
("class" operator)
("const") ; reserved but not used
("continue" operator)
("default" operator)
("do" operator)
("double" basic-type)
("else" operator)
("enum" operator)
("extends" operator)
("final" modifier)
("finally" operator)
("float" basic-type)
("for" operator)
("if" operator)
("int" basic-type)
("goto") ; reserved but not used
("implements" operator)
("import" operator)
("instanceof" operator)
("interface" operator)
("long" basic-type)
("native" basic-type)
("new" operator)
("package" operator)
("private" operator)
("package" operator)
("private" modifier)
("protected" modifier)
("public" modifier)
("return" operator)
("short" basic-type)
("static" modifier)
("striftfp" modifier)
("super" operator)
("switch" operator)
("synchronized" modifier)
("this" operator)
("throw" operator)
("throws" operator)
("transient" modifier)
("try" operator)
("void" operator)
("volatile" modifier)
("while" operator))
(defclass identifier-lexeme (form-lexeme) ())
(defclass literal-lexeme (form-lexeme) ())
(defclass integer-literal-lexeme (literal-lexeme) ())
(defclass decimal-integer-literal-lexeme (integer-literal-lexeme) ())
(defclass octal-integer-literal-lexeme (integer-literal-lexeme) ())
(defclass hex-integer-literal-lexeme (integer-literal-lexeme) ())
(defclass floating-point-literal-lexeme (literal-lexeme) ())
(defclass decimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ())
(defclass hexidecimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ())
;;; A badly formed, or perhaps unfinished, number.
(defclass bad-number-literal-lexeme (literal-lexeme) ())
(defclass boolean-literal-lexeme (literal-lexeme) ())
(defclass character-literal-lexeme (literal-lexeme) ())
(defclass incomplete-character-literal-lexeme (literal-lexeme incomplete-form-mixin) ())
(defclass string-literal-lexeme (literal-lexeme) ())
(defclass null-literal-lexeme (literal-lexeme) ())
(defclass separator-lexeme (form-lexeme) ())
(defclass punctuator-lexeme (form-lexeme) ())
;;; Separators: ( ) { } [ ] ; , .
(defclass semi-colon-lexeme (separator-lexeme) ())
(defclass comma-lexeme (separator-lexeme) ())
(defclass dot-lexeme (separator-lexeme) ())
(defclass delimiter-mixin () ())
(defclass opening-delimiter-mixin (delimiter-mixin) ())
(defclass closing-delimiter-mixin (delimiter-mixin) ())
(defclass left-bracket-lexeme (separator-lexeme opening-delimiter-mixin) ())
(defclass right-bracket-lexeme (separator-lexeme closing-delimiter-mixin) ())
(defclass left-parenthesis-lexeme (separator-lexeme opening-delimiter-mixin) ())
(defclass right-parenthesis-lexeme (separator-lexeme closing-delimiter-mixin) ())
(defclass left-brace-lexeme (separator-lexeme opening-delimiter-mixin) ())
(defclass right-brace-lexeme (separator-lexeme closing-delimiter-mixin) ())
;;; Operators:
;;; = < > ! ~ ? :
;;; == <= >= != && || ++ --
;;; + - * / & | ^ % << >> >>>
;;; += -= *= /= &= |= ^= %= <<= >>= >>>=
(defmacro define-operators (&rest punctuator-names)
`(progn
,@(loop for name in punctuator-names
for real-name = (intern (concatenate 'string
(string name) "-LEXEME")
#.*package*)
collecting `(defclass ,real-name (punctuator-lexeme) ()))))
(define-operators
equal left-angle-bracket right-angle-bracket exclamation tilde question
colon
eq leq geq neq and-and or-or increment decrement
plus minus asterisk slash ampersand pipe circumflex percent
left-shift right-shift unsigned-right-shift
plus-equal minus-equal asterisk-equal slash-equal ampersand-equal pipe-equal
circumflex-equal percent-equal left-shift-equal right-shift-equal
unsigned-right-shift-equal)
;;; This for annotated interfaces.
(defclass ampersand-lexeme (punctuator-lexeme) ())
;;; And something for when we come across something completely wrong.
(defclass error-lexeme (java-lexeme) ())
;;; Some lexemes that will drive the parser and lexer.
(defclass line-comment-start-lexeme (java-lexeme) ())
(defclass long-comment-start-lexeme (java-lexeme) ())
(defclass comment-end-lexeme (java-lexeme) ())
(defclass string-start-lexeme (java-lexeme) ())
(defclass string-end-lexeme (java-lexeme) ())
;;; And some lexemes used inside strings and comments.
(defclass word-lexeme (java-lexeme) ())
(defclass delimiter-lexeme (java-lexeme) ())
(defclass text-lexeme (java-lexeme) ())
;;; Some predicates for recognizing the constituents of identifiers.
;;; "The $ character should be used only in mechanically generated
;;; source code or, rarely, to access preexisting names on legacy
;;; systems."
(defun java-letter-p (ch)
(and (characterp ch)
(or (alpha-char-p ch)
(char= ch #\_)
(char= ch #\$))))
(defun java-letter-or-digit-p (ch)
(and (characterp ch)
(or (alphanumericp ch)
(char= ch #\_)
(char= ch #\$))))
;;; Something to recognise escapes, including unicode escapes (which may
;;; have multiple #\u characters).
(defun eat-escape (scan)
"Advance over an escape (after the #\\), returning T if valid so far, or NIL."
(macrolet ((fo () `(forward-object scan)))
(case (object-after scan)
((#\b #\t #\n #\f #\r #\" #\' #\\)
(fo) t)
(#\u
(loop until (end-of-buffer-p scan)
while (eql (object-after scan) #\u)
do (fo))
(loop until (end-of-buffer-p scan)
for char = (object-after scan)
with count = 0
while (and (characterp char)
(digit-char-p char 16))
do (fo) (incf count)
finally (return (or (and (end-of-buffer-p scan)
(< count 4))
(= count 4)))))
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
(loop repeat 3
until (end-of-buffer-p scan)
for char = (object-after scan)
while (and (characterp char)
(digit-char-p char 8))
do (fo))
t)
(t nil))))
;;; The default method for skipping whitespace.
(defmethod skip-inter ((syntax java-syntax) state scan)
(macrolet ((fo () `(forward-object scan)))
(loop when (end-of-buffer-p scan)
do (return nil)
until (not (whitespacep syntax (object-after scan)))
do (fo)
finally (return t))))
;;; The lexing procedure used at the toplevel. Dispatches to lex-token
;;; at the appropriate time - except for standalone dots (where the lexer
;;; doesn't know whether it's looking at a potential number or the
;;; separator in a QualifiedIdentifier).
(defmethod lex ((syntax java-syntax) (state lexer-toplevel-state) scan)
(macrolet ((fo () `(forward-object scan)))
(let ((object (object-after scan)))
(case object
(#\" (fo) (make-instance 'string-start-lexeme))
(#\' (fo)
(cond ((end-of-buffer-p scan)
(make-instance 'incomplete-character-literal-lexeme))
(t (cond ((eql (object-after scan) #\\)
(fo)
(if (not (end-of-buffer-p scan))
(unless (eat-escape scan)
(return-from lex
(make-instance 'error-lexeme)))))
(t (fo)))
(cond ((end-of-buffer-p scan)
(make-instance 'incomplete-character-literal-lexeme))
((eql (object-after scan) #\')
(fo)
(make-instance 'character-literal-lexeme))
(t (make-instance 'error-lexeme))))))
(#\[ (fo) (make-instance 'left-bracket-lexeme))
(#\] (fo) (make-instance 'right-bracket-lexeme))
(#\( (fo) (make-instance 'left-parenthesis-lexeme))
(#\) (fo) (make-instance 'right-parenthesis-lexeme))
(#\{ (fo) (make-instance 'left-brace-lexeme))
(#\} (fo) (make-instance 'right-brace-lexeme))
(#\@ (fo) (make-instance 'ampersand-lexeme))
(#\. (fo) (if (end-of-buffer-p scan)
(make-instance 'dot-lexeme)
(cond ((and (characterp (object-after scan))
(digit-char-p (object-after scan)))
(backward-object scan)
(lex-token syntax scan))
(t (make-instance 'dot-lexeme)))))
(#\- (fo) (if (end-of-buffer-p scan)
(make-instance 'minus-lexeme)
(case (object-after scan)
(#\- (fo) (make-instance 'decrement-lexeme))
(#\= (fo) (make-instance 'minus-equal-lexeme))
(t (make-instance 'minus-lexeme)))))
(#\+ (fo) (if (end-of-buffer-p scan)
(make-instance 'plus-lexeme)
(case (object-after scan)
(#\+ (fo) (make-instance 'increment-lexeme))
(#\= (fo) (make-instance 'plus-equal-lexeme))
(t (make-instance 'plus-lexeme)))))
(#\& (fo) (if (end-of-buffer-p scan)
(make-instance 'ampersand-lexeme)
(case (object-after scan)
(#\& (fo) (make-instance 'and-and-lexeme))
(#\= (fo) (make-instance 'ampersand-equal-lexeme))
(t (make-instance 'ampersand-lexeme)))))
(#\* (fo) (if (end-of-buffer-p scan)
(make-instance 'asterisk-lexeme)
(cond ((eql (object-after scan) #\=)
(fo)
(make-instance 'asterisk-equal-lexeme))
(t (make-instance 'asterisk-lexeme)))))
(#\~ (fo) (make-instance 'tilde-lexeme))
(#\! (fo) (if (end-of-buffer-p scan)
(make-instance 'exclamation-lexeme)
(cond ((eql (object-after scan) #\=)
(fo)
(make-instance 'neq-lexeme))
(t (make-instance 'exclamation-lexeme)))))
(#\/ (fo) (if (end-of-buffer-p scan)
(make-instance 'slash-lexeme)
(case (object-after scan)
(#\= (fo) (make-instance 'slash-equal-lexeme))
(#\* (fo) (make-instance 'long-comment-start-lexeme))
(#\/ (fo) (make-instance 'line-comment-start-lexeme))
(t (make-instance 'slash-lexeme)))))
(#\% (fo) (if (end-of-buffer-p scan)
[945 lines skipped]
--- /project/climacs/cvsroot/climacs/java-syntax-commands.lisp 2007/05/01 17:46:38 NONE
+++ /project/climacs/cvsroot/climacs/java-syntax-commands.lisp 2007/05/01 17:46:38 1.1
[1090 lines skipped]
More information about the Climacs-cvs
mailing list