[climacs-cvs] CVS climacs

dmurray dmurray at common-lisp.net
Tue May 1 17:46:38 UTC 2007


Update of /project/climacs/cvsroot/climacs
In directory clnet:/tmp/cvs-serv12086

Added Files:
	java-syntax.lisp java-syntax-commands.lisp 
Log Message:
Initial checkin.



--- /project/climacs/cvsroot/climacs/java-syntax.lisp	2007/05/01 17:46:38	NONE
+++ /project/climacs/cvsroot/climacs/java-syntax.lisp	2007/05/01 17:46:38	1.1
;; -*- Mode: Lisp; Package: CLIMACS-JAVA-SYNTAX -*-

;;;  (c) copyright 2005 by
;;;           Robert Strandh (strandh at labri.fr)
;;;  (c) copyright 2006 by
;;;           Troels Henriksen (athas at sigkill.dk)
;;;  (c) copyright 2007 by
;;;           John Q Splittist (splittist at gmail.com)
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Library General Public License for more details.
;;;
;;; You should have received a copy of the GNU Library General Public
;;; License along with this library; if not, write to the
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;;; Boston, MA  02111-1307  USA.

;;;# Syntax module for analysing Java(TM)

(in-package :climacs-java-syntax)

;;;# The command table.

(define-syntax-command-table java-table
    :errorp nil)

;;;# The syntax object.
;;;
;;; We could add options here.

(define-syntax java-syntax (lr-syntax-mixin fundamental-syntax)
  ((package :accessor package-of
	    :documentation "A list of strings being the components of
the `package' definition, if any."))
  (:name "Java")
  (:pathname-types "java" "jav")
  (:command-table java-table)
  (:default-initargs :initial-state |initial-state |))

;;; Now some ways to indicate what the syntax is. Extra details could be
;;; added. For now we'll show the package, if any.

(defmethod name-for-info-pane ((syntax java-syntax) &key pane)
  (declare (ignore pane))
  (format nil "Java~@[:~{~A~^.~}~]"
	  (package-of syntax)))

;;;# Lexing.
;;;
;;; First we define the different states the lexer can be in (as triggered
;;; by the parser.)

(define-lexer-state lexer-string-state ()
  ()
  (:documentation "In this state, the lexer is working inside a string
    delimited by double quote characters."))

(define-lexer-state lexer-line-comment-state ()
  ()
  (:documentation "In this state, the lexer is working inside a line
    comment starting with //."))

(define-lexer-state lexer-long-comment-state ()
  ()
  (:documentation "In this state, the lexer is working inside a long
    comment delimited by /* and */."))

;;; And then we define the various elements of the language.
;;;
;;; First, some high-level concepts:

(defclass java-nonterminal (nonterminal) ())

(defclass form (java-nonterminal) ())

;;; Since we're dealing with things that might not be finished,
;;; we allow for incomplete forms at the end of the buffer.

(defclass complete-form-mixin () ())
(defclass incomplete-form-mixin () ())

(defclass comment (java-nonterminal) ())
(defclass line-comment (java-comment) ())
(defclass long-comment (java-comment) ())

;;; Of course, sometimes people type things that don't (yet) comply
;;; with the language specification.

(defclass error-symbol (java-nonterminal) ())

;;; Finally, we define the relevant lexeme. We will check the `ink' and
;;; and the `face' later during redisplay.

(defclass java-lexeme (lexeme)
  ((ink)
   (face)))

(defclass form-lexeme (form java-lexeme) ())

;;; Keywords come in various flavours.

(defclass keyword-lexeme (form-lexeme) ())

(defclass basic-type () ())
(defclass modifier () ())
(defclass operator () ())

(eval-when (:compile-toplevel :load-toplevel :execute)
(defun spelling-to-symbol (name)
  (intern (concatenate 'string name "-LEXEME") #.*package*)))

(defmacro define-keywords (&rest keyword-names)
  `(progn
     ,@(loop for (name . supers) in keyword-names
	     for real-name = (spelling-to-symbol name)
	     collecting `(defclass ,real-name (,@ supers keyword-lexeme) ())
	       into defclasses
	     collecting name into names
	     finally (return (cons `(defparameter *keyword-spellings* ',names)
				   defclasses)))))

(define-keywords 
    ("abstract" modifier)
    ("assert" operator)
    ("boolean" basic-type)
    ("break" operator)
    ("byte" basic-type)
    ("case" operator)
    ("catch" operator)
    ("char" basic-type)
    ("class" operator)
    ("const") 				; reserved but not used
    ("continue" operator)
    ("default" operator)
    ("do" operator)
    ("double" basic-type)
    ("else" operator)
    ("enum" operator)
    ("extends" operator)
    ("final" modifier)
    ("finally" operator)
    ("float" basic-type)
    ("for" operator)
    ("if" operator)
    ("int" basic-type)
    ("goto")				; reserved but not used
    ("implements" operator)
    ("import" operator)
    ("instanceof" operator)
    ("interface" operator)
    ("long" basic-type)
    ("native" basic-type)
    ("new" operator)
    ("package" operator)
    ("private" operator)
    ("package" operator)
    ("private" modifier)
    ("protected" modifier)
    ("public" modifier)
    ("return" operator)
    ("short" basic-type)
    ("static" modifier)
    ("striftfp" modifier)
    ("super" operator)
    ("switch" operator)
    ("synchronized" modifier)
    ("this" operator)
    ("throw" operator)
    ("throws" operator)
    ("transient" modifier)
    ("try" operator)
    ("void" operator)
    ("volatile" modifier)
    ("while" operator))

(defclass identifier-lexeme (form-lexeme) ())
(defclass literal-lexeme (form-lexeme) ())
(defclass integer-literal-lexeme (literal-lexeme) ())
(defclass decimal-integer-literal-lexeme (integer-literal-lexeme) ())
(defclass octal-integer-literal-lexeme (integer-literal-lexeme) ())
(defclass hex-integer-literal-lexeme (integer-literal-lexeme) ())
(defclass floating-point-literal-lexeme (literal-lexeme) ())
(defclass decimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ())
(defclass hexidecimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ())
;;; A badly formed, or perhaps unfinished, number.
(defclass bad-number-literal-lexeme (literal-lexeme) ())
(defclass boolean-literal-lexeme (literal-lexeme) ())
(defclass character-literal-lexeme (literal-lexeme) ())
(defclass incomplete-character-literal-lexeme (literal-lexeme incomplete-form-mixin) ())
(defclass string-literal-lexeme (literal-lexeme) ())
(defclass null-literal-lexeme (literal-lexeme) ())
(defclass separator-lexeme (form-lexeme) ())
(defclass punctuator-lexeme (form-lexeme) ())

;;; Separators: ( ) { } [ ] ; , .

(defclass semi-colon-lexeme (separator-lexeme) ())
(defclass comma-lexeme (separator-lexeme) ())
(defclass dot-lexeme (separator-lexeme) ())
(defclass delimiter-mixin () ())
(defclass opening-delimiter-mixin (delimiter-mixin) ())
(defclass closing-delimiter-mixin (delimiter-mixin) ())

(defclass left-bracket-lexeme (separator-lexeme opening-delimiter-mixin) ())
(defclass right-bracket-lexeme (separator-lexeme closing-delimiter-mixin) ())
(defclass left-parenthesis-lexeme (separator-lexeme opening-delimiter-mixin) ())
(defclass right-parenthesis-lexeme (separator-lexeme closing-delimiter-mixin) ())
(defclass left-brace-lexeme (separator-lexeme opening-delimiter-mixin) ())
(defclass right-brace-lexeme (separator-lexeme closing-delimiter-mixin) ())

;;; Operators:
;;; = < > ! ~ ? :
;;; == <= >= != && || ++ --
;;; +  -  *  /  &  |  ^  %  <<  >>  >>>
;;; += -= *= /= &= |= ^= %= <<= >>= >>>=

(defmacro define-operators (&rest punctuator-names)
  `(progn
     ,@(loop for name in punctuator-names
	     for real-name = (intern (concatenate 'string 
						 (string name) "-LEXEME")
				   #.*package*)
	     collecting `(defclass ,real-name (punctuator-lexeme) ()))))

(define-operators 
    equal left-angle-bracket right-angle-bracket exclamation tilde question 
    colon
    eq leq geq neq and-and or-or increment decrement
    plus minus asterisk slash ampersand pipe circumflex percent
    left-shift right-shift unsigned-right-shift
    plus-equal minus-equal asterisk-equal slash-equal ampersand-equal pipe-equal
    circumflex-equal percent-equal left-shift-equal right-shift-equal
    unsigned-right-shift-equal)

;;; This for annotated interfaces.
(defclass ampersand-lexeme (punctuator-lexeme) ())

;;; And something for when we come across something completely wrong.

(defclass error-lexeme (java-lexeme) ())

;;; Some lexemes that will drive the parser and lexer.

(defclass line-comment-start-lexeme (java-lexeme) ())
(defclass long-comment-start-lexeme (java-lexeme) ())
(defclass comment-end-lexeme (java-lexeme) ())
(defclass string-start-lexeme (java-lexeme) ())
(defclass string-end-lexeme (java-lexeme) ())

;;; And some lexemes used inside strings and comments.

(defclass word-lexeme (java-lexeme) ())
(defclass delimiter-lexeme (java-lexeme) ())
(defclass text-lexeme (java-lexeme) ())

;;; Some predicates for recognizing the constituents of identifiers.
;;; "The $ character should be used only in mechanically generated
;;;  source code or, rarely, to access preexisting names on legacy
;;;  systems."

(defun java-letter-p (ch)
  (and (characterp ch)
       (or (alpha-char-p ch)
	   (char= ch #\_)
	   (char= ch #\$))))

(defun java-letter-or-digit-p (ch)
  (and (characterp ch)
       (or (alphanumericp ch)
	   (char= ch #\_)
	   (char= ch #\$))))

;;; Something to recognise escapes, including unicode escapes (which may
;;; have multiple #\u characters).

(defun eat-escape (scan)
  "Advance over an escape (after the #\\), returning T if valid so far, or NIL."
  (macrolet ((fo () `(forward-object scan)))
    (case (object-after scan)
      ((#\b #\t #\n #\f #\r #\" #\' #\\)
       (fo) t)
      (#\u
       (loop until (end-of-buffer-p scan)
	     while (eql (object-after scan) #\u)
	     do (fo))
       (loop until (end-of-buffer-p scan)
	     for char = (object-after scan)
	     with count = 0
	     while (and (characterp char)
			(digit-char-p char 16))
	     do (fo) (incf count)
	     finally (return (or (and (end-of-buffer-p scan)
				      (< count 4))
				 (= count 4)))))
      ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
       (loop repeat 3
	     until (end-of-buffer-p scan)
	     for char = (object-after scan)
	     while (and (characterp char)
			(digit-char-p char 8))
	     do (fo))
       t)
      (t nil))))

;;; The default method for skipping whitespace.

(defmethod skip-inter ((syntax java-syntax) state scan)
  (macrolet ((fo () `(forward-object scan)))
    (loop when (end-of-buffer-p scan)
	    do (return nil)
	  until (not (whitespacep syntax (object-after scan)))
	  do (fo)
	  finally (return t))))

;;; The lexing procedure used at the toplevel. Dispatches to lex-token
;;; at the appropriate time - except for standalone dots (where the lexer
;;; doesn't know whether it's looking at a potential number or the
;;; separator in a QualifiedIdentifier).

(defmethod lex ((syntax java-syntax) (state lexer-toplevel-state) scan)
  (macrolet ((fo () `(forward-object scan)))
    (let ((object (object-after scan)))
      (case object
	(#\" (fo) (make-instance 'string-start-lexeme))
	(#\' (fo)
	     (cond ((end-of-buffer-p scan)
		    (make-instance 'incomplete-character-literal-lexeme))
		   (t (cond ((eql (object-after scan) #\\)
			     (fo)
			     (if (not (end-of-buffer-p scan))
				 (unless (eat-escape scan)
				   (return-from lex 
				     (make-instance 'error-lexeme)))))
			    (t (fo)))
		      (cond ((end-of-buffer-p scan)
			     (make-instance 'incomplete-character-literal-lexeme))
			    ((eql (object-after scan) #\')
			     (fo)
			     (make-instance 'character-literal-lexeme))
			    (t (make-instance 'error-lexeme))))))
	(#\[ (fo) (make-instance 'left-bracket-lexeme))
	(#\] (fo) (make-instance 'right-bracket-lexeme))
	(#\( (fo) (make-instance 'left-parenthesis-lexeme))
	(#\) (fo) (make-instance 'right-parenthesis-lexeme))
	(#\{ (fo) (make-instance 'left-brace-lexeme))
	(#\} (fo) (make-instance 'right-brace-lexeme))
	(#\@ (fo) (make-instance 'ampersand-lexeme))
	(#\. (fo) (if (end-of-buffer-p scan)
		      (make-instance 'dot-lexeme)
		      (cond ((and (characterp (object-after scan))
				  (digit-char-p (object-after scan)))
			     (backward-object scan)
			     (lex-token syntax scan))
			    (t (make-instance 'dot-lexeme)))))
	(#\- (fo) (if (end-of-buffer-p scan)
		      (make-instance 'minus-lexeme)
		      (case (object-after scan)
			(#\- (fo) (make-instance 'decrement-lexeme))
			(#\= (fo) (make-instance 'minus-equal-lexeme)) 
			(t (make-instance 'minus-lexeme)))))
	(#\+ (fo) (if (end-of-buffer-p scan)
		      (make-instance 'plus-lexeme)
		      (case (object-after scan)
			(#\+ (fo) (make-instance 'increment-lexeme))
			(#\= (fo) (make-instance 'plus-equal-lexeme))
			(t (make-instance 'plus-lexeme)))))
	(#\& (fo) (if (end-of-buffer-p scan)
		      (make-instance 'ampersand-lexeme)
		      (case (object-after scan)
			(#\& (fo) (make-instance 'and-and-lexeme))
			(#\= (fo) (make-instance 'ampersand-equal-lexeme))
			(t (make-instance 'ampersand-lexeme)))))
	(#\* (fo) (if (end-of-buffer-p scan)
		      (make-instance 'asterisk-lexeme)
		      (cond ((eql (object-after scan) #\=)
			     (fo)
			     (make-instance 'asterisk-equal-lexeme))
			    (t (make-instance 'asterisk-lexeme)))))
	(#\~ (fo) (make-instance 'tilde-lexeme))
	(#\! (fo) (if (end-of-buffer-p scan)
		      (make-instance 'exclamation-lexeme)
		      (cond ((eql (object-after scan) #\=)
			     (fo)
			     (make-instance 'neq-lexeme))
			    (t (make-instance 'exclamation-lexeme)))))
	(#\/ (fo) (if (end-of-buffer-p scan)
		      (make-instance 'slash-lexeme)
		      (case (object-after scan)
			(#\= (fo) (make-instance 'slash-equal-lexeme))
			(#\* (fo) (make-instance 'long-comment-start-lexeme))
			(#\/ (fo) (make-instance 'line-comment-start-lexeme))
			(t (make-instance 'slash-lexeme)))))
	(#\% (fo) (if (end-of-buffer-p scan)

[945 lines skipped]
--- /project/climacs/cvsroot/climacs/java-syntax-commands.lisp	2007/05/01 17:46:38	NONE
+++ /project/climacs/cvsroot/climacs/java-syntax-commands.lisp	2007/05/01 17:46:38	1.1

[1090 lines skipped]



More information about the Climacs-cvs mailing list