;;; -*-lisp-*- ;;; ;;; Protocol for lexical analysis ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Sensble Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Accessors. (export 'lexer-char) (defgeneric lexer-char (lexer) (:documentation "Return the current lookahead character from the LEXER. When the lexer is first created, there is no lookahead character: you must `prime the pump' by calling NEXT-CHAR. The lexer represents encountering the end of its input stream by setting the lookahead character to nil. At this point it is still possible to push back characters.")) ;;;-------------------------------------------------------------------------- ;;; Formatting tokens. (defgeneric format-token (token-type &optional token-value) (:documentation "Return a string describing a token with the specified type and value.") (:method ((token-type (eql :eof)) &optional token-value) (declare (ignore token-value)) "") (:method ((token-type (eql :string)) &optional token-value) (declare (ignore token-value)) "") (:method ((token-type (eql :char)) &optional token-value) (declare (ignore token-value)) "") (:method ((token-type (eql :id)) &optional token-value) (format nil "" token-value)) (:method ((token-type symbol) &optional token-value) (declare (ignore token-value)) (check-type token-type keyword) (format nil "`~(~A~)'" token-type)) (:method ((token-type character) &optional token-value) (declare (ignore token-value)) (format nil "~:[<~:C>~;`~C'~]" (and (graphic-char-p token-type) (char/= token-type #\space)) token-type))) ;;;-------------------------------------------------------------------------- ;;; Reading and pushing back characters. (export 'next-char) (defgeneric next-char (lexer) (:documentation "Fetch the next character from the LEXER's input stream. Read a character from the input stream, and store it in the LEXER's CHAR slot. The character stored is returned. If characters have been pushed back then pushed-back characters are used instead of the input stream. If there are no more characters to be read then the lookahead character is nil. Returns the new lookahead character. (This function is primarily intended for the use of lexer subclasses.)")) (export 'pushback-char) (defgeneric pushback-char (lexer char) (:documentation "Push the CHAR back into the lexer. Make CHAR be the current lookahead character (stored in the LEXER's CHAR slot). The previous lookahead character is pushed down, and will be made available again once this character is consumed by NEXT-CHAR. (This function is primarily intended for the use of lexer subclasses.)")) (defgeneric fixup-stream* (lexer thunk) (:documentation "Helper function for WITH-LEXER-STREAM. This function does the main work for WITH-LEXER-STREAM. The THUNK is invoked on a single argument, the LEXER's underlying STREAM.")) (export 'with-lexer-stream) (defmacro with-lexer-stream ((streamvar lexer) &body body) "Evaluate BODY with STREAMVAR bound to the LEXER's input stream. The STREAM is fixed up so that the next character read (e.g., using READ-CHAR) will be the lexer's current lookahead character. Once the BODY completes, the next character in the stream is read and set as the lookahead character. It is an error if the lexer has pushed-back characters (since these can't be pushed back into the input stream properly)." `(fixup-stream* ,lexer (lambda (,streamvar) ,@body))) ;;;-------------------------------------------------------------------------- ;;; Reading and pushing back tokens. (export 'scan-token) (defgeneric scan-token (lexer) (:documentation "Internal protocol for scanning tokens from an input stream. Implementing a method on this function is the main responsibility of LEXER subclasses; it is called by the user-facing NEXT-TOKEN function. The method should consume characters (using NEXT-CHAR) as necessary, and return two values: a token type and token value. These will be stored in the corresponding slots in the lexer object in order to provide the user with one-token lookahead.")) (export 'next-token) (defgeneric next-token (lexer) (:documentation "Scan a token from an input stream. This function scans a token from an input stream. Two values are returned: a `token type' and a `token value'. These are opaque to the LEXER base class, but the intent is that the token type be significant to determining the syntax of the input, while the token value carries any additional information about the token's semantic content. The token type and token value are also made available for lookahead via accessors TOKEN-TYPE and TOKEN-VALUE on the LEXER object. The new lookahead token type and value are returned as two separate values. If tokens have been pushed back (see PUSHBACK-TOKEN) then they are returned one by one instead of scanning the stream.")) (export 'pushback-token) (defgeneric pushback-token (lexer token-type &optional token-value location) (:documentation "Push a token back into the lexer. Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token. The previous lookahead token is pushed down, and will be made available agan once this new token is consumed by NEXT-TOKEN. If LOCATION is non-nil then FILE-LOCATION is saved and replaced by LOCATION. The TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need not be values which can actually be returned by NEXT-TOKEN.")) ;;;-------------------------------------------------------------------------- ;;; Utilities. (export 'skip-spaces) (defgeneric skip-spaces (lexer) (:documentation "Skip over whitespace characters in the LEXER. There must be a lookahead character; when the function returns, the lookahead character will be a non-whitespace character or nil if there were no non-whitespace characters remaining. Returns the new lookahead character.")) (export 'require-token) (defun require-token (lexer wanted-token-type &key (errorp t) (consumep t) default) "Require a particular token to appear. If the LEXER's current lookahead token has type WANTED-TOKEN-TYPE then consume it (using NEXT-TOKEN) and return its value. Otherwise, if the token doesn't have the requested type then signal a continuable error describing the situation and return DEFAULT (which defaults to nil). If ERRORP is false then no error is signalled; this is useful for consuming or checking for optional punctuation. If CONSUMEP is false then a matching token is not consumed; non-matching tokens are never consumed." (with-slots (token-type token-value) lexer (cond ((eql token-type wanted-token-type) (prog1 token-value (when consumep (next-token lexer)))) (errorp (cerror* "Expected ~A but found ~A" (format-token wanted-token-type) (format-token token-type token-value)) default) (t default)))) ;;;----- That's all, folks --------------------------------------------------