| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Protocol for lexical analysis |
| 4 | ;;; |
| 5 | ;;; (c) 2009 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensble Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (cl:in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Accessors. |
| 30 | |
| 31 | (export 'lexer-char) |
| 32 | (defgeneric lexer-char (lexer) |
| 33 | (:documentation |
| 34 | "Return the current lookahead character from the LEXER. |
| 35 | |
| 36 | When the lexer is first created, there is no lookahead character: you must |
| 37 | `prime the pump' by calling NEXT-CHAR. The lexer represents encountering |
| 38 | the end of its input stream by setting the lookahead character to nil. At |
| 39 | this point it is still possible to push back characters.")) |
| 40 | |
| 41 | ;;;-------------------------------------------------------------------------- |
| 42 | ;;; Formatting tokens. |
| 43 | |
| 44 | (defgeneric format-token (token-type &optional token-value) |
| 45 | (:documentation |
| 46 | "Return a string describing a token with the specified type and value.") |
| 47 | (:method ((token-type (eql :eof)) &optional token-value) |
| 48 | (declare (ignore token-value)) |
| 49 | "<end-of-file>") |
| 50 | (:method ((token-type (eql :string)) &optional token-value) |
| 51 | (declare (ignore token-value)) |
| 52 | "<string-literal>") |
| 53 | (:method ((token-type (eql :char)) &optional token-value) |
| 54 | (declare (ignore token-value)) |
| 55 | "<character-literal>") |
| 56 | (:method ((token-type (eql :id)) &optional token-value) |
| 57 | (format nil "<identifier~@[ `~A'~]>" token-value)) |
| 58 | (:method ((token-type symbol) &optional token-value) |
| 59 | (declare (ignore token-value)) |
| 60 | (check-type token-type keyword) |
| 61 | (format nil "`~(~A~)'" token-type)) |
| 62 | (:method ((token-type character) &optional token-value) |
| 63 | (declare (ignore token-value)) |
| 64 | (format nil "~:[<~:C>~;`~C'~]" |
| 65 | (and (graphic-char-p token-type) |
| 66 | (char/= token-type #\space)) |
| 67 | token-type))) |
| 68 | |
| 69 | ;;;-------------------------------------------------------------------------- |
| 70 | ;;; Reading and pushing back characters. |
| 71 | |
| 72 | (export 'next-char) |
| 73 | (defgeneric next-char (lexer) |
| 74 | (:documentation |
| 75 | "Fetch the next character from the LEXER's input stream. |
| 76 | |
| 77 | Read a character from the input stream, and store it in the LEXER's CHAR |
| 78 | slot. The character stored is returned. If characters have been pushed |
| 79 | back then pushed-back characters are used instead of the input stream. If |
| 80 | there are no more characters to be read then the lookahead character is |
| 81 | nil. Returns the new lookahead character. |
| 82 | |
| 83 | (This function is primarily intended for the use of lexer subclasses.)")) |
| 84 | |
| 85 | (export 'pushback-char) |
| 86 | (defgeneric pushback-char (lexer char) |
| 87 | (:documentation |
| 88 | "Push the CHAR back into the lexer. |
| 89 | |
| 90 | Make CHAR be the current lookahead character (stored in the LEXER's CHAR |
| 91 | slot). The previous lookahead character is pushed down, and will be made |
| 92 | available again once this character is consumed by NEXT-CHAR. |
| 93 | |
| 94 | (This function is primarily intended for the use of lexer subclasses.)")) |
| 95 | |
| 96 | (defgeneric fixup-stream* (lexer thunk) |
| 97 | (:documentation |
| 98 | "Helper function for WITH-LEXER-STREAM. |
| 99 | |
| 100 | This function does the main work for WITH-LEXER-STREAM. The THUNK is |
| 101 | invoked on a single argument, the LEXER's underlying STREAM.")) |
| 102 | |
| 103 | (export 'with-lexer-stream) |
| 104 | (defmacro with-lexer-stream ((streamvar lexer) &body body) |
| 105 | "Evaluate BODY with STREAMVAR bound to the LEXER's input stream. |
| 106 | |
| 107 | The STREAM is fixed up so that the next character read (e.g., using |
| 108 | READ-CHAR) will be the lexer's current lookahead character. Once the BODY |
| 109 | completes, the next character in the stream is read and set as the |
| 110 | lookahead character. It is an error if the lexer has pushed-back |
| 111 | characters (since these can't be pushed back into the input stream |
| 112 | properly)." |
| 113 | |
| 114 | `(fixup-stream* ,lexer (lambda (,streamvar) ,@body))) |
| 115 | |
| 116 | ;;;-------------------------------------------------------------------------- |
| 117 | ;;; Reading and pushing back tokens. |
| 118 | |
| 119 | (export 'scan-token) |
| 120 | (defgeneric scan-token (lexer) |
| 121 | (:documentation |
| 122 | "Internal protocol for scanning tokens from an input stream. |
| 123 | |
| 124 | Implementing a method on this function is the main responsibility of LEXER |
| 125 | subclasses; it is called by the user-facing NEXT-TOKEN function. |
| 126 | |
| 127 | The method should consume characters (using NEXT-CHAR) as necessary, and |
| 128 | return two values: a token type and token value. These will be stored in |
| 129 | the corresponding slots in the lexer object in order to provide the user |
| 130 | with one-token lookahead.")) |
| 131 | |
| 132 | (export 'next-token) |
| 133 | (defgeneric next-token (lexer) |
| 134 | (:documentation |
| 135 | "Scan a token from an input stream. |
| 136 | |
| 137 | This function scans a token from an input stream. Two values are |
| 138 | returned: a `token type' and a `token value'. These are opaque to the |
| 139 | LEXER base class, but the intent is that the token type be significant to |
| 140 | determining the syntax of the input, while the token value carries any |
| 141 | additional information about the token's semantic content. The token type |
| 142 | and token value are also made available for lookahead via accessors |
| 143 | TOKEN-TYPE and TOKEN-VALUE on the LEXER object. |
| 144 | |
| 145 | The new lookahead token type and value are returned as two separate |
| 146 | values. |
| 147 | |
| 148 | If tokens have been pushed back (see PUSHBACK-TOKEN) then they are |
| 149 | returned one by one instead of scanning the stream.")) |
| 150 | |
| 151 | (export 'pushback-token) |
| 152 | (defgeneric pushback-token (lexer token-type &optional token-value location) |
| 153 | (:documentation |
| 154 | "Push a token back into the lexer. |
| 155 | |
| 156 | Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token. |
| 157 | The previous lookahead token is pushed down, and will be made available |
| 158 | agan once this new token is consumed by NEXT-TOKEN. If LOCATION is |
| 159 | non-nil then FILE-LOCATION is saved and replaced by LOCATION. The |
| 160 | TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need |
| 161 | not be values which can actually be returned by NEXT-TOKEN.")) |
| 162 | |
| 163 | ;;;-------------------------------------------------------------------------- |
| 164 | ;;; Utilities. |
| 165 | |
| 166 | (export 'skip-spaces) |
| 167 | (defgeneric skip-spaces (lexer) |
| 168 | (:documentation |
| 169 | "Skip over whitespace characters in the LEXER. |
| 170 | |
| 171 | There must be a lookahead character; when the function returns, the |
| 172 | lookahead character will be a non-whitespace character or nil if there |
| 173 | were no non-whitespace characters remaining. Returns the new lookahead |
| 174 | character.")) |
| 175 | |
| 176 | (export 'require-token) |
| 177 | (defun require-token |
| 178 | (lexer wanted-token-type &key (errorp t) (consumep t) default) |
| 179 | "Require a particular token to appear. |
| 180 | |
| 181 | If the LEXER's current lookahead token has type WANTED-TOKEN-TYPE then |
| 182 | consume it (using NEXT-TOKEN) and return its value. Otherwise, if the |
| 183 | token doesn't have the requested type then signal a continuable error |
| 184 | describing the situation and return DEFAULT (which defaults to nil). |
| 185 | |
| 186 | If ERRORP is false then no error is signalled; this is useful for |
| 187 | consuming or checking for optional punctuation. If CONSUMEP is false then |
| 188 | a matching token is not consumed; non-matching tokens are never consumed." |
| 189 | |
| 190 | (with-slots (token-type token-value) lexer |
| 191 | (cond ((eql token-type wanted-token-type) |
| 192 | (prog1 token-value |
| 193 | (when consumep (next-token lexer)))) |
| 194 | (errorp |
| 195 | (cerror* "Expected ~A but found ~A" |
| 196 | (format-token wanted-token-type) |
| 197 | (format-token token-type token-value)) |
| 198 | default) |
| 199 | (t |
| 200 | default)))) |
| 201 | |
| 202 | ;;;----- That's all, folks -------------------------------------------------- |