| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Protocol for lexical analysis |
| 4 | ;;; |
| 5 | ;;; (c) 2009 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensble Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (cl:in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Accessors. |
| 30 | |
| 31 | (export 'lexer-char) |
| 32 | (defgeneric lexer-char (lexer) |
| 33 | (:documentation |
| 34 | "Return the current lookahead character from the LEXER. |
| 35 | |
| 36 | When the lexer is first created, there is no lookahead character: you must |
| 37 | `prime the pump' by calling `next-char'. The lexer represents |
| 38 | encountering the end of its input stream by setting the lookahead |
| 39 | character to nil. At this point it is still possible to push back |
| 40 | characters.")) |
| 41 | |
| 42 | ;;;-------------------------------------------------------------------------- |
| 43 | ;;; Formatting tokens. |
| 44 | |
| 45 | (defgeneric format-token (token-type &optional token-value) |
| 46 | (:documentation |
| 47 | "Return a string describing a token with the specified type and value.") |
| 48 | (:method ((token-type (eql :eof)) &optional token-value) |
| 49 | (declare (ignore token-value)) |
| 50 | "<end-of-file>") |
| 51 | (:method ((token-type (eql :string)) &optional token-value) |
| 52 | (declare (ignore token-value)) |
| 53 | "<string-literal>") |
| 54 | (:method ((token-type (eql :char)) &optional token-value) |
| 55 | (declare (ignore token-value)) |
| 56 | "<character-literal>") |
| 57 | (:method ((token-type (eql :id)) &optional token-value) |
| 58 | (format nil "<identifier~@[ `~A'~]>" token-value)) |
| 59 | (:method ((token-type symbol) &optional token-value) |
| 60 | (declare (ignore token-value)) |
| 61 | (check-type token-type keyword) |
| 62 | (format nil "`~(~A~)'" token-type)) |
| 63 | (:method ((token-type character) &optional token-value) |
| 64 | (declare (ignore token-value)) |
| 65 | (format nil "~:[<~:C>~;`~C'~]" |
| 66 | (and (graphic-char-p token-type) |
| 67 | (char/= token-type #\space)) |
| 68 | token-type))) |
| 69 | |
| 70 | ;;;-------------------------------------------------------------------------- |
| 71 | ;;; Reading and pushing back characters. |
| 72 | |
| 73 | (export 'next-char) |
| 74 | (defgeneric next-char (lexer) |
| 75 | (:documentation |
| 76 | "Fetch the next character from the LEXER's input stream. |
| 77 | |
| 78 | Read a character from the input stream, and store it in the LEXER's CHAR |
| 79 | slot. The character stored is returned. If characters have been pushed |
| 80 | back then pushed-back characters are used instead of the input stream. If |
| 81 | there are no more characters to be read then the lookahead character is |
| 82 | nil. Returns the new lookahead character. |
| 83 | |
| 84 | (This function is primarily intended for the use of lexer subclasses.)")) |
| 85 | |
| 86 | (export 'pushback-char) |
| 87 | (defgeneric pushback-char (lexer char) |
| 88 | (:documentation |
| 89 | "Push the CHAR back into the lexer. |
| 90 | |
| 91 | Make CHAR be the current lookahead character (stored in the LEXER's CHAR |
| 92 | slot). The previous lookahead character is pushed down, and will be made |
| 93 | available again once this character is consumed by NEXT-CHAR. |
| 94 | |
| 95 | (This function is primarily intended for the use of lexer subclasses.)")) |
| 96 | |
| 97 | (defgeneric fixup-stream* (lexer thunk) |
| 98 | (:documentation |
| 99 | "Helper function for `with-lexer-stream'. |
| 100 | |
| 101 | This function does the main work for `with-lexer-stream'. The THUNK is |
| 102 | invoked on a single argument, the LEXER's underlying STREAM.")) |
| 103 | |
| 104 | (export 'with-lexer-stream) |
| 105 | (defmacro with-lexer-stream ((streamvar lexer) &body body) |
| 106 | "Evaluate BODY with STREAMVAR bound to the LEXER's input stream. |
| 107 | |
| 108 | The STREAM is fixed up so that the next character read (e.g., using |
| 109 | `read-char') will be the lexer's current lookahead character. Once the |
| 110 | BODY completes, the next character in the stream is read and set as the |
| 111 | lookahead character. It is an error if the lexer has pushed-back |
| 112 | characters (since these can't be pushed back into the input stream |
| 113 | properly)." |
| 114 | |
| 115 | `(fixup-stream* ,lexer (lambda (,streamvar) ,@body))) |
| 116 | |
| 117 | ;;;-------------------------------------------------------------------------- |
| 118 | ;;; Reading and pushing back tokens. |
| 119 | |
| 120 | (export 'scan-token) |
| 121 | (defgeneric scan-token (lexer) |
| 122 | (:documentation |
| 123 | "Internal protocol for scanning tokens from an input stream. |
| 124 | |
| 125 | Implementing a method on this function is the main responsibility of LEXER |
| 126 | subclasses; it is called by the user-facing `next-token' function. |
| 127 | |
| 128 | The method should consume characters (using `next-char') as necessary, and |
| 129 | return two values: a token type and token value. These will be stored in |
| 130 | the corresponding slots in the lexer object in order to provide the user |
| 131 | with one-token lookahead.")) |
| 132 | |
| 133 | (export 'next-token) |
| 134 | (defgeneric next-token (lexer) |
| 135 | (:documentation |
| 136 | "Scan a token from an input stream. |
| 137 | |
| 138 | This function scans a token from an input stream. Two values are |
| 139 | returned: a `token type' and a `token value'. These are opaque to the |
| 140 | LEXER base class, but the intent is that the token type be significant to |
| 141 | determining the syntax of the input, while the token value carries any |
| 142 | additional information about the token's semantic content. The token type |
| 143 | and token value are also made available for lookahead via accessors |
| 144 | TOKEN-TYPE and TOKEN-VALUE on the `lexer' object. |
| 145 | |
| 146 | The new lookahead token type and value are returned as two separate |
| 147 | values. |
| 148 | |
| 149 | If tokens have been pushed back (see `pushback-token') then they are |
| 150 | returned one by one instead of scanning the stream.")) |
| 151 | |
| 152 | (export 'pushback-token) |
| 153 | (defgeneric pushback-token (lexer token-type &optional token-value location) |
| 154 | (:documentation |
| 155 | "Push a token back into the lexer. |
| 156 | |
| 157 | Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token. |
| 158 | The previous lookahead token is pushed down, and will be made available |
| 159 | agan once this new token is consumed by NEXT-TOKEN. If LOCATION is |
| 160 | non-nil then `file-location' is saved and replaced by LOCATION. The |
| 161 | TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need |
| 162 | not be values which can actually be returned by NEXT-TOKEN.")) |
| 163 | |
| 164 | ;;;-------------------------------------------------------------------------- |
| 165 | ;;; Utilities. |
| 166 | |
| 167 | (export 'skip-spaces) |
| 168 | (defgeneric skip-spaces (lexer) |
| 169 | (:documentation |
| 170 | "Skip over whitespace characters in the LEXER. |
| 171 | |
| 172 | There must be a lookahead character; when the function returns, the |
| 173 | lookahead character will be a non-whitespace character or nil if there |
| 174 | were no non-whitespace characters remaining. Returns the new lookahead |
| 175 | character.")) |
| 176 | |
| 177 | (export 'require-token) |
| 178 | (defun require-token |
| 179 | (lexer wanted-token-type &key (errorp t) (consumep t) default) |
| 180 | "Require a particular token to appear. |
| 181 | |
| 182 | If the LEXER's current lookahead token has type `wanted-token-type' then |
| 183 | consume it (using `next-token') and return its value. Otherwise, if the |
| 184 | token doesn't have the requested type then signal a continuable error |
| 185 | describing the situation and return DEFAULT (which defaults to nil). |
| 186 | |
| 187 | If ERRORP is false then no error is signalled; this is useful for |
| 188 | consuming or checking for optional punctuation. If CONSUMEP is false then |
| 189 | a matching token is not consumed; non-matching tokens are never consumed." |
| 190 | |
| 191 | (with-slots (token-type token-value) lexer |
| 192 | (cond ((eql token-type wanted-token-type) |
| 193 | (prog1 token-value |
| 194 | (when consumep (next-token lexer)))) |
| 195 | (errorp |
| 196 | (cerror* "Expected ~A but found ~A" |
| 197 | (format-token wanted-token-type) |
| 198 | (format-token token-type token-value)) |
| 199 | default) |
| 200 | (t |
| 201 | default)))) |
| 202 | |
| 203 | ;;;----- That's all, folks -------------------------------------------------- |