Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Protocol for lexical analysis | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (cl:in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Accessors. | |
30 | ||
31 | (export 'lexer-char) | |
32 | (defgeneric lexer-char (lexer) | |
33 | (:documentation | |
34 | "Return the current lookahead character from the LEXER. | |
35 | ||
36 | When the lexer is first created, there is no lookahead character: you must | |
37 | `prime the pump' by calling NEXT-CHAR. The lexer represents encountering | |
38 | the end of its input stream by setting the lookahead character to nil. At | |
39 | this point it is still possible to push back characters.")) | |
40 | ||
41 | (export '(token-type token-value)) | |
42 | (defgeneric token-type (lexer) | |
43 | (:documentation | |
44 | "Return the type of the LEXER's current lookahead token | |
45 | ||
46 | When the lexer is first created, there is no lookahead token: you must | |
47 | `prime the pump' by calling NEXT-TOKEN.")) | |
48 | (defgeneric token-value (lexer) | |
49 | (:documentation | |
50 | "Return the value of the LEXER's current lookahead token | |
51 | ||
52 | When the lexer is first created, there is no lookahead token: you must | |
53 | `prime the pump' by calling NEXT-TOKEN.")) | |
54 | ||
55 | ;;;-------------------------------------------------------------------------- | |
56 | ;;; Formatting tokens. | |
57 | ||
58 | (defgeneric format-token (token-type &optional token-value) | |
59 | (:documentation | |
60 | "Return a string describing a token with the specified type and value.") | |
61 | (:method ((token-type (eql :eof)) &optional token-value) | |
62 | (declare (ignore token-value)) | |
63 | "<end-of-file>") | |
64 | (:method ((token-type (eql :string)) &optional token-value) | |
65 | (declare (ignore token-value)) | |
66 | "<string-literal>") | |
67 | (:method ((token-type (eql :char)) &optional token-value) | |
68 | (declare (ignore token-value)) | |
69 | "<character-literal>") | |
70 | (:method ((token-type (eql :id)) &optional token-value) | |
71 | (format nil "<identifier~@[ `~A'~]>" token-value)) | |
72 | (:method ((token-type symbol) &optional token-value) | |
73 | (declare (ignore token-value)) | |
74 | (check-type token-type keyword) | |
75 | (format nil "`~(~A~)'" token-type)) | |
76 | (:method ((token-type character) &optional token-value) | |
77 | (declare (ignore token-value)) | |
78 | (format nil "~:[<~:C>~;`~C'~]" | |
79 | (and (graphic-char-p token-type) | |
80 | (char/= token-type #\space)) | |
81 | token-type))) | |
82 | ||
83 | ;;;-------------------------------------------------------------------------- | |
84 | ;;; Reading and pushing back characters. | |
85 | ||
86 | (export 'next-char) | |
87 | (defgeneric next-char (lexer) | |
88 | (:documentation | |
89 | "Fetch the next character from the LEXER's input stream. | |
90 | ||
91 | Read a character from the input stream, and store it in the LEXER's CHAR | |
92 | slot. The character stored is returned. If characters have been pushed | |
93 | back then pushed-back characters are used instead of the input stream. If | |
94 | there are no more characters to be read then the lookahead character is | |
95 | nil. Returns the new lookahead character. | |
96 | ||
97 | (This function is primarily intended for the use of lexer subclasses.)")) | |
98 | ||
99 | (export 'pushback-char) | |
100 | (defgeneric pushback-char (lexer char) | |
101 | (:documentation | |
102 | "Push the CHAR back into the lexer. | |
103 | ||
104 | Make CHAR be the current lookahead character (stored in the LEXER's CHAR | |
105 | slot). The previous lookahead character is pushed down, and will be made | |
106 | available again once this character is consumed by NEXT-CHAR. | |
107 | ||
108 | (This function is primarily intended for the use of lexer subclasses.)")) | |
109 | ||
110 | (defgeneric fixup-stream* (lexer thunk) | |
111 | (:documentation | |
112 | "Helper function for WITH-LEXER-STREAM. | |
113 | ||
114 | This function does the main work for WITH-LEXER-STREAM. The THUNK is | |
115 | invoked on a single argument, the LEXER's underlying STREAM.")) | |
116 | ||
117 | (export 'with-lexer-stream) | |
118 | (defmacro with-lexer-stream ((streamvar lexer) &body body) | |
119 | "Evaluate BODY with STREAMVAR bound to the LEXER's input stream. | |
120 | ||
121 | The STREAM is fixed up so that the next character read (e.g., using | |
122 | READ-CHAR) will be the lexer's current lookahead character. Once the BODY | |
123 | completes, the next character in the stream is read and set as the | |
124 | lookahead character. It is an error if the lexer has pushed-back | |
125 | characters (since these can't be pushed back into the input stream | |
126 | properly)." | |
127 | ||
128 | `(fixup-stream* ,lexer (lambda (,streamvar) ,@body))) | |
129 | ||
130 | ;;;-------------------------------------------------------------------------- | |
131 | ;;; Reading and pushing back tokens. | |
132 | ||
133 | (export 'scan-token) | |
134 | (defgeneric scan-token (lexer) | |
135 | (:documentation | |
136 | "Internal protocol for scanning tokens from an input stream. | |
137 | ||
138 | Implementing a method on this function is the main responsibility of LEXER | |
139 | subclasses; it is called by the user-facing NEXT-TOKEN function. | |
140 | ||
141 | The method should consume characters (using NEXT-CHAR) as necessary, and | |
142 | return two values: a token type and token value. These will be stored in | |
143 | the corresponding slots in the lexer object in order to provide the user | |
144 | with one-token lookahead.")) | |
145 | ||
146 | (export 'next-token) | |
147 | (defgeneric next-token (lexer) | |
148 | (:documentation | |
149 | "Scan a token from an input stream. | |
150 | ||
151 | This function scans a token from an input stream. Two values are | |
152 | returned: a `token type' and a `token value'. These are opaque to the | |
153 | LEXER base class, but the intent is that the token type be significant to | |
154 | determining the syntax of the input, while the token value carries any | |
155 | additional information about the token's semantic content. The token type | |
156 | and token value are also made available for lookahead via accessors | |
157 | TOKEN-TYPE and TOKEN-VALUE on the LEXER object. | |
158 | ||
159 | The new lookahead token type and value are returned as two separate | |
160 | values. | |
161 | ||
162 | If tokens have been pushed back (see PUSHBACK-TOKEN) then they are | |
163 | returned one by one instead of scanning the stream.")) | |
164 | ||
165 | (export 'pushback-token) | |
166 | (defgeneric pushback-token (lexer token-type &optional token-value location) | |
167 | (:documentation | |
168 | "Push a token back into the lexer. | |
169 | ||
170 | Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token. | |
171 | The previous lookahead token is pushed down, and will be made available | |
172 | agan once this new token is consumed by NEXT-TOKEN. If LOCATION is | |
173 | non-nil then FILE-LOCATION is saved and replaced by LOCATION. The | |
174 | TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need | |
175 | not be values which can actually be returned by NEXT-TOKEN.")) | |
176 | ||
177 | ;;;-------------------------------------------------------------------------- | |
178 | ;;; Utilities. | |
179 | ||
180 | (export 'skip-spaces) | |
181 | (defgeneric skip-spaces (lexer) | |
182 | (:documentation | |
183 | "Skip over whitespace characters in the LEXER. | |
184 | ||
185 | There must be a lookahead character; when the function returns, the | |
186 | lookahead character will be a non-whitespace character or nil if there | |
187 | were no non-whitespace characters remaining. Returns the new lookahead | |
188 | character.")) | |
189 | ||
190 | (export 'require-token) | |
191 | (defun require-token | |
192 | (lexer wanted-token-type &key (errorp t) (consumep t) default) | |
193 | "Require a particular token to appear. | |
194 | ||
195 | If the LEXER's current lookahead token has type WANTED-TOKEN-TYPE then | |
196 | consume it (using NEXT-TOKEN) and return its value. Otherwise, if the | |
197 | token doesn't have the requested type then signal a continuable error | |
198 | describing the situation and return DEFAULT (which defaults to nil). | |
199 | ||
200 | If ERRORP is false then no error is signalled; this is useful for | |
201 | consuming or checking for optional punctuation. If CONSUMEP is false then | |
202 | a matching token is not consumed; non-matching tokens are never consumed." | |
203 | ||
204 | (with-slots (token-type token-value) lexer | |
205 | (cond ((eql token-type wanted-token-type) | |
206 | (prog1 token-value | |
207 | (when consumep (next-token lexer)))) | |
208 | (errorp | |
209 | (cerror* "Expected ~A but found ~A" | |
210 | (format-token wanted-token-type) | |
211 | (format-token token-type token-value)) | |
212 | default) | |
213 | (t | |
214 | default)))) | |
215 | ||
216 | ;;;----- That's all, folks -------------------------------------------------- |