Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Protocol for lexical analysis | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (cl:in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Accessors. | |
30 | ||
31 | (export 'lexer-char) | |
32 | (defgeneric lexer-char (lexer) | |
33 | (:documentation | |
34 | "Return the current lookahead character from the LEXER. | |
35 | ||
36 | When the lexer is first created, there is no lookahead character: you must | |
3109662a MW |
37 | `prime the pump' by calling `next-char'. The lexer represents |
38 | encountering the end of its input stream by setting the lookahead | |
39 | character to nil. At this point it is still possible to push back | |
40 | characters.")) | |
dea4d055 | 41 | |
dea4d055 MW |
42 | ;;;-------------------------------------------------------------------------- |
43 | ;;; Formatting tokens. | |
44 | ||
45 | (defgeneric format-token (token-type &optional token-value) | |
46 | (:documentation | |
47 | "Return a string describing a token with the specified type and value.") | |
48 | (:method ((token-type (eql :eof)) &optional token-value) | |
49 | (declare (ignore token-value)) | |
50 | "<end-of-file>") | |
51 | (:method ((token-type (eql :string)) &optional token-value) | |
52 | (declare (ignore token-value)) | |
53 | "<string-literal>") | |
54 | (:method ((token-type (eql :char)) &optional token-value) | |
55 | (declare (ignore token-value)) | |
56 | "<character-literal>") | |
57 | (:method ((token-type (eql :id)) &optional token-value) | |
58 | (format nil "<identifier~@[ `~A'~]>" token-value)) | |
59 | (:method ((token-type symbol) &optional token-value) | |
60 | (declare (ignore token-value)) | |
61 | (check-type token-type keyword) | |
62 | (format nil "`~(~A~)'" token-type)) | |
63 | (:method ((token-type character) &optional token-value) | |
64 | (declare (ignore token-value)) | |
65 | (format nil "~:[<~:C>~;`~C'~]" | |
66 | (and (graphic-char-p token-type) | |
67 | (char/= token-type #\space)) | |
68 | token-type))) | |
69 | ||
70 | ;;;-------------------------------------------------------------------------- | |
71 | ;;; Reading and pushing back characters. | |
72 | ||
73 | (export 'next-char) | |
74 | (defgeneric next-char (lexer) | |
75 | (:documentation | |
76 | "Fetch the next character from the LEXER's input stream. | |
77 | ||
78 | Read a character from the input stream, and store it in the LEXER's CHAR | |
79 | slot. The character stored is returned. If characters have been pushed | |
80 | back then pushed-back characters are used instead of the input stream. If | |
81 | there are no more characters to be read then the lookahead character is | |
82 | nil. Returns the new lookahead character. | |
83 | ||
84 | (This function is primarily intended for the use of lexer subclasses.)")) | |
85 | ||
86 | (export 'pushback-char) | |
87 | (defgeneric pushback-char (lexer char) | |
88 | (:documentation | |
89 | "Push the CHAR back into the lexer. | |
90 | ||
91 | Make CHAR be the current lookahead character (stored in the LEXER's CHAR | |
92 | slot). The previous lookahead character is pushed down, and will be made | |
93 | available again once this character is consumed by NEXT-CHAR. | |
94 | ||
95 | (This function is primarily intended for the use of lexer subclasses.)")) | |
96 | ||
97 | (defgeneric fixup-stream* (lexer thunk) | |
98 | (:documentation | |
3109662a | 99 | "Helper function for `with-lexer-stream'. |
dea4d055 | 100 | |
3109662a | 101 | This function does the main work for `with-lexer-stream'. The THUNK is |
dea4d055 MW |
102 | invoked on a single argument, the LEXER's underlying STREAM.")) |
103 | ||
104 | (export 'with-lexer-stream) | |
105 | (defmacro with-lexer-stream ((streamvar lexer) &body body) | |
106 | "Evaluate BODY with STREAMVAR bound to the LEXER's input stream. | |
107 | ||
108 | The STREAM is fixed up so that the next character read (e.g., using | |
3109662a MW |
109 | `read-char') will be the lexer's current lookahead character. Once the |
110 | BODY completes, the next character in the stream is read and set as the | |
dea4d055 MW |
111 | lookahead character. It is an error if the lexer has pushed-back |
112 | characters (since these can't be pushed back into the input stream | |
113 | properly)." | |
114 | ||
115 | `(fixup-stream* ,lexer (lambda (,streamvar) ,@body))) | |
116 | ||
117 | ;;;-------------------------------------------------------------------------- | |
118 | ;;; Reading and pushing back tokens. | |
119 | ||
120 | (export 'scan-token) | |
121 | (defgeneric scan-token (lexer) | |
122 | (:documentation | |
123 | "Internal protocol for scanning tokens from an input stream. | |
124 | ||
125 | Implementing a method on this function is the main responsibility of LEXER | |
3109662a | 126 | subclasses; it is called by the user-facing `next-token' function. |
dea4d055 | 127 | |
3109662a | 128 | The method should consume characters (using `next-char') as necessary, and |
dea4d055 MW |
129 | return two values: a token type and token value. These will be stored in |
130 | the corresponding slots in the lexer object in order to provide the user | |
131 | with one-token lookahead.")) | |
132 | ||
133 | (export 'next-token) | |
134 | (defgeneric next-token (lexer) | |
135 | (:documentation | |
136 | "Scan a token from an input stream. | |
137 | ||
138 | This function scans a token from an input stream. Two values are | |
139 | returned: a `token type' and a `token value'. These are opaque to the | |
140 | LEXER base class, but the intent is that the token type be significant to | |
141 | determining the syntax of the input, while the token value carries any | |
142 | additional information about the token's semantic content. The token type | |
143 | and token value are also made available for lookahead via accessors | |
3109662a | 144 | TOKEN-TYPE and TOKEN-VALUE on the `lexer' object. |
dea4d055 MW |
145 | |
146 | The new lookahead token type and value are returned as two separate | |
147 | values. | |
148 | ||
3109662a | 149 | If tokens have been pushed back (see `pushback-token') then they are |
dea4d055 MW |
150 | returned one by one instead of scanning the stream.")) |
151 | ||
152 | (export 'pushback-token) | |
153 | (defgeneric pushback-token (lexer token-type &optional token-value location) | |
154 | (:documentation | |
155 | "Push a token back into the lexer. | |
156 | ||
157 | Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token. | |
158 | The previous lookahead token is pushed down, and will be made available | |
159 | agan once this new token is consumed by NEXT-TOKEN. If LOCATION is | |
3109662a | 160 | non-nil then `file-location' is saved and replaced by LOCATION. The |
dea4d055 MW |
161 | TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need |
162 | not be values which can actually be returned by NEXT-TOKEN.")) | |
163 | ||
164 | ;;;-------------------------------------------------------------------------- | |
165 | ;;; Utilities. | |
166 | ||
167 | (export 'skip-spaces) | |
168 | (defgeneric skip-spaces (lexer) | |
169 | (:documentation | |
170 | "Skip over whitespace characters in the LEXER. | |
171 | ||
172 | There must be a lookahead character; when the function returns, the | |
173 | lookahead character will be a non-whitespace character or nil if there | |
174 | were no non-whitespace characters remaining. Returns the new lookahead | |
175 | character.")) | |
176 | ||
177 | (export 'require-token) | |
178 | (defun require-token | |
179 | (lexer wanted-token-type &key (errorp t) (consumep t) default) | |
180 | "Require a particular token to appear. | |
181 | ||
3109662a MW |
182 | If the LEXER's current lookahead token has type `wanted-token-type' then |
183 | consume it (using `next-token') and return its value. Otherwise, if the | |
dea4d055 MW |
184 | token doesn't have the requested type then signal a continuable error |
185 | describing the situation and return DEFAULT (which defaults to nil). | |
186 | ||
187 | If ERRORP is false then no error is signalled; this is useful for | |
188 | consuming or checking for optional punctuation. If CONSUMEP is false then | |
189 | a matching token is not consumed; non-matching tokens are never consumed." | |
190 | ||
191 | (with-slots (token-type token-value) lexer | |
192 | (cond ((eql token-type wanted-token-type) | |
193 | (prog1 token-value | |
194 | (when consumep (next-token lexer)))) | |
195 | (errorp | |
196 | (cerror* "Expected ~A but found ~A" | |
197 | (format-token wanted-token-type) | |
198 | (format-token token-type token-value)) | |
199 | default) | |
200 | (t | |
201 | default)))) | |
202 | ||
203 | ;;;----- That's all, folks -------------------------------------------------- |