Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Protocol for lexical analysis | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (cl:in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Accessors. | |
30 | ||
31 | (export 'lexer-char) | |
32 | (defgeneric lexer-char (lexer) | |
33 | (:documentation | |
34 | "Return the current lookahead character from the LEXER. | |
35 | ||
36 | When the lexer is first created, there is no lookahead character: you must | |
37 | `prime the pump' by calling NEXT-CHAR. The lexer represents encountering | |
38 | the end of its input stream by setting the lookahead character to nil. At | |
39 | this point it is still possible to push back characters.")) | |
40 | ||
dea4d055 MW |
41 | ;;;-------------------------------------------------------------------------- |
42 | ;;; Formatting tokens. | |
43 | ||
44 | (defgeneric format-token (token-type &optional token-value) | |
45 | (:documentation | |
46 | "Return a string describing a token with the specified type and value.") | |
47 | (:method ((token-type (eql :eof)) &optional token-value) | |
48 | (declare (ignore token-value)) | |
49 | "<end-of-file>") | |
50 | (:method ((token-type (eql :string)) &optional token-value) | |
51 | (declare (ignore token-value)) | |
52 | "<string-literal>") | |
53 | (:method ((token-type (eql :char)) &optional token-value) | |
54 | (declare (ignore token-value)) | |
55 | "<character-literal>") | |
56 | (:method ((token-type (eql :id)) &optional token-value) | |
57 | (format nil "<identifier~@[ `~A'~]>" token-value)) | |
58 | (:method ((token-type symbol) &optional token-value) | |
59 | (declare (ignore token-value)) | |
60 | (check-type token-type keyword) | |
61 | (format nil "`~(~A~)'" token-type)) | |
62 | (:method ((token-type character) &optional token-value) | |
63 | (declare (ignore token-value)) | |
64 | (format nil "~:[<~:C>~;`~C'~]" | |
65 | (and (graphic-char-p token-type) | |
66 | (char/= token-type #\space)) | |
67 | token-type))) | |
68 | ||
69 | ;;;-------------------------------------------------------------------------- | |
70 | ;;; Reading and pushing back characters. | |
71 | ||
72 | (export 'next-char) | |
73 | (defgeneric next-char (lexer) | |
74 | (:documentation | |
75 | "Fetch the next character from the LEXER's input stream. | |
76 | ||
77 | Read a character from the input stream, and store it in the LEXER's CHAR | |
78 | slot. The character stored is returned. If characters have been pushed | |
79 | back then pushed-back characters are used instead of the input stream. If | |
80 | there are no more characters to be read then the lookahead character is | |
81 | nil. Returns the new lookahead character. | |
82 | ||
83 | (This function is primarily intended for the use of lexer subclasses.)")) | |
84 | ||
85 | (export 'pushback-char) | |
86 | (defgeneric pushback-char (lexer char) | |
87 | (:documentation | |
88 | "Push the CHAR back into the lexer. | |
89 | ||
90 | Make CHAR be the current lookahead character (stored in the LEXER's CHAR | |
91 | slot). The previous lookahead character is pushed down, and will be made | |
92 | available again once this character is consumed by NEXT-CHAR. | |
93 | ||
94 | (This function is primarily intended for the use of lexer subclasses.)")) | |
95 | ||
96 | (defgeneric fixup-stream* (lexer thunk) | |
97 | (:documentation | |
98 | "Helper function for WITH-LEXER-STREAM. | |
99 | ||
100 | This function does the main work for WITH-LEXER-STREAM. The THUNK is | |
101 | invoked on a single argument, the LEXER's underlying STREAM.")) | |
102 | ||
103 | (export 'with-lexer-stream) | |
104 | (defmacro with-lexer-stream ((streamvar lexer) &body body) | |
105 | "Evaluate BODY with STREAMVAR bound to the LEXER's input stream. | |
106 | ||
107 | The STREAM is fixed up so that the next character read (e.g., using | |
108 | READ-CHAR) will be the lexer's current lookahead character. Once the BODY | |
109 | completes, the next character in the stream is read and set as the | |
110 | lookahead character. It is an error if the lexer has pushed-back | |
111 | characters (since these can't be pushed back into the input stream | |
112 | properly)." | |
113 | ||
114 | `(fixup-stream* ,lexer (lambda (,streamvar) ,@body))) | |
115 | ||
116 | ;;;-------------------------------------------------------------------------- | |
117 | ;;; Reading and pushing back tokens. | |
118 | ||
119 | (export 'scan-token) | |
120 | (defgeneric scan-token (lexer) | |
121 | (:documentation | |
122 | "Internal protocol for scanning tokens from an input stream. | |
123 | ||
124 | Implementing a method on this function is the main responsibility of LEXER | |
125 | subclasses; it is called by the user-facing NEXT-TOKEN function. | |
126 | ||
127 | The method should consume characters (using NEXT-CHAR) as necessary, and | |
128 | return two values: a token type and token value. These will be stored in | |
129 | the corresponding slots in the lexer object in order to provide the user | |
130 | with one-token lookahead.")) | |
131 | ||
132 | (export 'next-token) | |
133 | (defgeneric next-token (lexer) | |
134 | (:documentation | |
135 | "Scan a token from an input stream. | |
136 | ||
137 | This function scans a token from an input stream. Two values are | |
138 | returned: a `token type' and a `token value'. These are opaque to the | |
139 | LEXER base class, but the intent is that the token type be significant to | |
140 | determining the syntax of the input, while the token value carries any | |
141 | additional information about the token's semantic content. The token type | |
142 | and token value are also made available for lookahead via accessors | |
143 | TOKEN-TYPE and TOKEN-VALUE on the LEXER object. | |
144 | ||
145 | The new lookahead token type and value are returned as two separate | |
146 | values. | |
147 | ||
148 | If tokens have been pushed back (see PUSHBACK-TOKEN) then they are | |
149 | returned one by one instead of scanning the stream.")) | |
150 | ||
151 | (export 'pushback-token) | |
152 | (defgeneric pushback-token (lexer token-type &optional token-value location) | |
153 | (:documentation | |
154 | "Push a token back into the lexer. | |
155 | ||
156 | Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token. | |
157 | The previous lookahead token is pushed down, and will be made available | |
158 | agan once this new token is consumed by NEXT-TOKEN. If LOCATION is | |
159 | non-nil then FILE-LOCATION is saved and replaced by LOCATION. The | |
160 | TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need | |
161 | not be values which can actually be returned by NEXT-TOKEN.")) | |
162 | ||
163 | ;;;-------------------------------------------------------------------------- | |
164 | ;;; Utilities. | |
165 | ||
166 | (export 'skip-spaces) | |
167 | (defgeneric skip-spaces (lexer) | |
168 | (:documentation | |
169 | "Skip over whitespace characters in the LEXER. | |
170 | ||
171 | There must be a lookahead character; when the function returns, the | |
172 | lookahead character will be a non-whitespace character or nil if there | |
173 | were no non-whitespace characters remaining. Returns the new lookahead | |
174 | character.")) | |
175 | ||
176 | (export 'require-token) | |
177 | (defun require-token | |
178 | (lexer wanted-token-type &key (errorp t) (consumep t) default) | |
179 | "Require a particular token to appear. | |
180 | ||
181 | If the LEXER's current lookahead token has type WANTED-TOKEN-TYPE then | |
182 | consume it (using NEXT-TOKEN) and return its value. Otherwise, if the | |
183 | token doesn't have the requested type then signal a continuable error | |
184 | describing the situation and return DEFAULT (which defaults to nil). | |
185 | ||
186 | If ERRORP is false then no error is signalled; this is useful for | |
187 | consuming or checking for optional punctuation. If CONSUMEP is false then | |
188 | a matching token is not consumed; non-matching tokens are never consumed." | |
189 | ||
190 | (with-slots (token-type token-value) lexer | |
191 | (cond ((eql token-type wanted-token-type) | |
192 | (prog1 token-value | |
193 | (when consumep (next-token lexer)))) | |
194 | (errorp | |
195 | (cerror* "Expected ~A but found ~A" | |
196 | (format-token wanted-token-type) | |
197 | (format-token token-type token-value)) | |
198 | default) | |
199 | (t | |
200 | default)))) | |
201 | ||
202 | ;;;----- That's all, folks -------------------------------------------------- |