Don't write Lisp symbol names in uppercase: use `...' instead.
[sod] / src / lexer-proto.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Protocol for lexical analysis
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Accessors.
30
31(export 'lexer-char)
32(defgeneric lexer-char (lexer)
33 (:documentation
34 "Return the current lookahead character from the LEXER.
35
36 When the lexer is first created, there is no lookahead character: you must
3109662a
MW
37 `prime the pump' by calling `next-char'. The lexer represents
38 encountering the end of its input stream by setting the lookahead
39 character to nil. At this point it is still possible to push back
40 characters."))
dea4d055 41
dea4d055
MW
42;;;--------------------------------------------------------------------------
43;;; Formatting tokens.
44
45(defgeneric format-token (token-type &optional token-value)
46 (:documentation
47 "Return a string describing a token with the specified type and value.")
48 (:method ((token-type (eql :eof)) &optional token-value)
49 (declare (ignore token-value))
50 "<end-of-file>")
51 (:method ((token-type (eql :string)) &optional token-value)
52 (declare (ignore token-value))
53 "<string-literal>")
54 (:method ((token-type (eql :char)) &optional token-value)
55 (declare (ignore token-value))
56 "<character-literal>")
57 (:method ((token-type (eql :id)) &optional token-value)
58 (format nil "<identifier~@[ `~A'~]>" token-value))
59 (:method ((token-type symbol) &optional token-value)
60 (declare (ignore token-value))
61 (check-type token-type keyword)
62 (format nil "`~(~A~)'" token-type))
63 (:method ((token-type character) &optional token-value)
64 (declare (ignore token-value))
65 (format nil "~:[<~:C>~;`~C'~]"
66 (and (graphic-char-p token-type)
67 (char/= token-type #\space))
68 token-type)))
69
70;;;--------------------------------------------------------------------------
71;;; Reading and pushing back characters.
72
73(export 'next-char)
74(defgeneric next-char (lexer)
75 (:documentation
76 "Fetch the next character from the LEXER's input stream.
77
78 Read a character from the input stream, and store it in the LEXER's CHAR
79 slot. The character stored is returned. If characters have been pushed
80 back then pushed-back characters are used instead of the input stream. If
81 there are no more characters to be read then the lookahead character is
82 nil. Returns the new lookahead character.
83
84 (This function is primarily intended for the use of lexer subclasses.)"))
85
86(export 'pushback-char)
87(defgeneric pushback-char (lexer char)
88 (:documentation
89 "Push the CHAR back into the lexer.
90
91 Make CHAR be the current lookahead character (stored in the LEXER's CHAR
92 slot). The previous lookahead character is pushed down, and will be made
93 available again once this character is consumed by NEXT-CHAR.
94
95 (This function is primarily intended for the use of lexer subclasses.)"))
96
97(defgeneric fixup-stream* (lexer thunk)
98 (:documentation
3109662a 99 "Helper function for `with-lexer-stream'.
dea4d055 100
3109662a 101 This function does the main work for `with-lexer-stream'. The THUNK is
dea4d055
MW
102 invoked on a single argument, the LEXER's underlying STREAM."))
103
104(export 'with-lexer-stream)
105(defmacro with-lexer-stream ((streamvar lexer) &body body)
106 "Evaluate BODY with STREAMVAR bound to the LEXER's input stream.
107
108 The STREAM is fixed up so that the next character read (e.g., using
3109662a
MW
109 `read-char') will be the lexer's current lookahead character. Once the
110 BODY completes, the next character in the stream is read and set as the
dea4d055
MW
111 lookahead character. It is an error if the lexer has pushed-back
112 characters (since these can't be pushed back into the input stream
113 properly)."
114
115 `(fixup-stream* ,lexer (lambda (,streamvar) ,@body)))
116
117;;;--------------------------------------------------------------------------
118;;; Reading and pushing back tokens.
119
120(export 'scan-token)
121(defgeneric scan-token (lexer)
122 (:documentation
123 "Internal protocol for scanning tokens from an input stream.
124
125 Implementing a method on this function is the main responsibility of LEXER
3109662a 126 subclasses; it is called by the user-facing `next-token' function.
dea4d055 127
3109662a 128 The method should consume characters (using `next-char') as necessary, and
dea4d055
MW
129 return two values: a token type and token value. These will be stored in
130 the corresponding slots in the lexer object in order to provide the user
131 with one-token lookahead."))
132
133(export 'next-token)
134(defgeneric next-token (lexer)
135 (:documentation
136 "Scan a token from an input stream.
137
138 This function scans a token from an input stream. Two values are
139 returned: a `token type' and a `token value'. These are opaque to the
140 LEXER base class, but the intent is that the token type be significant to
141 determining the syntax of the input, while the token value carries any
142 additional information about the token's semantic content. The token type
143 and token value are also made available for lookahead via accessors
3109662a 144 TOKEN-TYPE and TOKEN-VALUE on the `lexer' object.
dea4d055
MW
145
146 The new lookahead token type and value are returned as two separate
147 values.
148
3109662a 149 If tokens have been pushed back (see `pushback-token') then they are
dea4d055
MW
150 returned one by one instead of scanning the stream."))
151
152(export 'pushback-token)
153(defgeneric pushback-token (lexer token-type &optional token-value location)
154 (:documentation
155 "Push a token back into the lexer.
156
157 Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token.
158 The previous lookahead token is pushed down, and will be made available
159 agan once this new token is consumed by NEXT-TOKEN. If LOCATION is
3109662a 160 non-nil then `file-location' is saved and replaced by LOCATION. The
dea4d055
MW
161 TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need
162 not be values which can actually be returned by NEXT-TOKEN."))
163
164;;;--------------------------------------------------------------------------
165;;; Utilities.
166
167(export 'skip-spaces)
168(defgeneric skip-spaces (lexer)
169 (:documentation
170 "Skip over whitespace characters in the LEXER.
171
172 There must be a lookahead character; when the function returns, the
173 lookahead character will be a non-whitespace character or nil if there
174 were no non-whitespace characters remaining. Returns the new lookahead
175 character."))
176
177(export 'require-token)
178(defun require-token
179 (lexer wanted-token-type &key (errorp t) (consumep t) default)
180 "Require a particular token to appear.
181
3109662a
MW
182 If the LEXER's current lookahead token has type `wanted-token-type' then
183 consume it (using `next-token') and return its value. Otherwise, if the
dea4d055
MW
184 token doesn't have the requested type then signal a continuable error
185 describing the situation and return DEFAULT (which defaults to nil).
186
187 If ERRORP is false then no error is signalled; this is useful for
188 consuming or checking for optional punctuation. If CONSUMEP is false then
189 a matching token is not consumed; non-matching tokens are never consumed."
190
191 (with-slots (token-type token-value) lexer
192 (cond ((eql token-type wanted-token-type)
193 (prog1 token-value
194 (when consumep (next-token lexer))))
195 (errorp
196 (cerror* "Expected ~A but found ~A"
197 (format-token wanted-token-type)
198 (format-token token-type token-value))
199 default)
200 (t
201 default))))
202
203;;;----- That's all, folks --------------------------------------------------