Massive reorganization in progress.
[sod] / src / proto-lexer.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Protocol for lexical analysis
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Accessors.
30
31(export 'lexer-char)
32(defgeneric lexer-char (lexer)
33 (:documentation
34 "Return the current lookahead character from the LEXER.
35
36 When the lexer is first created, there is no lookahead character: you must
37 `prime the pump' by calling NEXT-CHAR. The lexer represents encountering
38 the end of its input stream by setting the lookahead character to nil. At
39 this point it is still possible to push back characters."))
40
41(export '(token-type token-value))
42(defgeneric token-type (lexer)
43 (:documentation
44 "Return the type of the LEXER's current lookahead token
45
46 When the lexer is first created, there is no lookahead token: you must
47 `prime the pump' by calling NEXT-TOKEN."))
48(defgeneric token-value (lexer)
49 (:documentation
50 "Return the value of the LEXER's current lookahead token
51
52 When the lexer is first created, there is no lookahead token: you must
53 `prime the pump' by calling NEXT-TOKEN."))
54
55;;;--------------------------------------------------------------------------
56;;; Formatting tokens.
57
58(defgeneric format-token (token-type &optional token-value)
59 (:documentation
60 "Return a string describing a token with the specified type and value.")
61 (:method ((token-type (eql :eof)) &optional token-value)
62 (declare (ignore token-value))
63 "<end-of-file>")
64 (:method ((token-type (eql :string)) &optional token-value)
65 (declare (ignore token-value))
66 "<string-literal>")
67 (:method ((token-type (eql :char)) &optional token-value)
68 (declare (ignore token-value))
69 "<character-literal>")
70 (:method ((token-type (eql :id)) &optional token-value)
71 (format nil "<identifier~@[ `~A'~]>" token-value))
72 (:method ((token-type symbol) &optional token-value)
73 (declare (ignore token-value))
74 (check-type token-type keyword)
75 (format nil "`~(~A~)'" token-type))
76 (:method ((token-type character) &optional token-value)
77 (declare (ignore token-value))
78 (format nil "~:[<~:C>~;`~C'~]"
79 (and (graphic-char-p token-type)
80 (char/= token-type #\space))
81 token-type)))
82
83;;;--------------------------------------------------------------------------
84;;; Reading and pushing back characters.
85
86(export 'next-char)
87(defgeneric next-char (lexer)
88 (:documentation
89 "Fetch the next character from the LEXER's input stream.
90
91 Read a character from the input stream, and store it in the LEXER's CHAR
92 slot. The character stored is returned. If characters have been pushed
93 back then pushed-back characters are used instead of the input stream. If
94 there are no more characters to be read then the lookahead character is
95 nil. Returns the new lookahead character.
96
97 (This function is primarily intended for the use of lexer subclasses.)"))
98
99(export 'pushback-char)
100(defgeneric pushback-char (lexer char)
101 (:documentation
102 "Push the CHAR back into the lexer.
103
104 Make CHAR be the current lookahead character (stored in the LEXER's CHAR
105 slot). The previous lookahead character is pushed down, and will be made
106 available again once this character is consumed by NEXT-CHAR.
107
108 (This function is primarily intended for the use of lexer subclasses.)"))
109
110(defgeneric fixup-stream* (lexer thunk)
111 (:documentation
112 "Helper function for WITH-LEXER-STREAM.
113
114 This function does the main work for WITH-LEXER-STREAM. The THUNK is
115 invoked on a single argument, the LEXER's underlying STREAM."))
116
117(export 'with-lexer-stream)
118(defmacro with-lexer-stream ((streamvar lexer) &body body)
119 "Evaluate BODY with STREAMVAR bound to the LEXER's input stream.
120
121 The STREAM is fixed up so that the next character read (e.g., using
122 READ-CHAR) will be the lexer's current lookahead character. Once the BODY
123 completes, the next character in the stream is read and set as the
124 lookahead character. It is an error if the lexer has pushed-back
125 characters (since these can't be pushed back into the input stream
126 properly)."
127
128 `(fixup-stream* ,lexer (lambda (,streamvar) ,@body)))
129
130;;;--------------------------------------------------------------------------
131;;; Reading and pushing back tokens.
132
133(export 'scan-token)
134(defgeneric scan-token (lexer)
135 (:documentation
136 "Internal protocol for scanning tokens from an input stream.
137
138 Implementing a method on this function is the main responsibility of LEXER
139 subclasses; it is called by the user-facing NEXT-TOKEN function.
140
141 The method should consume characters (using NEXT-CHAR) as necessary, and
142 return two values: a token type and token value. These will be stored in
143 the corresponding slots in the lexer object in order to provide the user
144 with one-token lookahead."))
145
146(export 'next-token)
147(defgeneric next-token (lexer)
148 (:documentation
149 "Scan a token from an input stream.
150
151 This function scans a token from an input stream. Two values are
152 returned: a `token type' and a `token value'. These are opaque to the
153 LEXER base class, but the intent is that the token type be significant to
154 determining the syntax of the input, while the token value carries any
155 additional information about the token's semantic content. The token type
156 and token value are also made available for lookahead via accessors
157 TOKEN-TYPE and TOKEN-VALUE on the LEXER object.
158
159 The new lookahead token type and value are returned as two separate
160 values.
161
162 If tokens have been pushed back (see PUSHBACK-TOKEN) then they are
163 returned one by one instead of scanning the stream."))
164
165(export 'pushback-token)
166(defgeneric pushback-token (lexer token-type &optional token-value location)
167 (:documentation
168 "Push a token back into the lexer.
169
170 Make the given TOKEN-TYPE and TOKEN-VALUE be the current lookahead token.
171 The previous lookahead token is pushed down, and will be made available
172 agan once this new token is consumed by NEXT-TOKEN. If LOCATION is
173 non-nil then FILE-LOCATION is saved and replaced by LOCATION. The
174 TOKEN-TYPE and TOKEN-VALUE can be anything at all: for instance, they need
175 not be values which can actually be returned by NEXT-TOKEN."))
176
177;;;--------------------------------------------------------------------------
178;;; Utilities.
179
180(export 'skip-spaces)
181(defgeneric skip-spaces (lexer)
182 (:documentation
183 "Skip over whitespace characters in the LEXER.
184
185 There must be a lookahead character; when the function returns, the
186 lookahead character will be a non-whitespace character or nil if there
187 were no non-whitespace characters remaining. Returns the new lookahead
188 character."))
189
190(export 'require-token)
191(defun require-token
192 (lexer wanted-token-type &key (errorp t) (consumep t) default)
193 "Require a particular token to appear.
194
195 If the LEXER's current lookahead token has type WANTED-TOKEN-TYPE then
196 consume it (using NEXT-TOKEN) and return its value. Otherwise, if the
197 token doesn't have the requested type then signal a continuable error
198 describing the situation and return DEFAULT (which defaults to nil).
199
200 If ERRORP is false then no error is signalled; this is useful for
201 consuming or checking for optional punctuation. If CONSUMEP is false then
202 a matching token is not consumed; non-matching tokens are never consumed."
203
204 (with-slots (token-type token-value) lexer
205 (cond ((eql token-type wanted-token-type)
206 (prog1 token-value
207 (when consumep (next-token lexer))))
208 (errorp
209 (cerror* "Expected ~A but found ~A"
210 (format-token wanted-token-type)
211 (format-token token-type token-value))
212 default)
213 (t
214 default))))
215
216;;;----- That's all, folks --------------------------------------------------