Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Protocol for parsing. | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
e0808c47 | 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
dea4d055 MW |
11 | ;;; |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | ;;;-------------------------------------------------------------------------- | |
27 | ;;; Parser protocol discussion. | |
28 | ;;; | |
29 | ;;; Other languages, notably Haskell and ML, have `parser combinator | |
30 | ;;; libraries', which allow one to construct recursive descent parsers using | |
31 | ;;; approximately pleasant syntax. While attempts have been made to | |
32 | ;;; introduce the benefits of these libraries to Lisp, they've not been | |
33 | ;;; altogether successful; this seems due to Lisp's lack of features such as | |
34 | ;;; pattern matching, currying and lazy evaluation. Rather than fight with | |
35 | ;;; Lisp's weaknesses, this library plays to its strength, making heavy use | |
36 | ;;; of macros. Effectively, the `combinators' we build here are /compile- | |
37 | ;;; time/ combinators, not run-time ones. | |
38 | ;;; | |
39 | ;;; A `parser' is simply an expression which returns three values. | |
40 | ;;; | |
41 | ;;; * If the second value is nil, then the parser is said to have /failed/, | |
42 | ;;; and the first value is a list describing the things that the parser | |
43 | ;;; expected to find but didn't. (The precise details of the list items | |
44 | ;;; are important to error-reporting functions, but not to the low-level | |
45 | ;;; machinery, and are left up to higher-level protocols to nail down | |
46 | ;;; harder.) | |
47 | ;;; | |
48 | ;;; * If the second value is not nil, then the parser is said to have | |
49 | ;;; /succeeded/, and the first value is its /result/. | |
50 | ;;; | |
51 | ;;; * The third value indicates whether the parser consumed any of its | |
52 | ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and | |
53 | ;;; bad performance), so the `consumedp' return value is used to decide | |
54 | ;;; whether the parser has `committed' to a particular branch. If the | |
55 | ;;; parser context supports place-capture (many do) then `peek' can be | |
56 | ;;; used to suppress consumption of input in the case of parser failure. | |
57 | ;;; | |
58 | ;;; The functions and macros here are simply ways of gluing together | |
59 | ;;; expressions which obey this protocol. | |
60 | ;;; | |
bf090e02 | 61 | ;;; The main contribution of this file is a macro `with-parser-context' which |
dea4d055 | 62 | ;;; embeds a parsing-specific S-expressions language entered using the new |
bf090e02 MW |
63 | ;;; macro `parse'. The behaviour of this macro is controlled by a pair of |
64 | ;;; compile-time generic functions `expand-parser-spec' and | |
65 | ;;; `expand-parser-form'. As well as the parser expression they're meant to | |
66 | ;;; process, these functions dispatch on a `context' argument, which is | |
67 | ;;; intended to help `leaf' parsers find the terminal symbols which they're | |
68 | ;;; meant to process. | |
dea4d055 | 69 | ;;; |
bf090e02 MW |
70 | ;;; Note that the context is a compile-time object, constructed by the |
71 | ;;; `parse' macro expansion function, though the idea is that it will contain | |
72 | ;;; the name or names of variables holding the run-time parser state (which | |
73 | ;;; will typically be a lexical analyser or an input stream or suchlike). | |
dea4d055 MW |
74 | |
75 | (cl:in-package #:sod-parser) | |
76 | ||
77 | ;;;-------------------------------------------------------------------------- | |
78 | ;;; Utilities. | |
79 | ||
348f3bfe | 80 | (export 'combine-parser-failures) |
dea4d055 MW |
81 | (defun combine-parser-failures (failures) |
82 | "Combine the failure indicators listed in FAILURES. | |
83 | ||
84 | (Note that this means that FAILURES is a list of lists.)" | |
85 | ||
86 | (reduce (lambda (f ff) (union f ff :test #'equal)) | |
87 | failures | |
88 | :initial-value nil)) | |
89 | ||
da901cf2 MW |
90 | (export 'parse-empty) |
91 | (defun parse-empty (&optional value) | |
92 | "Return a parser which parses nothing, successfully. | |
93 | ||
94 | The parser returns VALUE and consumes nothing." | |
95 | (lambda () (values value t nil))) | |
96 | ||
97 | (export 'parse-fail) | |
98 | (defun parse-fail (indicator &optional consumedp) | |
99 | "Return a parser which fails. | |
100 | ||
101 | The parser reports the INDICATOR and (falsely) claims to have consumed | |
102 | input if CONSUMEDP is true." | |
103 | (lambda () (values indicator nil consumedp))) | |
104 | ||
dea4d055 MW |
105 | ;;;-------------------------------------------------------------------------- |
106 | ;;; Basic protocol. | |
107 | ||
108 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
109 | ||
110 | (export 'expand-parser-spec) | |
111 | (defgeneric expand-parser-spec (context spec) | |
112 | (:documentation | |
113 | "Expand a parser specifier SPEC in a particular parser CONTEXT.") | |
114 | (:method (context (spec list)) | |
115 | (expand-parser-form context (car spec) (cdr spec)))) | |
116 | ||
117 | (export 'expand-parser-form) | |
118 | (defgeneric expand-parser-form (context head tail) | |
119 | (:documentation | |
120 | "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.") | |
121 | (:method (context head tail) | |
1d8cc67a | 122 | (declare (ignore context)) |
dea4d055 MW |
123 | (cons head tail))) |
124 | ||
125 | (export 'wrap-parser) | |
126 | (defgeneric wrap-parser (context form) | |
127 | (:documentation | |
128 | "Enclose FORM in whatever is necessary to make the parser work.") | |
1d8cc67a MW |
129 | (:method (context form) |
130 | (declare (ignore context)) | |
131 | form))) | |
dea4d055 MW |
132 | |
133 | (export 'defparse) | |
134 | (defmacro defparse (name bvl &body body) | |
135 | "Define a new parser form. | |
136 | ||
137 | The full syntax is hairier than it looks: | |
138 | ||
139 | defparse NAME ( [[ :context (CTX SPEC) ]] . BVL ) | |
140 | { FORM }* | |
141 | ||
142 | The macro defines a new parser form (NAME ...) which is expanded by the | |
143 | body FORMs. The BVL is a destructuring lambda-list to be applied to the | |
144 | tail of the form. The body forms are enclosed in a block called NAME. | |
145 | ||
dea4d055 MW |
146 | If the :context key is provided, then the parser form is specialized on a |
147 | particular class of parser contexts SPEC; specialized expanders take | |
148 | priority over less specialized or unspecialized expanders -- so you can | |
149 | use this to override the built-in forms safely if they don't seem to be | |
150 | doing the right thing for you. Also, the context -- which is probably | |
151 | interesting to you if you've bothered to specialize -- is bound to the | |
152 | variable CTX." | |
153 | ||
154 | ;; BUG! misplaces declarations: if you declare the CONTEXT argument | |
155 | ;; `special' it won't be bound properly. I'm really not at all sure I know | |
156 | ;; how to fix this. | |
157 | ||
158 | (with-gensyms (head tail context) | |
159 | (let ((ctxclass t)) | |
160 | (loop | |
161 | (unless (and bvl (keywordp (car bvl))) (return)) | |
162 | (ecase (pop bvl) | |
163 | (:context (destructuring-bind (name spec) (pop bvl) | |
164 | (setf ctxclass spec context name))))) | |
165 | (multiple-value-bind (doc decls body) (parse-body body) | |
166 | `(defmethod expand-parser-form | |
167 | ((,context ,ctxclass) (,head (eql ',name)) ,tail) | |
168 | ,@doc | |
1d8cc67a | 169 | (declare (ignorable ,context)) |
fc09e191 MW |
170 | (destructuring-bind ,bvl ,tail |
171 | ,@decls | |
172 | (block ,name ,@body))))))) | |
dea4d055 MW |
173 | |
174 | (export '(with-parser-context parse)) | |
175 | (defmacro with-parser-context ((class &rest initargs) &body body) | |
176 | "Evaluate BODY with a macro `parse' which expands parser forms. | |
177 | ||
178 | Evaluate BODY as an implicit progn. At compile time, a parser context is | |
179 | constructed by (apply #'make-instance CLASS INITARGS). The BODY can make | |
180 | use of the macro `parse': | |
181 | ||
182 | parse SPEC | |
183 | ||
184 | which parses the input in the manner described by SPEC, in the context of | |
185 | the parser context." | |
186 | ||
187 | (let ((context (apply #'make-instance class initargs))) | |
188 | (wrap-parser context | |
189 | `(macrolet ((parse (form) | |
190 | (expand-parser-spec ',context form))) | |
191 | ,@body)))) | |
192 | ||
193 | ;;;-------------------------------------------------------------------------- | |
194 | ;;; Common parser context protocol. | |
195 | ||
196 | (export 'parser-at-eof-p) | |
197 | (defgeneric parser-at-eof-p (context) | |
198 | (:documentation | |
199 | "Return whether the parser has reached the end of its input. | |
200 | ||
201 | Be careful: all of this is happening at macro expansion time.")) | |
202 | ||
203 | (export 'parser-step) | |
204 | (defgeneric parser-step (context) | |
205 | (:documentation | |
206 | "Advance the parser to the next character. | |
207 | ||
208 | Be careful: all of this is happening at macro-expansion time.")) | |
209 | ||
210 | (defmethod expand-parser-spec (context (spec (eql :eof))) | |
211 | "Tests succeeds if the parser has reached the end of its input. | |
212 | ||
213 | The failure indicator is the keyword `:eof'." | |
214 | ||
215 | `(if ,(parser-at-eof-p context) | |
216 | (values :eof t nil) | |
217 | (values '(:eof) nil nil))) | |
218 | ||
219 | ;;;-------------------------------------------------------------------------- | |
220 | ;;; Useful macros for dealing with parsers. | |
221 | ||
222 | (export 'it) | |
223 | (export 'if-parse) | |
224 | (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP"))) | |
225 | parser consequent &optional (alternative nil altp)) | |
226 | "Conditional parsing construction. | |
227 | ||
228 | If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the | |
229 | result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the | |
230 | expected-item list. If ALTERNATIVE is omitted, then propagate the failure | |
231 | following the parser protocol." | |
232 | ||
233 | (with-gensyms (value win) | |
234 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
235 | (declare (ignorable ,consumedp)) | |
236 | (if ,win | |
237 | (let ((,result ,value)) | |
238 | (declare (ignorable ,result)) | |
239 | ,consequent) | |
240 | ,(cond ((not altp) | |
241 | `(values ,value nil ,consumedp)) | |
242 | (expected | |
243 | `(let ((,expected ,value)) ,alternative)) | |
244 | (t | |
245 | alternative)))))) | |
246 | ||
247 | (export 'when-parse) | |
248 | (defmacro when-parse ((&optional (result 'it)) parser &body body) | |
249 | "Convenience macro for conditional parsing. | |
250 | ||
251 | If PARSER succeeds then evaluate BODY with RESULT bound to the result; | |
252 | otherwise propagate the failure." | |
253 | `(if-parse (:result ,result) ,parser (progn ,@body))) | |
254 | ||
255 | (export 'cond-parse) | |
256 | (defmacro cond-parse ((&key (result 'it) expected | |
257 | (consumedp (gensym "CP"))) | |
258 | &body clauses) | |
259 | "Frightening conditional parsing construct. | |
260 | ||
261 | Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake' | |
262 | parser form `t' may be used to describe a default action. If the PARSER | |
263 | succeeds then evaluate FORMS in order with RESULT bound to the parser | |
264 | result (if there are no forms, then propagate the success); if the PARSER | |
265 | fails without consuming input, then move onto the next clause. | |
266 | ||
267 | If the default clause (if any) is reached, or a parser fails after | |
268 | consuming input, then EXPECTED is bound to a list of failure indicators | |
269 | and the default clause's FORMS are evaluated and with CONSUMEDP bound to a | |
270 | generalized boolean indicating whether any input was consumed. If there | |
271 | is no default clause, and either some parser fails after consuming input, | |
272 | or all of the parsers fail without consuming, then a failure is reported | |
273 | and the input-consumption indicator is propagated. | |
274 | ||
275 | If a parser fails after consuming input, then the failure indicators are | |
276 | whatever that parser reported; if all the parsers fail without consuming | |
277 | then the failure indicators are the union of the indicators reported by | |
278 | the various parsers." | |
279 | ||
280 | (with-gensyms (block fail failarg) | |
281 | (labels ((walk (clauses failures) | |
282 | (cond ((null clauses) | |
283 | (values `(,fail nil (list ,@(reverse failures))) | |
284 | `(values (combine-parser-failures ,failarg) | |
285 | nil | |
286 | ,consumedp))) | |
287 | ((eq (caar clauses) t) | |
288 | (values `(,fail nil (list ,@(reverse failures))) | |
289 | `(,@(if expected | |
290 | `(let ((,expected | |
291 | (combine-parser-failures | |
292 | ,failarg)))) | |
293 | `(progn)) | |
294 | ,@(cdar clauses)))) | |
295 | (t | |
296 | (with-gensyms (value win cp) | |
297 | (multiple-value-bind (inner failbody) | |
298 | (walk (cdr clauses) (cons value failures)) | |
299 | (values `(multiple-value-bind (,value ,win ,cp) | |
300 | (parse ,(caar clauses)) | |
301 | (when ,win | |
302 | (return-from ,block | |
303 | (let ((,result ,value) | |
304 | (,consumedp ,cp)) | |
305 | (declare (ignorable ,result | |
306 | ,consumedp)) | |
307 | ,@(cdar clauses)))) | |
308 | (when ,cp | |
309 | (,fail t (list ,value))) | |
310 | ,inner) | |
311 | failbody))))))) | |
312 | (multiple-value-bind (inner failbody) (walk clauses nil) | |
313 | `(block ,block | |
314 | (flet ((,fail (,consumedp ,failarg) | |
315 | (declare (ignorable ,consumedp ,failarg)) | |
316 | ,failbody)) | |
317 | ,inner)))))) | |
318 | ||
319 | (export 'parser) | |
320 | (defmacro parser (bvl &body parser) | |
321 | "Functional abstraction for parsers." | |
322 | (multiple-value-bind (doc decls body) (parse-body parser) | |
323 | `(lambda ,bvl ,@doc ,@decls (parse ,@body)))) | |
324 | ||
325 | ;;;-------------------------------------------------------------------------- | |
326 | ;;; Standard parser forms. | |
327 | ||
328 | (export 'label) | |
329 | (defparse label (label parser) | |
330 | "If PARSER fails, use LABEL as the expected outcome. | |
331 | ||
332 | The LABEL is only evaluated if necessary." | |
333 | (with-gensyms (value win consumedp) | |
334 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
335 | (if ,win | |
336 | (values ,value ,win ,consumedp) | |
337 | (values (list ,label) nil ,consumedp))))) | |
338 | ||
339 | (defparse t (value) | |
340 | "Succeed, without consuming input, with result VALUE." | |
341 | `(values ,value t nil)) | |
342 | ||
bf090e02 MW |
343 | (defparse nil (indicator) |
344 | "Fail, without consuming input, with indicator VALUE." | |
345 | `(values (list ,indicator) nil nil)) | |
346 | ||
dea4d055 MW |
347 | (defparse when (cond &body parser) |
348 | "If CONDITION is true, then match PARSER; otherwise fail." | |
349 | `(if ,cond (parse ,@parser) (values nil nil nil))) | |
350 | ||
351 | (defmethod expand-parser-spec (context (spec (eql t))) | |
352 | "Always matches without consuming input." | |
1d8cc67a | 353 | (declare (ignore context)) |
dea4d055 MW |
354 | '(values t t nil)) |
355 | ||
bf090e02 MW |
356 | (defmethod expand-parser-spec (context (spec (eql nil))) |
357 | "Always fails without consuming input. The failure indicator is `:fail'." | |
1d8cc67a | 358 | (declare (ignore context)) |
bf090e02 MW |
359 | '(values '(:fail) nil nil)) |
360 | ||
dea4d055 MW |
361 | (export 'seq) |
362 | (defparse seq (binds &body body) | |
363 | "Parse a sequence of heterogeneous items. | |
364 | ||
365 | Syntax: | |
366 | ||
367 | seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* ) | |
368 | { FORM }* | |
369 | ||
370 | The behaviour is similar to `let*'. The PARSER-FORMs are processed in | |
371 | order, left to right. If a parser succeeds, then its value is bound to | |
372 | the corresponding VAR, and available within Lisp forms enclosed within | |
373 | subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then | |
374 | the entire sequence fails. If all of the parsers succeeds, then the FORMs | |
375 | are evaluated as an implicit progn, and the sequence will succeed with the | |
376 | result computed by the final FORM." | |
377 | ||
378 | (with-gensyms (block consumedp) | |
379 | (labels ((walk (binds lets ignores) | |
380 | (if (endp binds) | |
381 | `(let* ((,consumedp nil) | |
382 | ,@(nreverse lets)) | |
383 | ,@(and ignores | |
384 | `((declare (ignore ,@(nreverse ignores))))) | |
385 | (values (progn ,@body) t ,consumedp)) | |
386 | (destructuring-bind (x &optional (y nil yp)) | |
387 | (if (listp (car binds)) | |
388 | (car binds) | |
389 | (list (car binds))) | |
390 | (with-gensyms (var value win cp) | |
391 | (multiple-value-bind (var parser ignores) | |
392 | (if (and yp x) | |
393 | (values x y ignores) | |
394 | (values var (if yp y x) (cons var ignores))) | |
395 | (walk (cdr binds) | |
396 | (cons `(,var (multiple-value-bind | |
397 | (,value ,win ,cp) | |
398 | (parse ,parser) | |
399 | (when ,cp (setf ,consumedp t)) | |
400 | (unless ,win | |
401 | (return-from ,block | |
402 | (values ,value ,nil | |
403 | ,consumedp))) | |
404 | ,value)) | |
405 | lets) | |
406 | ignores))))))) | |
407 | `(block ,block ,(walk binds nil nil))))) | |
408 | ||
409 | (export 'and) | |
410 | (defparse and (:context (context t) &rest parsers) | |
411 | "Parse a sequence of heterogeneous items, but ignore their values. | |
412 | ||
413 | This is just like (and is implemented using) `seq' with all the bindings | |
414 | set to `nil'. The result is `nil'." | |
415 | (with-gensyms (last) | |
416 | (if (null parsers) | |
417 | '(seq () nil) | |
418 | (expand-parser-spec context | |
419 | `(seq (,@(mapcar (lambda (parser) | |
420 | `(nil ,parser)) | |
421 | (butlast parsers)) | |
422 | (,last ,(car (last parsers)))) | |
423 | ,last))))) | |
424 | ||
425 | (export 'lisp) | |
426 | (defparse lisp (&rest forms) | |
427 | "Evaluate FORMs, which should obey the parser protocol." | |
428 | `(progn ,@forms)) | |
429 | ||
430 | (export 'many) | |
431 | (defparse many ((acc init update | |
432 | &key (new 'it) (final acc) (min nil minp) max (commitp t)) | |
433 | parser &optional (sep nil sepp)) | |
434 | "Parse a sequence of homogeneous items. | |
435 | ||
436 | The behaviour is similar to `do'. Initially an accumulator ACC is | |
437 | established, and bound to the value of INIT. The PARSER is then evaluated | |
438 | repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults | |
439 | to `it') bound to the result of the parse, and the value returned by | |
440 | UPDATE is stored back into ACC. If the PARSER fails, then the parse | |
441 | ends. The scope of ACC includes the UPDATE and FINAL forms, and the | |
442 | PARSER and SEP parsers; it is updated by side effects, not rebound. | |
443 | ||
444 | If a SEP parser is provided, then the behaviour changes as follows. | |
445 | Before each attempt to parse a new item using PARSER, the parser SEP is | |
446 | invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP | |
447 | is true, then the PARSER must also succeed or the overall parse will | |
448 | fail. If COMMITP is false then a trailing SEP is permitted and ignored. | |
449 | ||
450 | If MAX (which will be evaluated) is not nil, then it must be a number: the | |
451 | parse ends automatically after PARSER has succeeded MAX times. When the | |
452 | parse has ended, if the PARSER succeeded fewer than MIN (which will be | |
453 | evaluated) times then the parse fails. Otherwise, the FINAL form (which | |
454 | defaults to simply returning ACC) is evaluated and its value becomes the | |
455 | result of the parse. MAX defaults to nil -- i.e., no maximum; MIN | |
456 | defaults to 1 if a SEP parser is given, or 0 if not. | |
457 | ||
458 | Note that `many' cannot fail if MIN is zero." | |
459 | ||
460 | ;; Once upon a time, this was a macro of almost infinite hairiness which | |
461 | ;; tried to do everything itself, including inspecting its arguments for | |
462 | ;; constant-ness to decide whether it could elide bits of code. This | |
463 | ;; became unsustainable. Nowadays, it packages up its parser arguments | |
464 | ;; into functions and calls some primitive functions to do the heavy | |
465 | ;; lifting. | |
466 | ;; | |
467 | ;; The precise protocol between this macro and the backend functions is | |
468 | ;; subject to change: don't rely on it. | |
469 | ||
470 | (let* ((accvar (or acc (gensym "ACC-"))) | |
471 | (func (if sepp '%many-sep '%many))) | |
472 | `(let ((,accvar ,init)) | |
473 | (declare (ignorable ,accvar)) | |
474 | (,func (lambda (,new) | |
475 | (declare (ignorable ,new)) | |
476 | (setf ,accvar ,update)) | |
048d0b2d | 477 | (lambda () ,final) |
dea4d055 MW |
478 | (parser () ,parser) |
479 | ,@(and sepp (list `(parser () ,sep))) | |
480 | ,@(and minp `(:min ,min)) | |
481 | ,@(and max `(:max ,max)) | |
482 | ,@(and (not (eq commitp t)) `(:commitp ,commitp)))))) | |
483 | ||
484 | (export 'list) | |
485 | (defparse list ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 486 | "Like `many', but simply returns a list of the parser results." |
dea4d055 MW |
487 | (with-gensyms (acc) |
488 | `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys) | |
489 | ,parser ,@(and sepp (list sep)))))) | |
490 | ||
491 | (export 'skip-many) | |
492 | (defparse skip-many ((&rest keys) parser &optional (sep nil sepp)) | |
3109662a | 493 | "Like `many', but ignores the results." |
dea4d055 MW |
494 | `(parse (many (nil nil nil ,@keys) |
495 | ,parser ,@(and sepp (list sep))))) | |
496 | ||
497 | (export 'or) | |
498 | (defparse or (&rest parsers) | |
499 | "Try a number of alternative parsers. | |
500 | ||
501 | Each of the PARSERS in turn is tried. If any succeeds, then its result | |
502 | becomes the result of the parse. If any parser fails after consuming | |
503 | input, or if all of the parsers fail, then the overall parse fails, with | |
504 | the union of the expected items from the individual parses." | |
505 | ||
506 | (with-gensyms (fail cp failarg) | |
507 | (labels ((walk (parsers failures) | |
508 | (if (null parsers) | |
509 | `(,fail nil (list ,@(reverse failures))) | |
510 | (with-gensyms (value win consumedp) | |
511 | `(multiple-value-bind (,value ,win ,consumedp) | |
512 | (parse ,(car parsers)) | |
513 | (cond (,win | |
514 | (values ,value ,win ,consumedp)) | |
515 | (,consumedp | |
516 | (,fail t (list ,value))) | |
517 | (t | |
518 | ,(walk (cdr parsers) | |
519 | (cons value failures))))))))) | |
520 | `(flet ((,fail (,cp ,failarg) | |
521 | (values (combine-parser-failures ,failarg) nil ,cp))) | |
522 | ,(walk parsers nil))))) | |
523 | ||
524 | (export '?) | |
525 | (defparse ? (parser &optional (value nil)) | |
526 | "Matches PARSER or nothing; fails if PARSER fails after consuming input." | |
527 | `(parse (or ,parser (t ,value)))) | |
528 | ||
529 | ;;;-------------------------------------------------------------------------- | |
530 | ;;; Pluggable parsers. | |
531 | ||
532 | (export 'call-pluggable-parser) | |
533 | (defun call-pluggable-parser (symbol &rest args) | |
534 | "Call the pluggable parser denoted by SYMBOL. | |
535 | ||
536 | A `pluggable parser' is an indirection point at which a number of | |
537 | alternative parsers can be attached dynamically. The parsers are tried in | |
538 | some arbitrary order, so one should be careful to avoid ambiguities; each | |
539 | is paseed the given ARGS. | |
540 | ||
541 | If any parser succeeds then it determines the result; if any parser fails | |
542 | having consumed input then the pluggable parser fails immediately. If all | |
543 | of the parsers fail without consuming input then the pluggable parser | |
544 | fails with the union of the individual failure indicators." | |
545 | ||
546 | (let ((expected nil)) | |
547 | (dolist (item (get symbol 'parser)) | |
548 | (multiple-value-bind (value winp consumedp) (apply (cdr item) args) | |
549 | (when (or winp consumedp) | |
550 | (return-from call-pluggable-parser (values value winp consumedp))) | |
551 | (push value expected))) | |
552 | (values (combine-parser-failures expected) nil nil))) | |
553 | ||
554 | (export 'plug) | |
555 | (defparse plug (symbol &rest args) | |
556 | "Call the pluggable parser denoted by SYMBOL. | |
557 | ||
558 | This is just like the function `call-pluggable-parser', but the SYMBOL is | |
559 | not evaluated." | |
560 | `(call-pluggable-parser ',symbol ,@args)) | |
561 | ||
562 | (export 'pluggable-parser-add) | |
563 | (defun pluggable-parser-add (symbol tag parser) | |
564 | "Adds an element to a pluggable parser. | |
565 | ||
566 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
567 | comparable object which identifies the element. The PARSER is a parser | |
568 | function; it will be passed arguments via `pluggable-parser'. | |
569 | ||
570 | If a parser with the given TAG is already attached to SYMBOL then the new | |
571 | parser replaces the old one; otherwise it is added to the collection." | |
572 | ||
573 | (let ((alist (get symbol 'parser))) | |
574 | (aif (assoc tag alist) | |
575 | (setf (cdr it) parser) | |
576 | (setf (get symbol 'parser) (acons tag parser alist))))) | |
577 | ||
578 | (export 'define-pluggable-parser) | |
579 | (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body) | |
580 | "Adds an element to a pluggable parser. | |
581 | ||
582 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- | |
583 | comparable object which identifies the element. Neither SYMBOL nor TAG is | |
584 | evaluated. The BODY is a parser expression; the BVL is a lambda list | |
bf090e02 | 585 | describing how to bind the arguments supplied via `pluggable-parser'. |
dea4d055 MW |
586 | |
587 | If a parser with the given TAG is already attached to SYMBOL then the new | |
588 | parser replaces the old one; otherwise it is added to the collection." | |
589 | ||
fc09e191 MW |
590 | (multiple-value-bind (docs decls body) (parse-body body) |
591 | `(pluggable-parser-add ',symbol ',tag | |
592 | (lambda ,bvl | |
593 | ,@docs ,@decls | |
594 | (block ,symbol ,@body))))) | |
dea4d055 MW |
595 | |
596 | ;;;-------------------------------------------------------------------------- | |
597 | ;;; Rewindable parser context protocol. | |
598 | ||
599 | (eval-when (:compile-toplevel :load-toplevel :execute) | |
600 | ||
601 | (export 'parser-capture-place) | |
602 | (defgeneric parser-capture-place (context) | |
603 | (:documentation | |
604 | "Capture the current position of a parser CONTEXT. | |
605 | ||
606 | The return value may later be used with `parser-restore-place'. Be | |
607 | careful: all of this is happening at macro-expansion time.") | |
608 | (:method (context) | |
a1985b3c | 609 | (error "Parser context ~S doesn't support rewinding" context))) |
dea4d055 MW |
610 | |
611 | (export 'parser-restore-place) | |
612 | (defgeneric parser-restore-place (context place) | |
613 | (:documentation | |
614 | "`Rewind' the parser CONTEXT back to the captured PLACE. | |
615 | ||
616 | The place was previously captured by `parser-capture-place'. Be careful: | |
617 | all of this is happening at macro-expansion time.")) | |
618 | ||
619 | (export 'parser-release-place) | |
620 | (defgeneric parser-release-place (context place) | |
621 | (:documentation | |
622 | "Release a PLACE captured from the parser CONTEXT. | |
623 | ||
624 | The place was previously captured by `parser-capture-place'. The | |
625 | underlying scanner can use this call to determine whether there are | |
626 | outstanding captured places, and thereby optimize its behaviour. Be | |
627 | careful: all of this is happening at macro-expansion time.") | |
1d8cc67a MW |
628 | (:method (context place) |
629 | (declare (ignore context place)) | |
630 | nil)) | |
dea4d055 MW |
631 | |
632 | (export 'parser-places-must-be-released-p) | |
633 | (defgeneric parser-places-must-be-released-p (context) | |
634 | (:documentation | |
635 | "Answer whether places captured from the parser CONTEXT need releasing. | |
636 | ||
637 | Some contexts -- well, actually, their run-time counterparts -- work | |
638 | better if they can keep track of which places are captured, or at least if | |
639 | there are captured places outstanding. If this function returns true | |
640 | (which is the default) then `with-parser-place' (and hence parser macros | |
641 | such as `peek') will expand to `unwind-protect' forms in order to perform | |
642 | the correct cleanup. If it returns false, then the `unwind-protect' is | |
643 | omitted so that the runtime code does't have to register cleanup | |
644 | handlers.") | |
1d8cc67a MW |
645 | (:method (context) |
646 | (declare (ignore context)) | |
647 | t))) | |
dea4d055 MW |
648 | |
649 | (export 'with-parser-place) | |
650 | (defmacro with-parser-place ((place context) &body body) | |
651 | "Evaluate BODY surrounded with a binding of PLACE to a captured place. | |
652 | ||
653 | The surrounding code will release the PLACE properly on exit from the body | |
654 | forms. This is all happening at macro-expansion time." | |
655 | ;; ... which means that it's a bit hairy. Fortunately, the nested | |
656 | ;; backquotes aren't that bad. | |
657 | (once-only (context) | |
658 | (with-gensyms (bodyfunc) | |
659 | `(with-gensyms (,place) | |
660 | (flet ((,bodyfunc () ,@body)) | |
661 | `(let ((,,place ,(parser-capture-place ,context))) | |
662 | ,(if (parser-places-must-be-released-p ,context) | |
663 | `(unwind-protect ,(,bodyfunc) | |
2b8759bf MW |
664 | (when ,,place |
665 | ,(parser-release-place ,context ,place))) | |
dea4d055 MW |
666 | (,bodyfunc)))))))) |
667 | ||
668 | (export 'peek) | |
669 | (defparse peek (:context (context t) parser) | |
670 | "Attempt to run PARSER, but rewind the underlying source if it fails." | |
671 | (with-gensyms (value win consumedp) | |
672 | (with-parser-place (place context) | |
2b8759bf MW |
673 | `(macrolet ((commit-peeked-place () |
674 | `(progn | |
675 | ,',(parser-release-place context place) | |
676 | (setf ,',place nil)))) | |
677 | (multiple-value-bind (,value ,win ,consumedp) (parse ,parser) | |
678 | (cond ((or ,win (null ,place)) | |
679 | (values ,value ,win ,consumedp)) | |
680 | (t | |
681 | ,(parser-restore-place context place) | |
682 | (values ,value ,win nil)))))))) | |
683 | ||
684 | (defun commit-peeked-place () | |
685 | "Called by `commit' not lexically within `peek'." | |
a1985b3c | 686 | (error "`commit' is not within `peek'")) |
2b8759bf MW |
687 | |
688 | (export 'commit) | |
689 | (defparse commit () | |
690 | "Commit to the current parse. | |
691 | ||
692 | This releases the place captured by the innermost lexically enclosing | |
693 | `peek'." | |
694 | '(progn | |
695 | (commit-peeked-place) | |
696 | (values nil t nil))) | |
dea4d055 MW |
697 | |
698 | ;;;-------------------------------------------------------------------------- | |
699 | ;;; Character parser context protocol. | |
700 | ||
701 | (export 'character-parser-context) | |
702 | (defclass character-parser-context () | |
703 | () | |
704 | (:documentation | |
705 | "Base class for parsers which read one character at a time.")) | |
706 | ||
707 | (export 'parser-current-char) | |
708 | (defgeneric parser-current-char (context) | |
709 | (:documentation | |
710 | "Return the parser's current character. | |
711 | ||
712 | It is an error to invoke this operation if the parser is at end-of-file; | |
713 | you must check this first. Be careful: all of this is happening at | |
714 | macro-expansion time.")) | |
715 | ||
bf090e02 | 716 | (export 'if-char) |
dea4d055 MW |
717 | (defparse if-char (:context (context character-parser-context) |
718 | (&optional (char 'it)) condition consequent alternative) | |
719 | "Basic character-testing parser. | |
720 | ||
721 | If there is a current character, bind it to CHAR and evaluate the | |
bf090e02 MW |
722 | CONDITION; if that is true, then evaluate CONSEQUENT and step the parser |
723 | (in that order); otherwise, if either we're at EOF or the CONDITION | |
724 | returns false, evaluate ALTERNATIVE. The result of `if-char' are the | |
725 | values returned by CONSEQUENT or ALTERNATIVE." | |
dea4d055 MW |
726 | |
727 | (with-gensyms (block) | |
728 | `(block ,block | |
729 | (unless ,(parser-at-eof-p context) | |
730 | (let ((,char ,(parser-current-char context))) | |
731 | (when ,condition | |
bf090e02 MW |
732 | (return-from ,block |
733 | (multiple-value-prog1 ,consequent | |
734 | ,(parser-step context)))))) | |
dea4d055 MW |
735 | ,alternative))) |
736 | ||
737 | (defmethod expand-parser-spec | |
738 | ((context character-parser-context) (spec (eql :any))) | |
739 | "Matches any character; result is the character. | |
740 | ||
741 | The failure indicator is the keyword `:any'." | |
742 | (expand-parser-spec context | |
743 | '(if-char () t | |
744 | (values it t t) | |
745 | (values '(:any) nil nil)))) | |
746 | ||
747 | (export 'char) | |
748 | (defparse char (:context (context character-parser-context) char) | |
749 | "Matches the character CHAR (evaluated); result is the character. | |
750 | ||
751 | The failure indicator is CHAR." | |
752 | ||
753 | (once-only (char) | |
754 | (with-gensyms (it) | |
755 | (expand-parser-spec context | |
756 | `(if-char (,it) (char= ,it ,char) | |
757 | (values ,it t t) | |
758 | (values (list ,char) nil nil)))))) | |
759 | ||
760 | (defmethod expand-parser-spec | |
761 | ((context character-parser-context) (char character)) | |
762 | (expand-parser-spec context `(char ,char))) | |
763 | ||
764 | (export 'satisfies) | |
765 | (defparse satisfies (:context (context character-parser-context) predicate) | |
766 | "Matches a character that satisfies the PREDICATE | |
767 | ||
768 | The PREDICATE is a function designator. On success, the result is the | |
769 | character. The failure indicator is PREDICATE; you probably want to apply | |
770 | a `label'." | |
771 | ||
772 | (with-gensyms (it) | |
773 | (expand-parser-spec context | |
774 | `(if-char (,it) (,predicate ,it) | |
775 | (values ,it t t) | |
776 | (values '(,predicate) nil nil))))) | |
777 | ||
778 | (export 'not) | |
779 | (defparse not (:context (context character-parser-context) char) | |
780 | "Matches any character other than CHAR; result is the character. | |
781 | ||
782 | The failure indicator is (not CHAR)." | |
783 | ||
784 | (once-only (char) | |
785 | (with-gensyms (it) | |
786 | (expand-parser-spec context | |
787 | `(if-char (,it) (char/= ,it ,char) | |
788 | (values ,it t t) | |
789 | (values `((not ,,char)) nil nil)))))) | |
790 | ||
791 | (export 'filter) | |
792 | (defparse filter (:context (context character-parser-context) predicate) | |
793 | "Matches a character that satisfies the PREDICATE; result is the output of | |
794 | PREDICATE. | |
795 | ||
796 | The failure indicator is PREDICATE; you probably want to apply a `label'." | |
797 | ||
798 | ;; Can't do this one with `if-char'. | |
799 | (with-gensyms (block value) | |
800 | `(block ,block | |
801 | (unless ,(parser-at-eof-p context) | |
802 | (let ((,value (,predicate ,(parser-current-char context)))) | |
803 | (when ,value | |
804 | ,(parser-step context) | |
805 | (return-from ,block (values ,value t t))))) | |
806 | (values '(,predicate) nil nil)))) | |
807 | ||
808 | (defmethod expand-parser-spec | |
809 | ((context character-parser-context) (spec (eql :whitespace))) | |
810 | "Matches any sequence of whitespace; result is nil. | |
811 | ||
812 | Cannot fail." | |
813 | ||
814 | `(progn | |
815 | (cond ((and (not ,(parser-at-eof-p context)) | |
816 | (whitespace-char-p ,(parser-current-char context))) | |
817 | (loop | |
818 | ,(parser-step context) | |
819 | (when (or ,(parser-at-eof-p context) | |
820 | (not (whitespace-char-p | |
821 | ,(parser-current-char context)))) | |
822 | (return (values nil t t))))) | |
823 | (t | |
824 | (values nil t nil))))) | |
825 | ||
826 | (defmethod expand-parser-spec | |
827 | ((context character-parser-context) (string string)) | |
828 | "Matches the constituent characters of STRING; result is the string. | |
829 | ||
830 | The failure indicator is STRING; on failure, the input is rewound, so this | |
831 | only works on rewindable contexts." | |
832 | ||
833 | (with-gensyms (i) | |
834 | (unless (typep string 'simple-string) | |
835 | (setf string (make-array (length string) :initial-contents string))) | |
836 | (with-parser-place (place context) | |
837 | `(dotimes (,i ,(length string) (values ,string t | |
838 | ,(plusp (length string)))) | |
839 | (when (or ,(parser-at-eof-p context) | |
840 | (char/= ,(parser-current-char context) | |
841 | (schar ,string ,i))) | |
842 | ,(parser-restore-place context place) | |
843 | (return (values '(,string) nil nil))) | |
844 | ,(parser-step context))))) | |
845 | ||
846 | ;;;-------------------------------------------------------------------------- | |
847 | ;;; Token parser context protocol. | |
848 | ||
849 | (export 'token-parser-context) | |
850 | (defclass token-parser-context () | |
851 | () | |
852 | (:documentation | |
853 | "Base class for parsers which read tokens with associated semantic values. | |
854 | ||
855 | A token, according to the model suggested by this class, has a /type/, | |
856 | which classifies the token and is the main contributer to guiding the | |
857 | parse, and a /value/, which carries additional semantic information. | |
858 | ||
859 | This may seem redundant given Lisp's dynamic type system; but it's not | |
860 | actually capable of drawing sufficiently fine distinctions easily. For | |
861 | example, we can represent a symbol either as a string or a symbol; but | |
862 | using strings conflicts with being able to represent string literals, and | |
863 | using symbols looks ugly and they don't get GCed. Similarly, it'd be | |
864 | convenient to represent punctuation as characters, but that conflicts with | |
865 | using them for character literals. So, we introduce our own notion of | |
866 | token type. | |
867 | ||
868 | Token scanners are expected to signal end-of-file with a token of type | |
869 | `:eof'.")) | |
870 | ||
871 | (export 'parser-token-type) | |
872 | (defgeneric parser-token-type (context) | |
873 | (:documentation | |
874 | "Return the parser's current token type.")) | |
875 | ||
876 | (export 'parser-token-value) | |
877 | (defgeneric parser-token-value (context) | |
878 | (:documentation | |
879 | "Return the parser's current token's semantic value.")) | |
880 | ||
881 | (export 'token) | |
211bf0df MW |
882 | (locally (declare #+sbcl (sb-ext:muffle-conditions style-warning)) |
883 | (defparse token (:context (context token-parser-context) | |
884 | type &optional (value nil valuep) &key peekp) | |
885 | "Match tokens of a particular type. | |
dea4d055 MW |
886 | |
887 | A token matches under the following conditions: | |
888 | ||
889 | * If the value of TYPE is `t' then the match succeeds if and only if the | |
c91b90c3 | 890 | parser is not at end-of-file. |
dea4d055 MW |
891 | |
892 | * If the value of TYPE is not `eql' to the token type then the match | |
893 | fails. | |
894 | ||
895 | * If VALUE is specified, and the value of VALUE is not `equal' to the | |
896 | token semantic value then the match fails. | |
897 | ||
898 | * Otherwise the match succeeds. | |
899 | ||
900 | If the match is successful and the parser is not at end-of-file, and the | |
901 | value of PEEKP is nil then the parser advances to the next token; the | |
902 | result of the match is the token's value. | |
903 | ||
904 | If the match fails then the failure indicator is either TYPE or (TYPE | |
905 | VALUE), depending on whether a VALUE was specified." | |
906 | ||
211bf0df MW |
907 | (once-only (type value peekp) |
908 | (with-gensyms (tokty tokval) | |
909 | `(let ((,tokty ,(parser-token-type context)) | |
910 | (,tokval ,(parser-token-value context))) | |
911 | (if ,(if (eq type t) | |
912 | `(not (eq ,tokty :eof)) | |
913 | (flet ((check-value (cond) | |
914 | (if valuep | |
915 | `(and ,cond (equal ,tokval ,value)) | |
916 | cond))) | |
917 | (if (constantp type) | |
918 | (check-value `(eql ,tokty ,type)) | |
919 | `(if (eq ,type t) | |
920 | (not (eq ,tokty :eof)) | |
921 | ,(check-value `(eql ,tokty ,type)))))) | |
922 | ,(let* ((result `(values ,tokval t ,(if (constantp peekp) | |
923 | (not peekp) | |
924 | `(not ,peekp)))) | |
925 | (step (parser-step context))) | |
12ec2c34 MW |
926 | (cond ((not (constantp peekp)) |
927 | `(multiple-value-prog1 ,result | |
928 | (unless ,peekp ,step))) | |
929 | (peekp | |
930 | result) | |
931 | (t | |
932 | `(multiple-value-prog1 ,result | |
933 | ,step)))) | |
211bf0df MW |
934 | (values (list ,(if valuep `(list ,type ,value) type)) |
935 | nil nil))))))) | |
dea4d055 MW |
936 | |
937 | (defmethod expand-parser-spec ((context token-parser-context) spec) | |
938 | (if (atom spec) | |
939 | (expand-parser-spec context `(token ,spec)) | |
940 | (call-next-method))) | |
941 | ||
942 | (defmethod expand-parser-spec ((context token-parser-context) (spec string)) | |
943 | (expand-parser-spec context `(token :id ,spec))) | |
944 | ||
945 | ;;;----- That's all, folks -------------------------------------------------- |