Massive reorganization in progress.
[sod] / src / parser / proto-parser.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; Protocol for parsing.
4 ;;;
5 ;;; (c) 2009 Straylight/Edgeware
6 ;;;
7
8 ;;;----- Licensing notice ---------------------------------------------------
9 ;;;
10 ;;; This file is part of the Sensble Object Design, an object system for C.
11 ;;;
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 ;;;--------------------------------------------------------------------------
27 ;;; Parser protocol discussion.
28 ;;;
29 ;;; Other languages, notably Haskell and ML, have `parser combinator
30 ;;; libraries', which allow one to construct recursive descent parsers using
31 ;;; approximately pleasant syntax. While attempts have been made to
32 ;;; introduce the benefits of these libraries to Lisp, they've not been
33 ;;; altogether successful; this seems due to Lisp's lack of features such as
34 ;;; pattern matching, currying and lazy evaluation. Rather than fight with
35 ;;; Lisp's weaknesses, this library plays to its strength, making heavy use
36 ;;; of macros. Effectively, the `combinators' we build here are /compile-
37 ;;; time/ combinators, not run-time ones.
38 ;;;
39 ;;; A `parser' is simply an expression which returns three values.
40 ;;;
41 ;;; * If the second value is nil, then the parser is said to have /failed/,
42 ;;; and the first value is a list describing the things that the parser
43 ;;; expected to find but didn't. (The precise details of the list items
44 ;;; are important to error-reporting functions, but not to the low-level
45 ;;; machinery, and are left up to higher-level protocols to nail down
46 ;;; harder.)
47 ;;;
48 ;;; * If the second value is not nil, then the parser is said to have
49 ;;; /succeeded/, and the first value is its /result/.
50 ;;;
51 ;;; * The third value indicates whether the parser consumed any of its
52 ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and
53 ;;; bad performance), so the `consumedp' return value is used to decide
54 ;;; whether the parser has `committed' to a particular branch. If the
55 ;;; parser context supports place-capture (many do) then `peek' can be
56 ;;; used to suppress consumption of input in the case of parser failure.
57 ;;;
58 ;;; The functions and macros here are simply ways of gluing together
59 ;;; expressions which obey this protocol.
60 ;;;
61 ;;; The main contribution of this file is a macro WITH-PARSER-CONTEXT which
62 ;;; embeds a parsing-specific S-expressions language entered using the new
63 ;;; macro PARSE. The behaviour of this macro is controlled by a pair of
64 ;;; compile-time generic functions EXPAND-PARSER-SPEC and EXPAND-PARSER-FORM.
65 ;;; As well as the parser expression they're meant to process, these
66 ;;; functions dispatch on a `context' argument, which is intended to help
67 ;;; `leaf' parsers find the terminal symbols which they're meant to process.
68 ;;;
69 ;;; Note that the context is a compile-time object, constructed by the PARSE
70 ;;; macro expansion function, though the idea is that it will contain the
71 ;;; name or names of variables holding the run-time parser state (which will
72 ;;; typically be a lexical analyser or an input stream or suchlike).
73
74 (cl:in-package #:sod-parser)
75
76 ;;;--------------------------------------------------------------------------
77 ;;; Utilities.
78
79 (defun combine-parser-failures (failures)
80 "Combine the failure indicators listed in FAILURES.
81
82 (Note that this means that FAILURES is a list of lists.)"
83
84 (reduce (lambda (f ff) (union f ff :test #'equal))
85 failures
86 :initial-value nil))
87
88 ;;;--------------------------------------------------------------------------
89 ;;; Basic protocol.
90
91 (eval-when (:compile-toplevel :load-toplevel :execute)
92
93 (export 'expand-parser-spec)
94 (defgeneric expand-parser-spec (context spec)
95 (:documentation
96 "Expand a parser specifier SPEC in a particular parser CONTEXT.")
97 (:method (context (spec list))
98 (expand-parser-form context (car spec) (cdr spec))))
99
100 (export 'expand-parser-form)
101 (defgeneric expand-parser-form (context head tail)
102 (:documentation
103 "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.")
104 (:method (context head tail)
105 (cons head tail)))
106
107 (export 'wrap-parser)
108 (defgeneric wrap-parser (context form)
109 (:documentation
110 "Enclose FORM in whatever is necessary to make the parser work.")
111 (:method (context form) form)))
112
113 (export 'defparse)
114 (defmacro defparse (name bvl &body body)
115 "Define a new parser form.
116
117 The full syntax is hairier than it looks:
118
119 defparse NAME ( [[ :context (CTX SPEC) ]] . BVL )
120 { FORM }*
121
122 The macro defines a new parser form (NAME ...) which is expanded by the
123 body FORMs. The BVL is a destructuring lambda-list to be applied to the
124 tail of the form. The body forms are enclosed in a block called NAME.
125
126 Within the FORMs, a function `expand' is available: it takes a parser
127 specifier as its argument and returns its expansion in the parser's
128 context.
129
130 If the :context key is provided, then the parser form is specialized on a
131 particular class of parser contexts SPEC; specialized expanders take
132 priority over less specialized or unspecialized expanders -- so you can
133 use this to override the built-in forms safely if they don't seem to be
134 doing the right thing for you. Also, the context -- which is probably
135 interesting to you if you've bothered to specialize -- is bound to the
136 variable CTX."
137
138 ;; BUG! misplaces declarations: if you declare the CONTEXT argument
139 ;; `special' it won't be bound properly. I'm really not at all sure I know
140 ;; how to fix this.
141
142 (with-gensyms (head tail context)
143 (let ((ctxclass t))
144 (loop
145 (unless (and bvl (keywordp (car bvl))) (return))
146 (ecase (pop bvl)
147 (:context (destructuring-bind (name spec) (pop bvl)
148 (setf ctxclass spec context name)))))
149 (multiple-value-bind (doc decls body) (parse-body body)
150 `(defmethod expand-parser-form
151 ((,context ,ctxclass) (,head (eql ',name)) ,tail)
152 ,@doc
153 (block ,name
154 (destructuring-bind ,bvl ,tail
155 ,@decls
156 ,@body)))))))
157
158 (export '(with-parser-context parse))
159 (defmacro with-parser-context ((class &rest initargs) &body body)
160 "Evaluate BODY with a macro `parse' which expands parser forms.
161
162 Evaluate BODY as an implicit progn. At compile time, a parser context is
163 constructed by (apply #'make-instance CLASS INITARGS). The BODY can make
164 use of the macro `parse':
165
166 parse SPEC
167
168 which parses the input in the manner described by SPEC, in the context of
169 the parser context."
170
171 (let ((context (apply #'make-instance class initargs)))
172 (wrap-parser context
173 `(macrolet ((parse (form)
174 (expand-parser-spec ',context form)))
175 ,@body))))
176
177 ;;;--------------------------------------------------------------------------
178 ;;; Common parser context protocol.
179
180 (export 'parser-at-eof-p)
181 (defgeneric parser-at-eof-p (context)
182 (:documentation
183 "Return whether the parser has reached the end of its input.
184
185 Be careful: all of this is happening at macro expansion time."))
186
187 (export 'parser-step)
188 (defgeneric parser-step (context)
189 (:documentation
190 "Advance the parser to the next character.
191
192 Be careful: all of this is happening at macro-expansion time."))
193
194 (defmethod expand-parser-spec (context (spec (eql :eof)))
195 "Tests succeeds if the parser has reached the end of its input.
196
197 The failure indicator is the keyword `:eof'."
198
199 `(if ,(parser-at-eof-p context)
200 (values :eof t nil)
201 (values '(:eof) nil nil)))
202
203 ;;;--------------------------------------------------------------------------
204 ;;; Useful macros for dealing with parsers.
205
206 (export 'it)
207 (export 'if-parse)
208 (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP")))
209 parser consequent &optional (alternative nil altp))
210 "Conditional parsing construction.
211
212 If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the
213 result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the
214 expected-item list. If ALTERNATIVE is omitted, then propagate the failure
215 following the parser protocol."
216
217 (with-gensyms (value win)
218 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
219 (declare (ignorable ,consumedp))
220 (if ,win
221 (let ((,result ,value))
222 (declare (ignorable ,result))
223 ,consequent)
224 ,(cond ((not altp)
225 `(values ,value nil ,consumedp))
226 (expected
227 `(let ((,expected ,value)) ,alternative))
228 (t
229 alternative))))))
230
231 (export 'when-parse)
232 (defmacro when-parse ((&optional (result 'it)) parser &body body)
233 "Convenience macro for conditional parsing.
234
235 If PARSER succeeds then evaluate BODY with RESULT bound to the result;
236 otherwise propagate the failure."
237 `(if-parse (:result ,result) ,parser (progn ,@body)))
238
239 (export 'cond-parse)
240 (defmacro cond-parse ((&key (result 'it) expected
241 (consumedp (gensym "CP")))
242 &body clauses)
243 "Frightening conditional parsing construct.
244
245 Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake'
246 parser form `t' may be used to describe a default action. If the PARSER
247 succeeds then evaluate FORMS in order with RESULT bound to the parser
248 result (if there are no forms, then propagate the success); if the PARSER
249 fails without consuming input, then move onto the next clause.
250
251 If the default clause (if any) is reached, or a parser fails after
252 consuming input, then EXPECTED is bound to a list of failure indicators
253 and the default clause's FORMS are evaluated and with CONSUMEDP bound to a
254 generalized boolean indicating whether any input was consumed. If there
255 is no default clause, and either some parser fails after consuming input,
256 or all of the parsers fail without consuming, then a failure is reported
257 and the input-consumption indicator is propagated.
258
259 If a parser fails after consuming input, then the failure indicators are
260 whatever that parser reported; if all the parsers fail without consuming
261 then the failure indicators are the union of the indicators reported by
262 the various parsers."
263
264 (with-gensyms (block fail failarg)
265 (labels ((walk (clauses failures)
266 (cond ((null clauses)
267 (values `(,fail nil (list ,@(reverse failures)))
268 `(values (combine-parser-failures ,failarg)
269 nil
270 ,consumedp)))
271 ((eq (caar clauses) t)
272 (values `(,fail nil (list ,@(reverse failures)))
273 `(,@(if expected
274 `(let ((,expected
275 (combine-parser-failures
276 ,failarg))))
277 `(progn))
278 ,@(cdar clauses))))
279 (t
280 (with-gensyms (value win cp)
281 (multiple-value-bind (inner failbody)
282 (walk (cdr clauses) (cons value failures))
283 (values `(multiple-value-bind (,value ,win ,cp)
284 (parse ,(caar clauses))
285 (when ,win
286 (return-from ,block
287 (let ((,result ,value)
288 (,consumedp ,cp))
289 (declare (ignorable ,result
290 ,consumedp))
291 ,@(cdar clauses))))
292 (when ,cp
293 (,fail t (list ,value)))
294 ,inner)
295 failbody)))))))
296 (multiple-value-bind (inner failbody) (walk clauses nil)
297 `(block ,block
298 (flet ((,fail (,consumedp ,failarg)
299 (declare (ignorable ,consumedp ,failarg))
300 ,failbody))
301 ,inner))))))
302
303 (export 'parser)
304 (defmacro parser (bvl &body parser)
305 "Functional abstraction for parsers."
306 (multiple-value-bind (doc decls body) (parse-body parser)
307 `(lambda ,bvl ,@doc ,@decls (parse ,@body))))
308
309 ;;;--------------------------------------------------------------------------
310 ;;; Standard parser forms.
311
312 (export 'label)
313 (defparse label (label parser)
314 "If PARSER fails, use LABEL as the expected outcome.
315
316 The LABEL is only evaluated if necessary."
317 (with-gensyms (value win consumedp)
318 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
319 (if ,win
320 (values ,value ,win ,consumedp)
321 (values (list ,label) nil ,consumedp)))))
322
323 (defparse t (value)
324 "Succeed, without consuming input, with result VALUE."
325 `(values ,value t nil))
326
327 (defparse when (cond &body parser)
328 "If CONDITION is true, then match PARSER; otherwise fail."
329 `(if ,cond (parse ,@parser) (values nil nil nil)))
330
331 (defmethod expand-parser-spec (context (spec (eql t)))
332 "Always matches without consuming input."
333 '(values t t nil))
334
335 (export 'seq)
336 (defparse seq (binds &body body)
337 "Parse a sequence of heterogeneous items.
338
339 Syntax:
340
341 seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* )
342 { FORM }*
343
344 The behaviour is similar to `let*'. The PARSER-FORMs are processed in
345 order, left to right. If a parser succeeds, then its value is bound to
346 the corresponding VAR, and available within Lisp forms enclosed within
347 subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then
348 the entire sequence fails. If all of the parsers succeeds, then the FORMs
349 are evaluated as an implicit progn, and the sequence will succeed with the
350 result computed by the final FORM."
351
352 (with-gensyms (block consumedp)
353 (labels ((walk (binds lets ignores)
354 (if (endp binds)
355 `(let* ((,consumedp nil)
356 ,@(nreverse lets))
357 ,@(and ignores
358 `((declare (ignore ,@(nreverse ignores)))))
359 (values (progn ,@body) t ,consumedp))
360 (destructuring-bind (x &optional (y nil yp))
361 (if (listp (car binds))
362 (car binds)
363 (list (car binds)))
364 (with-gensyms (var value win cp)
365 (multiple-value-bind (var parser ignores)
366 (if (and yp x)
367 (values x y ignores)
368 (values var (if yp y x) (cons var ignores)))
369 (walk (cdr binds)
370 (cons `(,var (multiple-value-bind
371 (,value ,win ,cp)
372 (parse ,parser)
373 (when ,cp (setf ,consumedp t))
374 (unless ,win
375 (return-from ,block
376 (values ,value ,nil
377 ,consumedp)))
378 ,value))
379 lets)
380 ignores)))))))
381 `(block ,block ,(walk binds nil nil)))))
382
383 (export 'and)
384 (defparse and (:context (context t) &rest parsers)
385 "Parse a sequence of heterogeneous items, but ignore their values.
386
387 This is just like (and is implemented using) `seq' with all the bindings
388 set to `nil'. The result is `nil'."
389 (with-gensyms (last)
390 (if (null parsers)
391 '(seq () nil)
392 (expand-parser-spec context
393 `(seq (,@(mapcar (lambda (parser)
394 `(nil ,parser))
395 (butlast parsers))
396 (,last ,(car (last parsers))))
397 ,last)))))
398
399 (export 'lisp)
400 (defparse lisp (&rest forms)
401 "Evaluate FORMs, which should obey the parser protocol."
402 `(progn ,@forms))
403
404 (export 'many)
405 (defparse many ((acc init update
406 &key (new 'it) (final acc) (min nil minp) max (commitp t))
407 parser &optional (sep nil sepp))
408 "Parse a sequence of homogeneous items.
409
410 The behaviour is similar to `do'. Initially an accumulator ACC is
411 established, and bound to the value of INIT. The PARSER is then evaluated
412 repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults
413 to `it') bound to the result of the parse, and the value returned by
414 UPDATE is stored back into ACC. If the PARSER fails, then the parse
415 ends. The scope of ACC includes the UPDATE and FINAL forms, and the
416 PARSER and SEP parsers; it is updated by side effects, not rebound.
417
418 If a SEP parser is provided, then the behaviour changes as follows.
419 Before each attempt to parse a new item using PARSER, the parser SEP is
420 invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP
421 is true, then the PARSER must also succeed or the overall parse will
422 fail. If COMMITP is false then a trailing SEP is permitted and ignored.
423
424 If MAX (which will be evaluated) is not nil, then it must be a number: the
425 parse ends automatically after PARSER has succeeded MAX times. When the
426 parse has ended, if the PARSER succeeded fewer than MIN (which will be
427 evaluated) times then the parse fails. Otherwise, the FINAL form (which
428 defaults to simply returning ACC) is evaluated and its value becomes the
429 result of the parse. MAX defaults to nil -- i.e., no maximum; MIN
430 defaults to 1 if a SEP parser is given, or 0 if not.
431
432 Note that `many' cannot fail if MIN is zero."
433
434 ;; Once upon a time, this was a macro of almost infinite hairiness which
435 ;; tried to do everything itself, including inspecting its arguments for
436 ;; constant-ness to decide whether it could elide bits of code. This
437 ;; became unsustainable. Nowadays, it packages up its parser arguments
438 ;; into functions and calls some primitive functions to do the heavy
439 ;; lifting.
440 ;;
441 ;; The precise protocol between this macro and the backend functions is
442 ;; subject to change: don't rely on it.
443
444 (let* ((accvar (or acc (gensym "ACC-")))
445 (func (if sepp '%many-sep '%many)))
446 `(let ((,accvar ,init))
447 (declare (ignorable ,accvar))
448 (,func (lambda (,new)
449 (declare (ignorable ,new))
450 (setf ,accvar ,update))
451 (lambda ()
452 ,final)
453 (parser () ,parser)
454 ,@(and sepp (list `(parser () ,sep)))
455 ,@(and minp `(:min ,min))
456 ,@(and max `(:max ,max))
457 ,@(and (not (eq commitp t)) `(:commitp ,commitp))))))
458
459 (export 'list)
460 (defparse list ((&rest keys) parser &optional (sep nil sepp))
461 "Like MANY, but simply returns a list of the parser results."
462 (with-gensyms (acc)
463 `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys)
464 ,parser ,@(and sepp (list sep))))))
465
466 (export 'skip-many)
467 (defparse skip-many ((&rest keys) parser &optional (sep nil sepp))
468 "Like MANY, but ignores the results."
469 `(parse (many (nil nil nil ,@keys)
470 ,parser ,@(and sepp (list sep)))))
471
472 (export 'or)
473 (defparse or (&rest parsers)
474 "Try a number of alternative parsers.
475
476 Each of the PARSERS in turn is tried. If any succeeds, then its result
477 becomes the result of the parse. If any parser fails after consuming
478 input, or if all of the parsers fail, then the overall parse fails, with
479 the union of the expected items from the individual parses."
480
481 (with-gensyms (fail cp failarg)
482 (labels ((walk (parsers failures)
483 (if (null parsers)
484 `(,fail nil (list ,@(reverse failures)))
485 (with-gensyms (value win consumedp)
486 `(multiple-value-bind (,value ,win ,consumedp)
487 (parse ,(car parsers))
488 (cond (,win
489 (values ,value ,win ,consumedp))
490 (,consumedp
491 (,fail t (list ,value)))
492 (t
493 ,(walk (cdr parsers)
494 (cons value failures)))))))))
495 `(flet ((,fail (,cp ,failarg)
496 (values (combine-parser-failures ,failarg) nil ,cp)))
497 ,(walk parsers nil)))))
498
499 (export '?)
500 (defparse ? (parser &optional (value nil))
501 "Matches PARSER or nothing; fails if PARSER fails after consuming input."
502 `(parse (or ,parser (t ,value))))
503
504 ;;;--------------------------------------------------------------------------
505 ;;; Pluggable parsers.
506
507 (export 'call-pluggable-parser)
508 (defun call-pluggable-parser (symbol &rest args)
509 "Call the pluggable parser denoted by SYMBOL.
510
511 A `pluggable parser' is an indirection point at which a number of
512 alternative parsers can be attached dynamically. The parsers are tried in
513 some arbitrary order, so one should be careful to avoid ambiguities; each
514 is paseed the given ARGS.
515
516 If any parser succeeds then it determines the result; if any parser fails
517 having consumed input then the pluggable parser fails immediately. If all
518 of the parsers fail without consuming input then the pluggable parser
519 fails with the union of the individual failure indicators."
520
521 (let ((expected nil))
522 (dolist (item (get symbol 'parser))
523 (multiple-value-bind (value winp consumedp) (apply (cdr item) args)
524 (when (or winp consumedp)
525 (return-from call-pluggable-parser (values value winp consumedp)))
526 (push value expected)))
527 (values (combine-parser-failures expected) nil nil)))
528
529 (export 'plug)
530 (defparse plug (symbol &rest args)
531 "Call the pluggable parser denoted by SYMBOL.
532
533 This is just like the function `call-pluggable-parser', but the SYMBOL is
534 not evaluated."
535 `(call-pluggable-parser ',symbol ,@args))
536
537 (export 'pluggable-parser-add)
538 (defun pluggable-parser-add (symbol tag parser)
539 "Adds an element to a pluggable parser.
540
541 The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
542 comparable object which identifies the element. The PARSER is a parser
543 function; it will be passed arguments via `pluggable-parser'.
544
545 If a parser with the given TAG is already attached to SYMBOL then the new
546 parser replaces the old one; otherwise it is added to the collection."
547
548 (let ((alist (get symbol 'parser)))
549 (aif (assoc tag alist)
550 (setf (cdr it) parser)
551 (setf (get symbol 'parser) (acons tag parser alist)))))
552
553 (export 'define-pluggable-parser)
554 (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body)
555 "Adds an element to a pluggable parser.
556
557 The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
558 comparable object which identifies the element. Neither SYMBOL nor TAG is
559 evaluated. The BODY is a parser expression; the BVL is a lambda list
560 describing how to bind the argumens supplied via `pluggable-parser'.
561
562 If a parser with the given TAG is already attached to SYMBOL then the new
563 parser replaces the old one; otherwise it is added to the collection."
564
565 `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@body)))
566
567 ;;;--------------------------------------------------------------------------
568 ;;; Rewindable parser context protocol.
569
570 (eval-when (:compile-toplevel :load-toplevel :execute)
571
572 (export 'parser-capture-place)
573 (defgeneric parser-capture-place (context)
574 (:documentation
575 "Capture the current position of a parser CONTEXT.
576
577 The return value may later be used with `parser-restore-place'. Be
578 careful: all of this is happening at macro-expansion time.")
579 (:method (context)
580 (error "Parser context ~S doesn't support rewinding." context)))
581
582 (export 'parser-restore-place)
583 (defgeneric parser-restore-place (context place)
584 (:documentation
585 "`Rewind' the parser CONTEXT back to the captured PLACE.
586
587 The place was previously captured by `parser-capture-place'. Be careful:
588 all of this is happening at macro-expansion time."))
589
590 (export 'parser-release-place)
591 (defgeneric parser-release-place (context place)
592 (:documentation
593 "Release a PLACE captured from the parser CONTEXT.
594
595 The place was previously captured by `parser-capture-place'. The
596 underlying scanner can use this call to determine whether there are
597 outstanding captured places, and thereby optimize its behaviour. Be
598 careful: all of this is happening at macro-expansion time.")
599 (:method (context place) nil))
600
601 (export 'parser-places-must-be-released-p)
602 (defgeneric parser-places-must-be-released-p (context)
603 (:documentation
604 "Answer whether places captured from the parser CONTEXT need releasing.
605
606 Some contexts -- well, actually, their run-time counterparts -- work
607 better if they can keep track of which places are captured, or at least if
608 there are captured places outstanding. If this function returns true
609 (which is the default) then `with-parser-place' (and hence parser macros
610 such as `peek') will expand to `unwind-protect' forms in order to perform
611 the correct cleanup. If it returns false, then the `unwind-protect' is
612 omitted so that the runtime code does't have to register cleanup
613 handlers.")
614 (:method (context) t)))
615
616 (export 'with-parser-place)
617 (defmacro with-parser-place ((place context) &body body)
618 "Evaluate BODY surrounded with a binding of PLACE to a captured place.
619
620 The surrounding code will release the PLACE properly on exit from the body
621 forms. This is all happening at macro-expansion time."
622 ;; ... which means that it's a bit hairy. Fortunately, the nested
623 ;; backquotes aren't that bad.
624 (once-only (context)
625 (with-gensyms (bodyfunc)
626 `(with-gensyms (,place)
627 (flet ((,bodyfunc () ,@body))
628 `(let ((,,place ,(parser-capture-place ,context)))
629 ,(if (parser-places-must-be-released-p ,context)
630 `(unwind-protect ,(,bodyfunc)
631 ,(parser-release-place ,context ,place))
632 (,bodyfunc))))))))
633
634 (export 'peek)
635 (defparse peek (:context (context t) parser)
636 "Attempt to run PARSER, but rewind the underlying source if it fails."
637 (with-gensyms (value win consumedp)
638 (with-parser-place (place context)
639 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
640 (cond (,win
641 (values ,value ,win ,consumedp))
642 (t
643 ,(parser-restore-place context place)
644 (values ,value ,win nil)))))))
645
646 ;;;--------------------------------------------------------------------------
647 ;;; Character parser context protocol.
648
649 (export 'character-parser-context)
650 (defclass character-parser-context ()
651 ()
652 (:documentation
653 "Base class for parsers which read one character at a time."))
654
655 (export 'parser-current-char)
656 (defgeneric parser-current-char (context)
657 (:documentation
658 "Return the parser's current character.
659
660 It is an error to invoke this operation if the parser is at end-of-file;
661 you must check this first. Be careful: all of this is happening at
662 macro-expansion time."))
663
664 (defparse if-char (:context (context character-parser-context)
665 (&optional (char 'it)) condition consequent alternative)
666 "Basic character-testing parser.
667
668 If there is a current character, bind it to CHAR and evaluate the
669 CONDITION; if that is true, then step the parser and evaluate CONSEQUENT;
670 otherwise, if either we're at EOF or the CONDITION returns false, evaluate
671 ALTERNATIVE. The result of `if-char' are the values returned by
672 CONSEQUENT or ALTERNATIVE."
673
674 (with-gensyms (block)
675 `(block ,block
676 (unless ,(parser-at-eof-p context)
677 (let ((,char ,(parser-current-char context)))
678 (when ,condition
679 ,(parser-step context)
680 (return-from ,block ,consequent))))
681 ,alternative)))
682
683 (defmethod expand-parser-spec
684 ((context character-parser-context) (spec (eql :any)))
685 "Matches any character; result is the character.
686
687 The failure indicator is the keyword `:any'."
688 (expand-parser-spec context
689 '(if-char () t
690 (values it t t)
691 (values '(:any) nil nil))))
692
693 (export 'char)
694 (defparse char (:context (context character-parser-context) char)
695 "Matches the character CHAR (evaluated); result is the character.
696
697 The failure indicator is CHAR."
698
699 (once-only (char)
700 (with-gensyms (it)
701 (expand-parser-spec context
702 `(if-char (,it) (char= ,it ,char)
703 (values ,it t t)
704 (values (list ,char) nil nil))))))
705
706 (defmethod expand-parser-spec
707 ((context character-parser-context) (char character))
708 (expand-parser-spec context `(char ,char)))
709
710 (export 'satisfies)
711 (defparse satisfies (:context (context character-parser-context) predicate)
712 "Matches a character that satisfies the PREDICATE
713
714 The PREDICATE is a function designator. On success, the result is the
715 character. The failure indicator is PREDICATE; you probably want to apply
716 a `label'."
717
718 (with-gensyms (it)
719 (expand-parser-spec context
720 `(if-char (,it) (,predicate ,it)
721 (values ,it t t)
722 (values '(,predicate) nil nil)))))
723
724 (export 'not)
725 (defparse not (:context (context character-parser-context) char)
726 "Matches any character other than CHAR; result is the character.
727
728 The failure indicator is (not CHAR)."
729
730 (once-only (char)
731 (with-gensyms (it)
732 (expand-parser-spec context
733 `(if-char (,it) (char/= ,it ,char)
734 (values ,it t t)
735 (values `((not ,,char)) nil nil))))))
736
737 (export 'filter)
738 (defparse filter (:context (context character-parser-context) predicate)
739 "Matches a character that satisfies the PREDICATE; result is the output of
740 PREDICATE.
741
742 The failure indicator is PREDICATE; you probably want to apply a `label'."
743
744 ;; Can't do this one with `if-char'.
745 (with-gensyms (block value)
746 `(block ,block
747 (unless ,(parser-at-eof-p context)
748 (let ((,value (,predicate ,(parser-current-char context))))
749 (when ,value
750 ,(parser-step context)
751 (return-from ,block (values ,value t t)))))
752 (values '(,predicate) nil nil))))
753
754 (defmethod expand-parser-spec
755 ((context character-parser-context) (spec (eql :whitespace)))
756 "Matches any sequence of whitespace; result is nil.
757
758 Cannot fail."
759
760 `(progn
761 (cond ((and (not ,(parser-at-eof-p context))
762 (whitespace-char-p ,(parser-current-char context)))
763 (loop
764 ,(parser-step context)
765 (when (or ,(parser-at-eof-p context)
766 (not (whitespace-char-p
767 ,(parser-current-char context))))
768 (return (values nil t t)))))
769 (t
770 (values nil t nil)))))
771
772 (defmethod expand-parser-spec
773 ((context character-parser-context) (string string))
774 "Matches the constituent characters of STRING; result is the string.
775
776 The failure indicator is STRING; on failure, the input is rewound, so this
777 only works on rewindable contexts."
778
779 (with-gensyms (i)
780 (unless (typep string 'simple-string)
781 (setf string (make-array (length string) :initial-contents string)))
782 (with-parser-place (place context)
783 `(dotimes (,i ,(length string) (values ,string t
784 ,(plusp (length string))))
785 (when (or ,(parser-at-eof-p context)
786 (char/= ,(parser-current-char context)
787 (schar ,string ,i)))
788 ,(parser-restore-place context place)
789 (return (values '(,string) nil nil)))
790 ,(parser-step context)))))
791
792 ;;;--------------------------------------------------------------------------
793 ;;; Token parser context protocol.
794
795 (export 'token-parser-context)
796 (defclass token-parser-context ()
797 ()
798 (:documentation
799 "Base class for parsers which read tokens with associated semantic values.
800
801 A token, according to the model suggested by this class, has a /type/,
802 which classifies the token and is the main contributer to guiding the
803 parse, and a /value/, which carries additional semantic information.
804
805 This may seem redundant given Lisp's dynamic type system; but it's not
806 actually capable of drawing sufficiently fine distinctions easily. For
807 example, we can represent a symbol either as a string or a symbol; but
808 using strings conflicts with being able to represent string literals, and
809 using symbols looks ugly and they don't get GCed. Similarly, it'd be
810 convenient to represent punctuation as characters, but that conflicts with
811 using them for character literals. So, we introduce our own notion of
812 token type.
813
814 Token scanners are expected to signal end-of-file with a token of type
815 `:eof'."))
816
817 (export 'parser-token-type)
818 (defgeneric parser-token-type (context)
819 (:documentation
820 "Return the parser's current token type."))
821
822 (export 'parser-token-value)
823 (defgeneric parser-token-value (context)
824 (:documentation
825 "Return the parser's current token's semantic value."))
826
827 (export 'token)
828 (defparse token (:context (context token-parser-context)
829 type &optional (value nil valuep) &key peekp)
830 "Match tokens of a particular type.
831
832 A token matches under the following conditions:
833
834 * If the value of TYPE is `t' then the match succeeds if and only if the
835 parser it not at end-of-file.
836
837 * If the value of TYPE is not `eql' to the token type then the match
838 fails.
839
840 * If VALUE is specified, and the value of VALUE is not `equal' to the
841 token semantic value then the match fails.
842
843 * Otherwise the match succeeds.
844
845 If the match is successful and the parser is not at end-of-file, and the
846 value of PEEKP is nil then the parser advances to the next token; the
847 result of the match is the token's value.
848
849 If the match fails then the failure indicator is either TYPE or (TYPE
850 VALUE), depending on whether a VALUE was specified."
851
852 (once-only (type value peekp)
853 (with-gensyms (tokty tokval)
854 `(let ((,tokty ,(parser-token-type context))
855 (,tokval ,(parser-token-value context)))
856 (if ,(if (eq type t)
857 `(not (eq ,tokty :eof))
858 (flet ((check-value (cond)
859 (if valuep
860 `(and ,cond (equal ,tokval ,value))
861 cond)))
862 (if (constantp type)
863 (check-value `(eql ,tokty ,type))
864 `(if (eq ,type t)
865 (not (eq ,tokty :eof))
866 ,(check-value `(eql ,tokty ,type))))))
867 ,(let* ((result `(values ,tokval t ,(if (constantp peekp)
868 (not peekp)
869 `(not ,peekp))))
870 (step (parser-step context)))
871 (cond ((not (constantp peekp))
872 `(multiple-value-prog1 ,result
873 (unless ,peekp ,step)))
874 (peekp
875 result)
876 (t
877 `(multiple-value-prog1 ,result
878 ,step))))
879 (values (list ,(if valuep `(list ,type ,value) type))
880 nil nil))))))
881
882 (defmethod expand-parser-spec ((context token-parser-context) spec)
883 (if (atom spec)
884 (expand-parser-spec context `(token ,spec))
885 (call-next-method)))
886
887 (defmethod expand-parser-spec ((context token-parser-context) (spec string))
888 (expand-parser-spec context `(token :id ,spec)))
889
890 ;;;----- That's all, folks --------------------------------------------------