3 ;;; Protocol for parsing.
5 ;;; (c) 2009 Straylight/Edgeware
8 ;;;----- Licensing notice ---------------------------------------------------
10 ;;; This file is part of the Sensble Object Design, an object system for C.
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 ;;;--------------------------------------------------------------------------
27 ;;; Parser protocol discussion.
29 ;;; Other languages, notably Haskell and ML, have `parser combinator
30 ;;; libraries', which allow one to construct recursive descent parsers using
31 ;;; approximately pleasant syntax. While attempts have been made to
32 ;;; introduce the benefits of these libraries to Lisp, they've not been
33 ;;; altogether successful; this seems due to Lisp's lack of features such as
34 ;;; pattern matching, currying and lazy evaluation. Rather than fight with
35 ;;; Lisp's weaknesses, this library plays to its strength, making heavy use
36 ;;; of macros. Effectively, the `combinators' we build here are /compile-
37 ;;; time/ combinators, not run-time ones.
39 ;;; A `parser' is simply an expression which returns three values.
41 ;;; * If the second value is nil, then the parser is said to have /failed/,
42 ;;; and the first value is a list describing the things that the parser
43 ;;; expected to find but didn't. (The precise details of the list items
44 ;;; are important to error-reporting functions, but not to the low-level
45 ;;; machinery, and are left up to higher-level protocols to nail down
48 ;;; * If the second value is not nil, then the parser is said to have
49 ;;; /succeeded/, and the first value is its /result/.
51 ;;; * The third value indicates whether the parser consumed any of its
52 ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and
53 ;;; bad performance), so the `consumedp' return value is used to decide
54 ;;; whether the parser has `committed' to a particular branch. If the
55 ;;; parser context supports place-capture (many do) then `peek' can be
56 ;;; used to suppress consumption of input in the case of parser failure.
58 ;;; The functions and macros here are simply ways of gluing together
59 ;;; expressions which obey this protocol.
61 ;;; The main contribution of this file is a macro `with-parser-context' which
62 ;;; embeds a parsing-specific S-expressions language entered using the new
63 ;;; macro `parse'. The behaviour of this macro is controlled by a pair of
64 ;;; compile-time generic functions `expand-parser-spec' and
65 ;;; `expand-parser-form'. As well as the parser expression they're meant to
66 ;;; process, these functions dispatch on a `context' argument, which is
67 ;;; intended to help `leaf' parsers find the terminal symbols which they're
70 ;;; Note that the context is a compile-time object, constructed by the
71 ;;; `parse' macro expansion function, though the idea is that it will contain
72 ;;; the name or names of variables holding the run-time parser state (which
73 ;;; will typically be a lexical analyser or an input stream or suchlike).
75 (cl:in-package #:sod-parser)
77 ;;;--------------------------------------------------------------------------
80 (defun combine-parser-failures (failures)
81 "Combine the failure indicators listed in FAILURES.
83 (Note that this means that FAILURES is a list of lists.)"
85 (reduce (lambda (f ff) (union f ff :test #'equal))
89 ;;;--------------------------------------------------------------------------
92 (eval-when (:compile-toplevel :load-toplevel :execute)
94 (export 'expand-parser-spec)
95 (defgeneric expand-parser-spec (context spec)
97 "Expand a parser specifier SPEC in a particular parser CONTEXT.")
98 (:method (context (spec list))
99 (expand-parser-form context (car spec) (cdr spec))))
101 (export 'expand-parser-form)
102 (defgeneric expand-parser-form (context head tail)
104 "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.")
105 (:method (context head tail)
108 (export 'wrap-parser)
109 (defgeneric wrap-parser (context form)
111 "Enclose FORM in whatever is necessary to make the parser work.")
112 (:method (context form) form)))
115 (defmacro defparse (name bvl &body body)
116 "Define a new parser form.
118 The full syntax is hairier than it looks:
120 defparse NAME ( [[ :context (CTX SPEC) ]] . BVL )
123 The macro defines a new parser form (NAME ...) which is expanded by the
124 body FORMs. The BVL is a destructuring lambda-list to be applied to the
125 tail of the form. The body forms are enclosed in a block called NAME.
127 Within the FORMs, a function `expand' is available: it takes a parser
128 specifier as its argument and returns its expansion in the parser's
131 If the :context key is provided, then the parser form is specialized on a
132 particular class of parser contexts SPEC; specialized expanders take
133 priority over less specialized or unspecialized expanders -- so you can
134 use this to override the built-in forms safely if they don't seem to be
135 doing the right thing for you. Also, the context -- which is probably
136 interesting to you if you've bothered to specialize -- is bound to the
139 ;; BUG! misplaces declarations: if you declare the CONTEXT argument
140 ;; `special' it won't be bound properly. I'm really not at all sure I know
143 (with-gensyms (head tail context)
146 (unless (and bvl (keywordp (car bvl))) (return))
148 (:context (destructuring-bind (name spec) (pop bvl)
149 (setf ctxclass spec context name)))))
150 (multiple-value-bind (doc decls body) (parse-body body)
151 `(defmethod expand-parser-form
152 ((,context ,ctxclass) (,head (eql ',name)) ,tail)
155 (destructuring-bind ,bvl ,tail
159 (export '(with-parser-context parse))
160 (defmacro with-parser-context ((class &rest initargs) &body body)
161 "Evaluate BODY with a macro `parse' which expands parser forms.
163 Evaluate BODY as an implicit progn. At compile time, a parser context is
164 constructed by (apply #'make-instance CLASS INITARGS). The BODY can make
165 use of the macro `parse':
169 which parses the input in the manner described by SPEC, in the context of
172 (let ((context (apply #'make-instance class initargs)))
174 `(macrolet ((parse (form)
175 (expand-parser-spec ',context form)))
178 ;;;--------------------------------------------------------------------------
179 ;;; Common parser context protocol.
181 (export 'parser-at-eof-p)
182 (defgeneric parser-at-eof-p (context)
184 "Return whether the parser has reached the end of its input.
186 Be careful: all of this is happening at macro expansion time."))
188 (export 'parser-step)
189 (defgeneric parser-step (context)
191 "Advance the parser to the next character.
193 Be careful: all of this is happening at macro-expansion time."))
195 (defmethod expand-parser-spec (context (spec (eql :eof)))
196 "Tests succeeds if the parser has reached the end of its input.
198 The failure indicator is the keyword `:eof'."
200 `(if ,(parser-at-eof-p context)
202 (values '(:eof) nil nil)))
204 ;;;--------------------------------------------------------------------------
205 ;;; Useful macros for dealing with parsers.
209 (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP")))
210 parser consequent &optional (alternative nil altp))
211 "Conditional parsing construction.
213 If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the
214 result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the
215 expected-item list. If ALTERNATIVE is omitted, then propagate the failure
216 following the parser protocol."
218 (with-gensyms (value win)
219 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
220 (declare (ignorable ,consumedp))
222 (let ((,result ,value))
223 (declare (ignorable ,result))
226 `(values ,value nil ,consumedp))
228 `(let ((,expected ,value)) ,alternative))
233 (defmacro when-parse ((&optional (result 'it)) parser &body body)
234 "Convenience macro for conditional parsing.
236 If PARSER succeeds then evaluate BODY with RESULT bound to the result;
237 otherwise propagate the failure."
238 `(if-parse (:result ,result) ,parser (progn ,@body)))
241 (defmacro cond-parse ((&key (result 'it) expected
242 (consumedp (gensym "CP")))
244 "Frightening conditional parsing construct.
246 Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake'
247 parser form `t' may be used to describe a default action. If the PARSER
248 succeeds then evaluate FORMS in order with RESULT bound to the parser
249 result (if there are no forms, then propagate the success); if the PARSER
250 fails without consuming input, then move onto the next clause.
252 If the default clause (if any) is reached, or a parser fails after
253 consuming input, then EXPECTED is bound to a list of failure indicators
254 and the default clause's FORMS are evaluated and with CONSUMEDP bound to a
255 generalized boolean indicating whether any input was consumed. If there
256 is no default clause, and either some parser fails after consuming input,
257 or all of the parsers fail without consuming, then a failure is reported
258 and the input-consumption indicator is propagated.
260 If a parser fails after consuming input, then the failure indicators are
261 whatever that parser reported; if all the parsers fail without consuming
262 then the failure indicators are the union of the indicators reported by
263 the various parsers."
265 (with-gensyms (block fail failarg)
266 (labels ((walk (clauses failures)
267 (cond ((null clauses)
268 (values `(,fail nil (list ,@(reverse failures)))
269 `(values (combine-parser-failures ,failarg)
272 ((eq (caar clauses) t)
273 (values `(,fail nil (list ,@(reverse failures)))
276 (combine-parser-failures
281 (with-gensyms (value win cp)
282 (multiple-value-bind (inner failbody)
283 (walk (cdr clauses) (cons value failures))
284 (values `(multiple-value-bind (,value ,win ,cp)
285 (parse ,(caar clauses))
288 (let ((,result ,value)
290 (declare (ignorable ,result
294 (,fail t (list ,value)))
297 (multiple-value-bind (inner failbody) (walk clauses nil)
299 (flet ((,fail (,consumedp ,failarg)
300 (declare (ignorable ,consumedp ,failarg))
305 (defmacro parser (bvl &body parser)
306 "Functional abstraction for parsers."
307 (multiple-value-bind (doc decls body) (parse-body parser)
308 `(lambda ,bvl ,@doc ,@decls (parse ,@body))))
310 ;;;--------------------------------------------------------------------------
311 ;;; Standard parser forms.
314 (defparse label (label parser)
315 "If PARSER fails, use LABEL as the expected outcome.
317 The LABEL is only evaluated if necessary."
318 (with-gensyms (value win consumedp)
319 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
321 (values ,value ,win ,consumedp)
322 (values (list ,label) nil ,consumedp)))))
325 "Succeed, without consuming input, with result VALUE."
326 `(values ,value t nil))
328 (defparse nil (indicator)
329 "Fail, without consuming input, with indicator VALUE."
330 `(values (list ,indicator) nil nil))
332 (defparse when (cond &body parser)
333 "If CONDITION is true, then match PARSER; otherwise fail."
334 `(if ,cond (parse ,@parser) (values nil nil nil)))
336 (defmethod expand-parser-spec (context (spec (eql t)))
337 "Always matches without consuming input."
340 (defmethod expand-parser-spec (context (spec (eql nil)))
341 "Always fails without consuming input. The failure indicator is `:fail'."
342 '(values '(:fail) nil nil))
345 (defparse seq (binds &body body)
346 "Parse a sequence of heterogeneous items.
350 seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* )
353 The behaviour is similar to `let*'. The PARSER-FORMs are processed in
354 order, left to right. If a parser succeeds, then its value is bound to
355 the corresponding VAR, and available within Lisp forms enclosed within
356 subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then
357 the entire sequence fails. If all of the parsers succeeds, then the FORMs
358 are evaluated as an implicit progn, and the sequence will succeed with the
359 result computed by the final FORM."
361 (with-gensyms (block consumedp)
362 (labels ((walk (binds lets ignores)
364 `(let* ((,consumedp nil)
367 `((declare (ignore ,@(nreverse ignores)))))
368 (values (progn ,@body) t ,consumedp))
369 (destructuring-bind (x &optional (y nil yp))
370 (if (listp (car binds))
373 (with-gensyms (var value win cp)
374 (multiple-value-bind (var parser ignores)
377 (values var (if yp y x) (cons var ignores)))
379 (cons `(,var (multiple-value-bind
382 (when ,cp (setf ,consumedp t))
390 `(block ,block ,(walk binds nil nil)))))
393 (defparse and (:context (context t) &rest parsers)
394 "Parse a sequence of heterogeneous items, but ignore their values.
396 This is just like (and is implemented using) `seq' with all the bindings
397 set to `nil'. The result is `nil'."
401 (expand-parser-spec context
402 `(seq (,@(mapcar (lambda (parser)
405 (,last ,(car (last parsers))))
409 (defparse lisp (&rest forms)
410 "Evaluate FORMs, which should obey the parser protocol."
414 (defparse many ((acc init update
415 &key (new 'it) (final acc) (min nil minp) max (commitp t))
416 parser &optional (sep nil sepp))
417 "Parse a sequence of homogeneous items.
419 The behaviour is similar to `do'. Initially an accumulator ACC is
420 established, and bound to the value of INIT. The PARSER is then evaluated
421 repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults
422 to `it') bound to the result of the parse, and the value returned by
423 UPDATE is stored back into ACC. If the PARSER fails, then the parse
424 ends. The scope of ACC includes the UPDATE and FINAL forms, and the
425 PARSER and SEP parsers; it is updated by side effects, not rebound.
427 If a SEP parser is provided, then the behaviour changes as follows.
428 Before each attempt to parse a new item using PARSER, the parser SEP is
429 invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP
430 is true, then the PARSER must also succeed or the overall parse will
431 fail. If COMMITP is false then a trailing SEP is permitted and ignored.
433 If MAX (which will be evaluated) is not nil, then it must be a number: the
434 parse ends automatically after PARSER has succeeded MAX times. When the
435 parse has ended, if the PARSER succeeded fewer than MIN (which will be
436 evaluated) times then the parse fails. Otherwise, the FINAL form (which
437 defaults to simply returning ACC) is evaluated and its value becomes the
438 result of the parse. MAX defaults to nil -- i.e., no maximum; MIN
439 defaults to 1 if a SEP parser is given, or 0 if not.
441 Note that `many' cannot fail if MIN is zero."
443 ;; Once upon a time, this was a macro of almost infinite hairiness which
444 ;; tried to do everything itself, including inspecting its arguments for
445 ;; constant-ness to decide whether it could elide bits of code. This
446 ;; became unsustainable. Nowadays, it packages up its parser arguments
447 ;; into functions and calls some primitive functions to do the heavy
450 ;; The precise protocol between this macro and the backend functions is
451 ;; subject to change: don't rely on it.
453 (let* ((accvar (or acc (gensym "ACC-")))
454 (func (if sepp '%many-sep '%many)))
455 `(let ((,accvar ,init))
456 (declare (ignorable ,accvar))
457 (,func (lambda (,new)
458 (declare (ignorable ,new))
459 (setf ,accvar ,update))
463 ,@(and sepp (list `(parser () ,sep)))
464 ,@(and minp `(:min ,min))
465 ,@(and max `(:max ,max))
466 ,@(and (not (eq commitp t)) `(:commitp ,commitp))))))
469 (defparse list ((&rest keys) parser &optional (sep nil sepp))
470 "Like `many', but simply returns a list of the parser results."
472 `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys)
473 ,parser ,@(and sepp (list sep))))))
476 (defparse skip-many ((&rest keys) parser &optional (sep nil sepp))
477 "Like `many', but ignores the results."
478 `(parse (many (nil nil nil ,@keys)
479 ,parser ,@(and sepp (list sep)))))
482 (defparse or (&rest parsers)
483 "Try a number of alternative parsers.
485 Each of the PARSERS in turn is tried. If any succeeds, then its result
486 becomes the result of the parse. If any parser fails after consuming
487 input, or if all of the parsers fail, then the overall parse fails, with
488 the union of the expected items from the individual parses."
490 (with-gensyms (fail cp failarg)
491 (labels ((walk (parsers failures)
493 `(,fail nil (list ,@(reverse failures)))
494 (with-gensyms (value win consumedp)
495 `(multiple-value-bind (,value ,win ,consumedp)
496 (parse ,(car parsers))
498 (values ,value ,win ,consumedp))
500 (,fail t (list ,value)))
503 (cons value failures)))))))))
504 `(flet ((,fail (,cp ,failarg)
505 (values (combine-parser-failures ,failarg) nil ,cp)))
506 ,(walk parsers nil)))))
509 (defparse ? (parser &optional (value nil))
510 "Matches PARSER or nothing; fails if PARSER fails after consuming input."
511 `(parse (or ,parser (t ,value))))
513 ;;;--------------------------------------------------------------------------
514 ;;; Pluggable parsers.
516 (export 'call-pluggable-parser)
517 (defun call-pluggable-parser (symbol &rest args)
518 "Call the pluggable parser denoted by SYMBOL.
520 A `pluggable parser' is an indirection point at which a number of
521 alternative parsers can be attached dynamically. The parsers are tried in
522 some arbitrary order, so one should be careful to avoid ambiguities; each
523 is paseed the given ARGS.
525 If any parser succeeds then it determines the result; if any parser fails
526 having consumed input then the pluggable parser fails immediately. If all
527 of the parsers fail without consuming input then the pluggable parser
528 fails with the union of the individual failure indicators."
530 (let ((expected nil))
531 (dolist (item (get symbol 'parser))
532 (multiple-value-bind (value winp consumedp) (apply (cdr item) args)
533 (when (or winp consumedp)
534 (return-from call-pluggable-parser (values value winp consumedp)))
535 (push value expected)))
536 (values (combine-parser-failures expected) nil nil)))
539 (defparse plug (symbol &rest args)
540 "Call the pluggable parser denoted by SYMBOL.
542 This is just like the function `call-pluggable-parser', but the SYMBOL is
544 `(call-pluggable-parser ',symbol ,@args))
546 (export 'pluggable-parser-add)
547 (defun pluggable-parser-add (symbol tag parser)
548 "Adds an element to a pluggable parser.
550 The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
551 comparable object which identifies the element. The PARSER is a parser
552 function; it will be passed arguments via `pluggable-parser'.
554 If a parser with the given TAG is already attached to SYMBOL then the new
555 parser replaces the old one; otherwise it is added to the collection."
557 (let ((alist (get symbol 'parser)))
558 (aif (assoc tag alist)
559 (setf (cdr it) parser)
560 (setf (get symbol 'parser) (acons tag parser alist)))))
562 (export 'define-pluggable-parser)
563 (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body)
564 "Adds an element to a pluggable parser.
566 The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'-
567 comparable object which identifies the element. Neither SYMBOL nor TAG is
568 evaluated. The BODY is a parser expression; the BVL is a lambda list
569 describing how to bind the arguments supplied via `pluggable-parser'.
571 If a parser with the given TAG is already attached to SYMBOL then the new
572 parser replaces the old one; otherwise it is added to the collection."
574 `(pluggable-parser-add ',symbol ',tag (lambda ,bvl ,@body)))
576 ;;;--------------------------------------------------------------------------
577 ;;; Rewindable parser context protocol.
579 (eval-when (:compile-toplevel :load-toplevel :execute)
581 (export 'parser-capture-place)
582 (defgeneric parser-capture-place (context)
584 "Capture the current position of a parser CONTEXT.
586 The return value may later be used with `parser-restore-place'. Be
587 careful: all of this is happening at macro-expansion time.")
589 (error "Parser context ~S doesn't support rewinding." context)))
591 (export 'parser-restore-place)
592 (defgeneric parser-restore-place (context place)
594 "`Rewind' the parser CONTEXT back to the captured PLACE.
596 The place was previously captured by `parser-capture-place'. Be careful:
597 all of this is happening at macro-expansion time."))
599 (export 'parser-release-place)
600 (defgeneric parser-release-place (context place)
602 "Release a PLACE captured from the parser CONTEXT.
604 The place was previously captured by `parser-capture-place'. The
605 underlying scanner can use this call to determine whether there are
606 outstanding captured places, and thereby optimize its behaviour. Be
607 careful: all of this is happening at macro-expansion time.")
608 (:method (context place) nil))
610 (export 'parser-places-must-be-released-p)
611 (defgeneric parser-places-must-be-released-p (context)
613 "Answer whether places captured from the parser CONTEXT need releasing.
615 Some contexts -- well, actually, their run-time counterparts -- work
616 better if they can keep track of which places are captured, or at least if
617 there are captured places outstanding. If this function returns true
618 (which is the default) then `with-parser-place' (and hence parser macros
619 such as `peek') will expand to `unwind-protect' forms in order to perform
620 the correct cleanup. If it returns false, then the `unwind-protect' is
621 omitted so that the runtime code does't have to register cleanup
623 (:method (context) t)))
625 (export 'with-parser-place)
626 (defmacro with-parser-place ((place context) &body body)
627 "Evaluate BODY surrounded with a binding of PLACE to a captured place.
629 The surrounding code will release the PLACE properly on exit from the body
630 forms. This is all happening at macro-expansion time."
631 ;; ... which means that it's a bit hairy. Fortunately, the nested
632 ;; backquotes aren't that bad.
634 (with-gensyms (bodyfunc)
635 `(with-gensyms (,place)
636 (flet ((,bodyfunc () ,@body))
637 `(let ((,,place ,(parser-capture-place ,context)))
638 ,(if (parser-places-must-be-released-p ,context)
639 `(unwind-protect ,(,bodyfunc)
640 ,(parser-release-place ,context ,place))
644 (defparse peek (:context (context t) parser)
645 "Attempt to run PARSER, but rewind the underlying source if it fails."
646 (with-gensyms (value win consumedp)
647 (with-parser-place (place context)
648 `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser)
650 (values ,value ,win ,consumedp))
652 ,(parser-restore-place context place)
653 (values ,value ,win nil)))))))
655 ;;;--------------------------------------------------------------------------
656 ;;; Character parser context protocol.
658 (export 'character-parser-context)
659 (defclass character-parser-context ()
662 "Base class for parsers which read one character at a time."))
664 (export 'parser-current-char)
665 (defgeneric parser-current-char (context)
667 "Return the parser's current character.
669 It is an error to invoke this operation if the parser is at end-of-file;
670 you must check this first. Be careful: all of this is happening at
671 macro-expansion time."))
674 (defparse if-char (:context (context character-parser-context)
675 (&optional (char 'it)) condition consequent alternative)
676 "Basic character-testing parser.
678 If there is a current character, bind it to CHAR and evaluate the
679 CONDITION; if that is true, then evaluate CONSEQUENT and step the parser
680 (in that order); otherwise, if either we're at EOF or the CONDITION
681 returns false, evaluate ALTERNATIVE. The result of `if-char' are the
682 values returned by CONSEQUENT or ALTERNATIVE."
684 (with-gensyms (block)
686 (unless ,(parser-at-eof-p context)
687 (let ((,char ,(parser-current-char context)))
690 (multiple-value-prog1 ,consequent
691 ,(parser-step context))))))
694 (defmethod expand-parser-spec
695 ((context character-parser-context) (spec (eql :any)))
696 "Matches any character; result is the character.
698 The failure indicator is the keyword `:any'."
699 (expand-parser-spec context
702 (values '(:any) nil nil))))
705 (defparse char (:context (context character-parser-context) char)
706 "Matches the character CHAR (evaluated); result is the character.
708 The failure indicator is CHAR."
712 (expand-parser-spec context
713 `(if-char (,it) (char= ,it ,char)
715 (values (list ,char) nil nil))))))
717 (defmethod expand-parser-spec
718 ((context character-parser-context) (char character))
719 (expand-parser-spec context `(char ,char)))
722 (defparse satisfies (:context (context character-parser-context) predicate)
723 "Matches a character that satisfies the PREDICATE
725 The PREDICATE is a function designator. On success, the result is the
726 character. The failure indicator is PREDICATE; you probably want to apply
730 (expand-parser-spec context
731 `(if-char (,it) (,predicate ,it)
733 (values '(,predicate) nil nil)))))
736 (defparse not (:context (context character-parser-context) char)
737 "Matches any character other than CHAR; result is the character.
739 The failure indicator is (not CHAR)."
743 (expand-parser-spec context
744 `(if-char (,it) (char/= ,it ,char)
746 (values `((not ,,char)) nil nil))))))
749 (defparse filter (:context (context character-parser-context) predicate)
750 "Matches a character that satisfies the PREDICATE; result is the output of
753 The failure indicator is PREDICATE; you probably want to apply a `label'."
755 ;; Can't do this one with `if-char'.
756 (with-gensyms (block value)
758 (unless ,(parser-at-eof-p context)
759 (let ((,value (,predicate ,(parser-current-char context))))
761 ,(parser-step context)
762 (return-from ,block (values ,value t t)))))
763 (values '(,predicate) nil nil))))
765 (defmethod expand-parser-spec
766 ((context character-parser-context) (spec (eql :whitespace)))
767 "Matches any sequence of whitespace; result is nil.
772 (cond ((and (not ,(parser-at-eof-p context))
773 (whitespace-char-p ,(parser-current-char context)))
775 ,(parser-step context)
776 (when (or ,(parser-at-eof-p context)
777 (not (whitespace-char-p
778 ,(parser-current-char context))))
779 (return (values nil t t)))))
781 (values nil t nil)))))
783 (defmethod expand-parser-spec
784 ((context character-parser-context) (string string))
785 "Matches the constituent characters of STRING; result is the string.
787 The failure indicator is STRING; on failure, the input is rewound, so this
788 only works on rewindable contexts."
791 (unless (typep string 'simple-string)
792 (setf string (make-array (length string) :initial-contents string)))
793 (with-parser-place (place context)
794 `(dotimes (,i ,(length string) (values ,string t
795 ,(plusp (length string))))
796 (when (or ,(parser-at-eof-p context)
797 (char/= ,(parser-current-char context)
799 ,(parser-restore-place context place)
800 (return (values '(,string) nil nil)))
801 ,(parser-step context)))))
803 ;;;--------------------------------------------------------------------------
804 ;;; Token parser context protocol.
806 (export 'token-parser-context)
807 (defclass token-parser-context ()
810 "Base class for parsers which read tokens with associated semantic values.
812 A token, according to the model suggested by this class, has a /type/,
813 which classifies the token and is the main contributer to guiding the
814 parse, and a /value/, which carries additional semantic information.
816 This may seem redundant given Lisp's dynamic type system; but it's not
817 actually capable of drawing sufficiently fine distinctions easily. For
818 example, we can represent a symbol either as a string or a symbol; but
819 using strings conflicts with being able to represent string literals, and
820 using symbols looks ugly and they don't get GCed. Similarly, it'd be
821 convenient to represent punctuation as characters, but that conflicts with
822 using them for character literals. So, we introduce our own notion of
825 Token scanners are expected to signal end-of-file with a token of type
828 (export 'parser-token-type)
829 (defgeneric parser-token-type (context)
831 "Return the parser's current token type."))
833 (export 'parser-token-value)
834 (defgeneric parser-token-value (context)
836 "Return the parser's current token's semantic value."))
839 (defparse token (:context (context token-parser-context)
840 type &optional (value nil valuep) &key peekp)
841 "Match tokens of a particular type.
843 A token matches under the following conditions:
845 * If the value of TYPE is `t' then the match succeeds if and only if the
846 parser is not at end-of-file.
848 * If the value of TYPE is not `eql' to the token type then the match
851 * If VALUE is specified, and the value of VALUE is not `equal' to the
852 token semantic value then the match fails.
854 * Otherwise the match succeeds.
856 If the match is successful and the parser is not at end-of-file, and the
857 value of PEEKP is nil then the parser advances to the next token; the
858 result of the match is the token's value.
860 If the match fails then the failure indicator is either TYPE or (TYPE
861 VALUE), depending on whether a VALUE was specified."
863 (once-only (type value peekp)
864 (with-gensyms (tokty tokval)
865 `(let ((,tokty ,(parser-token-type context))
866 (,tokval ,(parser-token-value context)))
868 `(not (eq ,tokty :eof))
869 (flet ((check-value (cond)
871 `(and ,cond (equal ,tokval ,value))
874 (check-value `(eql ,tokty ,type))
876 (not (eq ,tokty :eof))
877 ,(check-value `(eql ,tokty ,type))))))
878 ,(let* ((result `(values ,tokval t ,(if (constantp peekp)
881 (step (parser-step context)))
882 (cond ((not (constantp peekp))
883 `(multiple-value-prog1 ,result
884 (unless ,peekp ,step)))
888 `(multiple-value-prog1 ,result
890 (values (list ,(if valuep `(list ,type ,value) type))
893 (defmethod expand-parser-spec ((context token-parser-context) spec)
895 (expand-parser-spec context `(token ,spec))
898 (defmethod expand-parser-spec ((context token-parser-context) (spec string))
899 (expand-parser-spec context `(token :id ,spec)))
901 ;;;----- That's all, folks --------------------------------------------------