| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Protocol for parsing. |
| 4 | ;;; |
| 5 | ;;; (c) 2009 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | ;;;-------------------------------------------------------------------------- |
| 27 | ;;; Parser protocol discussion. |
| 28 | ;;; |
| 29 | ;;; Other languages, notably Haskell and ML, have `parser combinator |
| 30 | ;;; libraries', which allow one to construct recursive descent parsers using |
| 31 | ;;; approximately pleasant syntax. While attempts have been made to |
| 32 | ;;; introduce the benefits of these libraries to Lisp, they've not been |
| 33 | ;;; altogether successful; this seems due to Lisp's lack of features such as |
| 34 | ;;; pattern matching, currying and lazy evaluation. Rather than fight with |
| 35 | ;;; Lisp's weaknesses, this library plays to its strength, making heavy use |
| 36 | ;;; of macros. Effectively, the `combinators' we build here are /compile- |
| 37 | ;;; time/ combinators, not run-time ones. |
| 38 | ;;; |
| 39 | ;;; A `parser' is simply an expression which returns three values. |
| 40 | ;;; |
| 41 | ;;; * If the second value is nil, then the parser is said to have /failed/, |
| 42 | ;;; and the first value is a list describing the things that the parser |
| 43 | ;;; expected to find but didn't. (The precise details of the list items |
| 44 | ;;; are important to error-reporting functions, but not to the low-level |
| 45 | ;;; machinery, and are left up to higher-level protocols to nail down |
| 46 | ;;; harder.) |
| 47 | ;;; |
| 48 | ;;; * If the second value is not nil, then the parser is said to have |
| 49 | ;;; /succeeded/, and the first value is its /result/. |
| 50 | ;;; |
| 51 | ;;; * The third value indicates whether the parser consumed any of its |
| 52 | ;;; input. Parsers don't backtrack implicitly (to avoid space leaks and |
| 53 | ;;; bad performance), so the `consumedp' return value is used to decide |
| 54 | ;;; whether the parser has `committed' to a particular branch. If the |
| 55 | ;;; parser context supports place-capture (many do) then `peek' can be |
| 56 | ;;; used to suppress consumption of input in the case of parser failure. |
| 57 | ;;; |
| 58 | ;;; The functions and macros here are simply ways of gluing together |
| 59 | ;;; expressions which obey this protocol. |
| 60 | ;;; |
| 61 | ;;; The main contribution of this file is a macro `with-parser-context' which |
| 62 | ;;; embeds a parsing-specific S-expressions language entered using the new |
| 63 | ;;; macro `parse'. The behaviour of this macro is controlled by a pair of |
| 64 | ;;; compile-time generic functions `expand-parser-spec' and |
| 65 | ;;; `expand-parser-form'. As well as the parser expression they're meant to |
| 66 | ;;; process, these functions dispatch on a `context' argument, which is |
| 67 | ;;; intended to help `leaf' parsers find the terminal symbols which they're |
| 68 | ;;; meant to process. |
| 69 | ;;; |
| 70 | ;;; Note that the context is a compile-time object, constructed by the |
| 71 | ;;; `parse' macro expansion function, though the idea is that it will contain |
| 72 | ;;; the name or names of variables holding the run-time parser state (which |
| 73 | ;;; will typically be a lexical analyser or an input stream or suchlike). |
| 74 | |
| 75 | (cl:in-package #:sod-parser) |
| 76 | |
| 77 | ;;;-------------------------------------------------------------------------- |
| 78 | ;;; Utilities. |
| 79 | |
| 80 | (export 'combine-parser-failures) |
| 81 | (defun combine-parser-failures (failures) |
| 82 | "Combine the failure indicators listed in FAILURES. |
| 83 | |
| 84 | (Note that this means that FAILURES is a list of lists.)" |
| 85 | |
| 86 | (reduce (lambda (f ff) (union f ff :test #'equal)) |
| 87 | failures |
| 88 | :initial-value nil)) |
| 89 | |
| 90 | (export 'parse-empty) |
| 91 | (defun parse-empty (&optional value) |
| 92 | "Return a parser which parses nothing, successfully. |
| 93 | |
| 94 | The parser returns VALUE and consumes nothing." |
| 95 | (lambda () (values value t nil))) |
| 96 | |
| 97 | (export 'parse-fail) |
| 98 | (defun parse-fail (indicator &optional consumedp) |
| 99 | "Return a parser which fails. |
| 100 | |
| 101 | The parser reports the INDICATOR and (falsely) claims to have consumed |
| 102 | input if CONSUMEDP is true." |
| 103 | (lambda () (values indicator nil consumedp))) |
| 104 | |
| 105 | ;;;-------------------------------------------------------------------------- |
| 106 | ;;; Basic protocol. |
| 107 | |
| 108 | (eval-when (:compile-toplevel :load-toplevel :execute) |
| 109 | |
| 110 | (export 'expand-parser-spec) |
| 111 | (defgeneric expand-parser-spec (context spec) |
| 112 | (:documentation |
| 113 | "Expand a parser specifier SPEC in a particular parser CONTEXT.") |
| 114 | (:method (context (spec list)) |
| 115 | (expand-parser-form context (car spec) (cdr spec)))) |
| 116 | |
| 117 | (export 'expand-parser-form) |
| 118 | (defgeneric expand-parser-form (context head tail) |
| 119 | (:documentation |
| 120 | "Expand a parser list-form given by HEAD and TAIL, in CONTEXT.") |
| 121 | (:method (context head tail) |
| 122 | (declare (ignore context)) |
| 123 | (cons head tail))) |
| 124 | |
| 125 | (export 'wrap-parser) |
| 126 | (defgeneric wrap-parser (context form) |
| 127 | (:documentation |
| 128 | "Enclose FORM in whatever is necessary to make the parser work.") |
| 129 | (:method (context form) |
| 130 | (declare (ignore context)) |
| 131 | form))) |
| 132 | |
| 133 | (export 'defparse) |
| 134 | (defmacro defparse (name bvl &body body) |
| 135 | "Define a new parser form. |
| 136 | |
| 137 | The full syntax is hairier than it looks: |
| 138 | |
| 139 | defparse NAME ( [[ :context (CTX SPEC) ]] . BVL ) |
| 140 | { FORM }* |
| 141 | |
| 142 | The macro defines a new parser form (NAME ...) which is expanded by the |
| 143 | body FORMs. The BVL is a destructuring lambda-list to be applied to the |
| 144 | tail of the form. The body forms are enclosed in a block called NAME. |
| 145 | |
| 146 | If the :context key is provided, then the parser form is specialized on a |
| 147 | particular class of parser contexts SPEC; specialized expanders take |
| 148 | priority over less specialized or unspecialized expanders -- so you can |
| 149 | use this to override the built-in forms safely if they don't seem to be |
| 150 | doing the right thing for you. Also, the context -- which is probably |
| 151 | interesting to you if you've bothered to specialize -- is bound to the |
| 152 | variable CTX." |
| 153 | |
| 154 | ;; BUG! misplaces declarations: if you declare the CONTEXT argument |
| 155 | ;; `special' it won't be bound properly. I'm really not at all sure I know |
| 156 | ;; how to fix this. |
| 157 | |
| 158 | (with-gensyms (head tail context) |
| 159 | (let ((ctxclass t)) |
| 160 | (loop |
| 161 | (unless (and bvl (keywordp (car bvl))) (return)) |
| 162 | (ecase (pop bvl) |
| 163 | (:context (destructuring-bind (name spec) (pop bvl) |
| 164 | (setf ctxclass spec context name))))) |
| 165 | (multiple-value-bind (doc decls body) (parse-body body) |
| 166 | `(defmethod expand-parser-form |
| 167 | ((,context ,ctxclass) (,head (eql ',name)) ,tail) |
| 168 | ,@doc |
| 169 | (declare (ignorable ,context)) |
| 170 | (destructuring-bind ,bvl ,tail |
| 171 | ,@decls |
| 172 | (block ,name ,@body))))))) |
| 173 | |
| 174 | (export '(with-parser-context parse)) |
| 175 | (defmacro with-parser-context ((class &rest initargs) &body body) |
| 176 | "Evaluate BODY with a macro `parse' which expands parser forms. |
| 177 | |
| 178 | Evaluate BODY as an implicit progn. At compile time, a parser context is |
| 179 | constructed by (apply #'make-instance CLASS INITARGS). The BODY can make |
| 180 | use of the macro `parse': |
| 181 | |
| 182 | parse SPEC |
| 183 | |
| 184 | which parses the input in the manner described by SPEC, in the context of |
| 185 | the parser context." |
| 186 | |
| 187 | (let ((context (apply #'make-instance class initargs))) |
| 188 | (wrap-parser context |
| 189 | `(macrolet ((parse (form) |
| 190 | (expand-parser-spec ',context form))) |
| 191 | ,@body)))) |
| 192 | |
| 193 | ;;;-------------------------------------------------------------------------- |
| 194 | ;;; Common parser context protocol. |
| 195 | |
| 196 | (export 'parser-at-eof-p) |
| 197 | (defgeneric parser-at-eof-p (context) |
| 198 | (:documentation |
| 199 | "Return whether the parser has reached the end of its input. |
| 200 | |
| 201 | Be careful: all of this is happening at macro expansion time.")) |
| 202 | |
| 203 | (export 'parser-step) |
| 204 | (defgeneric parser-step (context) |
| 205 | (:documentation |
| 206 | "Advance the parser to the next character. |
| 207 | |
| 208 | Be careful: all of this is happening at macro-expansion time.")) |
| 209 | |
| 210 | (defmethod expand-parser-spec (context (spec (eql :eof))) |
| 211 | "Tests succeeds if the parser has reached the end of its input. |
| 212 | |
| 213 | The failure indicator is the keyword `:eof'." |
| 214 | |
| 215 | `(if ,(parser-at-eof-p context) |
| 216 | (values :eof t nil) |
| 217 | (values '(:eof) nil nil))) |
| 218 | |
| 219 | ;;;-------------------------------------------------------------------------- |
| 220 | ;;; Useful macros for dealing with parsers. |
| 221 | |
| 222 | (export 'it) |
| 223 | (export 'if-parse) |
| 224 | (defmacro if-parse ((&key (result 'it) expected (consumedp (gensym "CP"))) |
| 225 | parser consequent &optional (alternative nil altp)) |
| 226 | "Conditional parsing construction. |
| 227 | |
| 228 | If PARSER succeeds, then evaluate CONSEQUENT with RESULT bound to the |
| 229 | result; otherwise evaluate ALTERNATIVE with EXPECTED bound to the |
| 230 | expected-item list. If ALTERNATIVE is omitted, then propagate the failure |
| 231 | following the parser protocol." |
| 232 | |
| 233 | (with-gensyms (value win) |
| 234 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) |
| 235 | (declare (ignorable ,consumedp)) |
| 236 | (if ,win |
| 237 | (let ((,result ,value)) |
| 238 | (declare (ignorable ,result)) |
| 239 | ,consequent) |
| 240 | ,(cond ((not altp) |
| 241 | `(values ,value nil ,consumedp)) |
| 242 | (expected |
| 243 | `(let ((,expected ,value)) ,alternative)) |
| 244 | (t |
| 245 | alternative)))))) |
| 246 | |
| 247 | (export 'when-parse) |
| 248 | (defmacro when-parse ((&optional (result 'it)) parser &body body) |
| 249 | "Convenience macro for conditional parsing. |
| 250 | |
| 251 | If PARSER succeeds then evaluate BODY with RESULT bound to the result; |
| 252 | otherwise propagate the failure." |
| 253 | `(if-parse (:result ,result) ,parser (progn ,@body))) |
| 254 | |
| 255 | (export 'cond-parse) |
| 256 | (defmacro cond-parse ((&key (result 'it) expected |
| 257 | (consumedp (gensym "CP"))) |
| 258 | &body clauses) |
| 259 | "Frightening conditional parsing construct. |
| 260 | |
| 261 | Each of the CLAUSES has the form (PARSER &rest FORMS); the special `fake' |
| 262 | parser form `t' may be used to describe a default action. If the PARSER |
| 263 | succeeds then evaluate FORMS in order with RESULT bound to the parser |
| 264 | result (if there are no forms, then propagate the success); if the PARSER |
| 265 | fails without consuming input, then move onto the next clause. |
| 266 | |
| 267 | If the default clause (if any) is reached, or a parser fails after |
| 268 | consuming input, then EXPECTED is bound to a list of failure indicators |
| 269 | and the default clause's FORMS are evaluated and with CONSUMEDP bound to a |
| 270 | generalized boolean indicating whether any input was consumed. If there |
| 271 | is no default clause, and either some parser fails after consuming input, |
| 272 | or all of the parsers fail without consuming, then a failure is reported |
| 273 | and the input-consumption indicator is propagated. |
| 274 | |
| 275 | If a parser fails after consuming input, then the failure indicators are |
| 276 | whatever that parser reported; if all the parsers fail without consuming |
| 277 | then the failure indicators are the union of the indicators reported by |
| 278 | the various parsers." |
| 279 | |
| 280 | (with-gensyms (block fail failarg) |
| 281 | (labels ((walk (clauses failures) |
| 282 | (cond ((null clauses) |
| 283 | (values `(,fail nil (list ,@(reverse failures))) |
| 284 | `(values (combine-parser-failures ,failarg) |
| 285 | nil |
| 286 | ,consumedp))) |
| 287 | ((eq (caar clauses) t) |
| 288 | (values `(,fail nil (list ,@(reverse failures))) |
| 289 | `(,@(if expected |
| 290 | `(let ((,expected |
| 291 | (combine-parser-failures |
| 292 | ,failarg)))) |
| 293 | `(progn)) |
| 294 | ,@(cdar clauses)))) |
| 295 | (t |
| 296 | (with-gensyms (value win cp) |
| 297 | (multiple-value-bind (inner failbody) |
| 298 | (walk (cdr clauses) (cons value failures)) |
| 299 | (values `(multiple-value-bind (,value ,win ,cp) |
| 300 | (parse ,(caar clauses)) |
| 301 | (when ,win |
| 302 | (return-from ,block |
| 303 | (let ((,result ,value) |
| 304 | (,consumedp ,cp)) |
| 305 | (declare (ignorable ,result |
| 306 | ,consumedp)) |
| 307 | ,@(cdar clauses)))) |
| 308 | (when ,cp |
| 309 | (,fail t (list ,value))) |
| 310 | ,inner) |
| 311 | failbody))))))) |
| 312 | (multiple-value-bind (inner failbody) (walk clauses nil) |
| 313 | `(block ,block |
| 314 | (flet ((,fail (,consumedp ,failarg) |
| 315 | (declare (ignorable ,consumedp ,failarg)) |
| 316 | ,failbody)) |
| 317 | ,inner)))))) |
| 318 | |
| 319 | (export 'parser) |
| 320 | (defmacro parser (bvl &body parser) |
| 321 | "Functional abstraction for parsers." |
| 322 | (multiple-value-bind (doc decls body) (parse-body parser) |
| 323 | `(lambda ,bvl ,@doc ,@decls (parse ,@body)))) |
| 324 | |
| 325 | ;;;-------------------------------------------------------------------------- |
| 326 | ;;; Standard parser forms. |
| 327 | |
| 328 | (export 'label) |
| 329 | (defparse label (label parser) |
| 330 | "If PARSER fails, use LABEL as the expected outcome. |
| 331 | |
| 332 | The LABEL is only evaluated if necessary." |
| 333 | (with-gensyms (value win consumedp) |
| 334 | `(multiple-value-bind (,value ,win ,consumedp) (parse ,parser) |
| 335 | (if ,win |
| 336 | (values ,value ,win ,consumedp) |
| 337 | (values (list ,label) nil ,consumedp))))) |
| 338 | |
| 339 | (defparse t (value) |
| 340 | "Succeed, without consuming input, with result VALUE." |
| 341 | `(values ,value t nil)) |
| 342 | |
| 343 | (defparse nil (indicator) |
| 344 | "Fail, without consuming input, with indicator VALUE." |
| 345 | `(values (list ,indicator) nil nil)) |
| 346 | |
| 347 | (defparse when (cond &body parser) |
| 348 | "If CONDITION is true, then match PARSER; otherwise fail." |
| 349 | `(if ,cond (parse ,@parser) (values nil nil nil))) |
| 350 | |
| 351 | (defmethod expand-parser-spec (context (spec (eql t))) |
| 352 | "Always matches without consuming input." |
| 353 | (declare (ignore context)) |
| 354 | '(values t t nil)) |
| 355 | |
| 356 | (defmethod expand-parser-spec (context (spec (eql nil))) |
| 357 | "Always fails without consuming input. The failure indicator is `:fail'." |
| 358 | (declare (ignore context)) |
| 359 | '(values '(:fail) nil nil)) |
| 360 | |
| 361 | (export 'seq) |
| 362 | (defparse seq (binds &body body) |
| 363 | "Parse a sequence of heterogeneous items. |
| 364 | |
| 365 | Syntax: |
| 366 | |
| 367 | seq ( { ATOMIC-PARSER-FORM | ([VAR] PARSER-FORM) }* ) |
| 368 | { FORM }* |
| 369 | |
| 370 | The behaviour is similar to `let*'. The PARSER-FORMs are processed in |
| 371 | order, left to right. If a parser succeeds, then its value is bound to |
| 372 | the corresponding VAR, and available within Lisp forms enclosed within |
| 373 | subsequent PARSER-FORMs and/or the body FORMs. If any parser fails, then |
| 374 | the entire sequence fails. If all of the parsers succeeds, then the FORMs |
| 375 | are evaluated as an implicit progn, and the sequence will succeed with the |
| 376 | result computed by the final FORM." |
| 377 | |
| 378 | (with-gensyms (block consumedp) |
| 379 | (labels ((walk (binds lets ignores) |
| 380 | (if (endp binds) |
| 381 | `(let* ((,consumedp nil) |
| 382 | ,@(nreverse lets)) |
| 383 | ,@(and ignores |
| 384 | `((declare (ignore ,@(nreverse ignores))))) |
| 385 | (values (progn ,@body) t ,consumedp)) |
| 386 | (destructuring-bind (x &optional (y nil yp)) |
| 387 | (if (listp (car binds)) |
| 388 | (car binds) |
| 389 | (list (car binds))) |
| 390 | (with-gensyms (var value win cp) |
| 391 | (multiple-value-bind (var parser ignores) |
| 392 | (if (and yp x) |
| 393 | (values x y ignores) |
| 394 | (values var (if yp y x) (cons var ignores))) |
| 395 | (walk (cdr binds) |
| 396 | (cons `(,var (multiple-value-bind |
| 397 | (,value ,win ,cp) |
| 398 | (parse ,parser) |
| 399 | (when ,cp (setf ,consumedp t)) |
| 400 | (unless ,win |
| 401 | (return-from ,block |
| 402 | (values ,value ,nil |
| 403 | ,consumedp))) |
| 404 | ,value)) |
| 405 | lets) |
| 406 | ignores))))))) |
| 407 | `(block ,block ,(walk binds nil nil))))) |
| 408 | |
| 409 | (export 'and) |
| 410 | (defparse and (:context (context t) &rest parsers) |
| 411 | "Parse a sequence of heterogeneous items, but ignore their values. |
| 412 | |
| 413 | This is just like (and is implemented using) `seq' with all the bindings |
| 414 | set to `nil'. The result is `nil'." |
| 415 | (with-gensyms (last) |
| 416 | (if (null parsers) |
| 417 | '(seq () nil) |
| 418 | (expand-parser-spec context |
| 419 | `(seq (,@(mapcar (lambda (parser) |
| 420 | `(nil ,parser)) |
| 421 | (butlast parsers)) |
| 422 | (,last ,(car (last parsers)))) |
| 423 | ,last))))) |
| 424 | |
| 425 | (export 'lisp) |
| 426 | (defparse lisp (&rest forms) |
| 427 | "Evaluate FORMs, which should obey the parser protocol." |
| 428 | `(progn ,@forms)) |
| 429 | |
| 430 | (export 'many) |
| 431 | (defparse many ((acc init update |
| 432 | &key (new 'it) (final acc) (min nil minp) max (commitp t)) |
| 433 | parser &optional (sep nil sepp)) |
| 434 | "Parse a sequence of homogeneous items. |
| 435 | |
| 436 | The behaviour is similar to `do'. Initially an accumulator ACC is |
| 437 | established, and bound to the value of INIT. The PARSER is then evaluated |
| 438 | repeatedly. Each time it succeeds, UPDATE is evaluated with NEW (defaults |
| 439 | to `it') bound to the result of the parse, and the value returned by |
| 440 | UPDATE is stored back into ACC. If the PARSER fails, then the parse |
| 441 | ends. The scope of ACC includes the UPDATE and FINAL forms, and the |
| 442 | PARSER and SEP parsers; it is updated by side effects, not rebound. |
| 443 | |
| 444 | If a SEP parser is provided, then the behaviour changes as follows. |
| 445 | Before each attempt to parse a new item using PARSER, the parser SEP is |
| 446 | invoked. If SEP fails then the parse ends; if SEP succeeds, and COMMITP |
| 447 | is true, then the PARSER must also succeed or the overall parse will |
| 448 | fail. If COMMITP is false then a trailing SEP is permitted and ignored. |
| 449 | |
| 450 | If MAX (which will be evaluated) is not nil, then it must be a number: the |
| 451 | parse ends automatically after PARSER has succeeded MAX times. When the |
| 452 | parse has ended, if the PARSER succeeded fewer than MIN (which will be |
| 453 | evaluated) times then the parse fails. Otherwise, the FINAL form (which |
| 454 | defaults to simply returning ACC) is evaluated and its value becomes the |
| 455 | result of the parse. MAX defaults to nil -- i.e., no maximum; MIN |
| 456 | defaults to 1 if a SEP parser is given, or 0 if not. |
| 457 | |
| 458 | Note that `many' cannot fail if MIN is zero." |
| 459 | |
| 460 | ;; Once upon a time, this was a macro of almost infinite hairiness which |
| 461 | ;; tried to do everything itself, including inspecting its arguments for |
| 462 | ;; constant-ness to decide whether it could elide bits of code. This |
| 463 | ;; became unsustainable. Nowadays, it packages up its parser arguments |
| 464 | ;; into functions and calls some primitive functions to do the heavy |
| 465 | ;; lifting. |
| 466 | ;; |
| 467 | ;; The precise protocol between this macro and the backend functions is |
| 468 | ;; subject to change: don't rely on it. |
| 469 | |
| 470 | (let* ((accvar (or acc (gensym "ACC-"))) |
| 471 | (func (if sepp '%many-sep '%many))) |
| 472 | `(let ((,accvar ,init)) |
| 473 | (declare (ignorable ,accvar)) |
| 474 | (,func (lambda (,new) |
| 475 | (declare (ignorable ,new)) |
| 476 | (setf ,accvar ,update)) |
| 477 | (lambda () ,final) |
| 478 | (parser () ,parser) |
| 479 | ,@(and sepp (list `(parser () ,sep))) |
| 480 | ,@(and minp `(:min ,min)) |
| 481 | ,@(and max `(:max ,max)) |
| 482 | ,@(and (not (eq commitp t)) `(:commitp ,commitp)))))) |
| 483 | |
| 484 | (export 'list) |
| 485 | (defparse list ((&rest keys) parser &optional (sep nil sepp)) |
| 486 | "Like `many', but simply returns a list of the parser results." |
| 487 | (with-gensyms (acc) |
| 488 | `(parse (many (,acc nil (cons it ,acc) :final (nreverse ,acc) ,@keys) |
| 489 | ,parser ,@(and sepp (list sep)))))) |
| 490 | |
| 491 | (export 'skip-many) |
| 492 | (defparse skip-many ((&rest keys) parser &optional (sep nil sepp)) |
| 493 | "Like `many', but ignores the results." |
| 494 | `(parse (many (nil nil nil ,@keys) |
| 495 | ,parser ,@(and sepp (list sep))))) |
| 496 | |
| 497 | (export 'or) |
| 498 | (defparse or (&rest parsers) |
| 499 | "Try a number of alternative parsers. |
| 500 | |
| 501 | Each of the PARSERS in turn is tried. If any succeeds, then its result |
| 502 | becomes the result of the parse. If any parser fails after consuming |
| 503 | input, or if all of the parsers fail, then the overall parse fails, with |
| 504 | the union of the expected items from the individual parses." |
| 505 | |
| 506 | (with-gensyms (fail cp failarg) |
| 507 | (labels ((walk (parsers failures) |
| 508 | (if (null parsers) |
| 509 | `(,fail nil (list ,@(reverse failures))) |
| 510 | (with-gensyms (value win consumedp) |
| 511 | `(multiple-value-bind (,value ,win ,consumedp) |
| 512 | (parse ,(car parsers)) |
| 513 | (cond (,win |
| 514 | (values ,value ,win ,consumedp)) |
| 515 | (,consumedp |
| 516 | (,fail t (list ,value))) |
| 517 | (t |
| 518 | ,(walk (cdr parsers) |
| 519 | (cons value failures))))))))) |
| 520 | `(flet ((,fail (,cp ,failarg) |
| 521 | (values (combine-parser-failures ,failarg) nil ,cp))) |
| 522 | ,(walk parsers nil))))) |
| 523 | |
| 524 | (export '?) |
| 525 | (defparse ? (parser &optional (value nil)) |
| 526 | "Matches PARSER or nothing; fails if PARSER fails after consuming input." |
| 527 | `(parse (or ,parser (t ,value)))) |
| 528 | |
| 529 | ;;;-------------------------------------------------------------------------- |
| 530 | ;;; Pluggable parsers. |
| 531 | |
| 532 | (export 'call-pluggable-parser) |
| 533 | (defun call-pluggable-parser (symbol &rest args) |
| 534 | "Call the pluggable parser denoted by SYMBOL. |
| 535 | |
| 536 | A `pluggable parser' is an indirection point at which a number of |
| 537 | alternative parsers can be attached dynamically. The parsers are tried in |
| 538 | some arbitrary order, so one should be careful to avoid ambiguities; each |
| 539 | is paseed the given ARGS. |
| 540 | |
| 541 | If any parser succeeds then it determines the result; if any parser fails |
| 542 | having consumed input then the pluggable parser fails immediately. If all |
| 543 | of the parsers fail without consuming input then the pluggable parser |
| 544 | fails with the union of the individual failure indicators." |
| 545 | |
| 546 | (let ((expected nil)) |
| 547 | (dolist (item (get symbol 'parser)) |
| 548 | (multiple-value-bind (value winp consumedp) (apply (cdr item) args) |
| 549 | (when (or winp consumedp) |
| 550 | (return-from call-pluggable-parser (values value winp consumedp))) |
| 551 | (push value expected))) |
| 552 | (values (combine-parser-failures expected) nil nil))) |
| 553 | |
| 554 | (export 'plug) |
| 555 | (defparse plug (symbol &rest args) |
| 556 | "Call the pluggable parser denoted by SYMBOL. |
| 557 | |
| 558 | This is just like the function `call-pluggable-parser', but the SYMBOL is |
| 559 | not evaluated." |
| 560 | `(call-pluggable-parser ',symbol ,@args)) |
| 561 | |
| 562 | (export 'pluggable-parser-add) |
| 563 | (defun pluggable-parser-add (symbol tag parser) |
| 564 | "Adds an element to a pluggable parser. |
| 565 | |
| 566 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- |
| 567 | comparable object which identifies the element. The PARSER is a parser |
| 568 | function; it will be passed arguments via `pluggable-parser'. |
| 569 | |
| 570 | If a parser with the given TAG is already attached to SYMBOL then the new |
| 571 | parser replaces the old one; otherwise it is added to the collection." |
| 572 | |
| 573 | (let ((alist (get symbol 'parser))) |
| 574 | (aif (assoc tag alist) |
| 575 | (setf (cdr it) parser) |
| 576 | (setf (get symbol 'parser) (acons tag parser alist))))) |
| 577 | |
| 578 | (export 'define-pluggable-parser) |
| 579 | (defmacro define-pluggable-parser (symbol tag (&rest bvl) &body body) |
| 580 | "Adds an element to a pluggable parser. |
| 581 | |
| 582 | The pluggable parser itself is denoted by SYMBOL; the TAG is any `eql'- |
| 583 | comparable object which identifies the element. Neither SYMBOL nor TAG is |
| 584 | evaluated. The BODY is a parser expression; the BVL is a lambda list |
| 585 | describing how to bind the arguments supplied via `pluggable-parser'. |
| 586 | |
| 587 | If a parser with the given TAG is already attached to SYMBOL then the new |
| 588 | parser replaces the old one; otherwise it is added to the collection." |
| 589 | |
| 590 | (multiple-value-bind (docs decls body) (parse-body body) |
| 591 | `(pluggable-parser-add ',symbol ',tag |
| 592 | (lambda ,bvl |
| 593 | ,@docs ,@decls |
| 594 | (block ,symbol ,@body))))) |
| 595 | |
| 596 | ;;;-------------------------------------------------------------------------- |
| 597 | ;;; Rewindable parser context protocol. |
| 598 | |
| 599 | (eval-when (:compile-toplevel :load-toplevel :execute) |
| 600 | |
| 601 | (export 'parser-capture-place) |
| 602 | (defgeneric parser-capture-place (context) |
| 603 | (:documentation |
| 604 | "Capture the current position of a parser CONTEXT. |
| 605 | |
| 606 | The return value may later be used with `parser-restore-place'. Be |
| 607 | careful: all of this is happening at macro-expansion time.") |
| 608 | (:method (context) |
| 609 | (error "Parser context ~S doesn't support rewinding" context))) |
| 610 | |
| 611 | (export 'parser-restore-place) |
| 612 | (defgeneric parser-restore-place (context place) |
| 613 | (:documentation |
| 614 | "`Rewind' the parser CONTEXT back to the captured PLACE. |
| 615 | |
| 616 | The place was previously captured by `parser-capture-place'. Be careful: |
| 617 | all of this is happening at macro-expansion time.")) |
| 618 | |
| 619 | (export 'parser-release-place) |
| 620 | (defgeneric parser-release-place (context place) |
| 621 | (:documentation |
| 622 | "Release a PLACE captured from the parser CONTEXT. |
| 623 | |
| 624 | The place was previously captured by `parser-capture-place'. The |
| 625 | underlying scanner can use this call to determine whether there are |
| 626 | outstanding captured places, and thereby optimize its behaviour. Be |
| 627 | careful: all of this is happening at macro-expansion time.") |
| 628 | (:method (context place) |
| 629 | (declare (ignore context place)) |
| 630 | nil)) |
| 631 | |
| 632 | (export 'parser-places-must-be-released-p) |
| 633 | (defgeneric parser-places-must-be-released-p (context) |
| 634 | (:documentation |
| 635 | "Answer whether places captured from the parser CONTEXT need releasing. |
| 636 | |
| 637 | Some contexts -- well, actually, their run-time counterparts -- work |
| 638 | better if they can keep track of which places are captured, or at least if |
| 639 | there are captured places outstanding. If this function returns true |
| 640 | (which is the default) then `with-parser-place' (and hence parser macros |
| 641 | such as `peek') will expand to `unwind-protect' forms in order to perform |
| 642 | the correct cleanup. If it returns false, then the `unwind-protect' is |
| 643 | omitted so that the runtime code does't have to register cleanup |
| 644 | handlers.") |
| 645 | (:method (context) |
| 646 | (declare (ignore context)) |
| 647 | t))) |
| 648 | |
| 649 | (export 'with-parser-place) |
| 650 | (defmacro with-parser-place ((place context) &body body) |
| 651 | "Evaluate BODY surrounded with a binding of PLACE to a captured place. |
| 652 | |
| 653 | The surrounding code will release the PLACE properly on exit from the body |
| 654 | forms. This is all happening at macro-expansion time." |
| 655 | ;; ... which means that it's a bit hairy. Fortunately, the nested |
| 656 | ;; backquotes aren't that bad. |
| 657 | (once-only (context) |
| 658 | (with-gensyms (bodyfunc) |
| 659 | `(with-gensyms (,place) |
| 660 | (flet ((,bodyfunc () ,@body)) |
| 661 | `(let ((,,place ,(parser-capture-place ,context))) |
| 662 | ,(if (parser-places-must-be-released-p ,context) |
| 663 | `(unwind-protect ,(,bodyfunc) |
| 664 | (when ,,place |
| 665 | ,(parser-release-place ,context ,place))) |
| 666 | (,bodyfunc)))))))) |
| 667 | |
| 668 | (export 'peek) |
| 669 | (defparse peek (:context (context t) parser) |
| 670 | "Attempt to run PARSER, but rewind the underlying source if it fails." |
| 671 | (with-gensyms (value win consumedp) |
| 672 | (with-parser-place (place context) |
| 673 | `(macrolet ((commit-peeked-place () |
| 674 | `(progn |
| 675 | ,',(parser-release-place context place) |
| 676 | (setf ,',place nil)))) |
| 677 | (multiple-value-bind (,value ,win ,consumedp) (parse ,parser) |
| 678 | (cond ((or ,win (null ,place)) |
| 679 | (values ,value ,win ,consumedp)) |
| 680 | (t |
| 681 | ,(parser-restore-place context place) |
| 682 | (values ,value ,win nil)))))))) |
| 683 | |
| 684 | (defun commit-peeked-place () |
| 685 | "Called by `commit' not lexically within `peek'." |
| 686 | (error "`commit' is not within `peek'")) |
| 687 | |
| 688 | (export 'commit) |
| 689 | (defparse commit () |
| 690 | "Commit to the current parse. |
| 691 | |
| 692 | This releases the place captured by the innermost lexically enclosing |
| 693 | `peek'." |
| 694 | '(progn |
| 695 | (commit-peeked-place) |
| 696 | (values nil t nil))) |
| 697 | |
| 698 | ;;;-------------------------------------------------------------------------- |
| 699 | ;;; Character parser context protocol. |
| 700 | |
| 701 | (export 'character-parser-context) |
| 702 | (defclass character-parser-context () |
| 703 | () |
| 704 | (:documentation |
| 705 | "Base class for parsers which read one character at a time.")) |
| 706 | |
| 707 | (export 'parser-current-char) |
| 708 | (defgeneric parser-current-char (context) |
| 709 | (:documentation |
| 710 | "Return the parser's current character. |
| 711 | |
| 712 | It is an error to invoke this operation if the parser is at end-of-file; |
| 713 | you must check this first. Be careful: all of this is happening at |
| 714 | macro-expansion time.")) |
| 715 | |
| 716 | (export 'if-char) |
| 717 | (defparse if-char (:context (context character-parser-context) |
| 718 | (&optional (char 'it)) condition consequent alternative) |
| 719 | "Basic character-testing parser. |
| 720 | |
| 721 | If there is a current character, bind it to CHAR and evaluate the |
| 722 | CONDITION; if that is true, then evaluate CONSEQUENT and step the parser |
| 723 | (in that order); otherwise, if either we're at EOF or the CONDITION |
| 724 | returns false, evaluate ALTERNATIVE. The result of `if-char' are the |
| 725 | values returned by CONSEQUENT or ALTERNATIVE." |
| 726 | |
| 727 | (with-gensyms (block) |
| 728 | `(block ,block |
| 729 | (unless ,(parser-at-eof-p context) |
| 730 | (let ((,char ,(parser-current-char context))) |
| 731 | (when ,condition |
| 732 | (return-from ,block |
| 733 | (multiple-value-prog1 ,consequent |
| 734 | ,(parser-step context)))))) |
| 735 | ,alternative))) |
| 736 | |
| 737 | (defmethod expand-parser-spec |
| 738 | ((context character-parser-context) (spec (eql :any))) |
| 739 | "Matches any character; result is the character. |
| 740 | |
| 741 | The failure indicator is the keyword `:any'." |
| 742 | (expand-parser-spec context |
| 743 | '(if-char () t |
| 744 | (values it t t) |
| 745 | (values '(:any) nil nil)))) |
| 746 | |
| 747 | (export 'char) |
| 748 | (defparse char (:context (context character-parser-context) char) |
| 749 | "Matches the character CHAR (evaluated); result is the character. |
| 750 | |
| 751 | The failure indicator is CHAR." |
| 752 | |
| 753 | (once-only (char) |
| 754 | (with-gensyms (it) |
| 755 | (expand-parser-spec context |
| 756 | `(if-char (,it) (char= ,it ,char) |
| 757 | (values ,it t t) |
| 758 | (values (list ,char) nil nil)))))) |
| 759 | |
| 760 | (defmethod expand-parser-spec |
| 761 | ((context character-parser-context) (char character)) |
| 762 | (expand-parser-spec context `(char ,char))) |
| 763 | |
| 764 | (export 'satisfies) |
| 765 | (defparse satisfies (:context (context character-parser-context) predicate) |
| 766 | "Matches a character that satisfies the PREDICATE |
| 767 | |
| 768 | The PREDICATE is a function designator. On success, the result is the |
| 769 | character. The failure indicator is PREDICATE; you probably want to apply |
| 770 | a `label'." |
| 771 | |
| 772 | (with-gensyms (it) |
| 773 | (expand-parser-spec context |
| 774 | `(if-char (,it) (,predicate ,it) |
| 775 | (values ,it t t) |
| 776 | (values '(,predicate) nil nil))))) |
| 777 | |
| 778 | (export 'not) |
| 779 | (defparse not (:context (context character-parser-context) char) |
| 780 | "Matches any character other than CHAR; result is the character. |
| 781 | |
| 782 | The failure indicator is (not CHAR)." |
| 783 | |
| 784 | (once-only (char) |
| 785 | (with-gensyms (it) |
| 786 | (expand-parser-spec context |
| 787 | `(if-char (,it) (char/= ,it ,char) |
| 788 | (values ,it t t) |
| 789 | (values `((not ,,char)) nil nil)))))) |
| 790 | |
| 791 | (export 'filter) |
| 792 | (defparse filter (:context (context character-parser-context) predicate) |
| 793 | "Matches a character that satisfies the PREDICATE; result is the output of |
| 794 | PREDICATE. |
| 795 | |
| 796 | The failure indicator is PREDICATE; you probably want to apply a `label'." |
| 797 | |
| 798 | ;; Can't do this one with `if-char'. |
| 799 | (with-gensyms (block value) |
| 800 | `(block ,block |
| 801 | (unless ,(parser-at-eof-p context) |
| 802 | (let ((,value (,predicate ,(parser-current-char context)))) |
| 803 | (when ,value |
| 804 | ,(parser-step context) |
| 805 | (return-from ,block (values ,value t t))))) |
| 806 | (values '(,predicate) nil nil)))) |
| 807 | |
| 808 | (defmethod expand-parser-spec |
| 809 | ((context character-parser-context) (spec (eql :whitespace))) |
| 810 | "Matches any sequence of whitespace; result is nil. |
| 811 | |
| 812 | Cannot fail." |
| 813 | |
| 814 | `(progn |
| 815 | (cond ((and (not ,(parser-at-eof-p context)) |
| 816 | (whitespace-char-p ,(parser-current-char context))) |
| 817 | (loop |
| 818 | ,(parser-step context) |
| 819 | (when (or ,(parser-at-eof-p context) |
| 820 | (not (whitespace-char-p |
| 821 | ,(parser-current-char context)))) |
| 822 | (return (values nil t t))))) |
| 823 | (t |
| 824 | (values nil t nil))))) |
| 825 | |
| 826 | (defmethod expand-parser-spec |
| 827 | ((context character-parser-context) (string string)) |
| 828 | "Matches the constituent characters of STRING; result is the string. |
| 829 | |
| 830 | The failure indicator is STRING; on failure, the input is rewound, so this |
| 831 | only works on rewindable contexts." |
| 832 | |
| 833 | (with-gensyms (i) |
| 834 | (unless (typep string 'simple-string) |
| 835 | (setf string (make-array (length string) :initial-contents string))) |
| 836 | (with-parser-place (place context) |
| 837 | `(dotimes (,i ,(length string) (values ,string t |
| 838 | ,(plusp (length string)))) |
| 839 | (when (or ,(parser-at-eof-p context) |
| 840 | (char/= ,(parser-current-char context) |
| 841 | (schar ,string ,i))) |
| 842 | ,(parser-restore-place context place) |
| 843 | (return (values '(,string) nil nil))) |
| 844 | ,(parser-step context))))) |
| 845 | |
| 846 | ;;;-------------------------------------------------------------------------- |
| 847 | ;;; Token parser context protocol. |
| 848 | |
| 849 | (export 'token-parser-context) |
| 850 | (defclass token-parser-context () |
| 851 | () |
| 852 | (:documentation |
| 853 | "Base class for parsers which read tokens with associated semantic values. |
| 854 | |
| 855 | A token, according to the model suggested by this class, has a /type/, |
| 856 | which classifies the token and is the main contributer to guiding the |
| 857 | parse, and a /value/, which carries additional semantic information. |
| 858 | |
| 859 | This may seem redundant given Lisp's dynamic type system; but it's not |
| 860 | actually capable of drawing sufficiently fine distinctions easily. For |
| 861 | example, we can represent a symbol either as a string or a symbol; but |
| 862 | using strings conflicts with being able to represent string literals, and |
| 863 | using symbols looks ugly and they don't get GCed. Similarly, it'd be |
| 864 | convenient to represent punctuation as characters, but that conflicts with |
| 865 | using them for character literals. So, we introduce our own notion of |
| 866 | token type. |
| 867 | |
| 868 | Token scanners are expected to signal end-of-file with a token of type |
| 869 | `:eof'.")) |
| 870 | |
| 871 | (export 'parser-token-type) |
| 872 | (defgeneric parser-token-type (context) |
| 873 | (:documentation |
| 874 | "Return the parser's current token type.")) |
| 875 | |
| 876 | (export 'parser-token-value) |
| 877 | (defgeneric parser-token-value (context) |
| 878 | (:documentation |
| 879 | "Return the parser's current token's semantic value.")) |
| 880 | |
| 881 | (export 'token) |
| 882 | (locally (declare #+sbcl (sb-ext:muffle-conditions style-warning)) |
| 883 | (defparse token (:context (context token-parser-context) |
| 884 | type &optional (value nil valuep) &key peekp) |
| 885 | "Match tokens of a particular type. |
| 886 | |
| 887 | A token matches under the following conditions: |
| 888 | |
| 889 | * If the value of TYPE is `t' then the match succeeds if and only if the |
| 890 | parser is not at end-of-file. |
| 891 | |
| 892 | * If the value of TYPE is not `eql' to the token type then the match |
| 893 | fails. |
| 894 | |
| 895 | * If VALUE is specified, and the value of VALUE is not `equal' to the |
| 896 | token semantic value then the match fails. |
| 897 | |
| 898 | * Otherwise the match succeeds. |
| 899 | |
| 900 | If the match is successful and the parser is not at end-of-file, and the |
| 901 | value of PEEKP is nil then the parser advances to the next token; the |
| 902 | result of the match is the token's value. |
| 903 | |
| 904 | If the match fails then the failure indicator is either TYPE or (TYPE |
| 905 | VALUE), depending on whether a VALUE was specified." |
| 906 | |
| 907 | (once-only (type value peekp) |
| 908 | (with-gensyms (tokty tokval) |
| 909 | `(let ((,tokty ,(parser-token-type context)) |
| 910 | (,tokval ,(parser-token-value context))) |
| 911 | (if ,(if (eq type t) |
| 912 | `(not (eq ,tokty :eof)) |
| 913 | (flet ((check-value (cond) |
| 914 | (if valuep |
| 915 | `(and ,cond (equal ,tokval ,value)) |
| 916 | cond))) |
| 917 | (if (constantp type) |
| 918 | (check-value `(eql ,tokty ,type)) |
| 919 | `(if (eq ,type t) |
| 920 | (not (eq ,tokty :eof)) |
| 921 | ,(check-value `(eql ,tokty ,type)))))) |
| 922 | ,(let* ((result `(values ,tokval t ,(if (constantp peekp) |
| 923 | (not peekp) |
| 924 | `(not ,peekp)))) |
| 925 | (step (parser-step context))) |
| 926 | (cond ((not (constantp peekp)) |
| 927 | `(multiple-value-prog1 ,result |
| 928 | (unless ,peekp ,step))) |
| 929 | (peekp |
| 930 | result) |
| 931 | (t |
| 932 | `(multiple-value-prog1 ,result |
| 933 | ,step)))) |
| 934 | (values (list ,(if valuep `(list ,type ,value) type)) |
| 935 | nil nil))))))) |
| 936 | |
| 937 | (defmethod expand-parser-spec ((context token-parser-context) spec) |
| 938 | (if (atom spec) |
| 939 | (expand-parser-spec context `(token ,spec)) |
| 940 | (call-next-method))) |
| 941 | |
| 942 | (defmethod expand-parser-spec ((context token-parser-context) (spec string)) |
| 943 | (expand-parser-spec context `(token :id ,spec))) |
| 944 | |
| 945 | ;;;----- That's all, folks -------------------------------------------------- |