X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/a07d8d005f69c0f9f5da2e09c6ee39cb1e1801aa..dea4d05507e59ab779ed4bb209e05971d87e260c:/src/parser/test-parser.lisp diff --git a/src/parser/test-parser.lisp b/src/parser/test-parser.lisp new file mode 100644 index 0000000..f25961e --- /dev/null +++ b/src/parser/test-parser.lisp @@ -0,0 +1,444 @@ +;;; -*-lisp-*- +;;; +;;; Test parser infrastructure +;;; +;;; (c) 2009 Straylight/Edgeware +;;; + +;;;----- Licensing notice --------------------------------------------------- +;;; +;;; This file is part of the Sensble Object Design, an object system for C. +;;; +;;; SOD is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; SOD is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with SOD; if not, write to the Free Software Foundation, +;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(cl:in-package #:sod-test) + +(defclass test-parser (test-case) + ()) +(add-test *sod-test-suite* (get-suite test-parser)) + +;;;-------------------------------------------------------------------------- +;;; Utilities. + +(defmacro assert-parse + ((string value winp consumedp &key (scanner (gensym "SCANNER-"))) + &body parser) + (once-only (string value winp consumedp) + (with-gensyms (my-value my-winp my-consumedp label what) + `(let ((,scanner (make-string-scanner ,string))) + (multiple-value-bind (,my-value ,my-winp ,my-consumedp) + (with-parser-context + (character-scanner-context :scanner ,scanner) + (parse ,@parser)) + (flet ((,label (,what) + (format nil "~A; parsing ~S with ~S" + ,what ,string ',@parser))) + (cond (,winp + (assert-true ,my-winp (,label "winp")) + (if (eq ,value t) + (assert-not-eql ,my-value nil + (,label "parser result")) + (assert-equal ,my-value ,value + (,label "parser result")))) + (t + (assert-false ,my-winp (,label "winp")) + (assert-true (and (null (set-difference ,my-value ,value + :test #'equal)) + (null (set-difference ,value ,my-value + :test #'equal))) + (,label "failure indicator")))) + (if ,consumedp + (assert-true ,my-consumedp (,label "consumedp")) + (assert-false ,my-consumedp (,label "consumedp"))))))))) + +;;;-------------------------------------------------------------------------- +;;; Simple parser tests. +;;; +;;; This lot causes SBCL to warn like a mad thing. It's too clever for us, +;;; and does half of the work at compile time! + +(def-test-method test-simple ((test test-parser) :run nil) + "Test simple atomic parsers, because we rely on them later." + + ;; Characters match themselves. For a character known only at run-time, + ;; use (char CH). + (assert-parse ("abcd" #\a t t) #\a) + (let ((ch #\b)) + (assert-parse ("abcd" '(#\b) nil nil) (char ch))) + + ;; A character can't match at EOF. + (assert-parse ("" '(#\z) nil nil) #\z) + + ;; All characters match :any; but EOF isn't a character. + (assert-parse ("z" #\z t t) :any) + (assert-parse ("" '(:any) nil nil) :any) + + ;; The parser (satisfies PREDICATE) succeeds if the PREDICATE returns + ;; true when applied to the current character. + (assert-parse ("a" #\a t t) (satisfies alpha-char-p)) + (assert-parse ("0" '(alpha-char-p) nil nil) (satisfies alpha-char-p)) + + ;; The parser (not CHAR) matches a character other than CHAR; but it won't + ;; match EOF. + (assert-parse ("a" #\a t t) (not #\b)) + (assert-parse ("b" '((not #\b)) nil nil) (not #\b)) + (assert-parse ("" '((not #\b)) nil nil) (not #\b)) + + ;; But :eof matches only at EOF. + (assert-parse ("" :eof t nil) :eof) + (assert-parse ("abcd" '(:eof) nil nil) :eof) + + ;; Strings match themselves without consuming if they fail. + (assert-parse ("abcd" "ab" t t) "ab") + (assert-parse ("abcd" '("cd") nil nil) "cd")) + +(def-test-method test-sequence ((test test-parser) :run nil) + + ;; An empty sequence always succeeds and never consumes. And provokes + ;; warnings: don't do this. + (assert-parse ("" :win t nil) (seq () :win)) + (assert-parse ("abcd" :win t nil) (seq () :win)) + + ;; A `seq' matches the individual parsers in order, and binds their results + ;; to variables -- if given. The result is the value of the body. If any + ;; parser fails having consumed input, then input stays consumed. There's + ;; no backtracking. + (assert-parse ("abcd" '(#\a . #\c) t t) + (seq ((foo #\a) #\b (bar #\c)) (cons foo bar))) + (assert-parse ("abcd" '(#\c) nil t) + (seq ((foo #\a) (bar #\c)) (cons foo bar))) + (assert-parse ("abcd" '(#\c) nil nil) + (seq ((bar #\c) (foo #\a)) (cons foo bar)))) + +(def-test-method test-repeat ((test test-parser) :run nil) + + ;; A `many' matches a bunch of similar things in a row. You can compute a + ;; result using `do'-like accumulation. + (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc)) #\a)) + + ;; The default minimum is zero; so the parser always succeeds. + (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc)) #\b)) + + ;; You can provide an explicit minimum. Then the match might fail. + (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min 2) #\a)) + (assert-parse ("aabb" '(#\a) nil t) (many (acc 0 (1+ acc) :min 3) #\a)) + + ;; You can also provide an explicit maximum. This will cause the parser to + ;; stop searching, but it can't make it fail. + (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max 3) #\a)) + + ;; You can provide both a maximum and a minimum at the same time. If + ;; they're consistent, you won't be surprised. If they aren't, then the + ;; maximum wins and the minimum is simply ignored (currently). + (assert-parse ("aaaaab" 4 t t) + (many (acc 0 (1+ acc) :min 3 :max 4) #\a)) + (assert-parse ("aabbbb" '(#\a) nil t) + (many (acc 0 (1+ acc) :min 3 :max 4) #\a)) + (assert-parse ("aaabbb" 3 t t) + (many (acc 0 (1+ acc) :min 3 :max 3) #\a)) + (assert-parse ("aaabbb" 3 t t) + (many (acc 0 (1+ acc) :min 17 :max 3) #\a)) + + ;; You can provide a separator. The `many' parser will look for the + ;; separator between each of the main items, but will ignore the results. + (assert-parse ("a,a,abc" 3 t t) (many (acc 0 (1+ acc)) #\a #\,)) + (assert-parse ("a,a,abc" 2 t t) (many (acc 0 (1+ acc) :max 2) #\a #\,)) + + ;; If `many' sees a separator then by default it commits to finding another + ;; item; so this can cause a parse to fail. + (assert-parse ("a,a,bc" '(#\a) nil t) (many (acc 0 (1+ acc)) #\a #\,)) + (assert-parse ("abc" 1 t t) (many (acc 0 (1+ acc)) #\a #\,)) + + ;; If you specify a separator then the default minimum number of + ;; repetitions becomes 1 rather than 0. But you can override this + ;; explicitly. + (assert-parse ("bc" '(#\a) nil nil) (many (acc 0 (1+ acc)) #\a #\,)) + (assert-parse ("bc" 0 t nil) (many (acc 0 (1+ acc) :min 0) #\a #\,)) + + ;; The parser will fail looking for a separator if there aren't enough + ;; items. + (assert-parse ("a,abc" '(#\,) nil t) + (many (acc 0 (1+ acc) :min 3) #\a #\,)) + + ;; You can override the commit-on-separator behaviour by using :commit. + ;; This makes a trailing separator legal (but optional), so it also affects + ;; the behaviour regarding maximum and minimum repetitions. (Commitment is + ;; irrelevant if you don't have a separator.) + (assert-parse ("a,a,bc" 2 t t) + (many (acc 0 (1+ acc) :commitp nil) #\a #\,)) + (assert-parse ("a,a,abc" 3 t t) + (many (acc 0 (1+ acc) :commitp nil) #\a #\,)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp t) #\a #\,)) + #\,) + n)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp nil) #\a #\,)) + #\b) + n)) + (assert-parse ("a,a,bc" '(#\a) nil t) + (many (acc 0 (1+ acc) :min 3 :commitp nil) #\a #\,)) + + ;; The `many' parser won't backtrack. The `many' eats as many `a's as + ;; possible; asking for another one is sure to fail. + (assert-parse ("aaaabc" '(#\a) nil t) (and (skip-many () #\a) #\a))) + +(def-test-method test-repeat-hairy ((test test-parser) :run nil) + ;; The `many' expander is very hairy and does magical things if it notices + ;; that some of its arguments are constants. So here we test a number of + ;; the above things again, using variables so that it has to produce code + ;; which makes decisions at run-time. (I've no doubt that SBCL will issue + ;; an infinite number of notes explaining how clever it is and how it can + ;; do it all at compile-time anyway. Of course, suppressing these notes is + ;; the main reason `many' is so hairy anyway.) + + (let ((zero 0) (two 2) (three 3) (yes t) (no nil)) + + ;; Minima. + (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :min zero) #\a)) + (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc) :min zero) #\b)) + (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min two) #\a)) + (assert-parse ("aabb" '(#\a) nil t) + (many (acc 0 (1+ acc) :min three) #\a)) + + ;; Maxima. + (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :max no) #\a)) + (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max three) #\a)) + + ;; And now together with separators and commitment. Oh, my. + (assert-parse ("a,a,a,bc" 3 t t) + (many (acc 0 (1+ acc) :commitp no) #\a #\,)) + (assert-parse ("a,a,a,bc" '(#\a) nil t) + (many (acc 0 (1+ acc) :commitp yes) #\a #\,)) + (assert-parse ("a,a,bc" '(#\a) nil t) + (many (acc 0 (1+ acc) :min three :commitp yes) #\a #\,)) + (assert-parse ("a,a,bc" '(#\a) nil t) + (many (acc 0 (1+ acc) :min 3 :commitp yes) #\a #\,)) + (assert-parse ("a,a,bc" '(#\a) nil t) + (many (acc 0 (1+ acc) :min three :commitp t) #\a #\,)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max three :commitp no) #\a #\,)) #\b) + n)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max three :commitp yes) #\a #\,)) #\,) + n)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp no) #\a #\,)) #\b) + n)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp yes) #\a #\,)) #\,) + n)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max three :commitp nil) #\a #\,)) #\b) + n)) + (assert-parse ("a,a,a,bc" 3 t t) + (seq ((n (many (acc 0 (1+ acc) :max three :commitp t) #\a #\,)) #\,) + n)))) + +(def-test-method test-alternate ((test test-parser) :run nil) + + ;; An `or' matches the first parser that either succeeds or fails having + ;; consumed input. + (assert-parse ("abcd" #\a t t) (or #\a #\b)) + (assert-parse ("abcd" #\a t t) (or #\b #\a)) + (assert-parse ("abcd" '(#\b #\c) nil nil) (or #\b #\c)) + + ;; Strings don't consume if they fail. + (assert-parse ("abcd" "ab" t t) (or "cd" "ab")) + (assert-parse ("abcd" "ab" t t) (or "ad" "ab")) + (assert-parse ("abcd" '("ad" "ac") nil nil) (or "ad" "ac")) + + ;; But `seq' will if some component consumes. + (assert-parse ("abcd" '(#\d) nil t) (or (and #\a #\d) "ab")) + (assert-parse ("abcd" "ab" t t) (or (and #\c #\d) "ab")) + + ;; We can tame this using `peek' which rewinds the source if its argument + ;; fails, so as to hide consumption of input. + (assert-parse ("abcd" "ab" t t) (or (peek (and #\a #\d)) "ab")) + (assert-parse ("abcd" '(#\a #\b "cd") t t) + (seq ((foo (peek (seq ((foo #\a) (bar #\b)) (list foo bar)))) + (bar "cd")) + (append foo (list bar)))) + + ;; Failure indicators are union'd if they all fail. + (assert-parse ("abcd" '(#\q #\x #\z) nil nil) + (or #\q (peek (and #\a (or #\x #\q))) #\z)) + + ;; But if any of them consumed input then you only get the indicators from + ;; the consuming branch, because we committed to it when we consumed the + ;; input. + (assert-parse ("abcd" '(#\x #\q) nil t) + (or #\q #\z (and #\a (or #\q #\x))))) + +;;;-------------------------------------------------------------------------- +;;; Some tests with a simple recursive parser. + +(defstruct (node + (:predicate nodep) + (:constructor make-node (left data right))) + "Structure type for a simple binary tree." + left data right) + +(defun parse-tree (scanner) + "Parse a textual representation into a simple binary tree. + + The syntax is simple: + + TREE ::= EMPTY | `(' TREE CHAR TREE `)' + + There's an ambiguity in this syntax, at least if you have limited + lookahead: suppose you've just parsed the opening `(' of a TREE, and you + see another `(' -- is it the start of the non-empty left sub-TREE, or is + it the CHAR following an empty left sub-TREE? We opt for the first choice + always." + + ;; This came from another project, although it isn't actually used there. + ;; It exposed the weakness in an earlier design which prompted the addition + ;; of the CONSUMEDP flags to the parser protocol. + + (with-parser-context (character-scanner-context :scanner scanner) + (labels ((tree () + (parse (or (seq (#\( + (left (tree)) + (data :any) + (right (tree)) + #\)) + (make-node left data right)) + (values nil t nil))))) + (parse (seq ((tree (tree)) :eof) + tree))))) + +(defun parse-tree-lookahead (scanner) + "Parse a textual representation into a simple binary tree. + + The syntax is simple, and, indeed, the grammar's the same as for + `sod-parse-tree': + + TREE ::= EMPTY | `(' TREE CHAR TREE `)' + + But the rules are different. Instead of resolving the `ambiguity' between + TREE and CHAR when we find another `(' after the opening `(' of a TREE + deterministically in favour of TREE as `parse-tree' does, we try that + first, and backtrack if necessary." + + ;; Bison can do this, but you have to persuade it to use the scary GLR + ;; parser algorithm + + (with-parser-context (character-scanner-context :scanner scanner) + (labels ((tree () + (parse (or (peek (seq (#\( + (left (tree)) + (data :any) + (right (tree)) + #\)) + (make-node left data right))) + (values nil t nil))))) + (parse (seq ((tree (tree)) :eof) + tree))))) + +(def-test-method test-simple-tree-parser ((test test-parser) :run nil) + (assert-parse ("" nil t nil :scanner sc) (parse-tree sc)) + (assert-parse ("((a)b((c)d(e)))" t t t :scanner sc) (parse-tree sc)) + (assert-parse ("((a)b((c)d(e)))z" '(:eof) nil t :scanner sc) + (parse-tree sc)) + (assert-parse ("((a)b((c)d(e))" '(#\)) nil t :scanner sc) (parse-tree sc)) + (assert-parse ("(([)*(]))" t t t :scanner sc) (parse-tree sc)) + (assert-parse ("((()-()))" '(#\)) nil t :scanner sc) (parse-tree sc)) + (assert-parse ("((()-()))" t t t :scanner sc) (parse-tree-lookahead sc))) + +;;;-------------------------------------------------------------------------- +;;; Test expression parser. + +(defparse token (:context (context character-parser-context) parser) + (with-gensyms (value) + (expand-parser-spec context + `(seq ((,value ,parser) :whitespace) ,value)))) + +(let ((add (binop "+" (x y 5) `(+ ,x ,y))) + (sub (binop "-" (x y 5) `(- ,x ,y))) + (mul (binop "*" (x y 7) `(* ,x ,y))) + (div (binop "/" (x y 7) `(/ ,x ,y))) + (eq (binop "=" (x y 3 :assoc nil) `(= ,x ,y))) + (ne (binop "/=" (x y 3 :assoc nil) `(/= ,x ,y))) + (lt (binop "<" (x y 3 :assoc nil) `(< ,x ,y))) + (gt (binop ">" (x y 3 :assoc nil) `(> ,x ,y))) + (and (binop "&" (x y 2) `(and ,x ,y))) + (or (binop "|" (x y 1) `(or ,x ,y))) + (expt (binop "**" (x y 8 :assoc :right) `(** ,x ,y))) + (neg (preop "-" (x 9) `(- ,x))) + (not (preop "!" (x 2) `(not ,x))) + (fact (postop "!" (x 10) `(! ,x))) + (lp (lparen #\))) (rp (rparen #\))) + (lb (lparen #\])) (rb (rparen #\]))) + (defun test-parse-expr (string) + (with-parser-context (string-parser :string string) + (parse (seq (:whitespace + (value (expr (:nestedp nestedp) + (token (many (a 0 (+ (* a 10) it) :min 1) + (filter digit-char-p))) + (token (or (seq ("**") expt) + (seq ("/=") ne) + (seq (#\+) add) + (seq (#\-) sub) + (seq (#\*) mul) + (seq (#\/) div) + (seq (#\=) eq) + (seq (#\<) lt) + (seq (#\>) gt) + (seq (#\&) and) + (seq (#\|) or))) + (token (or (seq (#\() lp) + (seq (#\-) neg) + (seq (#\!) not))) + (token (or (seq (#\!) fact) + (when nestedp (seq (#\)) rp)))))) + (next (or :any (t :eof)))) + (cons value next)))))) + +(defun assert-expr-parse (string value winp consumedp) + (multiple-value-bind (v w c) (test-parse-expr string) + (flet ((message (what) + (format nil "expression ~S; ~A" string what))) + (cond (winp (assert-true w (message "winp")) + (assert-equal v value (message "value"))) + (t (assert-false w (message "winp")) + (assert-equal v value (message "expected")))) + (assert-eql c consumedp (message "consumedp"))))) + +(def-test-method test-expression-parser ((test test-parser) :run nil) + (assert-expr-parse "1 + 2 + 3" '((+ (+ 1 2) 3) . :eof) t t) + (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t) + (assert-expr-parse "1 * 2 + 3" '((+ (* 1 2) 3) . :eof) t t) + (assert-expr-parse "(1 + 2) * 3" '((* (+ 1 2) 3) . :eof) t t) + (assert-expr-parse "1 ** 2 ** 3" '((** 1 (** 2 3)) . :eof) t t) + (assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t) + (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t) + + (assert-expr-parse "! 1 + 2 = 3 | 6 - 3 /= 12/6" + '((or (not (= (+ 1 2) 3)) + (/= (- 6 3) (/ 12 6))) + . :eof) + t t) + (assert-expr-parse "! 1 > 2 & ! 4 < 6 | 3 < 4 & 9 > 10" + '((or (and (not (> 1 2)) (not (< 4 6))) + (and (< 3 4) (> 9 10))) + . :eof) + t t) + + (assert-condition 'simple-error (test-parse-expr "(1 + 2")) + (assert-condition 'simple-error (test-parse-expr "(1 + 2]")) + (assert-condition 'simple-error (test-parse-expr "1 < 2 < 3"))) + +;;;----- That's all, folks --------------------------------------------------