X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/bf090e021a5c20da452a4841cdfb8eb78e29544e..aa14a4cddcb96b681d5c19a2ec8bad382f43b264:/src/parser/test-parser.lisp

diff --git a/src/parser/test-parser.lisp b/src/parser/test-parser.lisp
deleted file mode 100644
index f25961e..0000000
--- a/src/parser/test-parser.lisp
+++ /dev/null
@@ -1,444 +0,0 @@
-;;; -*-lisp-*-
-;;;
-;;; Test parser infrastructure
-;;;
-;;; (c) 2009 Straylight/Edgeware
-;;;
-
-;;;----- Licensing notice ---------------------------------------------------
-;;;
-;;; This file is part of the Sensble Object Design, an object system for C.
-;;;
-;;; SOD is free software; you can redistribute it and/or modify
-;;; it under the terms of the GNU General Public License as published by
-;;; the Free Software Foundation; either version 2 of the License, or
-;;; (at your option) any later version.
-;;;
-;;; SOD is distributed in the hope that it will be useful,
-;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;;; GNU General Public License for more details.
-;;;
-;;; You should have received a copy of the GNU General Public License
-;;; along with SOD; if not, write to the Free Software Foundation,
-;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-(cl:in-package #:sod-test)
-
-(defclass test-parser (test-case)
-  ())
-(add-test *sod-test-suite* (get-suite test-parser))
-
-;;;--------------------------------------------------------------------------
-;;; Utilities.
-
-(defmacro assert-parse
-    ((string value winp consumedp &key (scanner (gensym "SCANNER-")))
-     &body parser)
-  (once-only (string value winp consumedp)
-    (with-gensyms (my-value my-winp my-consumedp label what)
-      `(let ((,scanner (make-string-scanner ,string)))
-	 (multiple-value-bind (,my-value ,my-winp ,my-consumedp)
-	     (with-parser-context
-		 (character-scanner-context :scanner ,scanner)
-	       (parse ,@parser))
-	   (flet ((,label (,what)
-		    (format nil "~A; parsing ~S with ~S"
-			    ,what ,string ',@parser)))
-	     (cond (,winp
-		    (assert-true ,my-winp (,label "winp"))
-		    (if (eq ,value t)
-			(assert-not-eql ,my-value nil
-					(,label "parser result"))
-			(assert-equal ,my-value ,value
-				      (,label "parser result"))))
-		   (t
-		    (assert-false ,my-winp (,label "winp"))
-		    (assert-true (and (null (set-difference ,my-value ,value
-							    :test #'equal))
-				      (null (set-difference ,value ,my-value
-							    :test #'equal)))
-				 (,label "failure indicator"))))
-	     (if ,consumedp
-		 (assert-true ,my-consumedp (,label "consumedp"))
-		 (assert-false ,my-consumedp (,label "consumedp")))))))))
-
-;;;--------------------------------------------------------------------------
-;;; Simple parser tests.
-;;;
-;;; This lot causes SBCL to warn like a mad thing.  It's too clever for us,
-;;; and does half of the work at compile time!
-
-(def-test-method test-simple ((test test-parser) :run nil)
-  "Test simple atomic parsers, because we rely on them later."
-
-  ;; Characters match themselves.  For a character known only at run-time,
-  ;; use (char CH).
-  (assert-parse ("abcd" #\a t t) #\a)
-  (let ((ch #\b))
-    (assert-parse ("abcd" '(#\b) nil nil) (char ch)))
-
-  ;; A character can't match at EOF.
-  (assert-parse ("" '(#\z) nil nil) #\z)
-
-  ;; All characters match :any; but EOF isn't a character.
-  (assert-parse ("z" #\z t t) :any)
-  (assert-parse ("" '(:any) nil nil) :any)
-
-  ;; The parser (satisfies PREDICATE) succeeds if the PREDICATE returns
-  ;; true when applied to the current character.
-  (assert-parse ("a" #\a t t) (satisfies alpha-char-p))
-  (assert-parse ("0" '(alpha-char-p) nil nil) (satisfies alpha-char-p))
-
-  ;; The parser (not CHAR) matches a character other than CHAR; but it won't
-  ;; match EOF.
-  (assert-parse ("a" #\a t t) (not #\b))
-  (assert-parse ("b" '((not #\b)) nil nil) (not #\b))
-  (assert-parse ("" '((not #\b)) nil nil) (not #\b))
-
-  ;; But :eof matches only at EOF.
-  (assert-parse ("" :eof t nil) :eof)
-  (assert-parse ("abcd" '(:eof) nil nil) :eof)
-
-  ;; Strings match themselves without consuming if they fail.
-  (assert-parse ("abcd" "ab" t t) "ab")
-  (assert-parse ("abcd" '("cd") nil nil) "cd"))
-
-(def-test-method test-sequence ((test test-parser) :run nil)
-
-  ;; An empty sequence always succeeds and never consumes.  And provokes
-  ;; warnings: don't do this.
-  (assert-parse ("" :win t nil) (seq () :win))
-  (assert-parse ("abcd" :win t nil) (seq () :win))
-
-  ;; A `seq' matches the individual parsers in order, and binds their results
-  ;; to variables -- if given.  The result is the value of the body.  If any
-  ;; parser fails having consumed input, then input stays consumed.  There's
-  ;; no backtracking.
-  (assert-parse ("abcd" '(#\a . #\c) t t)
-    (seq ((foo #\a) #\b (bar #\c)) (cons foo bar)))
-  (assert-parse ("abcd" '(#\c) nil t)
-    (seq ((foo #\a) (bar #\c)) (cons foo bar)))
-  (assert-parse ("abcd" '(#\c) nil nil)
-    (seq ((bar #\c) (foo #\a)) (cons foo bar))))
-
-(def-test-method test-repeat ((test test-parser) :run nil)
-
-  ;; A `many' matches a bunch of similar things in a row.  You can compute a
-  ;; result using `do'-like accumulation.
-  (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc)) #\a))
-
-  ;; The default minimum is zero; so the parser always succeeds.
-  (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc)) #\b))
-
-  ;; You can provide an explicit minimum.  Then the match might fail.
-  (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min 2) #\a))
-  (assert-parse ("aabb" '(#\a) nil t) (many (acc 0 (1+ acc) :min 3) #\a))
-
-  ;; You can also provide an explicit maximum.  This will cause the parser to
-  ;; stop searching, but it can't make it fail.
-  (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max 3) #\a))
-
-  ;; You can provide both a maximum and a minimum at the same time.  If
-  ;; they're consistent, you won't be surprised.  If they aren't, then the
-  ;; maximum wins and the minimum is simply ignored (currently).
-  (assert-parse ("aaaaab" 4 t t)
-    (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
-  (assert-parse ("aabbbb" '(#\a) nil t)
-    (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
-  (assert-parse ("aaabbb" 3 t t)
-    (many (acc 0 (1+ acc) :min 3 :max 3) #\a))
-  (assert-parse ("aaabbb" 3 t t)
-    (many (acc 0 (1+ acc) :min 17 :max 3) #\a))
-
-  ;; You can provide a separator.  The `many' parser will look for the
-  ;; separator between each of the main items, but will ignore the results.
-  (assert-parse ("a,a,abc" 3 t t) (many (acc 0 (1+ acc)) #\a #\,))
-  (assert-parse ("a,a,abc" 2 t t) (many (acc 0 (1+ acc) :max 2) #\a #\,))
-
-  ;; If `many' sees a separator then by default it commits to finding another
-  ;; item; so this can cause a parse to fail.
-  (assert-parse ("a,a,bc" '(#\a) nil t) (many (acc 0 (1+ acc)) #\a #\,))
-  (assert-parse ("abc" 1 t t) (many (acc 0 (1+ acc)) #\a #\,))
-
-  ;; If you specify a separator then the default minimum number of
-  ;; repetitions becomes 1 rather than 0.  But you can override this
-  ;; explicitly.
-  (assert-parse ("bc" '(#\a) nil nil) (many (acc 0 (1+ acc)) #\a #\,))
-  (assert-parse ("bc" 0 t nil) (many (acc 0 (1+ acc) :min 0) #\a #\,))
-
-  ;; The parser will fail looking for a separator if there aren't enough
-  ;; items.
-  (assert-parse ("a,abc" '(#\,) nil t)
-    (many (acc 0 (1+ acc) :min 3) #\a #\,))
-
-  ;; You can override the commit-on-separator behaviour by using :commit.
-  ;; This makes a trailing separator legal (but optional), so it also affects
-  ;; the behaviour regarding maximum and minimum repetitions.  (Commitment is
-  ;; irrelevant if you don't have a separator.)
-  (assert-parse ("a,a,bc" 2 t t)
-    (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
-  (assert-parse ("a,a,abc" 3 t t)
-    (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
-  (assert-parse ("a,a,a,bc" 3 t t)
-    (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp t) #\a #\,))
-	  #\,)
-      n))
-  (assert-parse ("a,a,a,bc" 3 t t)
-    (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp nil) #\a #\,))
-	  #\b)
-      n))
-  (assert-parse ("a,a,bc" '(#\a) nil t)
-    (many (acc 0 (1+ acc) :min 3 :commitp nil) #\a #\,))
-
-  ;; The `many' parser won't backtrack.  The `many' eats as many `a's as
-  ;; possible; asking for another one is sure to fail.
-  (assert-parse ("aaaabc" '(#\a) nil t) (and (skip-many () #\a) #\a)))
-
-(def-test-method test-repeat-hairy ((test test-parser) :run nil)
-  ;; The `many' expander is very hairy and does magical things if it notices
-  ;; that some of its arguments are constants.  So here we test a number of
-  ;; the above things again, using variables so that it has to produce code
-  ;; which makes decisions at run-time.  (I've no doubt that SBCL will issue
-  ;; an infinite number of notes explaining how clever it is and how it can
-  ;; do it all at compile-time anyway.  Of course, suppressing these notes is
-  ;; the main reason `many' is so hairy anyway.)
-
-  (let ((zero 0) (two 2) (three 3) (yes t) (no nil))
-
-    ;; Minima.
-    (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :min zero) #\a))
-    (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc) :min zero) #\b))
-    (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min two) #\a))
-    (assert-parse ("aabb" '(#\a) nil t)
-      (many (acc 0 (1+ acc) :min three) #\a))
-
-    ;; Maxima.
-    (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :max no) #\a))
-    (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max three) #\a))
-
-    ;; And now together with separators and commitment.  Oh, my.
-    (assert-parse ("a,a,a,bc" 3 t t)
-      (many (acc 0 (1+ acc) :commitp no) #\a #\,))
-    (assert-parse ("a,a,a,bc" '(#\a) nil t)
-      (many (acc 0 (1+ acc) :commitp yes) #\a #\,))
-    (assert-parse ("a,a,bc" '(#\a) nil t)
-      (many (acc 0 (1+ acc) :min three :commitp yes) #\a #\,))
-    (assert-parse ("a,a,bc" '(#\a) nil t)
-      (many (acc 0 (1+ acc) :min 3 :commitp yes) #\a #\,))
-    (assert-parse ("a,a,bc" '(#\a) nil t)
-      (many (acc 0 (1+ acc) :min three :commitp t) #\a #\,))
-    (assert-parse ("a,a,a,bc" 3 t t)
-      (seq ((n (many (acc 0 (1+ acc) :max three :commitp no) #\a #\,)) #\b)
-	n))
-    (assert-parse ("a,a,a,bc" 3 t t)
-      (seq ((n (many (acc 0 (1+ acc) :max three :commitp yes) #\a #\,)) #\,)
-	n))
-    (assert-parse ("a,a,a,bc" 3 t t)
-      (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp no) #\a #\,)) #\b)
-	n))
-    (assert-parse ("a,a,a,bc" 3 t t)
-      (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp yes) #\a #\,)) #\,)
-	n))
-    (assert-parse ("a,a,a,bc" 3 t t)
-      (seq ((n (many (acc 0 (1+ acc) :max three :commitp nil) #\a #\,)) #\b)
-	n))
-    (assert-parse ("a,a,a,bc" 3 t t)
-      (seq ((n (many (acc 0 (1+ acc) :max three :commitp t) #\a #\,)) #\,)
-	n))))
-
-(def-test-method test-alternate ((test test-parser) :run nil)
-
-  ;; An `or' matches the first parser that either succeeds or fails having
-  ;; consumed input.
-  (assert-parse ("abcd" #\a t t) (or #\a #\b))
-  (assert-parse ("abcd" #\a t t) (or #\b #\a))
-  (assert-parse ("abcd" '(#\b #\c) nil nil) (or #\b #\c))
-
-  ;; Strings don't consume if they fail.
-  (assert-parse ("abcd" "ab" t t) (or "cd" "ab"))
-  (assert-parse ("abcd" "ab" t t) (or "ad" "ab"))
-  (assert-parse ("abcd" '("ad" "ac") nil nil) (or "ad" "ac"))
-
-  ;; But `seq' will if some component consumes.
-  (assert-parse ("abcd" '(#\d) nil t) (or (and #\a #\d) "ab"))
-  (assert-parse ("abcd" "ab" t t) (or (and #\c #\d) "ab"))
-
-  ;; We can tame this using `peek' which rewinds the source if its argument
-  ;; fails, so as to hide consumption of input.
-  (assert-parse ("abcd" "ab" t t) (or (peek (and #\a #\d)) "ab"))
-  (assert-parse ("abcd" '(#\a #\b "cd") t t)
-    (seq ((foo (peek (seq ((foo #\a) (bar #\b)) (list foo bar))))
-	  (bar "cd"))
-      (append foo (list bar))))
-
-  ;; Failure indicators are union'd if they all fail.
-  (assert-parse ("abcd" '(#\q #\x #\z) nil nil)
-    (or #\q (peek (and #\a (or #\x #\q))) #\z))
-
-  ;; But if any of them consumed input then you only get the indicators from
-  ;; the consuming branch, because we committed to it when we consumed the
-  ;; input.
-  (assert-parse ("abcd" '(#\x #\q) nil t)
-    (or #\q #\z (and #\a (or #\q #\x)))))
-
-;;;--------------------------------------------------------------------------
-;;; Some tests with a simple recursive parser.
-
-(defstruct (node
-	     (:predicate nodep)
-	     (:constructor make-node (left data right)))
-  "Structure type for a simple binary tree."
-  left data right)
-
-(defun parse-tree (scanner)
-  "Parse a textual representation into a simple binary tree.
-
-   The syntax is simple:
-
-	TREE ::= EMPTY | `(' TREE CHAR TREE `)'
-
-   There's an ambiguity in this syntax, at least if you have limited
-   lookahead: suppose you've just parsed the opening `(' of a TREE, and you
-   see another `(' -- is it the start of the non-empty left sub-TREE, or is
-   it the CHAR following an empty left sub-TREE?  We opt for the first choice
-   always."
-
-  ;; This came from another project, although it isn't actually used there.
-  ;; It exposed the weakness in an earlier design which prompted the addition
-  ;; of the CONSUMEDP flags to the parser protocol.
-
-  (with-parser-context (character-scanner-context :scanner scanner)
-    (labels ((tree ()
-	       (parse (or (seq (#\(
-				(left (tree))
-				(data :any)
-				(right (tree))
-				#\))
-			    (make-node left data right))
-			  (values nil t nil)))))
-      (parse (seq ((tree (tree)) :eof)
-	       tree)))))
-
-(defun parse-tree-lookahead (scanner)
-  "Parse a textual representation into a simple binary tree.
-
-   The syntax is simple, and, indeed, the grammar's the same as for
-   `sod-parse-tree':
-
-	TREE ::= EMPTY | `(' TREE CHAR TREE `)'
-
-   But the rules are different.  Instead of resolving the `ambiguity' between
-   TREE and CHAR when we find another `(' after the opening `(' of a TREE
-   deterministically in favour of TREE as `parse-tree' does, we try that
-   first, and backtrack if necessary."
-
-  ;; Bison can do this, but you have to persuade it to use the scary GLR
-  ;; parser algorithm
-
-  (with-parser-context (character-scanner-context :scanner scanner)
-    (labels ((tree ()
-	       (parse (or (peek (seq (#\(
-				      (left (tree))
-				      (data :any)
-				      (right (tree))
-				      #\))
-				  (make-node left data right)))
-			  (values nil t nil)))))
-      (parse (seq ((tree (tree)) :eof)
-	       tree)))))
-
-(def-test-method test-simple-tree-parser ((test test-parser) :run nil)
-  (assert-parse ("" nil t nil :scanner sc) (parse-tree sc))
-  (assert-parse ("((a)b((c)d(e)))" t t t :scanner sc) (parse-tree sc))
-  (assert-parse ("((a)b((c)d(e)))z" '(:eof) nil t :scanner sc)
-    (parse-tree sc))
-  (assert-parse ("((a)b((c)d(e))" '(#\)) nil t :scanner sc) (parse-tree sc))
-  (assert-parse ("(([)*(]))" t t t :scanner sc) (parse-tree sc))
-  (assert-parse ("((()-()))" '(#\)) nil t :scanner sc) (parse-tree sc))
-  (assert-parse ("((()-()))" t t t :scanner sc) (parse-tree-lookahead sc)))
-
-;;;--------------------------------------------------------------------------
-;;; Test expression parser.
-
-(defparse token (:context (context character-parser-context) parser)
-  (with-gensyms (value)
-    (expand-parser-spec context
-			`(seq ((,value ,parser) :whitespace) ,value))))
-
-(let ((add (binop "+" (x y 5) `(+ ,x ,y)))
-      (sub (binop "-" (x y 5) `(- ,x ,y)))
-      (mul (binop "*" (x y 7) `(* ,x ,y)))
-      (div (binop "/" (x y 7) `(/ ,x ,y)))
-      (eq (binop "=" (x y 3 :assoc nil) `(= ,x ,y)))
-      (ne (binop "/=" (x y 3 :assoc nil) `(/= ,x ,y)))
-      (lt (binop "<" (x y 3 :assoc nil) `(< ,x ,y)))
-      (gt (binop ">" (x y 3 :assoc nil) `(> ,x ,y)))
-      (and (binop "&" (x y 2) `(and ,x ,y)))
-      (or (binop "|" (x y 1) `(or ,x ,y)))
-      (expt (binop "**" (x y 8 :assoc :right) `(** ,x ,y)))
-      (neg (preop "-" (x 9) `(- ,x)))
-      (not (preop "!" (x 2) `(not ,x)))
-      (fact (postop "!" (x 10) `(! ,x)))
-      (lp (lparen #\))) (rp (rparen #\)))
-      (lb (lparen #\])) (rb (rparen #\])))
-  (defun test-parse-expr (string)
-    (with-parser-context (string-parser :string string)
-      (parse (seq (:whitespace
-		   (value (expr (:nestedp nestedp)
-			    (token (many (a 0 (+ (* a 10) it) :min 1)
-				     (filter digit-char-p)))
-			    (token (or (seq ("**") expt)
-				       (seq ("/=") ne)
-				       (seq (#\+) add)
-				       (seq (#\-) sub)
-				       (seq (#\*) mul)
-				       (seq (#\/) div)
-				       (seq (#\=) eq)
-				       (seq (#\<) lt)
-				       (seq (#\>) gt)
-				       (seq (#\&) and)
-				       (seq (#\|) or)))
-			    (token (or (seq (#\() lp)
-				       (seq (#\-) neg)
-				       (seq (#\!) not)))
-			    (token (or (seq (#\!) fact)
-				       (when nestedp (seq (#\)) rp))))))
-		   (next (or :any (t :eof))))
-	       (cons value next))))))
-
-(defun assert-expr-parse (string value winp consumedp)
-  (multiple-value-bind (v w c) (test-parse-expr string)
-    (flet ((message (what)
-	     (format nil "expression ~S; ~A" string what)))
-      (cond (winp (assert-true w (message "winp"))
-		  (assert-equal v value (message "value")))
-	    (t (assert-false w (message "winp"))
-	       (assert-equal v value (message "expected"))))
-      (assert-eql c consumedp (message "consumedp")))))
-
-(def-test-method test-expression-parser ((test test-parser) :run nil)
-  (assert-expr-parse "1 + 2 + 3" '((+ (+ 1 2) 3) . :eof) t t)
-  (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
-  (assert-expr-parse "1 * 2 + 3" '((+ (* 1 2) 3) . :eof) t t)
-  (assert-expr-parse "(1 + 2) * 3" '((* (+ 1 2) 3) . :eof) t t)
-  (assert-expr-parse "1 ** 2 ** 3" '((** 1 (** 2 3)) . :eof) t t)
-  (assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t)
-  (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
-
-  (assert-expr-parse "! 1 + 2 = 3 | 6 - 3 /= 12/6"
-		     '((or (not (= (+ 1 2) 3))
-			   (/= (- 6 3) (/ 12 6)))
-		       . :eof)
-		     t t)
-  (assert-expr-parse "! 1 > 2 & ! 4 < 6 | 3 < 4 & 9 > 10"
-		     '((or (and (not (> 1 2)) (not (< 4 6)))
-			   (and (< 3 4) (> 9 10)))
-		       . :eof)
-		     t t)
-
-  (assert-condition 'simple-error (test-parse-expr "(1 + 2"))
-  (assert-condition 'simple-error (test-parse-expr "(1 + 2]"))
-  (assert-condition 'simple-error (test-parse-expr "1 < 2 < 3")))
-
-;;;----- That's all, folks --------------------------------------------------