[sod] / src / parser / parser-test.lisp

;;; -*-lisp-*-
;;;
;;; Test parser infrastructure
;;;
;;; (c) 2009 Straylight/Edgeware
;;;

;;;----- Licensing notice ---------------------------------------------------
;;;
;;; This file is part of the Sensible Object Design, an object system for C.
;;;
;;; SOD is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 2 of the License, or
;;; (at your option) any later version.
;;;
;;; SOD is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with SOD; if not, write to the Free Software Foundation,
;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

(cl:in-package #:sod-test)

(defclass test-parser (test-case)
  ())
(add-test *sod-test-suite* (get-suite test-parser))

;;;--------------------------------------------------------------------------
;;; Utilities.

(defmacro assert-parse
    ((string value winp consumedp &key (scanner (gensym "SCANNER-")))
     &body parser)
  (once-only (string value winp consumedp)
    (with-gensyms (my-value my-winp my-consumedp label what)
      `(let ((,scanner (make-string-scanner ,string)))
	 (declare (ignorable ,scanner))
	 (multiple-value-bind (,my-value ,my-winp ,my-consumedp)
	     (with-parser-context
		 (character-scanner-context :scanner ,scanner)
	       (parse ,@parser))
	   (flet ((,label (,what)
		    (format nil "~A; parsing ~S with ~S"
			    ,what ,string ',@parser)))
	     (cond (,winp
		    (assert-true ,my-winp (,label "winp"))
		    (if (eq ,value t)
			(assert-not-eql ,my-value nil
					(,label "parser result"))
			(assert-equal ,my-value ,value
				      (,label "parser result"))))
		   (t
		    (assert-false ,my-winp (,label "winp"))
		    (assert-true (and (null (set-difference ,my-value ,value
							    :test #'equal))
				      (null (set-difference ,value ,my-value
							    :test #'equal)))
				 (,label "failure indicator"))))
	     (if ,consumedp
		 (assert-true ,my-consumedp (,label "consumedp"))
		 (assert-false ,my-consumedp (,label "consumedp")))))))))

;;;--------------------------------------------------------------------------
;;; Simple parser tests.
;;;
;;; This lot causes SBCL to warn like a mad thing.  It's too clever for us,
;;; and does half of the work at compile time!

(def-test-method test-simple ((test test-parser) :run nil)
  "Test simple atomic parsers, because we rely on them later."

  ;; Characters match themselves.  For a character known only at run-time,
  ;; use (char CH).
  (assert-parse ("abcd" #\a t t) #\a)
  (let ((ch #\b))
    (assert-parse ("abcd" '(#\b) nil nil) (char ch)))

  ;; A character can't match at EOF.
  (assert-parse ("" '(#\z) nil nil) #\z)

  ;; All characters match :any; but EOF isn't a character.
  (assert-parse ("z" #\z t t) :any)
  (assert-parse ("" '(:any) nil nil) :any)

  ;; The parser (satisfies PREDICATE) succeeds if the PREDICATE returns
  ;; true when applied to the current character.
  (assert-parse ("a" #\a t t) (satisfies alpha-char-p))
  (assert-parse ("0" '(alpha-char-p) nil nil) (satisfies alpha-char-p))

  ;; The parser (not CHAR) matches a character other than CHAR; but it won't
  ;; match EOF.
  (assert-parse ("a" #\a t t) (not #\b))
  (assert-parse ("b" '((not #\b)) nil nil) (not #\b))
  (assert-parse ("" '((not #\b)) nil nil) (not #\b))

  ;; But :eof matches only at EOF.
  (assert-parse ("" :eof t nil) :eof)
  (assert-parse ("abcd" '(:eof) nil nil) :eof)

  ;; Strings match themselves without consuming if they fail.
  (assert-parse ("abcd" "ab" t t) "ab")
  (assert-parse ("abcd" '("cd") nil nil) "cd"))

(def-test-method test-sequence ((test test-parser) :run nil)

  ;; An empty sequence always succeeds and never consumes.  And provokes
  ;; warnings: don't do this.
  (assert-parse ("" :win t nil) (seq () :win))
  (assert-parse ("abcd" :win t nil) (seq () :win))

  ;; A `seq' matches the individual parsers in order, and binds their results
  ;; to variables -- if given.  The result is the value of the body.  If any
  ;; parser fails having consumed input, then input stays consumed.  There's
  ;; no backtracking.
  (assert-parse ("abcd" '(#\a . #\c) t t)
    (seq ((foo #\a) #\b (bar #\c)) (cons foo bar)))
  (assert-parse ("abcd" '(#\c) nil t)
    (seq ((foo #\a) (bar #\c)) (cons foo bar)))
  (assert-parse ("abcd" '(#\c) nil nil)
    (seq ((bar #\c) (foo #\a)) (cons foo bar))))

(def-test-method test-repeat ((test test-parser) :run nil)

  ;; A `many' matches a bunch of similar things in a row.  You can compute a
  ;; result using `do'-like accumulation.
  (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc)) #\a))

  ;; The default minimum is zero; so the parser always succeeds.
  (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc)) #\b))

  ;; You can provide an explicit minimum.  Then the match might fail.
  (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min 2) #\a))
  (assert-parse ("aabb" '(#\a) nil t) (many (acc 0 (1+ acc) :min 3) #\a))

  ;; You can also provide an explicit maximum.  This will cause the parser to
  ;; stop searching, but it can't make it fail.
  (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max 3) #\a))

  ;; You can provide both a maximum and a minimum at the same time.  If
  ;; they're consistent, you won't be surprised.  If they aren't, then the
  ;; maximum wins and the minimum is simply ignored (currently).
  (assert-parse ("aaaaab" 4 t t)
    (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
  (assert-parse ("aabbbb" '(#\a) nil t)
    (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
  (assert-parse ("aaabbb" 3 t t)
    (many (acc 0 (1+ acc) :min 3 :max 3) #\a))
  (assert-parse ("aaabbb" 3 t t)
    (many (acc 0 (1+ acc) :min 17 :max 3) #\a))

  ;; You can provide a separator.  The `many' parser will look for the
  ;; separator between each of the main items, but will ignore the results.
  (assert-parse ("a,a,abc" 3 t t) (many (acc 0 (1+ acc)) #\a #\,))
  (assert-parse ("a,a,abc" 2 t t) (many (acc 0 (1+ acc) :max 2) #\a #\,))

  ;; If `many' sees a separator then by default it commits to finding another
  ;; item; so this can cause a parse to fail.
  (assert-parse ("a,a,bc" '(#\a) nil t) (many (acc 0 (1+ acc)) #\a #\,))
  (assert-parse ("abc" 1 t t) (many (acc 0 (1+ acc)) #\a #\,))

  ;; If you specify a separator then the default minimum number of
  ;; repetitions becomes 1 rather than 0.  But you can override this
  ;; explicitly.
  (assert-parse ("bc" '(#\a) nil nil) (many (acc 0 (1+ acc)) #\a #\,))
  (assert-parse ("bc" 0 t nil) (many (acc 0 (1+ acc) :min 0) #\a #\,))

  ;; The parser will fail looking for a separator if there aren't enough
  ;; items.
  (assert-parse ("a,abc" '(#\,) nil t)
    (many (acc 0 (1+ acc) :min 3) #\a #\,))

  ;; You can override the commit-on-separator behaviour by using :commit.
  ;; This makes a trailing separator legal (but optional), so it also affects
  ;; the behaviour regarding maximum and minimum repetitions.  (Commitment is
  ;; irrelevant if you don't have a separator.)
  (assert-parse ("a,a,bc" 2 t t)
    (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
  (assert-parse ("a,a,abc" 3 t t)
    (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
  (assert-parse ("a,a,a,bc" 3 t t)
    (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp t) #\a #\,))
	  #\,)
      n))
  (assert-parse ("a,a,a,bc" 3 t t)
    (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp nil) #\a #\,))
	  #\b)
      n))
  (assert-parse ("a,a,bc" '(#\a) nil t)
    (many (acc 0 (1+ acc) :min 3 :commitp nil) #\a #\,))

  ;; The `many' parser won't backtrack.  The `many' eats as many `a's as
  ;; possible; asking for another one is sure to fail.
  (assert-parse ("aaaabc" '(#\a) nil t) (and (skip-many () #\a) #\a)))

(def-test-method test-repeat-hairy ((test test-parser) :run nil)
  ;; The `many' expander is very hairy and does magical things if it notices
  ;; that some of its arguments are constants.  So here we test a number of
  ;; the above things again, using variables so that it has to produce code
  ;; which makes decisions at run-time.  (I've no doubt that SBCL will issue
  ;; an infinite number of notes explaining how clever it is and how it can
  ;; do it all at compile-time anyway.  Of course, suppressing these notes is
  ;; the main reason `many' is so hairy anyway.)

  (let ((zero 0) (two 2) (three 3) (yes t) (no nil))

    ;; Minima.
    (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :min zero) #\a))
    (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc) :min zero) #\b))
    (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min two) #\a))
    (assert-parse ("aabb" '(#\a) nil t)
      (many (acc 0 (1+ acc) :min three) #\a))

    ;; Maxima.
    (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :max no) #\a))
    (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max three) #\a))

    ;; And now together with separators and commitment.  Oh, my.
    (assert-parse ("a,a,a,bc" 3 t t)
      (many (acc 0 (1+ acc) :commitp no) #\a #\,))
    (assert-parse ("a,a,a,bc" '(#\a) nil t)
      (many (acc 0 (1+ acc) :commitp yes) #\a #\,))
    (assert-parse ("a,a,bc" '(#\a) nil t)
      (many (acc 0 (1+ acc) :min three :commitp yes) #\a #\,))
    (assert-parse ("a,a,bc" '(#\a) nil t)
      (many (acc 0 (1+ acc) :min 3 :commitp yes) #\a #\,))
    (assert-parse ("a,a,bc" '(#\a) nil t)
      (many (acc 0 (1+ acc) :min three :commitp t) #\a #\,))
    (assert-parse ("a,a,a,bc" 3 t t)
      (seq ((n (many (acc 0 (1+ acc) :max three :commitp no) #\a #\,)) #\b)
	n))
    (assert-parse ("a,a,a,bc" 3 t t)
      (seq ((n (many (acc 0 (1+ acc) :max three :commitp yes) #\a #\,)) #\,)
	n))
    (assert-parse ("a,a,a,bc" 3 t t)
      (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp no) #\a #\,)) #\b)
	n))
    (assert-parse ("a,a,a,bc" 3 t t)
      (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp yes) #\a #\,)) #\,)
	n))
    (assert-parse ("a,a,a,bc" 3 t t)
      (seq ((n (many (acc 0 (1+ acc) :max three :commitp nil) #\a #\,)) #\b)
	n))
    (assert-parse ("a,a,a,bc" 3 t t)
      (seq ((n (many (acc 0 (1+ acc) :max three :commitp t) #\a #\,)) #\,)
	n))))

(def-test-method test-alternate ((test test-parser) :run nil)

  ;; An `or' matches the first parser that either succeeds or fails having
  ;; consumed input.
  (assert-parse ("abcd" #\a t t) (or #\a #\b))
  (assert-parse ("abcd" #\a t t) (or #\b #\a))
  (assert-parse ("abcd" '(#\b #\c) nil nil) (or #\b #\c))

  ;; Strings don't consume if they fail.
  (assert-parse ("abcd" "ab" t t) (or "cd" "ab"))
  (assert-parse ("abcd" "ab" t t) (or "ad" "ab"))
  (assert-parse ("abcd" '("ad" "ac") nil nil) (or "ad" "ac"))

  ;; But `seq' will if some component consumes.
  (assert-parse ("abcd" '(#\d) nil t) (or (and #\a #\d) "ab"))
  (assert-parse ("abcd" "ab" t t) (or (and #\c #\d) "ab"))

  ;; We can tame this using `peek' which rewinds the source if its argument
  ;; fails, so as to hide consumption of input.
  (assert-parse ("abcd" "ab" t t) (or (peek (and #\a #\d)) "ab"))
  (assert-parse ("abcd" '(#\a #\b "cd") t t)
    (seq ((foo (peek (seq ((foo #\a) (bar #\b)) (list foo bar))))
	  (bar "cd"))
      (append foo (list bar))))

  ;; Failure indicators are union'd if they all fail.
  (assert-parse ("abcd" '(#\q #\x #\z) nil nil)
    (or #\q (peek (and #\a (or #\x #\q))) #\z))

  ;; But if any of them consumed input then you only get the indicators from
  ;; the consuming branch, because we committed to it when we consumed the
  ;; input.
  (assert-parse ("abcd" '(#\x #\q) nil t)
    (or #\q #\z (and #\a (or #\q #\x)))))

;;;--------------------------------------------------------------------------
;;; Some tests with a simple recursive parser.

(defstruct (node
	     (:predicate nodep)
	     (:constructor make-node (left data right)))
  "Structure type for a simple binary tree."
  left data right)

(defun parse-tree (scanner)
  "Parse a textual representation into a simple binary tree.

   The syntax is simple:

	TREE ::= EMPTY | `(' TREE CHAR TREE `)'

   There's an ambiguity in this syntax, at least if you have limited
   lookahead: suppose you've just parsed the opening `(' of a TREE, and you
   see another `(' -- is it the start of the non-empty left sub-TREE, or is
   it the CHAR following an empty left sub-TREE?  We opt for the first choice
   always."

  ;; This came from another project, although it isn't actually used there.
  ;; It exposed the weakness in an earlier design which prompted the addition
  ;; of the CONSUMEDP flags to the parser protocol.

  (with-parser-context (character-scanner-context :scanner scanner)
    (labels ((tree ()
	       (parse (or (seq (#\(
				(left (tree))
				(data :any)
				(right (tree))
				#\))
			    (make-node left data right))
			  (values nil t nil)))))
      (parse (seq ((tree (tree)) :eof)
	       tree)))))

(defun parse-tree-lookahead (scanner)
  "Parse a textual representation into a simple binary tree.

   The syntax is simple, and, indeed, the grammar's the same as for
   `sod-parse-tree':

	TREE ::= EMPTY | `(' TREE CHAR TREE `)'

   But the rules are different.  Instead of resolving the `ambiguity' between
   TREE and CHAR when we find another `(' after the opening `(' of a TREE
   deterministically in favour of TREE as `parse-tree' does, we try that
   first, and backtrack if necessary."

  ;; Bison can do this, but you have to persuade it to use the scary GLR
  ;; parser algorithm

  (with-parser-context (character-scanner-context :scanner scanner)
    (labels ((tree ()
	       (parse (or (peek (seq (#\(
				      (left (tree))
				      (data :any)
				      (right (tree))
				      #\))
				  (make-node left data right)))
			  (values nil t nil)))))
      (parse (seq ((tree (tree)) :eof)
	       tree)))))

(def-test-method test-simple-tree-parser ((test test-parser) :run nil)
  (assert-parse ("" nil t nil :scanner sc) (parse-tree sc))
  (assert-parse ("((a)b((c)d(e)))" t t t :scanner sc) (parse-tree sc))
  (assert-parse ("((a)b((c)d(e)))z" '(:eof) nil t :scanner sc)
    (parse-tree sc))
  (assert-parse ("((a)b((c)d(e))" '(#\)) nil t :scanner sc) (parse-tree sc))
  (assert-parse ("(([)*(]))" t t t :scanner sc) (parse-tree sc))
  (assert-parse ("((()-()))" '(#\)) nil t :scanner sc) (parse-tree sc))
  (assert-parse ("((()-()))" t t t :scanner sc) (parse-tree-lookahead sc)))

;;;--------------------------------------------------------------------------
;;; Test expression parser.

(eval-when (:compile-toplevel :load-toplevel :execute)
  (defparse token (:context (context character-parser-context) parser)
    (with-gensyms (value)
      (expand-parser-spec context
			  `(seq ((,value ,parser) :whitespace) ,value)))))

(let ((add (binop "+" (x y 5) `(+ ,x ,y)))
      (sub (binop "-" (x y 5) `(- ,x ,y)))
      (mul (binop "*" (x y 7) `(* ,x ,y)))
      (div (binop "/" (x y 7) `(/ ,x ,y)))
      (eq (binop "=" (x y 3 :assoc nil) `(= ,x ,y)))
      (ne (binop "/=" (x y 3 :assoc nil) `(/= ,x ,y)))
      (lt (binop "<" (x y 3 :assoc nil) `(< ,x ,y)))
      (gt (binop ">" (x y 3 :assoc nil) `(> ,x ,y)))
      (and (binop "&" (x y 2) `(and ,x ,y)))
      (or (binop "|" (x y 1) `(or ,x ,y)))
      (expt (binop "**" (x y 8 :assoc :right) `(** ,x ,y)))
      (neg (preop "-" (x 9) `(- ,x)))
      (not (preop "!" (x 2) `(not ,x)))
      (fact (postop "!" (x 10) `(! ,x)))
      (lp (lparen #\))) (rp (rparen #\)))
      (lb (lparen #\])) (rb (rparen #\])))
  (defun test-parse-expr (string)
    (with-parser-context (string-parser :string string)
      (parse (seq (:whitespace
		   (value (expr (:nestedp nestedp)
			    (token (many (a 0 (+ (* a 10) it) :min 1)
				     (filter digit-char-p)))
			    (token (or (seq ("**") expt)
				       (seq ("/=") ne)
				       (seq (#\+) add)
				       (seq (#\-) sub)
				       (seq (#\*) mul)
				       (seq (#\/) div)
				       (seq (#\=) eq)
				       (seq (#\<) lt)
				       (seq (#\>) gt)
				       (seq (#\&) and)
				       (seq (#\|) or)))
			    (token (or (seq (#\() lp)
				       (seq (#\[) lb)
				       (seq (#\-) neg)
				       (seq (#\!) not)))
			    (token (or (seq (#\!) fact)
				       (when nestedp
					 (or (seq (#\)) rp)
					     (seq (#\]) rb)))))))
		   (next (or :any (t :eof))))
	       (cons value next))))))

(defun assert-expr-parse (string value winp consumedp)
  (multiple-value-bind (v w c) (test-parse-expr string)
    (flet ((message (what)
	     (format nil "expression ~S; ~A" string what)))
      (cond (winp (assert-true w (message "winp"))
		  (assert-equal v value (message "value")))
	    (t (assert-false w (message "winp"))
	       (assert-equal v value (message "expected"))))
      (assert-eql c consumedp (message "consumedp")))))

(def-test-method test-expression-parser ((test test-parser) :run nil)
  (assert-expr-parse "1 + 2 + 3" '((+ (+ 1 2) 3) . :eof) t t)
  (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
  (assert-expr-parse "1 * 2 + 3" '((+ (* 1 2) 3) . :eof) t t)
  (assert-expr-parse "(1 + 2) * 3" '((* (+ 1 2) 3) . :eof) t t)
  (assert-expr-parse "1 ** 2 ** 3" '((** 1 (** 2 3)) . :eof) t t)
  (assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t)
  (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
  (assert-expr-parse "! 1 + 2 = 3 | 6 - 3 /= 12/6"
		     '((or (not (= (+ 1 2) 3))
			   (/= (- 6 3) (/ 12 6)))
		       . :eof)
		     t t)
  (assert-expr-parse "! 1 > 2 & ! 4 < 6 | 3 < 4 & 9 > 10"
		     '((or (and (not (> 1 2)) (not (< 4 6)))
			   (and (< 3 4) (> 9 10)))
		       . :eof)
		     t t)

  (assert-condition 'simple-error (test-parse-expr "(1 + 2"))
  (assert-condition 'simple-error (test-parse-expr "(1 + 2]"))
  (assert-condition 'simple-error (test-parse-expr "1 < 2 < 3")))

;;;----- That's all, folks --------------------------------------------------
Commit	Line	Data
dea4d055 MW	1	;;; --lisp--
	2	;;;
	3	;;; Test parser infrastructure
	4	;;;
	5	;;; (c) 2009 Straylight/Edgeware
	6	;;;
	7
	8	;;;----- Licensing notice ---------------------------------------------------
	9	;;;
e0808c47	10	;;; This file is part of the Sensible Object Design, an object system for C.
dea4d055 MW	11	;;;
	12	;;; SOD is free software; you can redistribute it and/or modify
	13	;;; it under the terms of the GNU General Public License as published by
	14	;;; the Free Software Foundation; either version 2 of the License, or
	15	;;; (at your option) any later version.
	16	;;;
	17	;;; SOD is distributed in the hope that it will be useful,
	18	;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	;;; GNU General Public License for more details.
	21	;;;
	22	;;; You should have received a copy of the GNU General Public License
	23	;;; along with SOD; if not, write to the Free Software Foundation,
	24	;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	25
	26	(cl:in-package #:sod-test)
	27
	28	(defclass test-parser (test-case)
	29	())
	30	(add-test sod-test-suite (get-suite test-parser))
	31
	32	;;;--------------------------------------------------------------------------
	33	;;; Utilities.
	34
	35	(defmacro assert-parse
	36	((string value winp consumedp &key (scanner (gensym "SCANNER-")))
	37	&body parser)
	38	(once-only (string value winp consumedp)
	39	(with-gensyms (my-value my-winp my-consumedp label what)
	40	`(let ((,scanner (make-string-scanner ,string)))
2b6a1356	41	(declare (ignorable ,scanner))
dea4d055 MW	42	(multiple-value-bind (,my-value ,my-winp ,my-consumedp)
	43	(with-parser-context
	44	(character-scanner-context :scanner ,scanner)
	45	(parse ,@parser))
	46	(flet ((,label (,what)
	47	(format nil "~A; parsing ~S with ~S"
	48	,what ,string ',@parser)))
	49	(cond (,winp
	50	(assert-true ,my-winp (,label "winp"))
	51	(if (eq ,value t)
	52	(assert-not-eql ,my-value nil
	53	(,label "parser result"))
	54	(assert-equal ,my-value ,value
	55	(,label "parser result"))))
	56	(t
	57	(assert-false ,my-winp (,label "winp"))
	58	(assert-true (and (null (set-difference ,my-value ,value
	59	:test #'equal))
	60	(null (set-difference ,value ,my-value
	61	:test #'equal)))
	62	(,label "failure indicator"))))
	63	(if ,consumedp
	64	(assert-true ,my-consumedp (,label "consumedp"))
	65	(assert-false ,my-consumedp (,label "consumedp")))))))))
	66
	67	;;;--------------------------------------------------------------------------
	68	;;; Simple parser tests.
	69	;;;
	70	;;; This lot causes SBCL to warn like a mad thing. It's too clever for us,
	71	;;; and does half of the work at compile time!
	72
	73	(def-test-method test-simple ((test test-parser) :run nil)
	74	"Test simple atomic parsers, because we rely on them later."
	75
	76	;; Characters match themselves. For a character known only at run-time,
	77	;; use (char CH).
	78	(assert-parse ("abcd" #\a t t) #\a)
	79	(let ((ch #\b))
	80	(assert-parse ("abcd" '(#\b) nil nil) (char ch)))
	81
	82	;; A character can't match at EOF.
	83	(assert-parse ("" '(#\z) nil nil) #\z)
	84
	85	;; All characters match :any; but EOF isn't a character.
	86	(assert-parse ("z" #\z t t) :any)
	87	(assert-parse ("" '(:any) nil nil) :any)
	88
	89	;; The parser (satisfies PREDICATE) succeeds if the PREDICATE returns
	90	;; true when applied to the current character.
	91	(assert-parse ("a" #\a t t) (satisfies alpha-char-p))
	92	(assert-parse ("0" '(alpha-char-p) nil nil) (satisfies alpha-char-p))
	93
	94	;; The parser (not CHAR) matches a character other than CHAR; but it won't
	95	;; match EOF.
	96	(assert-parse ("a" #\a t t) (not #\b))
	97	(assert-parse ("b" '((not #\b)) nil nil) (not #\b))
	98	(assert-parse ("" '((not #\b)) nil nil) (not #\b))
	99
	100	;; But :eof matches only at EOF.
	101	(assert-parse ("" :eof t nil) :eof)
	102	(assert-parse ("abcd" '(:eof) nil nil) :eof)
	103
	104	;; Strings match themselves without consuming if they fail.
	105	(assert-parse ("abcd" "ab" t t) "ab")
106	(assert-parse ("abcd" '("cd") nil nil) "cd"))
107
108	(def-test-method test-sequence ((test test-parser) :run nil)
109
110	;; An empty sequence always succeeds and never consumes. And provokes
111	;; warnings: don't do this.
112	(assert-parse ("" :win t nil) (seq () :win))
113	(assert-parse ("abcd" :win t nil) (seq () :win))
114
115	;; A `seq' matches the individual parsers in order, and binds their results
116	;; to variables -- if given. The result is the value of the body. If any
117	;; parser fails having consumed input, then input stays consumed. There's
118	;; no backtracking.
119	(assert-parse ("abcd" '(#\a . #\c) t t)
120	(seq ((foo #\a) #\b (bar #\c)) (cons foo bar)))
121	(assert-parse ("abcd" '(#\c) nil t)
122	(seq ((foo #\a) (bar #\c)) (cons foo bar)))
123	(assert-parse ("abcd" '(#\c) nil nil)
124	(seq ((bar #\c) (foo #\a)) (cons foo bar))))
125
126	(def-test-method test-repeat ((test test-parser) :run nil)
127
128	;; A `many' matches a bunch of similar things in a row. You can compute a
129	;; result using `do'-like accumulation.
130	(assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc)) #\a))
131
132	;; The default minimum is zero; so the parser always succeeds.
133	(assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc)) #\b))
134
135	;; You can provide an explicit minimum. Then the match might fail.
136	(assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min 2) #\a))
137	(assert-parse ("aabb" '(#\a) nil t) (many (acc 0 (1+ acc) :min 3) #\a))
138
139	;; You can also provide an explicit maximum. This will cause the parser to
140	;; stop searching, but it can't make it fail.
141	(assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max 3) #\a))
142
143	;; You can provide both a maximum and a minimum at the same time. If
144	;; they're consistent, you won't be surprised. If they aren't, then the
145	;; maximum wins and the minimum is simply ignored (currently).
146	(assert-parse ("aaaaab" 4 t t)
147	(many (acc 0 (1+ acc) :min 3 :max 4) #\a))
148	(assert-parse ("aabbbb" '(#\a) nil t)
149	(many (acc 0 (1+ acc) :min 3 :max 4) #\a))
150	(assert-parse ("aaabbb" 3 t t)
151	(many (acc 0 (1+ acc) :min 3 :max 3) #\a))
152	(assert-parse ("aaabbb" 3 t t)
153	(many (acc 0 (1+ acc) :min 17 :max 3) #\a))
154
155	;; You can provide a separator. The `many' parser will look for the
156	;; separator between each of the main items, but will ignore the results.
157	(assert-parse ("a,a,abc" 3 t t) (many (acc 0 (1+ acc)) #\a #\,))
158	(assert-parse ("a,a,abc" 2 t t) (many (acc 0 (1+ acc) :max 2) #\a #\,))
159
160	;; If `many' sees a separator then by default it commits to finding another
161	;; item; so this can cause a parse to fail.
162	(assert-parse ("a,a,bc" '(#\a) nil t) (many (acc 0 (1+ acc)) #\a #\,))
163	(assert-parse ("abc" 1 t t) (many (acc 0 (1+ acc)) #\a #\,))
164
165	;; If you specify a separator then the default minimum number of
166	;; repetitions becomes 1 rather than 0. But you can override this
167	;; explicitly.
168	(assert-parse ("bc" '(#\a) nil nil) (many (acc 0 (1+ acc)) #\a #\,))
169	(assert-parse ("bc" 0 t nil) (many (acc 0 (1+ acc) :min 0) #\a #\,))
170
171	;; The parser will fail looking for a separator if there aren't enough
172	;; items.
173	(assert-parse ("a,abc" '(#\,) nil t)
174	(many (acc 0 (1+ acc) :min 3) #\a #\,))
175
176	;; You can override the commit-on-separator behaviour by using :commit.
177	;; This makes a trailing separator legal (but optional), so it also affects
178	;; the behaviour regarding maximum and minimum repetitions. (Commitment is
179	;; irrelevant if you don't have a separator.)
180	(assert-parse ("a,a,bc" 2 t t)
181	(many (acc 0 (1+ acc) :commitp nil) #\a #\,))
182	(assert-parse ("a,a,abc" 3 t t)
183	(many (acc 0 (1+ acc) :commitp nil) #\a #\,))
184	(assert-parse ("a,a,a,bc" 3 t t)
185	(seq ((n (many (acc 0 (1+ acc) :max 3 :commitp t) #\a #\,))
186	#\,)
187	n))
188	(assert-parse ("a,a,a,bc" 3 t t)
189	(seq ((n (many (acc 0 (1+ acc) :max 3 :commitp nil) #\a #\,))
190	#\b)
191	n))
192	(assert-parse ("a,a,bc" '(#\a) nil t)
193	(many (acc 0 (1+ acc) :min 3 :commitp nil) #\a #\,))
194
195	;; The `many' parser won't backtrack. The `many' eats as many `a's as
196	;; possible; asking for another one is sure to fail.
197	(assert-parse ("aaaabc" '(#\a) nil t) (and (skip-many () #\a) #\a)))
198
199	(def-test-method test-repeat-hairy ((test test-parser) :run nil)
200	;; The `many' expander is very hairy and does magical things if it notices
201	;; that some of its arguments are constants. So here we test a number of
202	;; the above things again, using variables so that it has to produce code
203	;; which makes decisions at run-time. (I've no doubt that SBCL will issue
204	;; an infinite number of notes explaining how clever it is and how it can
205	;; do it all at compile-time anyway. Of course, suppressing these notes is
206	;; the main reason `many' is so hairy anyway.)
207
208	(let ((zero 0) (two 2) (three 3) (yes t) (no nil))
209
210	;; Minima.
211	(assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :min zero) #\a))
212	(assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc) :min zero) #\b))
213	(assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min two) #\a))
214	(assert-parse ("aabb" '(#\a) nil t)
215	(many (acc 0 (1+ acc) :min three) #\a))
216
217	;; Maxima.
218	(assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :max no) #\a))
219	(assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max three) #\a))
220
221	;; And now together with separators and commitment. Oh, my.
222	(assert-parse ("a,a,a,bc" 3 t t)
223	(many (acc 0 (1+ acc) :commitp no) #\a #\,))
224	(assert-parse ("a,a,a,bc" '(#\a) nil t)
225	(many (acc 0 (1+ acc) :commitp yes) #\a #\,))
226	(assert-parse ("a,a,bc" '(#\a) nil t)
227	(many (acc 0 (1+ acc) :min three :commitp yes) #\a #\,))
228	(assert-parse ("a,a,bc" '(#\a) nil t)
229	(many (acc 0 (1+ acc) :min 3 :commitp yes) #\a #\,))
230	(assert-parse ("a,a,bc" '(#\a) nil t)
231	(many (acc 0 (1+ acc) :min three :commitp t) #\a #\,))
232	(assert-parse ("a,a,a,bc" 3 t t)
233	(seq ((n (many (acc 0 (1+ acc) :max three :commitp no) #\a #\,)) #\b)
234	n))
235	(assert-parse ("a,a,a,bc" 3 t t)
236	(seq ((n (many (acc 0 (1+ acc) :max three :commitp yes) #\a #\,)) #\,)
237	n))
238	(assert-parse ("a,a,a,bc" 3 t t)
239	(seq ((n (many (acc 0 (1+ acc) :max 3 :commitp no) #\a #\,)) #\b)
240	n))
241	(assert-parse ("a,a,a,bc" 3 t t)
242	(seq ((n (many (acc 0 (1+ acc) :max 3 :commitp yes) #\a #\,)) #\,)
243	n))
244	(assert-parse ("a,a,a,bc" 3 t t)
245	(seq ((n (many (acc 0 (1+ acc) :max three :commitp nil) #\a #\,)) #\b)
246	n))
247	(assert-parse ("a,a,a,bc" 3 t t)
248	(seq ((n (many (acc 0 (1+ acc) :max three :commitp t) #\a #\,)) #\,)
249	n))))
250
251	(def-test-method test-alternate ((test test-parser) :run nil)
252
253	;; An `or' matches the first parser that either succeeds or fails having
254	;; consumed input.
255	(assert-parse ("abcd" #\a t t) (or #\a #\b))
256	(assert-parse ("abcd" #\a t t) (or #\b #\a))
257	(assert-parse ("abcd" '(#\b #\c) nil nil) (or #\b #\c))
258
259	;; Strings don't consume if they fail.
260	(assert-parse ("abcd" "ab" t t) (or "cd" "ab"))
261	(assert-parse ("abcd" "ab" t t) (or "ad" "ab"))
262	(assert-parse ("abcd" '("ad" "ac") nil nil) (or "ad" "ac"))
263
264	;; But `seq' will if some component consumes.
265	(assert-parse ("abcd" '(#\d) nil t) (or (and #\a #\d) "ab"))
266	(assert-parse ("abcd" "ab" t t) (or (and #\c #\d) "ab"))
267
268	;; We can tame this using `peek' which rewinds the source if its argument
269	;; fails, so as to hide consumption of input.
270	(assert-parse ("abcd" "ab" t t) (or (peek (and #\a #\d)) "ab"))
271	(assert-parse ("abcd" '(#\a #\b "cd") t t)
272	(seq ((foo (peek (seq ((foo #\a) (bar #\b)) (list foo bar))))
273	(bar "cd"))
274	(append foo (list bar))))
275
276	;; Failure indicators are union'd if they all fail.
277	(assert-parse ("abcd" '(#\q #\x #\z) nil nil)
278	(or #\q (peek (and #\a (or #\x #\q))) #\z))
279
280	;; But if any of them consumed input then you only get the indicators from
281	;; the consuming branch, because we committed to it when we consumed the
282	;; input.
283	(assert-parse ("abcd" '(#\x #\q) nil t)
284	(or #\q #\z (and #\a (or #\q #\x)))))
285
286	;;;--------------------------------------------------------------------------
287	;;; Some tests with a simple recursive parser.
288
289	(defstruct (node
290	(:predicate nodep)
291	(:constructor make-node (left data right)))
292	"Structure type for a simple binary tree."
293	left data right)
294
295	(defun parse-tree (scanner)
296	"Parse a textual representation into a simple binary tree.
297
298	The syntax is simple:
299
300	TREE ::= EMPTY \| `(' TREE CHAR TREE `)'
301
302	There's an ambiguity in this syntax, at least if you have limited
303	lookahead: suppose you've just parsed the opening `(' of a TREE, and you
304	see another `(' -- is it the start of the non-empty left sub-TREE, or is
305	it the CHAR following an empty left sub-TREE? We opt for the first choice
306	always."
307
308	;; This came from another project, although it isn't actually used there.
309	;; It exposed the weakness in an earlier design which prompted the addition
310	;; of the CONSUMEDP flags to the parser protocol.
311
312	(with-parser-context (character-scanner-context :scanner scanner)
313	(labels ((tree ()
314	(parse (or (seq (#\(
315	(left (tree))
316	(data :any)
317	(right (tree))
318	#\))
319	(make-node left data right))
320	(values nil t nil)))))
321	(parse (seq ((tree (tree)) :eof)
322	tree)))))
323
324	(defun parse-tree-lookahead (scanner)
325	"Parse a textual representation into a simple binary tree.
326
327	The syntax is simple, and, indeed, the grammar's the same as for
328	`sod-parse-tree':
329
330	TREE ::= EMPTY \| `(' TREE CHAR TREE `)'
331
332	But the rules are different. Instead of resolving the `ambiguity' between
333	TREE and CHAR when we find another `(' after the opening `(' of a TREE
334	deterministically in favour of TREE as `parse-tree' does, we try that
335	first, and backtrack if necessary."
336
337	;; Bison can do this, but you have to persuade it to use the scary GLR
338	;; parser algorithm
339
340	(with-parser-context (character-scanner-context :scanner scanner)
341	(labels ((tree ()
342	(parse (or (peek (seq (#\(
343	(left (tree))
344	(data :any)
345	(right (tree))
346	#\))
347	(make-node left data right)))
348	(values nil t nil)))))
349	(parse (seq ((tree (tree)) :eof)
350	tree)))))
351
352	(def-test-method test-simple-tree-parser ((test test-parser) :run nil)
353	(assert-parse ("" nil t nil :scanner sc) (parse-tree sc))
354	(assert-parse ("((a)b((c)d(e)))" t t t :scanner sc) (parse-tree sc))
355	(assert-parse ("((a)b((c)d(e)))z" '(:eof) nil t :scanner sc)
356	(parse-tree sc))
357	(assert-parse ("((a)b((c)d(e))" '(#\)) nil t :scanner sc) (parse-tree sc))
358	(assert-parse ("(([)*(]))" t t t :scanner sc) (parse-tree sc))
359	(assert-parse ("((()-()))" '(#\)) nil t :scanner sc) (parse-tree sc))
360	(assert-parse ("((()-()))" t t t :scanner sc) (parse-tree-lookahead sc)))
361
362	;;;--------------------------------------------------------------------------
363	;;; Test expression parser.
364
8d1d7d3e MW	365	(eval-when (:compile-toplevel :load-toplevel :execute)
	366	(defparse token (:context (context character-parser-context) parser)
	367	(with-gensyms (value)
	368	(expand-parser-spec context
	369	`(seq ((,value ,parser) :whitespace) ,value)))))
dea4d055 MW	370
	371	(let ((add (binop "+" (x y 5) `(+ ,x ,y)))
	372	(sub (binop "-" (x y 5) `(- ,x ,y)))
	373	(mul (binop "" (x y 7) `( ,x ,y)))
	374	(div (binop "/" (x y 7) `(/ ,x ,y)))
	375	(eq (binop "=" (x y 3 :assoc nil) `(= ,x ,y)))
	376	(ne (binop "/=" (x y 3 :assoc nil) `(/= ,x ,y)))
	377	(lt (binop "<" (x y 3 :assoc nil) `(< ,x ,y)))
	378	(gt (binop ">" (x y 3 :assoc nil) `(> ,x ,y)))
	379	(and (binop "&" (x y 2) `(and ,x ,y)))
	380	(or (binop "\|" (x y 1) `(or ,x ,y)))
	381	(expt (binop "" (x y 8 :assoc :right) `( ,x ,y)))
	382	(neg (preop "-" (x 9) `(- ,x)))
	383	(not (preop "!" (x 2) `(not ,x)))
	384	(fact (postop "!" (x 10) `(! ,x)))
	385	(lp (lparen #\))) (rp (rparen #\)))
	386	(lb (lparen #\])) (rb (rparen #\])))
	387	(defun test-parse-expr (string)
	388	(with-parser-context (string-parser :string string)
	389	(parse (seq (:whitespace
	390	(value (expr (:nestedp nestedp)
	391	(token (many (a 0 (+ (* a 10) it) :min 1)
	392	(filter digit-char-p)))
	393	(token (or (seq ("**") expt)
	394	(seq ("/=") ne)
	395	(seq (#\+) add)
	396	(seq (#\-) sub)
	397	(seq (#\*) mul)
	398	(seq (#\/) div)
	399	(seq (#\=) eq)
	400	(seq (#\<) lt)
	401	(seq (#\>) gt)
	402	(seq (#\&) and)
	403	(seq (#\\|) or)))
	404	(token (or (seq (#\() lp)
239fa5bd	405	(seq (#\[) lb)
dea4d055 MW	406	(seq (#\-) neg)
	407	(seq (#\!) not)))
	408	(token (or (seq (#\!) fact)
239fa5bd MW	409	(when nestedp
	410	(or (seq (#\)) rp)
	411	(seq (#\]) rb)))))))
dea4d055 MW	412	(next (or :any (t :eof))))
	413	(cons value next))))))
	414
	415	(defun assert-expr-parse (string value winp consumedp)
	416	(multiple-value-bind (v w c) (test-parse-expr string)
	417	(flet ((message (what)
	418	(format nil "expression ~S; ~A" string what)))
	419	(cond (winp (assert-true w (message "winp"))
	420	(assert-equal v value (message "value")))
	421	(t (assert-false w (message "winp"))
	422	(assert-equal v value (message "expected"))))
	423	(assert-eql c consumedp (message "consumedp")))))
	424
	425	(def-test-method test-expression-parser ((test test-parser) :run nil)
	426	(assert-expr-parse "1 + 2 + 3" '((+ (+ 1 2) 3) . :eof) t t)
	427	(assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
	428	(assert-expr-parse "1 * 2 + 3" '((+ (* 1 2) 3) . :eof) t t)
	429	(assert-expr-parse "(1 + 2) * 3" '((* (+ 1 2) 3) . :eof) t t)
	430	(assert-expr-parse "1 2 3" '(( 1 ( 2 3)) . :eof) t t)
	431	(assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t)
	432	(assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
dea4d055 MW	433	(assert-expr-parse "! 1 + 2 = 3 \| 6 - 3 /= 12/6"
	434	'((or (not (= (+ 1 2) 3))
	435	(/= (- 6 3) (/ 12 6)))
	436	. :eof)
	437	t t)
	438	(assert-expr-parse "! 1 > 2 & ! 4 < 6 \| 3 < 4 & 9 > 10"
	439	'((or (and (not (> 1 2)) (not (< 4 6)))
	440	(and (< 3 4) (> 9 10)))
	441	. :eof)
	442	t t)
	443
	444	(assert-condition 'simple-error (test-parse-expr "(1 + 2"))
	445	(assert-condition 'simple-error (test-parse-expr "(1 + 2]"))
	446	(assert-condition 'simple-error (test-parse-expr "1 < 2 < 3")))
	447
	448	;;;----- That's all, folks --------------------------------------------------