Fix spelling of `Sensible' in all of the header comments.
[sod] / src / parser / parser-test.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Test parser infrastructure
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
e0808c47 10;;; This file is part of the Sensible Object Design, an object system for C.
dea4d055
MW
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod-test)
27
28(defclass test-parser (test-case)
29 ())
30(add-test *sod-test-suite* (get-suite test-parser))
31
32;;;--------------------------------------------------------------------------
33;;; Utilities.
34
35(defmacro assert-parse
36 ((string value winp consumedp &key (scanner (gensym "SCANNER-")))
37 &body parser)
38 (once-only (string value winp consumedp)
39 (with-gensyms (my-value my-winp my-consumedp label what)
40 `(let ((,scanner (make-string-scanner ,string)))
2b6a1356 41 (declare (ignorable ,scanner))
dea4d055
MW
42 (multiple-value-bind (,my-value ,my-winp ,my-consumedp)
43 (with-parser-context
44 (character-scanner-context :scanner ,scanner)
45 (parse ,@parser))
46 (flet ((,label (,what)
47 (format nil "~A; parsing ~S with ~S"
48 ,what ,string ',@parser)))
49 (cond (,winp
50 (assert-true ,my-winp (,label "winp"))
51 (if (eq ,value t)
52 (assert-not-eql ,my-value nil
53 (,label "parser result"))
54 (assert-equal ,my-value ,value
55 (,label "parser result"))))
56 (t
57 (assert-false ,my-winp (,label "winp"))
58 (assert-true (and (null (set-difference ,my-value ,value
59 :test #'equal))
60 (null (set-difference ,value ,my-value
61 :test #'equal)))
62 (,label "failure indicator"))))
63 (if ,consumedp
64 (assert-true ,my-consumedp (,label "consumedp"))
65 (assert-false ,my-consumedp (,label "consumedp")))))))))
66
67;;;--------------------------------------------------------------------------
68;;; Simple parser tests.
69;;;
70;;; This lot causes SBCL to warn like a mad thing. It's too clever for us,
71;;; and does half of the work at compile time!
72
73(def-test-method test-simple ((test test-parser) :run nil)
74 "Test simple atomic parsers, because we rely on them later."
75
76 ;; Characters match themselves. For a character known only at run-time,
77 ;; use (char CH).
78 (assert-parse ("abcd" #\a t t) #\a)
79 (let ((ch #\b))
80 (assert-parse ("abcd" '(#\b) nil nil) (char ch)))
81
82 ;; A character can't match at EOF.
83 (assert-parse ("" '(#\z) nil nil) #\z)
84
85 ;; All characters match :any; but EOF isn't a character.
86 (assert-parse ("z" #\z t t) :any)
87 (assert-parse ("" '(:any) nil nil) :any)
88
89 ;; The parser (satisfies PREDICATE) succeeds if the PREDICATE returns
90 ;; true when applied to the current character.
91 (assert-parse ("a" #\a t t) (satisfies alpha-char-p))
92 (assert-parse ("0" '(alpha-char-p) nil nil) (satisfies alpha-char-p))
93
94 ;; The parser (not CHAR) matches a character other than CHAR; but it won't
95 ;; match EOF.
96 (assert-parse ("a" #\a t t) (not #\b))
97 (assert-parse ("b" '((not #\b)) nil nil) (not #\b))
98 (assert-parse ("" '((not #\b)) nil nil) (not #\b))
99
100 ;; But :eof matches only at EOF.
101 (assert-parse ("" :eof t nil) :eof)
102 (assert-parse ("abcd" '(:eof) nil nil) :eof)
103
104 ;; Strings match themselves without consuming if they fail.
105 (assert-parse ("abcd" "ab" t t) "ab")
106 (assert-parse ("abcd" '("cd") nil nil) "cd"))
107
108(def-test-method test-sequence ((test test-parser) :run nil)
109
110 ;; An empty sequence always succeeds and never consumes. And provokes
111 ;; warnings: don't do this.
112 (assert-parse ("" :win t nil) (seq () :win))
113 (assert-parse ("abcd" :win t nil) (seq () :win))
114
115 ;; A `seq' matches the individual parsers in order, and binds their results
116 ;; to variables -- if given. The result is the value of the body. If any
117 ;; parser fails having consumed input, then input stays consumed. There's
118 ;; no backtracking.
119 (assert-parse ("abcd" '(#\a . #\c) t t)
120 (seq ((foo #\a) #\b (bar #\c)) (cons foo bar)))
121 (assert-parse ("abcd" '(#\c) nil t)
122 (seq ((foo #\a) (bar #\c)) (cons foo bar)))
123 (assert-parse ("abcd" '(#\c) nil nil)
124 (seq ((bar #\c) (foo #\a)) (cons foo bar))))
125
126(def-test-method test-repeat ((test test-parser) :run nil)
127
128 ;; A `many' matches a bunch of similar things in a row. You can compute a
129 ;; result using `do'-like accumulation.
130 (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc)) #\a))
131
132 ;; The default minimum is zero; so the parser always succeeds.
133 (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc)) #\b))
134
135 ;; You can provide an explicit minimum. Then the match might fail.
136 (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min 2) #\a))
137 (assert-parse ("aabb" '(#\a) nil t) (many (acc 0 (1+ acc) :min 3) #\a))
138
139 ;; You can also provide an explicit maximum. This will cause the parser to
140 ;; stop searching, but it can't make it fail.
141 (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max 3) #\a))
142
143 ;; You can provide both a maximum and a minimum at the same time. If
144 ;; they're consistent, you won't be surprised. If they aren't, then the
145 ;; maximum wins and the minimum is simply ignored (currently).
146 (assert-parse ("aaaaab" 4 t t)
147 (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
148 (assert-parse ("aabbbb" '(#\a) nil t)
149 (many (acc 0 (1+ acc) :min 3 :max 4) #\a))
150 (assert-parse ("aaabbb" 3 t t)
151 (many (acc 0 (1+ acc) :min 3 :max 3) #\a))
152 (assert-parse ("aaabbb" 3 t t)
153 (many (acc 0 (1+ acc) :min 17 :max 3) #\a))
154
155 ;; You can provide a separator. The `many' parser will look for the
156 ;; separator between each of the main items, but will ignore the results.
157 (assert-parse ("a,a,abc" 3 t t) (many (acc 0 (1+ acc)) #\a #\,))
158 (assert-parse ("a,a,abc" 2 t t) (many (acc 0 (1+ acc) :max 2) #\a #\,))
159
160 ;; If `many' sees a separator then by default it commits to finding another
161 ;; item; so this can cause a parse to fail.
162 (assert-parse ("a,a,bc" '(#\a) nil t) (many (acc 0 (1+ acc)) #\a #\,))
163 (assert-parse ("abc" 1 t t) (many (acc 0 (1+ acc)) #\a #\,))
164
165 ;; If you specify a separator then the default minimum number of
166 ;; repetitions becomes 1 rather than 0. But you can override this
167 ;; explicitly.
168 (assert-parse ("bc" '(#\a) nil nil) (many (acc 0 (1+ acc)) #\a #\,))
169 (assert-parse ("bc" 0 t nil) (many (acc 0 (1+ acc) :min 0) #\a #\,))
170
171 ;; The parser will fail looking for a separator if there aren't enough
172 ;; items.
173 (assert-parse ("a,abc" '(#\,) nil t)
174 (many (acc 0 (1+ acc) :min 3) #\a #\,))
175
176 ;; You can override the commit-on-separator behaviour by using :commit.
177 ;; This makes a trailing separator legal (but optional), so it also affects
178 ;; the behaviour regarding maximum and minimum repetitions. (Commitment is
179 ;; irrelevant if you don't have a separator.)
180 (assert-parse ("a,a,bc" 2 t t)
181 (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
182 (assert-parse ("a,a,abc" 3 t t)
183 (many (acc 0 (1+ acc) :commitp nil) #\a #\,))
184 (assert-parse ("a,a,a,bc" 3 t t)
185 (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp t) #\a #\,))
186 #\,)
187 n))
188 (assert-parse ("a,a,a,bc" 3 t t)
189 (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp nil) #\a #\,))
190 #\b)
191 n))
192 (assert-parse ("a,a,bc" '(#\a) nil t)
193 (many (acc 0 (1+ acc) :min 3 :commitp nil) #\a #\,))
194
195 ;; The `many' parser won't backtrack. The `many' eats as many `a's as
196 ;; possible; asking for another one is sure to fail.
197 (assert-parse ("aaaabc" '(#\a) nil t) (and (skip-many () #\a) #\a)))
198
199(def-test-method test-repeat-hairy ((test test-parser) :run nil)
200 ;; The `many' expander is very hairy and does magical things if it notices
201 ;; that some of its arguments are constants. So here we test a number of
202 ;; the above things again, using variables so that it has to produce code
203 ;; which makes decisions at run-time. (I've no doubt that SBCL will issue
204 ;; an infinite number of notes explaining how clever it is and how it can
205 ;; do it all at compile-time anyway. Of course, suppressing these notes is
206 ;; the main reason `many' is so hairy anyway.)
207
208 (let ((zero 0) (two 2) (three 3) (yes t) (no nil))
209
210 ;; Minima.
211 (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :min zero) #\a))
212 (assert-parse ("aaaab" 0 t nil) (many (acc 0 (1+ acc) :min zero) #\b))
213 (assert-parse ("aabb" 2 t t) (many (acc 0 (1+ acc) :min two) #\a))
214 (assert-parse ("aabb" '(#\a) nil t)
215 (many (acc 0 (1+ acc) :min three) #\a))
216
217 ;; Maxima.
218 (assert-parse ("aaaab" 4 t t) (many (acc 0 (1+ acc) :max no) #\a))
219 (assert-parse ("aaaab" 3 t t) (many (acc 0 (1+ acc) :max three) #\a))
220
221 ;; And now together with separators and commitment. Oh, my.
222 (assert-parse ("a,a,a,bc" 3 t t)
223 (many (acc 0 (1+ acc) :commitp no) #\a #\,))
224 (assert-parse ("a,a,a,bc" '(#\a) nil t)
225 (many (acc 0 (1+ acc) :commitp yes) #\a #\,))
226 (assert-parse ("a,a,bc" '(#\a) nil t)
227 (many (acc 0 (1+ acc) :min three :commitp yes) #\a #\,))
228 (assert-parse ("a,a,bc" '(#\a) nil t)
229 (many (acc 0 (1+ acc) :min 3 :commitp yes) #\a #\,))
230 (assert-parse ("a,a,bc" '(#\a) nil t)
231 (many (acc 0 (1+ acc) :min three :commitp t) #\a #\,))
232 (assert-parse ("a,a,a,bc" 3 t t)
233 (seq ((n (many (acc 0 (1+ acc) :max three :commitp no) #\a #\,)) #\b)
234 n))
235 (assert-parse ("a,a,a,bc" 3 t t)
236 (seq ((n (many (acc 0 (1+ acc) :max three :commitp yes) #\a #\,)) #\,)
237 n))
238 (assert-parse ("a,a,a,bc" 3 t t)
239 (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp no) #\a #\,)) #\b)
240 n))
241 (assert-parse ("a,a,a,bc" 3 t t)
242 (seq ((n (many (acc 0 (1+ acc) :max 3 :commitp yes) #\a #\,)) #\,)
243 n))
244 (assert-parse ("a,a,a,bc" 3 t t)
245 (seq ((n (many (acc 0 (1+ acc) :max three :commitp nil) #\a #\,)) #\b)
246 n))
247 (assert-parse ("a,a,a,bc" 3 t t)
248 (seq ((n (many (acc 0 (1+ acc) :max three :commitp t) #\a #\,)) #\,)
249 n))))
250
251(def-test-method test-alternate ((test test-parser) :run nil)
252
253 ;; An `or' matches the first parser that either succeeds or fails having
254 ;; consumed input.
255 (assert-parse ("abcd" #\a t t) (or #\a #\b))
256 (assert-parse ("abcd" #\a t t) (or #\b #\a))
257 (assert-parse ("abcd" '(#\b #\c) nil nil) (or #\b #\c))
258
259 ;; Strings don't consume if they fail.
260 (assert-parse ("abcd" "ab" t t) (or "cd" "ab"))
261 (assert-parse ("abcd" "ab" t t) (or "ad" "ab"))
262 (assert-parse ("abcd" '("ad" "ac") nil nil) (or "ad" "ac"))
263
264 ;; But `seq' will if some component consumes.
265 (assert-parse ("abcd" '(#\d) nil t) (or (and #\a #\d) "ab"))
266 (assert-parse ("abcd" "ab" t t) (or (and #\c #\d) "ab"))
267
268 ;; We can tame this using `peek' which rewinds the source if its argument
269 ;; fails, so as to hide consumption of input.
270 (assert-parse ("abcd" "ab" t t) (or (peek (and #\a #\d)) "ab"))
271 (assert-parse ("abcd" '(#\a #\b "cd") t t)
272 (seq ((foo (peek (seq ((foo #\a) (bar #\b)) (list foo bar))))
273 (bar "cd"))
274 (append foo (list bar))))
275
276 ;; Failure indicators are union'd if they all fail.
277 (assert-parse ("abcd" '(#\q #\x #\z) nil nil)
278 (or #\q (peek (and #\a (or #\x #\q))) #\z))
279
280 ;; But if any of them consumed input then you only get the indicators from
281 ;; the consuming branch, because we committed to it when we consumed the
282 ;; input.
283 (assert-parse ("abcd" '(#\x #\q) nil t)
284 (or #\q #\z (and #\a (or #\q #\x)))))
285
286;;;--------------------------------------------------------------------------
287;;; Some tests with a simple recursive parser.
288
289(defstruct (node
290 (:predicate nodep)
291 (:constructor make-node (left data right)))
292 "Structure type for a simple binary tree."
293 left data right)
294
295(defun parse-tree (scanner)
296 "Parse a textual representation into a simple binary tree.
297
298 The syntax is simple:
299
300 TREE ::= EMPTY | `(' TREE CHAR TREE `)'
301
302 There's an ambiguity in this syntax, at least if you have limited
303 lookahead: suppose you've just parsed the opening `(' of a TREE, and you
304 see another `(' -- is it the start of the non-empty left sub-TREE, or is
305 it the CHAR following an empty left sub-TREE? We opt for the first choice
306 always."
307
308 ;; This came from another project, although it isn't actually used there.
309 ;; It exposed the weakness in an earlier design which prompted the addition
310 ;; of the CONSUMEDP flags to the parser protocol.
311
312 (with-parser-context (character-scanner-context :scanner scanner)
313 (labels ((tree ()
314 (parse (or (seq (#\(
315 (left (tree))
316 (data :any)
317 (right (tree))
318 #\))
319 (make-node left data right))
320 (values nil t nil)))))
321 (parse (seq ((tree (tree)) :eof)
322 tree)))))
323
324(defun parse-tree-lookahead (scanner)
325 "Parse a textual representation into a simple binary tree.
326
327 The syntax is simple, and, indeed, the grammar's the same as for
328 `sod-parse-tree':
329
330 TREE ::= EMPTY | `(' TREE CHAR TREE `)'
331
332 But the rules are different. Instead of resolving the `ambiguity' between
333 TREE and CHAR when we find another `(' after the opening `(' of a TREE
334 deterministically in favour of TREE as `parse-tree' does, we try that
335 first, and backtrack if necessary."
336
337 ;; Bison can do this, but you have to persuade it to use the scary GLR
338 ;; parser algorithm
339
340 (with-parser-context (character-scanner-context :scanner scanner)
341 (labels ((tree ()
342 (parse (or (peek (seq (#\(
343 (left (tree))
344 (data :any)
345 (right (tree))
346 #\))
347 (make-node left data right)))
348 (values nil t nil)))))
349 (parse (seq ((tree (tree)) :eof)
350 tree)))))
351
352(def-test-method test-simple-tree-parser ((test test-parser) :run nil)
353 (assert-parse ("" nil t nil :scanner sc) (parse-tree sc))
354 (assert-parse ("((a)b((c)d(e)))" t t t :scanner sc) (parse-tree sc))
355 (assert-parse ("((a)b((c)d(e)))z" '(:eof) nil t :scanner sc)
356 (parse-tree sc))
357 (assert-parse ("((a)b((c)d(e))" '(#\)) nil t :scanner sc) (parse-tree sc))
358 (assert-parse ("(([)*(]))" t t t :scanner sc) (parse-tree sc))
359 (assert-parse ("((()-()))" '(#\)) nil t :scanner sc) (parse-tree sc))
360 (assert-parse ("((()-()))" t t t :scanner sc) (parse-tree-lookahead sc)))
361
362;;;--------------------------------------------------------------------------
363;;; Test expression parser.
364
8d1d7d3e
MW
365(eval-when (:compile-toplevel :load-toplevel :execute)
366 (defparse token (:context (context character-parser-context) parser)
367 (with-gensyms (value)
368 (expand-parser-spec context
369 `(seq ((,value ,parser) :whitespace) ,value)))))
dea4d055
MW
370
371(let ((add (binop "+" (x y 5) `(+ ,x ,y)))
372 (sub (binop "-" (x y 5) `(- ,x ,y)))
373 (mul (binop "*" (x y 7) `(* ,x ,y)))
374 (div (binop "/" (x y 7) `(/ ,x ,y)))
375 (eq (binop "=" (x y 3 :assoc nil) `(= ,x ,y)))
376 (ne (binop "/=" (x y 3 :assoc nil) `(/= ,x ,y)))
377 (lt (binop "<" (x y 3 :assoc nil) `(< ,x ,y)))
378 (gt (binop ">" (x y 3 :assoc nil) `(> ,x ,y)))
379 (and (binop "&" (x y 2) `(and ,x ,y)))
380 (or (binop "|" (x y 1) `(or ,x ,y)))
381 (expt (binop "**" (x y 8 :assoc :right) `(** ,x ,y)))
382 (neg (preop "-" (x 9) `(- ,x)))
383 (not (preop "!" (x 2) `(not ,x)))
384 (fact (postop "!" (x 10) `(! ,x)))
385 (lp (lparen #\))) (rp (rparen #\)))
386 (lb (lparen #\])) (rb (rparen #\])))
387 (defun test-parse-expr (string)
388 (with-parser-context (string-parser :string string)
389 (parse (seq (:whitespace
390 (value (expr (:nestedp nestedp)
391 (token (many (a 0 (+ (* a 10) it) :min 1)
392 (filter digit-char-p)))
393 (token (or (seq ("**") expt)
394 (seq ("/=") ne)
395 (seq (#\+) add)
396 (seq (#\-) sub)
397 (seq (#\*) mul)
398 (seq (#\/) div)
399 (seq (#\=) eq)
400 (seq (#\<) lt)
401 (seq (#\>) gt)
402 (seq (#\&) and)
403 (seq (#\|) or)))
404 (token (or (seq (#\() lp)
239fa5bd 405 (seq (#\[) lb)
dea4d055
MW
406 (seq (#\-) neg)
407 (seq (#\!) not)))
408 (token (or (seq (#\!) fact)
239fa5bd
MW
409 (when nestedp
410 (or (seq (#\)) rp)
411 (seq (#\]) rb)))))))
dea4d055
MW
412 (next (or :any (t :eof))))
413 (cons value next))))))
414
415(defun assert-expr-parse (string value winp consumedp)
416 (multiple-value-bind (v w c) (test-parse-expr string)
417 (flet ((message (what)
418 (format nil "expression ~S; ~A" string what)))
419 (cond (winp (assert-true w (message "winp"))
420 (assert-equal v value (message "value")))
421 (t (assert-false w (message "winp"))
422 (assert-equal v value (message "expected"))))
423 (assert-eql c consumedp (message "consumedp")))))
424
425(def-test-method test-expression-parser ((test test-parser) :run nil)
426 (assert-expr-parse "1 + 2 + 3" '((+ (+ 1 2) 3) . :eof) t t)
427 (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
428 (assert-expr-parse "1 * 2 + 3" '((+ (* 1 2) 3) . :eof) t t)
429 (assert-expr-parse "(1 + 2) * 3" '((* (+ 1 2) 3) . :eof) t t)
430 (assert-expr-parse "1 ** 2 ** 3" '((** 1 (** 2 3)) . :eof) t t)
431 (assert-expr-parse "1 + 2) * 3" '((+ 1 2) . #\)) t t)
432 (assert-expr-parse "1 + 2 * 3" '((+ 1 (* 2 3)) . :eof) t t)
dea4d055
MW
433 (assert-expr-parse "! 1 + 2 = 3 | 6 - 3 /= 12/6"
434 '((or (not (= (+ 1 2) 3))
435 (/= (- 6 3) (/ 12 6)))
436 . :eof)
437 t t)
438 (assert-expr-parse "! 1 > 2 & ! 4 < 6 | 3 < 4 & 9 > 10"
439 '((or (and (not (> 1 2)) (not (< 4 6)))
440 (and (< 3 4) (> 9 10)))
441 . :eof)
442 t t)
443
444 (assert-condition 'simple-error (test-parse-expr "(1 + 2"))
445 (assert-condition 'simple-error (test-parse-expr "(1 + 2]"))
446 (assert-condition 'simple-error (test-parse-expr "1 < 2 < 3")))
447
448;;;----- That's all, folks --------------------------------------------------