Change naming convention around.
[sod] / src / parser / scanner-charbuf-test.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; Test for the charbuf scanner
4 ;;;
5 ;;; (c) 2009 Straylight/Edgeware
6 ;;;
7
8 ;;;----- Licensing notice ---------------------------------------------------
9 ;;;
10 ;;; This file is part of the Sensble Object Design, an object system for C.
11 ;;;
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 ;;; The charbuf scanner is a hairy beast and in need of a thorough going
27 ;;; over.
28
29 (cl:in-package #:sod-test)
30
31 ;;;--------------------------------------------------------------------------
32 ;;; Tests of the low-level seeking and fetching machinery.
33
34 (defclass charbuf-test (test-case) (scanner))
35 (add-test *sod-test-suite* (get-suite charbuf-test))
36
37 (defparameter *background-pattern*
38 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789."
39 "Basic pattern underlying our initial buffer contents.
40
41 The pattern is one character short of the base-64 sequence `A-Za-z0-9./',
42 with the aim of making its length be prime to the actual buffer length --
43 so that the pattern doesn't repeat exactly for many buffers.")
44 (assert (= (gcd (length *background-pattern*) charbuf-size) 1))
45
46 (defun make-pattern-string (start end)
47 "Return a string containing the buffer pattern between START and END.
48
49 The most interesting cases occur at the boundaries between buffers; so we
50 shall want to put recognizable patterns there. The buffers are quite big
51 (we import `charbuf-size' off the books so that we don't actually have to
52 know exactly) but we should still fill with a repeating pattern so that we
53 can detect synchronization failures.
54
55 We fill most of the buffer with the `*background-pattern*', which has been
56 chosen so as not to align nicely with the buffer size. Across the joins,
57 we write a string `<<N][N+1>>', where the boundary is between `]' and `[',
58 and the numbers N and N + 1 are the numbers, in words, of the respective
59 buffers."
60
61 (with-output-to-string (out)
62 (multiple-value-bind (n0 i0) (floor start charbuf-size)
63 (multiple-value-bind (n1 i1) (floor end charbuf-size)
64
65 (do ((n n0 (1+ n))) ((> n n1))
66 (let* ((l (format nil "[~R>>" n))
67 (r (format nil "<<~R]" n))
68 (m (length l))
69 (q (length *background-pattern*))
70 (i (if (= n n0) i0 0))
71 (e (if (= n n1) i1 charbuf-size))
72 (k (min (- charbuf-size m) e)))
73 (when (< i (length l))
74 (write-string l out :start i :end (min e m))
75 (setf i m))
76 (do ((o (mod (+ (* charbuf-size n) i) q) 0))
77 ((>= i k))
78 (let ((p (min (- k i) (- q o))))
79 (write-string *background-pattern* out :start o :end (+ o p))
80 (incf i p)))
81 (when (< k e)
82 (write-string r out :start (- i k) :end (- e k)))))))))
83
84 (defparameter *test-pattern* (make-pattern-string 0 10000)
85 "The pattern that our test scanner is reading.")
86
87 (defmethod set-up ((test charbuf-test))
88 (with-slots (scanner) test
89 (let ((stream (make-string-input-stream *test-pattern*)))
90 (setf scanner (make-instance 'charbuf-scanner
91 :stream stream
92 :filename "<magic test>")))))
93
94 (defun skip (scanner n)
95 (assert (>= n 0))
96 (charbuf-scanner-map scanner
97 (lambda (buf start end)
98 (declare (ignore buf))
99 (let ((d (- end start)))
100 (if (>= d n)
101 (values t (+ start n))
102 (progn (decf n d) (values nil 0)))))))
103
104 (defun assert-string-next (scanner pos len)
105 "Assert that the next LEN characters from SCANNER are correct.
106
107 That is, that they match the corresponding LEN characters starting at
108 position POS as returned by `make-pattern-string'."
109 (let ((want (make-pattern-string pos (+ pos len))))
110 (dotimes (i len)
111 (assert-false (scanner-at-eof-p scanner))
112 (assert-eql (scanner-current-char scanner) (char want i))
113 (scanner-step scanner))))
114
115 (def-test-method test-pattern ((test charbuf-test) :run nil)
116 ;; Make sure the pattern is what we expect. This is a completely different
117 ;; (and considerably more stupid) way of generating the basic pattern up to
118 ;; a particular length.
119 (let* ((len (length *background-pattern*))
120 (string ""))
121 (loop while (< (length string) len)
122 do (setf string (concatenate 'string string *test-pattern*)))
123 (loop for n from 0
124 for i from 0 by charbuf-size below len
125 for l = (format nil "[~R>>" n) and r = (format nil "<<~R]" n)
126 for e = (- (+ i charbuf-size) (length r))
127 do (setf (subseq string i) l)
128 when (< e len) do (setf (subseq string e) r))
129 (assert-equal (subseq string 0 len)
130 (make-pattern-string 0 len))))
131
132 (def-test-method test-read ((test charbuf-test) :run nil)
133 ;; Test reading from various places.
134 (with-slots (scanner) test
135 (loop for prev = 0 then (+ pos len)
136 for (pos len) in '((0 10) (50 250) (4086 20)
137 (5000 3192) (9800 200)) do
138 (assert (>= pos prev))
139 (skip scanner (- pos prev))
140 (assert-string-next scanner pos len))
141 (assert-true (scanner-at-eof-p scanner))))
142
143 (def-test-method test-unread ((test charbuf-test) :run nil)
144 ;; Torture test for `scanner-unread', which is distressingly hairy.
145 (with-slots (scanner) test
146
147 (flet ((test (here next skip there note)
148 (assert-eql (scanner-current-char scanner) here
149 (format nil "Here (~A)." note))
150 (scanner-step scanner)
151 (assert-eql (scanner-current-char scanner) next
152 (format nil "Next (~A)." note))
153 (scanner-unread scanner here)
154 (with-scanner-place (place scanner)
155 (assert-eql (scanner-current-char scanner) here
156 (format nil "Here again (~A)." note))
157 (scanner-step scanner)
158 (assert-eql (scanner-current-char scanner) next
159 (format nil "Next again (~A)." note))
160 (skip scanner skip)
161 (assert-eql (scanner-current-char scanner) there
162 (format nil "There (~A)." note))
163 (scanner-unread scanner there)
164 (with-scanner-place (another-place scanner)
165 (scanner-restore-place scanner place)
166 (assert-eql (scanner-current-char scanner) here
167 (format nil "Here restored (~A)." note))))))
168
169 (test #\[ #\z 51 #\0 "start")
170 (skip scanner 4095)
171 (test #\] #\[ 4096 #\[ "edge")
172
173 ;; Check behaviour at EOF. Ought to test behaviour when EOF is on a
174 ;; buffer boundary too.
175 (skip scanner 5904)
176 (assert-false (scanner-at-eof-p scanner))
177 (assert-eql (scanner-current-char scanner) #\t "EOF.")
178 (scanner-step scanner)
179 (assert-true (scanner-at-eof-p scanner))
180 (scanner-unread scanner #\t)
181 (assert-false (scanner-at-eof-p scanner))
182 (assert-eql (scanner-current-char scanner) #\t "EOF again."))))
183
184 (def-test-method test-rewind ((test charbuf-test) :run nil)
185 ;; Test reading, like before, but this time with rewinding.
186 (with-slots (scanner) test
187 (let* ((list '((0 10) (0 10000) (50 250) (4086 20)
188 (4095 4097) (5000 3192) (9999 1)))
189 (places (loop for prev = 0 then pos
190 for (pos) in list
191 do (skip scanner (- pos prev))
192 collect (scanner-capture-place scanner))))
193 (loop for (pos len) in list
194 for place in places do
195 (scanner-restore-place scanner place)
196 (assert-string-next scanner pos len))
197 (assert-true (scanner-at-eof-p scanner)))))
198
199 (def-test-method test-interval ((test charbuf-test) :run nil)
200 ;; Test fetching intervals of text.
201 (with-slots (scanner) test
202 (let* ((posns '(0 12 4080 4110 5000 9000 10000))
203 (places (loop for prev = 0 then pos
204 for pos in posns
205 do (skip scanner (- pos prev))
206 collect (scanner-capture-place scanner))))
207 (loop for p0 in places
208 for i0 in posns do
209 (loop for p1 in places
210 for i1 in posns do
211 (if (< i1 i0)
212 (assert-condition 'error (scanner-interval p0 p1))
213 (assert-equal (scanner-interval scanner p0 p1)
214 (make-pattern-string i0 i1)
215 (format nil "Mismatch interval ~A .. ~A."
216 i0 i1)))
217 (assert-true (scanner-at-eof-p scanner)))))))
218
219 ;;;--------------------------------------------------------------------------
220 ;;; Tests of the position tracking machinery.
221
222 (defparameter *position-test-text*
223 ;; Use a roundabout method of getting tabs in there, so that they don't get
224 ;; screwed by strange editors and suchlike.
225 (substitute #\tab #\@ "Line one
226 Line two is rather longer, but not noticeably more interesting.
227 Line three explains that line four contains column numbers mod 10.
228 012345678@6789@@2345678@012
229 @@Line five is indented somewhat.")
230 "Text for the position-tracking test.
231
232 The text should /look/ like the following. Note that this text here may
233 get trashed by tab/space conversions and whatever, and I've indented it so
234 that it doesn't look daft in the source; but the columns should remain
235 where they are.
236
237 0 1 2 3 4 5 6 7
238 0123456789012345678901234567890123456789012345678901234567890123456789012
239 Line one
240 Line two is rather longer, but not noticeably more interesting.
241 Line three explains that line four contains column numbers mod 10.
242 012345678 6789 2345678 012
243 Line five is indented somewhat.
244
245 It would be nice at some point to add additional tests for edge cases
246 around buffer boundaries. This isn't completely essential, though: the
247 current implementation manages positions fairly independently of the
248 buffering.")
249
250 (defparameter *known-positions*
251 '(
252 ;; The first few line aren't actually very interesting. We'll
253 ;; check the start and end positions, and maybe a few in the
254 ;; middle. Note that a newline character is logically a part of
255 ;; the preceding line.
256 (0 #\L 1 0 #\i 1 1 0) (5 #\o 1 5 #\n 1 6 5) (8 #\newline 1 8 #\L 2 0 0)
257 (9 #\L 2 0 #\i 2 1 0) (72 #\newline 2 63 #\L 3 0 0)
258 (73 #\L 3 0 #\i 3 1 0) (139 #\newline 3 66 #\0 4 0 0)
259
260 ;; Now for the line with the fancy tabbings.
261 (140 #\0 4 0 #\1 4 1 0)
262 (148 #\8 4 8 #\tab 4 9 8) ; nothing so far
263 (149 #\tab 4 9 #\6 4 16 15) ; the tab itself just follows on
264 (150 #\6 4 16 #\7 4 17 16) ; but the char after is tabbed
265 (154 #\tab 4 20 #\tab 4 24 23) ; next tab position
266 (155 #\tab 4 24 #\2 4 32 31) ; two in a row
267 (156 #\2 4 32 #\3 4 33 32) ; should be here now
268 (162 #\8 4 38 #\tab 4 39 38) ; skip to the next bit
269 (163 #\tab 4 39 #\0 4 40 39) ; tab is here
270 (164 #\0 4 40 #\1 4 41 40) ; and doesn't move us much
271 (166 #\2 4 42 #\newline 4 43 42) ; last actual character on the line
272 (167 #\newline 4 43 #\tab 5 0 0) ; and the ending newline
273
274 ;; And the final line.
275 (168 #\tab 5 0 #\tab 5 8 7) ; first tab on next line
276 (169 #\tab 5 8 #\L 5 16 15) ; and the second
277 (170 #\L 5 16 #\i 5 17 16) ; beginning of the text
278 (200 #\. 5 46 :eof 5 47 46) ; last character in the stream
279 (201 :eof 5 47)) ; but eof has a position too
280 "List of character positions, characters and line/column numbers.
281
282 The characters are there for sanity-checking purposes. The format is
283
284 (INDEX CHAR LINE COLUMN NEXT-CHAR
285 NEXT-LINE NEXT-COLUMN REWIND-COLUMN)
286
287 which asserts that the character at INDEX is CHAR, found at the given LINE
288 and COLUMN, that the next character is NEXT-CHAR, at the NEXT-LINE and
289 NEXT-COLUMN, and if one unreads from there, it will be (possibly
290 erroneously) claimed that the character at INDEX is at REWIND-COLUMN.
291 (Restoring a captured place shouldn't get the column wrong -- only
292 unreading.)
293
294 The symbol `:eof' means that there is no character at the given INDEX,
295 because the file has already ended. However, EOF has a position which
296 should be correct, and it should be possible to unread from EOF.")
297
298 (defclass charbuf-position-test (test-case) (scanner))
299 (add-test *sod-test-suite* (get-suite charbuf-position-test))
300
301 (defmethod set-up ((test charbuf-position-test))
302 (with-slots (scanner) test
303 (let ((stream (make-string-input-stream *position-test-text*)))
304 (setf scanner (make-instance 'charbuf-scanner
305 :stream stream
306 :filename "<position test>")))))
307
308 (defun check-position (scanner pos char line column note)
309 (if (eq char :eof)
310 (assert-true (scanner-at-eof-p scanner)
311 (format nil "EOF, position ~A (~A)." pos note))
312 (assert-eql char (scanner-current-char scanner)
313 (format nil "Character, position ~A (~A)." pos note)))
314 (assert-eql line (scanner-line scanner)
315 (format nil "Line number, position ~A (~A)." pos note))
316 (assert-eql column (scanner-column scanner)
317 (format nil "Column number, position ~A (~A)." pos note)))
318
319 (def-test-method test-simple-positions
320 ((test charbuf-position-test) :run nil)
321 (with-slots (scanner) test
322 (loop for prev = 0 then pos
323 for (pos char line column) in *known-positions* do
324 (loop repeat (- pos prev) do (scanner-step scanner))
325 (check-position scanner pos char line column "simple"))))
326
327 (def-test-method test-rewind-positions
328 ((test charbuf-position-test) :run nil)
329 (with-slots (scanner) test
330 (let ((places (loop for prev = 0 then pos
331 for (pos char line column) in *known-positions* do
332 (skip scanner (- pos prev))
333 (check-position scanner pos char line column "skip")
334 collect (scanner-capture-place scanner))))
335 (loop for place in places
336 for (pos char line column
337 next-char next-line next-column
338 rewind-column)
339 in *known-positions* do
340 (scanner-restore-place scanner place)
341 (check-position scanner pos char line column "rewind")
342 (unless (eq char :eof)
343 (scanner-step scanner)
344 (check-position scanner (1+ pos) next-char
345 next-line next-column "step")
346 (scanner-unread scanner char)
347 (check-position scanner pos char line rewind-column
348 "unread")
349 (scanner-step scanner)
350 (check-position scanner (1+ pos) next-char
351 next-line next-column "restep"))))))
352
353 ;;;----- That's all, folks --------------------------------------------------