An actual running implementation, which makes code that compiles.
[sod] / src / parser / scanner-charbuf-impl.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Efficient buffering character scanner
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod-parser)
27
28;;;--------------------------------------------------------------------------
29;;; Infrastructure types.
30
1d8cc67a
MW
31(eval-when (:compile-toplevel :load-toplevel :execute)
32 (defconstant charbuf-size 4096
33 "Number of characters in a character buffer."))
dea4d055
MW
34
35(deftype charbuf ()
36 "Type of character buffers."
37 `(simple-string ,charbuf-size))
38
39(deftype charbuf-index ()
40 "Type of indices into character buffers."
41 `(integer 0 ,charbuf-size))
42
43(declaim (inline make-charbuf))
44(defun make-charbuf ()
45 "Return a fresh uninitialized character buffer."
46 (make-array charbuf-size :element-type 'character))
47
48(defstruct charbuf-chain-link
49 "A link in the charbuf scanner's buffer chain.
50
51 Usually the scanner doesn't bother maintaining a buffer chain; but if
52 we've rewound to a captured place then we need to be able to retrace our
53 steps on to later buffers.
54
55 It turns out to be easier to have an explicit link to the next structure
56 in the chain than to maintain a spine of cons cells, so we do that; the
57 only other things we need are the buffer itself and its length, which
58 might be shorter than `charbuf-size', e.g., if we hit end-of-file."
59 (next nil :type (or charbuf-chain-link null))
60 (buf nil :type (or charbuf (member nil :eof)) :read-only t)
61 (size 0 :type charbuf-index :read-only t))
62
63(export 'charbuf-scanner-place-p)
64(defstruct charbuf-scanner-place
65 "A captured place we can return to later.
66
67 We remember the buffer-chain link, so that we can retrace our steps up to
68 the present. We also need the index at which we continue reading
69 characters; and the line and column numbers to resume from."
9ec578d9 70 (scanner nil :type charbuf-scanner :read-only t)
dea4d055
MW
71 (link nil :type charbuf-chain-link :read-only t)
72 (index 0 :type charbuf-index :read-only t)
73 (line 0 :type fixnum :read-only t)
74 (column 0 :type fixnum :read-only t))
75
9ec578d9
MW
76(defmethod file-location ((place charbuf-scanner-place))
77 (make-file-location (scanner-filename
78 (charbuf-scanner-place-scanner place))
79 (charbuf-scanner-place-line place)
80 (charbuf-scanner-place-column place)))
81
dea4d055
MW
82;;;--------------------------------------------------------------------------
83;;; Main class.
84
85(export 'charbuf-scanner)
86(defclass charbuf-scanner (character-scanner)
87 ((stream :initarg :stream :type stream)
88 (buf :initform nil :type (or charbuf (member nil :eof)))
89 (size :initform 0 :type (integer 0 #.charbuf-size))
90 (index :initform 0 :type (integer 0 #.charbuf-size))
91 (captures :initform 0 :type (and fixnum unsigned-byte))
92 (tail :initform nil :type (or charbuf-chain-link null))
93 (unread :initform nil :type (or charbuf-chain-link nil))
94 (filename :initarg :filename :type (or string null)
95 :reader scanner-filename)
bf090e02 96 (line :initarg :line :initform 1 :type fixnum :reader scanner-line)
dea4d055
MW
97 (column :initarg :column :initform 0 :type fixnum :reader scanner-column))
98 (:documentation
99 "An efficient rewindable scanner for character streams.
100
101 The scanner should be used via the parser protocol. The following notes
102 describe the class's slots and the invariants maintained by the class.
103
104 The scanner reads characters from STREAM. It reads in chunks,
105 `charbuf-size' characters at a time, into freshly allocated arrays. At
106 the beginning of time, BUF is nil; and SIZE is 0, indicating that a new
107 buffer needs to be read in; this anomalous situation is remedied during
108 instance initialization. At all times thereafter:
109
110 * If SIZE > 0 then BUF is a `charbuf' containing characters.
111
112 * (<= 0 INDEX SIZE charbuf-size).
113
114 When the current buffer is finished with, another one is fetched. If
115 we've rewound the scanner to a captured place, then there'll be a chain of
116 buffers starting at TAIL (which corresponds to the current buffer); and we
117 should use its NEXT buffer when we've finished this one.
118
119 If there is no next buffer then we should acquire a new one and fill it
120 from the input stream. If there is an outstanding captured place then we
121 must also create a buffer chain entry for this new buffer and link it onto
122 the chain. If there aren't outstanding captures then we don't need to
123 bother with any of that -- earlier places certainly can't be captured and
124 a capture of the current position can allocate its own buffer chain
125 entry.
126
127 Which leaves us with the need to determine whether there are outstanding
128 captures. We simply maintain a counter, and rely on the client releasing
129 captured places properly when he's finished. In practice, this is usually
130 done using the `peek' parser macro so there isn't a problem."))
131
132;;;--------------------------------------------------------------------------
133;;; Utilities.
134
135(defgeneric charbuf-scanner-fetch (scanner)
136 (:documentation
137 "Refill the scanner buffer.
138
139 This is an internal method, which is really only a method so that the
140 compiler will optimize slot references.
141
142 Replace the current buffer with the next one, either from the buffer chain
143 (if we're currently rewound) or with a new buffer from the stream."))
144
145(defmethod charbuf-scanner-fetch ((scanner charbuf-scanner))
146 (with-slots (stream buf size index tail captures) scanner
147 (loop
148 (acond
149
150 ;; If we've hit the end of the line, stop.
151 ((eq buf :eof)
152 (return nil))
153
154 ;; If there's another buffer, we should check it out.
155 ((and tail (charbuf-chain-link-next tail))
156 (setf tail it
157 buf (charbuf-chain-link-buf it)
158 size (charbuf-chain-link-size it)
159 index 0))
160
161 ;; No joy: try reading more stuff from the input stream.
162 (t
163 (let* ((new (make-charbuf))
164 (n (read-sequence new stream :start 0 :end charbuf-size)))
165
166 ;; If there's nothing coming in then store a magical marker.
167 (when (zerop n) (setf new :eof))
168
169 ;; If there's someone watching, link a new entry onto the chain.
170 ;; There must, under these circumstances, be a `tail'.
171 (if (plusp captures)
172 (let ((next (make-charbuf-chain-link :buf new :size n)))
173 (setf (charbuf-chain-link-next tail) next
174 tail next))
175 (setf tail nil))
176
177 ;; Store the new state.
178 (setf buf new
179 size n
180 index 0))))
181
182 ;; If there's stuff in the current buffer, we're done.
183 (when (< index size)
184 (return t)))))
185
186(export 'charbuf-scanner-map)
187(defgeneric charbuf-scanner-map (scanner func &optional fail)
188 (:documentation
189 "Read characters from the SCANNER's raw buffers.
190
191 This is intended to be an efficient and versatile interface for reading
192 characters from a scanner in bulk. The FUNC is invoked repeatedly with
193 three arguments: a simple string BUF and two nonnegative fixnums START and
194 END, indicating that the subsequence of BUF between START (inclusive) and
195 END (exclusive) should be processed. The FUNC returns two values: a
196 generalized boolean DONEP and a nonnegative fixnum USED. If DONEP is
197 false then USED is ignored: the function has consumed the entire buffer
198 and wishes to read more. If DONEP is true then the condition (<= START
199 USED END) must hold; the FUNC has consumed the buffer as far as USED
200 (exclusive) and has completed successfully; the values DONEP and `t' are
3109662a 201 returned as the result of `charbuf-scanner-map'.
dea4d055
MW
202
203 If end-of-file is encountered before FUNC completes successfully then FAIL
3109662a 204 is called with no arguments, and `charbuf-scanner-map' returns whatever
dea4d055
MW
205 FAIL returns.
206
207 Observe that, if FAIL returns a second value of nil, then
208 `charbuf-scanner-map' is usable as a parser expression."))
209
210(defmethod charbuf-scanner-map
211 ((scanner charbuf-scanner) func &optional fail)
212 (with-slots (buf index size) scanner
213 (flet ((offer (buf start end)
214
215 ;; Pass the buffer to the function, and see what it thought.
216 (multiple-value-bind (donep used) (funcall func buf start end)
217
218 ;; Update the position as far as the function read.
219 (with-slots (line column) scanner
220 (let ((l line) (c column) (limit (if donep used end)))
221 (do ((i start (1+ i)))
222 ((>= i limit))
223 (setf (values l c)
224 (update-position (char buf i) l c)))
225 (setf line l column c)))
226
227 ;; If the function is finished then update our state and
228 ;; return.
229 (when donep
230 (setf index used)
231 (when (>= index size)
232 (charbuf-scanner-fetch scanner))
233 (return-from charbuf-scanner-map (values donep t))))))
234
235 ;; If there's anything in the current buffer, offer it to the function.
236 (when (< index size)
237 (offer buf index size))
238
239 ;; Repeatedly fetch new buffers and offer them to the function.
240 ;; Because the buffers are fresh, we know that we must process them
241 ;; from the beginning. Note that `offer' will exit if FUNC has
242 ;; finished, so we don't need to worry about that.
243 (loop
244 (unless (charbuf-scanner-fetch scanner)
245 (return (if fail (funcall fail) (values nil nil))))
246 (offer buf 0 size)))))
247
248;;;--------------------------------------------------------------------------
249;;; Initialization.
250
251(defmethod shared-initialize :after
252 ((scanner charbuf-scanner) slot-names &key)
253
254 ;; Grab the filename from the underlying stream if we don't have a better
255 ;; guess.
256 (default-slot (scanner 'filename slot-names)
257 (with-slots (stream) scanner
258 (aif (stream-pathname stream) (namestring it) nil)))
259
260 ;; Get ready with the first character.
261 (charbuf-scanner-fetch scanner))
262
263;;;--------------------------------------------------------------------------
264;;; Scanner protocol implementation.
265
266(defmethod scanner-at-eof-p ((scanner charbuf-scanner))
267 (with-slots (buf) scanner
268 (eq buf :eof)))
269
270(defmethod scanner-current-char ((scanner charbuf-scanner))
271 (with-slots (buf index) scanner
272 (schar buf index)))
273
274(defmethod scanner-step ((scanner charbuf-scanner))
275 (with-slots (buf size index line column) scanner
276
277 ;; If there's a current character then update the position from it. When
278 ;; is there a current character? When the index is valid.
279 (when (< index size)
280 (setf (values line column)
281 (update-position (schar buf index) line column)))
282
283 ;; Now move the position on. If there's still a character left then we
284 ;; win; otherwise fetch another buffer.
285 (or (< (incf index) size)
286 (charbuf-scanner-fetch scanner))))
287
288(defmethod scanner-unread ((scanner charbuf-scanner) char)
289 (with-slots (buf index size unread tail line column) scanner
290 (cond
291
292 ;; First, let's rewind the buffer index. This isn't going to work if
293 ;; the index is already zero. (Note that this implies that INDEX is
294 ;; zero in the remaining cases.)
295 ((plusp index)
296 (decf index))
297
298 ;; Plan B. Maybe we've been here before, in which case we'll have left
299 ;; the appropriate state kicking about already. Note that, according
300 ;; to the `unread' rules, the character must be the same as last time,
301 ;; so we can just reuse the whole thing unchanged. Also, note that
302 ;; the NEXT field in UNREAD is not nil due to the way that we construct
303 ;; the link below.
304 ((and unread (eql (charbuf-chain-link-next unread) tail))
305 (setf tail unread size 1
306 buf (charbuf-chain-link-buf unread)))
307
308 ;; Nope, we've not been here, at least not recently. We'll concoct a
309 ;; new buffer and put the necessary stuff in it. Store it away for
310 ;; later so that repeated read/unread oscillations at this position
311 ;; don't end up consing enormous arrays too much.
312 (t
313 (let* ((next (or tail (make-charbuf-chain-link :buf buf :size size)))
314 (fake (make-charbuf))
315 (this (make-charbuf-chain-link :buf fake :size 1 :next next)))
316 (setf (schar fake 0) char buf fake size 1
317 tail this unread this))))
318
319 ;; That's that sorted; now we have to fiddle the position.
320 (setf (values line column) (backtrack-position char line column))))
321
322(defmethod scanner-capture-place ((scanner charbuf-scanner))
323 (with-slots (buf size index captures tail line column) scanner
324 (incf captures)
325 (unless tail
326 (setf tail (make-charbuf-chain-link :buf buf :size size)))
9ec578d9 327 (make-charbuf-scanner-place :scanner scanner :link tail :index index
dea4d055
MW
328 :line line :column column)))
329
330(defmethod scanner-restore-place ((scanner charbuf-scanner) place)
331 (with-slots (buf size index tail line column) scanner
332 (let ((link (charbuf-scanner-place-link place)))
333 (setf buf (charbuf-chain-link-buf link)
334 size (charbuf-chain-link-size link)
335 index (charbuf-scanner-place-index place)
336 line (charbuf-scanner-place-line place)
337 column (charbuf-scanner-place-column place)
338 tail link))))
339
340(defmethod scanner-release-place ((scanner charbuf-scanner) place)
1d8cc67a 341 (declare (ignore place))
dea4d055
MW
342 (with-slots (captures) scanner
343 (decf captures)))
344
345(defstruct (charbuf-slice
346 (:constructor make-charbuf-slice
347 (buf &optional (start 0) %end
348 &aux (end (or %end (length buf))))))
349 (buf nil :type (or charbuf (eql :eof)) :read-only t)
350 (start 0 :type (and fixnum unsigned-byte) :read-only t)
351 (end 0 :type (and fixnum unsigned-byte) :read-only t))
352
353(declaim (inline charbuf-slice-length))
354(defun charbuf-slice-length (slice)
355 (- (charbuf-slice-end slice) (charbuf-slice-start slice)))
356
357(defun concatenate-charbuf-slices (slices)
358 (let* ((len (reduce #'+ slices
359 :key #'charbuf-slice-length
360 :initial-value 0))
361 (string (make-array len :element-type 'character))
362 (i 0))
363 (dolist (slice slices)
364 (let ((buf (charbuf-slice-buf slice))
365 (end (charbuf-slice-end slice)))
366 (do ((j (charbuf-slice-start slice) (1+ j)))
367 ((>= j end))
368 (setf (schar string i) (schar buf j))
369 (incf i))))
370 string))
371
372(defmethod scanner-interval
373 ((scanner charbuf-scanner) place-a &optional place-b)
374 (let* ((slices nil)
375 (place-b (or place-b
376 (with-slots (index tail) scanner
9ec578d9
MW
377 (make-charbuf-scanner-place :scanner scanner
378 :link tail
dea4d055
MW
379 :index index))))
380 (last-link (charbuf-scanner-place-link place-b)))
381 (flet ((bad ()
3109662a 382 (error "Incorrect places ~S and ~S to `scanner-interval'."
dea4d055
MW
383 place-a place-b)))
384 (do ((link (charbuf-scanner-place-link place-a)
385 (charbuf-chain-link-next link))
386 (start (charbuf-scanner-place-index place-a) 0))
387 ((eq link last-link)
388 (let ((end (charbuf-scanner-place-index place-b)))
389 (when (< end start)
390 (bad))
391 (push (make-charbuf-slice (charbuf-chain-link-buf link)
392 start end)
393 slices)
394 (concatenate-charbuf-slices (nreverse slices))))
395 (when (null link) (bad))
396 (push (make-charbuf-slice (charbuf-chain-link-buf link)
397 start
398 (charbuf-chain-link-size link))
399 slices)))))
400
401;;;--------------------------------------------------------------------------
402;;; Specialized streams.
403
404(export 'charbuf-scanner-stream)
405(defclass charbuf-scanner-stream (character-scanner-stream)
406 ((scanner :initarg :scanner :type charbuf-scanner)))
407
bf090e02
MW
408(defmethod make-scanner-stream ((scanner charbuf-scanner))
409 (make-instance 'charbuf-scanner-stream :scanner scanner))
410
dea4d055 411(defmethod stream-read-sequence
1d8cc67a
MW
412 ((stream charbuf-scanner-stream) (seq string)
413 #+clisp &key #-clisp &optional (start 0) end)
dea4d055
MW
414 (with-slots (scanner) stream
415 (unless end (setf end (length seq)))
416 (let ((i start) (n (- end start)))
417 (labels ((copy (i buf start end)
418 (do ((j i (1+ j))
419 (k start (1+ k)))
420 ((>= k end))
421 (setf (char seq j) (schar buf k))))
422 (snarf (buf start end)
423 (let ((m (- end start)))
424 (cond ((< m n)
425 (copy i buf start end) (decf n m) (incf i m)
426 (values nil 0))
427 (t
428 (copy i buf start (+ start n)) (incf i n)
429 (values t n))))))
430 (charbuf-scanner-map scanner #'snarf)
431 i))))
432
433(defmethod stream-read-line ((stream charbuf-scanner-stream))
434 (with-slots (scanner) stream
435 (let ((slices nil))
436 (flet ((snarf (buf start end)
437 (let ((pos (position #\newline buf :start start :end end)))
438 (push (make-charbuf-slice buf start (or pos end)) slices)
439 (if pos
440 (values (concatenate-charbuf-slices (nreverse slices))
441 (1+ pos))
442 (values nil 0))))
443 (fail ()
444 (values (concatenate-charbuf-slices (nreverse slices)) t)))
445 (charbuf-scanner-map scanner #'snarf #'fail)))))
446
447;;;----- That's all, folks --------------------------------------------------