Today's wip.
[sod] / src / lexer-impl.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; Implementation of lexical analysis protocol.
4 ;;;
5 ;;; (c) 2009 Straylight/Edgeware
6 ;;;
7
8 ;;;----- Licensing notice ---------------------------------------------------
9 ;;;
10 ;;; This file is part of the Sensble Object Design, an object system for C.
11 ;;;
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 (cl:in-package #:sod)
27
28 ;;;--------------------------------------------------------------------------
29 ;;; Class implementation.
30
31 (defmethod shared-initialize :after
32 ((scanner sod-token-scanner) slot-names &key)
33 (default-slot (scanner 'sod-parser::filename slot-names)
34 (scanner-filename (token-scanner-char-scanner scanner))))
35
36 (defmethod make-scanner-stream ((scanner sod-token-scanner))
37 (make-scanner-stream (token-scanner-char-scanner scanner)))
38
39 ;;;--------------------------------------------------------------------------
40 ;;; Indicators and error messages.
41
42 (defvar *indicator-map* (make-hash-table)
43 "Hash table mapping indicator objects to human-readable descriptions.")
44
45 (defun show-char (stream char &optional colonp atsignp)
46 "Format CHAR to STREAM in a readable way.
47
48 Usable in `format''s ~/.../ command."
49 (declare (ignore colonp atsignp))
50 (cond ((null char) (write-string "<eof>" stream))
51 ((and (graphic-char-p char) (char/= char #\space))
52 (format stream "`~C'" char))
53 (t (format stream "<~(~:C~)>" char))))
54
55 (defun skip-until (scanner token-types &key keep-end)
56 "This is the implementation of the `skip-until' parser."
57 (do ((consumedp nil t))
58 ((member (token-type scanner) token-types)
59 (unless keep-end (scanner-step scanner))
60 (values nil t (or keep-end consumedp)))
61 (when (scanner-at-eof-p scanner)
62 (return (values token-types nil consumedp)))
63 (scanner-step scanner)))
64
65 (defun parse-error-recover (scanner parser recover)
66 "This is the implementation of the `error' parser."
67 (multiple-value-bind (result win consumedp) (funcall parser)
68 (cond ((or win (and (not consumedp) (scanner-at-eof-p scanner)))
69 ;; If we succeeded then there's nothing for us to do here. On the
70 ;; other hand, if we failed, didn't consume any tokens, and we're
71 ;; at end-of-file, then there's not much hope of making onward
72 ;; progress, so in this case we propagate the failure rather than
73 ;; trying to recover. And we assume that the continuation will
74 ;; somehow arrange to report the problem, and avoid inundating the
75 ;; user with error reports.
76 (values result win consumedp))
77 (t
78 ;; Now we have to do some kind of sensible error recovery. The
79 ;; important thing to do here is to make sure that we make some
80 ;; progress. If we consumed any tokens then we're fine, and we'll
81 ;; just try the provided recovery strategy. Otherwise, if we're
82 ;; not at EOF, then we can ensure progress by discarding the
83 ;; current token. Finally, if we are at EOF then our best bet is
84 ;; simply to propagate the current failure back to the caller, but
85 ;; we handled that case above.
86 (syntax-error scanner result :continuep t)
87 (unless consumedp (scanner-step scanner))
88 (funcall recover)))))
89
90 ;;;--------------------------------------------------------------------------
91 ;;; Token scanning.
92
93 (defmethod scanner-token ((scanner sod-token-scanner))
94 (with-slots (char-scanner line column) scanner
95 (with-parser-context (character-scanner-context :scanner char-scanner)
96
97 (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
98 ;; Scan and return a sequence of digits.
99 (parse (many (acc init (+ (* acc radix) it) :min min)
100 (label (list :digit radix)
101 (filter (lambda (ch)
102 (digit-char-p ch radix))))))))
103
104 ;; Skip initial junk, and remember the place.
105 (loop
106 (setf (scanner-line scanner) (scanner-line char-scanner)
107 (scanner-column scanner) (scanner-column char-scanner))
108 (cond-parse (:consumedp cp :expected exp)
109 ((satisfies whitespace-char-p) (parse :whitespace))
110 ((scan-comment char-scanner))
111 (t (if cp (lexer-error char-scanner exp cp) (return)))))
112
113 ;; Now parse something.
114 (cond-parse (:consumedp cp :expected exp)
115
116 ;; Alphanumerics mean we read an identifier.
117 ((or #\_ (satisfies alpha-char-p))
118 (values :id (with-output-to-string (out)
119 (write-char it out)
120 (parse (many (nil nil (write-char it out))
121 (or #\_ (satisfies alphanumericp)))))))
122
123 ;; Quotes introduce a literal.
124 ((seq ((quote (or #\" #\'))
125 (contents (many (out (make-string-output-stream)
126 (progn (write-char it out) out)
127 :final (get-output-stream-string out))
128 (or (and #\\ :any) (not quote))))
129 (nil (char quote)))
130 (ecase quote
131 (#\" contents)
132 (#\' (case (length contents)
133 (1 (char contents 0))
134 (0 (cerror* "Empty character literal") #\?)
135 (t (cerror* "Too many characters in literal")
136 (char contents 0))))))
137 (values (etypecase it
138 (character :char)
139 (string :string))
140 it))
141
142 ;; Zero introduces a chosen-radix integer.
143 ((and #\0
144 (or (and (or #\b #\B) (scan-digits :radix 2))
145 (and (or #\o #\O) (scan-digits :radix 8))
146 (and (or #\x #\X) (scan-digits :radix 16))
147 (scan-digits :radix 8 :min 0)))
148 (values :int it))
149
150 ;; Any other digit forces radix-10.
151 ((seq ((d (filter digit-char-p))
152 (i (scan-digits :radix 10 :min 0 :init d)))
153 i)
154 (values :int it))
155
156 ;; Some special punctuation sequences are single tokens.
157 ("..." (values :ellipsis nil))
158
159 ;; Any other character is punctuation.
160 (:any (values it nil))
161
162 ;; End of file means precisely that.
163 (:eof (values :eof nil))
164
165 ;; Report errors and try again. Because we must have consumed some
166 ;; input in order to get here (we've matched both :any and :eof) we
167 ;; must make progress on every call.
168 (t
169 (assert cp)
170 (lexer-error char-scanner exp cp)
171 (scanner-token scanner)))))))
172
173 ;;;----- That's all, folks --------------------------------------------------