Change naming convention around.
[sod] / src / lexical-parse.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Lexical analysis for input parser
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Class definition.
30
31(export 'sod-token-scanner)
32(defclass sod-token-scanner (token-scanner)
33 ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner))
34 (:documentation
35 "A token scanner for SOD input files.
36
37 Not a lot here, apart from a character scanner to read from and the
38 standard token scanner infrastructure."))
39
bf090e02
MW
40(defmethod shared-initialize :after
41 ((scanner sod-token-scanner) slot-names &key)
42 (default-slot (scanner 'sod-parser::filename slot-names)
43 (scanner-filename (token-scanner-char-scanner scanner))))
44
dea4d055
MW
45;;;--------------------------------------------------------------------------
46;;; Utilities.
47
48(defun show-char (stream char &optional colonp atsignp)
49 "Format CHAR to STREAM in a readable way.
50
51 Usable in `format''s ~/.../ command."
52 (declare (ignore colonp atsignp))
53 (cond ((null char) (write-string "<eof>" stream))
54 ((and (graphic-char-p char) (char/= char #\space))
55 (format stream "`~C'" char))
56 (t (format stream "<~(~:C~)>" char))))
57
58(defun scan-comment (scanner)
59 "Scan a comment (either `/* ... */' or `// ...') from SCANNER.
60
61 The result isn't interesting."
62 (with-parser-context (character-scanner-context :scanner scanner)
63 (parse (or (and "/*"
64 (and (skip-many ()
65 (and (skip-many () (not #\*))
66 (label "*/" (skip-many (:min 1) #\*)))
67 (not #\/))
68 #\/))
69 (and "//"
70 (skip-many () (not #\newline))
71 (? #\newline))))))
72
bf090e02
MW
73(defmethod make-scanner-stream ((scanner sod-token-scanner))
74 (make-scanner-stream (token-scanner-char-scanner scanner)))
75
dea4d055
MW
76;;;--------------------------------------------------------------------------
77;;; Error reporting.
78
bf090e02
MW
79(defvar *indicator-map* (make-hash-table)
80 "Hash table mapping indicator objects to human-readable descriptions.")
81
82(defun define-indicator (indicator description)
83 (setf (gethash indicator *indicator-map*) description)
84 indicator)
85
dea4d055
MW
86(export 'syntax-error)
87(defun syntax-error (scanner expected &key (continuep t))
88 "Signal a (maybe) continuable syntax error."
89 (labels ((show-token (type value)
90 (if (characterp type)
91 (format nil "~/sod::show-char/" type)
92 (case type
93 (:id (format nil "<identifier~@[ `~A'~]>" value))
94 (:string "<string-literal>")
95 (:char "<character-literal>")
96 (:eof "<end-of-file>")
97 (:ellipsis "`...'")
98 (t (format nil "<? ~S~@[ ~S~]>" type value)))))
99 (show-expected (thing)
bf090e02
MW
100 (acond ((gethash thing *indicator-map*) it)
101 ((atom thing) (show-token thing nil))
102 ((eq (car thing) :id)
103 (format nil "`~A'" (cadr thing)))
104 (t (format nil "<? ~S>" thing)))))
dea4d055
MW
105 (funcall (if continuep #'cerror* #'error)
106 "Syntax error: ~
bf090e02 107 expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~
dea4d055
MW
108 but found ~A"
109 (mapcar #'show-expected expected)
110 (show-token (token-type scanner) (token-value scanner)))))
111
bf090e02
MW
112(export 'lexer-error)
113(defun lexer-error (char-scanner expected consumedp)
114 "Signal a continuable lexical error."
115 (cerror* "Lexical error: ~
116 expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~
117 but found ~/sod::show-char/~
118 ~@[ at ~A~]"
119 (mapcar (lambda (exp)
120 (typecase exp
121 (character (format nil "~/sod::show-char/" exp))
122 (string (format nil "`~A'" exp))
123 ((cons (eql :digit) *) (format nil "<radix-~A digit>"
124 (cadr exp)))
125 ((eql :eof) "<end-of-file>")
126 ((eql :any) "<character>")
127 (t (format nil "<? ~S>" exp))))
128 expected)
129 (and (not (scanner-at-eof-p char-scanner))
130 (scanner-current-char char-scanner))
131 (and consumedp (file-location char-scanner))))
132
dea4d055
MW
133;;;--------------------------------------------------------------------------
134;;; Token scanner protocol implementation.
135
136(defmethod scanner-token ((scanner sod-token-scanner))
137 (with-slots (char-scanner line column) scanner
138 (with-parser-context (character-scanner-context :scanner char-scanner)
139
140 (flet ((scan-digits (&key (radix 10) (min 1) (init 0))
141 ;; Scan an return a sequence of digits.
142 (parse (many (acc init (+ (* acc radix) it) :min min)
143 (label (list :digit radix)
144 (filter (lambda (ch)
bf090e02 145 (digit-char-p ch radix))))))))
dea4d055
MW
146
147 ;; Skip initial junk, and remember the place.
148 (loop
149 (setf (scanner-line scanner) (scanner-line char-scanner)
150 (scanner-column scanner) (scanner-column char-scanner))
151 (cond-parse (:consumedp cp :expected exp)
152 ((satisfies whitespace-char-p) (parse :whitespace))
153 ((scan-comment char-scanner))
bf090e02 154 (t (if cp (lexer-error char-scanner exp cp) (return)))))
dea4d055
MW
155
156 ;; Now parse something.
157 (cond-parse (:consumedp cp :expected exp)
158
159 ;; Alphanumerics mean we read an identifier.
160 ((or #\_ (satisfies alpha-char-p))
161 (values :id (with-output-to-string (out)
162 (write-char it out)
163 (parse (many (nil nil (write-char it out))
164 (or #\_ (satisfies alphanumericp)))))))
165
166 ;; Quotes introduce a literal.
167 ((seq ((quote (or #\" #\'))
168 (contents (many (out (make-string-output-stream)
169 (progn (write-char it out) out)
170 :final (get-output-stream-string out))
171 (or (and #\\ :any) (not quote))))
172 (nil (char quote)))
173 (ecase quote
174 (#\" contents)
175 (#\' (case (length contents)
176 (1 (char contents 0))
177 (0 (cerror* "Empty character literal") #\?)
178 (t (cerror* "Too many characters in literal")
179 (char contents 0))))))
180 (values (etypecase it
181 (character :char)
182 (string :string))
183 it))
184
185 ;; Zero introduces a chosen-radix integer.
186 ((and #\0
187 (or (and (or #\b #\B) (scan-digits :radix 2))
188 (and (or #\o #\O) (scan-digits :radix 8))
189 (and (or #\x #\X) (scan-digits :radix 16))
190 (scan-digits :radix 8 :min 0)))
191 (values :int it))
192
193 ;; Any other digit forces radix-10.
194 ((seq ((d (filter digit-char-p))
195 (i (scan-digits :radix 10 :min 0 :init d)))
196 i)
197 (values :int it))
198
199 ;; Some special punctuation sequences are single tokens.
200 ("..." (values :ellipsis nil))
201
202 ;; Any other character is punctuation.
203 (:any (values it nil))
204
205 ;; End of file means precisely that.
206 (:eof (values :eof nil))
207
208 ;; Report errors and try again. Because we must have consumed some
209 ;; input in order to get here (we've matched both :any and :eof) we
210 ;; must make progress on every call.
bf090e02
MW
211 (t
212 (assert cp)
213 (lexer-error char-scanner exp cp)
214 (scanner-token scanner)))))))
dea4d055
MW
215
216;;;----- That's all, folks --------------------------------------------------