Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Lexical analysis for input parser | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (cl:in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Class definition. | |
30 | ||
31 | (export 'sod-token-scanner) | |
32 | (defclass sod-token-scanner (token-scanner) | |
33 | ((char-scanner :initarg :char-scanner :reader token-scanner-char-scanner)) | |
34 | (:documentation | |
35 | "A token scanner for SOD input files. | |
36 | ||
37 | Not a lot here, apart from a character scanner to read from and the | |
38 | standard token scanner infrastructure.")) | |
39 | ||
bf090e02 MW |
40 | (defmethod shared-initialize :after |
41 | ((scanner sod-token-scanner) slot-names &key) | |
42 | (default-slot (scanner 'sod-parser::filename slot-names) | |
43 | (scanner-filename (token-scanner-char-scanner scanner)))) | |
44 | ||
dea4d055 MW |
45 | ;;;-------------------------------------------------------------------------- |
46 | ;;; Utilities. | |
47 | ||
48 | (defun show-char (stream char &optional colonp atsignp) | |
49 | "Format CHAR to STREAM in a readable way. | |
50 | ||
51 | Usable in `format''s ~/.../ command." | |
52 | (declare (ignore colonp atsignp)) | |
53 | (cond ((null char) (write-string "<eof>" stream)) | |
54 | ((and (graphic-char-p char) (char/= char #\space)) | |
55 | (format stream "`~C'" char)) | |
56 | (t (format stream "<~(~:C~)>" char)))) | |
57 | ||
58 | (defun scan-comment (scanner) | |
59 | "Scan a comment (either `/* ... */' or `// ...') from SCANNER. | |
60 | ||
61 | The result isn't interesting." | |
62 | (with-parser-context (character-scanner-context :scanner scanner) | |
63 | (parse (or (and "/*" | |
64 | (and (skip-many () | |
65 | (and (skip-many () (not #\*)) | |
66 | (label "*/" (skip-many (:min 1) #\*))) | |
67 | (not #\/)) | |
68 | #\/)) | |
69 | (and "//" | |
70 | (skip-many () (not #\newline)) | |
71 | (? #\newline)))))) | |
72 | ||
bf090e02 MW |
73 | (defmethod make-scanner-stream ((scanner sod-token-scanner)) |
74 | (make-scanner-stream (token-scanner-char-scanner scanner))) | |
75 | ||
dea4d055 MW |
76 | ;;;-------------------------------------------------------------------------- |
77 | ;;; Error reporting. | |
78 | ||
bf090e02 MW |
79 | (defvar *indicator-map* (make-hash-table) |
80 | "Hash table mapping indicator objects to human-readable descriptions.") | |
81 | ||
82 | (defun define-indicator (indicator description) | |
83 | (setf (gethash indicator *indicator-map*) description) | |
84 | indicator) | |
85 | ||
dea4d055 MW |
86 | (export 'syntax-error) |
87 | (defun syntax-error (scanner expected &key (continuep t)) | |
88 | "Signal a (maybe) continuable syntax error." | |
89 | (labels ((show-token (type value) | |
90 | (if (characterp type) | |
91 | (format nil "~/sod::show-char/" type) | |
92 | (case type | |
93 | (:id (format nil "<identifier~@[ `~A'~]>" value)) | |
94 | (:string "<string-literal>") | |
95 | (:char "<character-literal>") | |
96 | (:eof "<end-of-file>") | |
97 | (:ellipsis "`...'") | |
98 | (t (format nil "<? ~S~@[ ~S~]>" type value))))) | |
99 | (show-expected (thing) | |
bf090e02 MW |
100 | (acond ((gethash thing *indicator-map*) it) |
101 | ((atom thing) (show-token thing nil)) | |
102 | ((eq (car thing) :id) | |
103 | (format nil "`~A'" (cadr thing))) | |
104 | (t (format nil "<? ~S>" thing))))) | |
dea4d055 MW |
105 | (funcall (if continuep #'cerror* #'error) |
106 | "Syntax error: ~ | |
bf090e02 | 107 | expected ~{~#[<bug>~;~A~;~A or ~A~:;~A, ~]~} ~ |
dea4d055 MW |
108 | but found ~A" |
109 | (mapcar #'show-expected expected) | |
110 | (show-token (token-type scanner) (token-value scanner))))) | |
111 | ||
bf090e02 MW |
112 | (export 'lexer-error) |
113 | (defun lexer-error (char-scanner expected consumedp) | |
114 | "Signal a continuable lexical error." | |
115 | (cerror* "Lexical error: ~ | |
116 | expected ~{~#[<bug>~;~A~;~A or ~A~;:~A, ~]~} ~ | |
117 | but found ~/sod::show-char/~ | |
118 | ~@[ at ~A~]" | |
119 | (mapcar (lambda (exp) | |
120 | (typecase exp | |
121 | (character (format nil "~/sod::show-char/" exp)) | |
122 | (string (format nil "`~A'" exp)) | |
123 | ((cons (eql :digit) *) (format nil "<radix-~A digit>" | |
124 | (cadr exp))) | |
125 | ((eql :eof) "<end-of-file>") | |
126 | ((eql :any) "<character>") | |
127 | (t (format nil "<? ~S>" exp)))) | |
128 | expected) | |
129 | (and (not (scanner-at-eof-p char-scanner)) | |
130 | (scanner-current-char char-scanner)) | |
131 | (and consumedp (file-location char-scanner)))) | |
132 | ||
dea4d055 MW |
133 | ;;;-------------------------------------------------------------------------- |
134 | ;;; Token scanner protocol implementation. | |
135 | ||
136 | (defmethod scanner-token ((scanner sod-token-scanner)) | |
137 | (with-slots (char-scanner line column) scanner | |
138 | (with-parser-context (character-scanner-context :scanner char-scanner) | |
139 | ||
140 | (flet ((scan-digits (&key (radix 10) (min 1) (init 0)) | |
141 | ;; Scan an return a sequence of digits. | |
142 | (parse (many (acc init (+ (* acc radix) it) :min min) | |
143 | (label (list :digit radix) | |
144 | (filter (lambda (ch) | |
bf090e02 | 145 | (digit-char-p ch radix)))))))) |
dea4d055 MW |
146 | |
147 | ;; Skip initial junk, and remember the place. | |
148 | (loop | |
149 | (setf (scanner-line scanner) (scanner-line char-scanner) | |
150 | (scanner-column scanner) (scanner-column char-scanner)) | |
151 | (cond-parse (:consumedp cp :expected exp) | |
152 | ((satisfies whitespace-char-p) (parse :whitespace)) | |
153 | ((scan-comment char-scanner)) | |
bf090e02 | 154 | (t (if cp (lexer-error char-scanner exp cp) (return))))) |
dea4d055 MW |
155 | |
156 | ;; Now parse something. | |
157 | (cond-parse (:consumedp cp :expected exp) | |
158 | ||
159 | ;; Alphanumerics mean we read an identifier. | |
160 | ((or #\_ (satisfies alpha-char-p)) | |
161 | (values :id (with-output-to-string (out) | |
162 | (write-char it out) | |
163 | (parse (many (nil nil (write-char it out)) | |
164 | (or #\_ (satisfies alphanumericp))))))) | |
165 | ||
166 | ;; Quotes introduce a literal. | |
167 | ((seq ((quote (or #\" #\')) | |
168 | (contents (many (out (make-string-output-stream) | |
169 | (progn (write-char it out) out) | |
170 | :final (get-output-stream-string out)) | |
171 | (or (and #\\ :any) (not quote)))) | |
172 | (nil (char quote))) | |
173 | (ecase quote | |
174 | (#\" contents) | |
175 | (#\' (case (length contents) | |
176 | (1 (char contents 0)) | |
177 | (0 (cerror* "Empty character literal") #\?) | |
178 | (t (cerror* "Too many characters in literal") | |
179 | (char contents 0)))))) | |
180 | (values (etypecase it | |
181 | (character :char) | |
182 | (string :string)) | |
183 | it)) | |
184 | ||
185 | ;; Zero introduces a chosen-radix integer. | |
186 | ((and #\0 | |
187 | (or (and (or #\b #\B) (scan-digits :radix 2)) | |
188 | (and (or #\o #\O) (scan-digits :radix 8)) | |
189 | (and (or #\x #\X) (scan-digits :radix 16)) | |
190 | (scan-digits :radix 8 :min 0))) | |
191 | (values :int it)) | |
192 | ||
193 | ;; Any other digit forces radix-10. | |
194 | ((seq ((d (filter digit-char-p)) | |
195 | (i (scan-digits :radix 10 :min 0 :init d))) | |
196 | i) | |
197 | (values :int it)) | |
198 | ||
199 | ;; Some special punctuation sequences are single tokens. | |
200 | ("..." (values :ellipsis nil)) | |
201 | ||
202 | ;; Any other character is punctuation. | |
203 | (:any (values it nil)) | |
204 | ||
205 | ;; End of file means precisely that. | |
206 | (:eof (values :eof nil)) | |
207 | ||
208 | ;; Report errors and try again. Because we must have consumed some | |
209 | ;; input in order to get here (we've matched both :any and :eof) we | |
210 | ;; must make progress on every call. | |
bf090e02 MW |
211 | (t |
212 | (assert cp) | |
213 | (lexer-error char-scanner exp cp) | |
214 | (scanner-token scanner))))))) | |
dea4d055 MW |
215 | |
216 | ;;;----- That's all, folks -------------------------------------------------- |