Commit | Line | Data |
---|---|---|
dea4d055 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Parser for C types | |
4 | ;;; | |
5 | ;;; (c) 2009 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
e0808c47 | 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
dea4d055 MW |
11 | ;;; |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (cl:in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Declaration specifiers. | |
bf090e02 MW |
30 | ;;; |
31 | ;;; This stuff is distressingly complicated. | |
32 | ;;; | |
33 | ;;; Parsing a (single) declaration specifier is quite easy, and a declaration | |
34 | ;;; is just a sequence of these things. Except that there are a stack of | |
35 | ;;; rules about which ones are allowed to go together, and the language | |
36 | ;;; doesn't require them to appear in any particular order. | |
37 | ;;; | |
38 | ;;; A collection of declaration specifiers is carried about in a purpose-made | |
39 | ;;; object with a number of handy operations defined on it, and then I build | |
40 | ;;; some parsers in terms of them. The basic strategy is to parse | |
41 | ;;; declaration specifiers while they're valid, and keep track of what we've | |
42 | ;;; read. When I've reached the end, we'll convert what we've got into a | |
43 | ;;; `canonical form', and then convert that into a C type object of the | |
44 | ;;; appropriate kind. The whole business is rather more complicated than it | |
45 | ;;; really ought to be. | |
46 | ||
47 | ;; Firstly, a table of interesting things about the various declaration | |
48 | ;; specifiers that I might encounter. I categorize declaration specifiers | |
49 | ;; into four kinds. | |
50 | ;; | |
51 | ;; * `Type specifiers' describe the actual type, whether that's integer, | |
52 | ;; character, floating point, or some tagged or user-named type. | |
53 | ;; | |
54 | ;; * `Size specifiers' distinguish different sizes of the same basic type. | |
55 | ;; This is how we tell the difference between `int' and `long'. | |
56 | ;; | |
57 | ;; * `Sign specifiers' distinguish different signednesses. This is how we | |
58 | ;; tell the difference between `int' and `unsigned'. | |
59 | ;; | |
60 | ;; * `Qualifiers' are our old friends `const', `restrict' and `volatile'. | |
61 | ;; | |
62 | ;; These groupings are for my benefit here, in determining whether a | |
63 | ;; particular declaration specifier is valid in the current context. I don't | |
64 | ;; accept `function specifiers' (of which the only current example is | |
65 | ;; `inline') since it's meaningless to me. | |
dea4d055 MW |
66 | |
67 | (defclass declspec () | |
239fa5bd MW |
68 | ;; Despite the fact that it looks pretty trivial, this can't be done with |
69 | ;; `defstruct' for the simple reason that we add more methods to the | |
70 | ;; accessor functions later. | |
dea4d055 MW |
71 | ((label :type keyword :initarg :label :reader ds-label) |
72 | (name :type string :initarg :name :reader ds-name) | |
bf090e02 MW |
73 | (kind :type (member type sign size qualifier) |
74 | :initarg :kind :reader ds-kind) | |
75 | (taggedp :type boolean :initarg :taggedp | |
76 | :initform nil :reader ds-taggedp)) | |
77 | (:documentation | |
78 | "Represents the important components of a declaration specifier. | |
79 | ||
80 | The only interesting instances of this class are in the table | |
81 | `*declspec-map*'.")) | |
dea4d055 MW |
82 | |
83 | (defmethod shared-initialize :after ((ds declspec) slot-names &key) | |
bf090e02 MW |
84 | "If no name is provided then derive one from the label. |
85 | ||
86 | Most declaration specifiers have simple names for which this works well." | |
dea4d055 MW |
87 | (default-slot (ds 'name slot-names) |
88 | (string-downcase (ds-label ds)))) | |
89 | ||
dea4d055 MW |
90 | (defparameter *declspec-map* |
91 | (let ((map (make-hash-table :test #'equal))) | |
0e7cdea0 MW |
92 | (dolist (item '((type :void :char :int :float :double |
93 | (:bool :name "_Bool")) | |
94 | (complexity (:complex :name "_Complex") | |
95 | (:imaginary :name "_Imaginary")) | |
bf090e02 MW |
96 | ((type :taggedp t) :enum :struct :union) |
97 | (size :short :long (:long-long :name "long long")) | |
dea4d055 | 98 | (sign :signed :unsigned) |
bf090e02 MW |
99 | (qualifier :const :restrict :volatile))) |
100 | (destructuring-bind (kind &key (taggedp nil)) | |
101 | (let ((spec (car item))) | |
102 | (if (consp spec) spec (list spec))) | |
dea4d055 | 103 | (dolist (spec (cdr item)) |
bf090e02 MW |
104 | (destructuring-bind (label |
105 | &key | |
106 | (name (string-downcase label)) | |
107 | (taggedp taggedp)) | |
108 | (if (consp spec) spec (list spec)) | |
dea4d055 | 109 | (let ((ds (make-instance 'declspec |
bf090e02 MW |
110 | :label label |
111 | :name name | |
112 | :kind kind | |
113 | :taggedp taggedp))) | |
dea4d055 MW |
114 | (setf (gethash name map) ds |
115 | (gethash label map) ds)))))) | |
0e7cdea0 MW |
116 | (dolist (label '(:complex :imaginary :bool)) |
117 | (setf (gethash (string-downcase label) map) (gethash label map))) | |
bf090e02 | 118 | map) |
3109662a | 119 | "Maps symbolic labels and textual names to `declspec' instances.") |
bf090e02 MW |
120 | |
121 | ;; A collection of declaration specifiers, and how to merge them together. | |
122 | ||
123 | (defclass declspecs () | |
239fa5bd MW |
124 | ;; This could have been done with `defstruct' just as well, but a |
125 | ;; `defclass' can be tweaked interactively, which is a win at the moment. | |
bf090e02 | 126 | ((type :initform nil :initarg :type :reader ds-type) |
0e7cdea0 | 127 | (complexity :initform nil :initarg :complexity :reader ds-complexity) |
bf090e02 MW |
128 | (sign :initform nil :initarg :sign :reader ds-sign) |
129 | (size :initform nil :initarg :size :reader ds-size) | |
130 | (qualifier :initform nil :initarg :qualifiers :reader ds-qualifiers)) | |
131 | (:documentation | |
132 | "Represents a collection of declaration specifiers. | |
133 | ||
134 | This is used during type parsing to represent the type under | |
135 | construction. Instances are immutable: we build new ones rather than | |
136 | modifying existing ones. This leads to a certain amount of churn, but | |
137 | we'll just have to live with that. | |
138 | ||
139 | (Why are instances immutable? Because it's much easier to merge a new | |
3109662a MW |
140 | specifier into an existing collection and then check that the resulting |
141 | thing is valid, rather than having to deal with all of the possible | |
bf090e02 MW |
142 | special cases of what the new thing might be. And if the merged |
143 | collection isn't good, I must roll back to the previous version. So I | |
144 | don't get to take advantage of a mutable structure.)")) | |
dea4d055 MW |
145 | |
146 | (defmethod ds-label ((ty c-type)) :c-type) | |
147 | (defmethod ds-name ((ty c-type)) (princ-to-string ty)) | |
148 | (defmethod ds-kind ((ty c-type)) 'type) | |
149 | ||
150 | (defparameter *good-declspecs* | |
0e7cdea0 MW |
151 | '(((:int) (:signed :unsigned) (:short :long :long-long) ()) |
152 | ((:char) (:signed :unsigned) () ()) | |
153 | ((:double) () (:long) (:complex :imaginary)) | |
154 | (t () () ())) | |
dea4d055 MW |
155 | "List of good collections of declaration specifiers. |
156 | ||
0e7cdea0 MW |
157 | Each item is a list of the form (TYPES SIGNS SIZES COMPLEXITIES). Each of |
158 | TYPES, SIGNS, SIZES, and COMPLEXITIES, is either a list of acceptable | |
159 | specifiers of the appropriate kind, or T, which matches any specifier.") | |
dea4d055 | 160 | |
dea4d055 MW |
161 | (defun good-declspecs-p (specs) |
162 | "Are SPECS a good collection of declaration specifiers?" | |
0e7cdea0 MW |
163 | (let ((speclist (list (ds-type specs) |
164 | (ds-sign specs) | |
165 | (ds-size specs) | |
166 | (ds-complexity specs)))) | |
dea4d055 MW |
167 | (some (lambda (it) |
168 | (every (lambda (spec pat) | |
169 | (or (eq pat t) (null spec) | |
170 | (member (ds-label spec) pat))) | |
171 | speclist it)) | |
172 | *good-declspecs*))) | |
173 | ||
174 | (defun combine-declspec (specs ds) | |
175 | "Combine the declspec DS with the existing SPECS. | |
176 | ||
177 | Returns new DECLSPECS if they're OK, or `nil' if not. The old SPECS are | |
178 | not modified." | |
bf090e02 | 179 | |
dea4d055 MW |
180 | (let* ((kind (ds-kind ds)) |
181 | (old (slot-value specs kind))) | |
182 | (multiple-value-bind (ok new) | |
183 | (case kind | |
184 | (qualifier (values t (adjoin ds old))) | |
185 | (size (cond ((not old) (values t ds)) | |
186 | ((and (eq (ds-label old) :long) (eq ds old)) | |
187 | (values t (gethash :long-long *declspec-map*))) | |
188 | (t (values nil nil)))) | |
189 | (t (values (not old) ds))) | |
190 | (if ok | |
191 | (let ((copy (copy-instance specs))) | |
192 | (setf (slot-value copy kind) new) | |
193 | (and (good-declspecs-p copy) copy)) | |
194 | nil)))) | |
195 | ||
dea4d055 | 196 | (defun declspecs-type (specs) |
bf090e02 | 197 | "Convert `declspecs' SPECS into a standalone C type object." |
dea4d055 MW |
198 | (let ((type (ds-type specs)) |
199 | (size (ds-size specs)) | |
bf090e02 | 200 | (sign (ds-sign specs)) |
5ce911a0 | 201 | (cplx (ds-complexity specs)) |
bf090e02 MW |
202 | (quals (mapcar #'ds-label (ds-qualifiers specs)))) |
203 | (cond ((typep type 'c-type) | |
204 | (qualify-c-type type quals)) | |
5ce911a0 | 205 | ((or type size sign cplx) |
bf090e02 | 206 | (when (and sign (eq (ds-label sign) :signed) |
dea4d055 MW |
207 | (eq (ds-label type) :int)) |
208 | (setf sign nil)) | |
209 | (cond ((and (or (null type) (eq (ds-label type) :int)) | |
210 | (or size sign)) | |
211 | (setf type nil)) | |
212 | ((null type) | |
213 | (setf type (gethash :int *declspec-map*)))) | |
214 | (make-simple-type (format nil "~{~@[~A~^ ~]~}" | |
239fa5bd | 215 | (mapcar #'ds-name |
dea4d055 | 216 | (remove nil |
5ce911a0 MW |
217 | (list sign cplx |
218 | size type)))) | |
bf090e02 | 219 | quals)) |
dea4d055 MW |
220 | (t |
221 | nil)))) | |
222 | ||
bf090e02 | 223 | ;; Parsing declaration specifiers. |
dea4d055 | 224 | |
bf090e02 | 225 | (define-indicator :declspec "<declaration-specifier>") |
dea4d055 | 226 | |
bf090e02 MW |
227 | (defun scan-declspec |
228 | (scanner &key (predicate (constantly t)) (indicator :declspec)) | |
3109662a | 229 | "Scan a `declspec' from SCANNER. |
dea4d055 | 230 | |
bf090e02 MW |
231 | If PREDICATE is provided then only succeed if (funcall PREDICATE DECLSPEC) |
232 | is true, where DECLSPEC is the raw declaration specifier or C-type object, | |
233 | so we won't have fetched the tag for a tagged type yet. If the PREDICATE | |
234 | returns false then the scan fails without consuming input. | |
dea4d055 | 235 | |
bf090e02 MW |
236 | If we couldn't find an acceptable declaration specifier then issue |
237 | INDICATOR as the failure indicator. Value on success is either a | |
238 | `declspec' object or a `c-type' object." | |
dea4d055 | 239 | |
bf090e02 MW |
240 | ;; Turns out to be easier to do this by hand. |
241 | (let ((ds (and (eq (token-type scanner) :id) | |
242 | (let ((kw (token-value scanner))) | |
ec5cb3ca MW |
243 | (or (and (boundp '*module-type-map*) |
244 | (gethash kw *module-type-map*)) | |
bf090e02 MW |
245 | (gethash kw *declspec-map*)))))) |
246 | (cond ((or (not ds) (and predicate (not (funcall predicate ds)))) | |
247 | (values (list indicator) nil nil)) | |
8293b90a | 248 | ((and (typep ds 'declspec) (ds-taggedp ds)) |
bf090e02 MW |
249 | (scanner-step scanner) |
250 | (if (eq (token-type scanner) :id) | |
251 | (let ((ty (make-c-tagged-type (ds-label ds) | |
252 | (token-value scanner)))) | |
253 | (scanner-step scanner) | |
254 | (values ty t t)) | |
255 | (values :tag nil t))) | |
256 | (t | |
257 | (scanner-step scanner) | |
258 | (values ds t t))))) | |
dea4d055 | 259 | |
bf090e02 MW |
260 | (defun scan-and-merge-declspec (scanner specs) |
261 | "Scan a declaration specifier and merge it with SPECS. | |
262 | ||
263 | This is a parser function. If it succeeds, it returns the merged | |
264 | `declspecs' object. It can fail either if no valid declaration specifier | |
265 | is found or it cannot merge the declaration specifier with the existing | |
266 | SPECS." | |
267 | ||
268 | (with-parser-context (token-scanner-context :scanner scanner) | |
269 | (if-parse (:consumedp consumedp) (scan-declspec scanner) | |
270 | (aif (combine-declspec specs it) | |
271 | (values it t consumedp) | |
272 | (values (list :declspec) nil consumedp))))) | |
273 | ||
239fa5bd | 274 | (export 'parse-c-type) |
bf090e02 MW |
275 | (defun parse-c-type (scanner) |
276 | "Parse a C type from declaration specifiers. | |
dea4d055 | 277 | |
bf090e02 MW |
278 | This is a parser function. If it succeeds then the result is a `c-type' |
279 | object representing the type it found. Note that this function won't try | |
280 | to parse a C declarator." | |
dea4d055 | 281 | |
bf090e02 MW |
282 | (with-parser-context (token-scanner-context :scanner scanner) |
283 | (if-parse (:result specs :consumedp cp) | |
284 | (many (specs (make-instance 'declspecs) it :min 1) | |
285 | (peek (scan-and-merge-declspec scanner specs))) | |
286 | (let ((type (declspecs-type specs))) | |
287 | (if type (values type t cp) | |
288 | (values (list :declspec) nil cp)))))) | |
dea4d055 | 289 | |
bf090e02 MW |
290 | ;;;-------------------------------------------------------------------------- |
291 | ;;; Parsing declarators. | |
292 | ;;; | |
293 | ;;; The syntax of declaration specifiers was horrific. Declarators are a | |
294 | ;;; very simple expression syntax, but this time the semantics are awful. In | |
295 | ;;; particular, they're inside-out. If <> denotes mumble of foo, then op <> | |
296 | ;;; is something like mumble of op of foo. Unfortunately, the expression | |
297 | ;;; parser engine wants to apply op of mumble of foo, so I'll have to do some | |
298 | ;;; work to fix the impedance mismatch. | |
299 | ;;; | |
300 | ;;; The currency we'll use is a pair (FUNC . NAME), with the semantics that | |
301 | ;;; (funcall FUNC TYPE) returns the derived type. The result of | |
302 | ;;; `parse-declarator' will be of this form. | |
dea4d055 | 303 | |
239fa5bd | 304 | (export 'parse-declarator) |
ea578bb4 | 305 | (defun parse-declarator (scanner base-type &key kernel abstractp) |
239fa5bd | 306 | "Parse a C declarator, returning a pair (C-TYPE . NAME). |
dea4d055 | 307 | |
239fa5bd MW |
308 | The SCANNER is a token scanner to read from. The BASE-TYPE is the type |
309 | extracted from the preceding declaration specifiers, as parsed by | |
310 | `parse-c-type'. | |
311 | ||
312 | The result contains both the resulting constructed C-TYPE (with any | |
313 | qualifiers etc. as necessary), and the name from the middle of the | |
ea578bb4 | 314 | declarator. The name is parsed using the KERNEL parser provided, and |
239fa5bd MW |
315 | defaults to matching a simple identifier `:id'. This might, e.g., be |
316 | (? :id) to parse an `abstract declarator' which has optional names. | |
317 | ||
ea578bb4 | 318 | There's an annoying ambiguity in the syntax, if an empty KERNEL is |
239fa5bd MW |
319 | permitted. In this case, you must ensure that ABSTRACTP is true so that |
320 | the appropriate heuristic can be applied. As a convenience, if ABSTRACTP | |
ea578bb4 | 321 | is true then `(? :id)' is used as the default KERNEL." |
239fa5bd | 322 | (with-parser-context (token-scanner-context :scanner scanner) |
ea578bb4 | 323 | (let ((kernel-parser (cond (kernel kernel) |
239fa5bd MW |
324 | (abstractp (parser () (? :id))) |
325 | (t (parser () :id))))) | |
326 | ||
327 | (labels ((qualifiers () | |
328 | ;; qualifier* | |
329 | ||
330 | (parse | |
331 | (seq ((quals (list () | |
332 | (scan-declspec | |
333 | scanner | |
334 | :indicator :qualifier | |
335 | :predicate (lambda (ds) | |
336 | (and (typep ds 'declspec) | |
337 | (eq (ds-kind ds) | |
338 | 'qualifier))))))) | |
339 | (mapcar #'ds-label quals)))) | |
340 | ||
341 | (star () | |
342 | ;; Prefix: `*' qualifiers | |
343 | ||
344 | (parse (seq (#\* (quals (qualifiers))) | |
345 | (preop "*" (state 9) | |
346 | (cons (lambda (type) | |
347 | (funcall (car state) | |
348 | (make-pointer-type type quals))) | |
349 | (cdr state)))))) | |
350 | ||
c28f6ae9 MW |
351 | (predict-argument-list-p () |
352 | ;; See `prefix-lparen'. Predict an argument list rather | |
353 | ;; than a nested declarator if (a) abstract declarators are | |
354 | ;; permitted and (b) the next token is a declaration | |
355 | ;; specifier or ellipsis. | |
356 | (let ((type (token-type scanner)) | |
357 | (value (token-value scanner))) | |
358 | (and abstractp | |
359 | (or (eq type :ellipsis) | |
360 | (and (eq type :id) | |
361 | (or (gethash value *module-type-map*) | |
362 | (gethash value *declspec-map*))))))) | |
239fa5bd MW |
363 | |
364 | (prefix-lparen () | |
365 | ;; Prefix: `(' | |
366 | ;; | |
367 | ;; Opening parentheses are treated as prefix operators by | |
368 | ;; the expression parsing engine. There's an annoying | |
369 | ;; ambiguity in the syntax if abstract declarators are | |
370 | ;; permitted: a `(' might be either the start of a nested | |
371 | ;; subdeclarator or the start of a postfix function argument | |
372 | ;; list. The two are disambiguated by stating that if the | |
373 | ;; token following the `(' is a `)' or a declaration | |
374 | ;; specifier, then we have a postfix argument list. | |
375 | (parse | |
376 | (peek (seq (#\( | |
c28f6ae9 | 377 | (nil (if (predict-argument-list-p) |
239fa5bd MW |
378 | (values nil nil nil) |
379 | (values t t nil)))) | |
380 | (lparen #\)))))) | |
381 | ||
ea578bb4 MW |
382 | (kernel () |
383 | (parse (seq ((name (funcall kernel-parser))) | |
239fa5bd MW |
384 | (cons #'identity name)))) |
385 | ||
386 | (argument-list () | |
63a86f42 | 387 | ;; [argument [`,' argument]* [`,' `...']] | `...' |
b0ff693c MW |
388 | ;; |
389 | ;; The possibility of a trailing `,' `...' means that we | |
390 | ;; can't use the standard `list' parser. Note that, unlike | |
391 | ;; `real' C, we allow an ellipsis even if there are no | |
392 | ;; explicit arguments. | |
393 | ||
394 | (let ((args nil)) | |
395 | (loop | |
396 | (when (eq (token-type scanner) :ellipsis) | |
397 | (push :ellipsis args) | |
398 | (scanner-step scanner) | |
399 | (return)) | |
400 | (multiple-value-bind (arg winp consumedp) | |
401 | (parse (seq ((base-type (parse-c-type scanner)) | |
402 | (dtor (parse-declarator scanner | |
403 | base-type | |
404 | :abstractp t))) | |
405 | (make-argument (cdr dtor) (car dtor)))) | |
406 | (unless winp | |
407 | (if (or consumedp args) | |
408 | (return-from argument-list (values arg nil t)) | |
409 | (return))) | |
410 | (push arg args)) | |
411 | (unless (eq (token-type scanner) #\,) | |
412 | (return)) | |
413 | (scanner-step scanner)) | |
414 | (values (nreverse args) t args))) | |
239fa5bd MW |
415 | |
416 | (postfix-lparen () | |
417 | ;; Postfix: `(' argument-list `)' | |
418 | ||
419 | (parse (seq (#\( (args (argument-list)) #\)) | |
420 | (postop "()" (state 10) | |
421 | (cons (lambda (type) | |
422 | (funcall (car state) | |
423 | (make-function-type type args))) | |
424 | (cdr state)))))) | |
425 | ||
426 | (dimension () | |
427 | ;; `[' c-fragment ']' | |
428 | ||
429 | (parse (seq ((frag (parse-delimited-fragment | |
430 | scanner #\[ #\]))) | |
431 | (c-fragment-text frag)))) | |
432 | ||
433 | (lbracket () | |
434 | ;; Postfix: dimension+ | |
435 | ||
436 | (parse (seq ((dims (list (:min 1) (dimension)))) | |
437 | (postop "[]" (state 10) | |
438 | (cons (lambda (type) | |
439 | (funcall (car state) | |
440 | (make-array-type type dims))) | |
441 | (cdr state))))))) | |
442 | ||
443 | ;; And now we actually do the declarator parsing. | |
444 | (parse (seq ((value (expr (:nestedp nestedp) | |
445 | ||
446 | ;; An actual operand. | |
ea578bb4 | 447 | (kernel) |
239fa5bd MW |
448 | |
449 | ;; Binary operators. There aren't any. | |
450 | nil | |
451 | ||
452 | ;; Prefix operators. | |
453 | (or (star) | |
454 | (prefix-lparen)) | |
455 | ||
456 | ;; Postfix operators. | |
457 | (or (postfix-lparen) | |
458 | (lbracket) | |
459 | (when nestedp (seq (#\)) (rparen #\)))))))) | |
460 | (cons (funcall (car value) base-type) (cdr value)))))))) | |
dea4d055 MW |
461 | |
462 | ;;;----- That's all, folks -------------------------------------------------- |