Massive reorganization in progress.
[sod] / pre-reorg / parse-c-types.lisp
CommitLineData
abdf50aa
MW
1;;; -*-lisp-*-
2;;;
3;;; Parser for C types
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Simple Object Definition system.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Declaration specifiers.
30;;;
31;;; This is a little messy. The C rules, which we're largely following,
32;;; allow declaration specifiers to be written in any oreder, and allows an
33;;; arbitrary number of the things. This is mainly an exercise in
34;;; book-keeping, but we make an effort to categorize the various kinds of
35;;; specifiers rather better than the C standard.
36;;;
37;;; We consider four kinds of declaration specifiers:
38;;;
39;;; * Type qualifiers: `const', `restrict', and `volatile'.
40;;; * Sign specifiers: `signed' and `unsigned'.
41;;; * Size specifiers: `short' and `long'.
42;;; * Type specifiers: `void', `char', `int', `float', and `double',
43;;;
44;;; The C standard acknowledges the category of type qualifiers (6.7.3), but
45;;; groups the other three kinds together and calls them all `type
46;;; specifiers' (6.7.2).
47
ddee4bb1
MW
48;; Let's not repeat ourselves.
49(macrolet ((define-declaration-specifiers (&rest defs)
50 (let ((mappings nil)
51 (deftypes nil)
52 (hashvar (gensym "HASH"))
53 (keyvar (gensym "KEY"))
54 (valvar (gensym "VAL")))
55 (dolist (def defs)
56 (destructuring-bind (kind &rest clauses) def
57 (let ((maps (mapcar (lambda (clause)
58 (if (consp clause)
59 clause
60 (cons (string-downcase clause)
61 clause)))
62 clauses)))
63 (push `(deftype ,(symbolicate 'decl- kind) ()
64 '(member ,@(mapcar #'cdr maps)))
65 deftypes)
66 (setf mappings (nconc (remove-if-not #'car maps)
67 mappings)))))
68 `(progn
69 ,@(nreverse deftypes)
70 (defparameter *declspec-map*
71 (let ((,hashvar (make-hash-table :test #'equal)))
72 (mapc (lambda (,keyvar ,valvar)
73 (setf (gethash ,keyvar ,hashvar) ,valvar))
74 ',(mapcar #'car mappings)
75 ',(mapcar #'cdr mappings))
76 ,hashvar))))))
77 (define-declaration-specifiers
78 (type :char :int :float :double :void)
79 (size :short :long (nil . :long-long))
80 (sign :signed :unsigned)
81 (qualifier :const :restrict :volatile)
82 (tagged :enum :struct :union)))
83
abdf50aa
MW
84(defstruct (declspec
85 (:predicate declspecp))
86 "Represents a declaration specifier being built."
87 (qualifiers nil :type list)
ddee4bb1
MW
88 (sign nil :type (or decl-sign null))
89 (size nil :type (or decl-size null))
90 (type nil :type (or decl-type c-type null)))
abdf50aa
MW
91
92(defun check-declspec (spec)
93 "Check that the declaration specifiers in SPEC are a valid combination.
94
95 This is surprisingly hairy.
96
97 It could be even worse: at least validity is monotonic. Consider an
98 alternate language where `double' is a size specifier like `long' rather
99 than being a primary type specifier like `float' (so you'd be able to say
100 things like `long double float'). Then `long float' would be invalid, but
101 `long float double' would be OK. We'd therefore need an additional
102 argument to know whether we were preparing a final set of specifiers (in
103 which case we'd have to reject `long float') or whether this is an
104 intermediate step (in which case we'd have to tentatively allow it in the
105 hope that the user added the necessary `double' later)."
106
107 (let ((sign (declspec-sign spec))
108 (size (declspec-size spec))
109 (type (declspec-type spec)))
110
111 (and (loop for (good-type good-signs good-sizes) in
112
113 ;; The entries in this table have the form (GOOD-TYPE
114 ;; GOOD-SIGNS GOOD-SIZES). The GOOD-TYPE is either a keyword
115 ;; or T (matches anything); the GOOD-SIZES and GOOD-SIGNS are
116 ;; lists. The SPEC must match at least one entry, as follows:
117 ;; the type must be NIL or match GOOD-TYPE; and the size and
118 ;; sign must match one of the elements of the corresponding
119 ;; GOOD list.
120 '((:int (nil :signed :unsigned) (nil :short :long :long-long))
121 (:char (nil :signed :unsigned) (nil))
122 (:double (nil) (nil :long))
123 (t (nil) (nil)))
124
125 thereis (and (or (eq type nil)
126 (eq good-type t)
127 (eq type good-type))
128 (member sign good-signs)
129 (member size good-sizes)))
130 spec)))
131
132(defun update-declspec-qualifiers (spec qual)
133 "Update the qualifiers in SPEC by adding QUAL.
134
135 The new declspec is returned if it's valid; otherwise NIL. SPEC is not
136 modified."
137
138 (let ((new (copy-declspec spec)))
139 (pushnew qual (declspec-qualifiers new))
140 (check-declspec new)))
141
142(defun update-declspec-sign (spec sign)
143 "Update the signedness in SPEC to be SIGN.
144
145 The new declspec is returned if it's valid; otherwise NIL. SPEC is not
146 modified."
147
148 (and (null (declspec-sign spec))
149 (let ((new (copy-declspec spec)))
150 (setf (declspec-sign new) sign)
151 (check-declspec new))))
152
153(defun update-declspec-size (spec size)
154 "Update the size in SPEC according to SIZE.
155
156 The new declspec is returned if it's valid; otherwise NIL. (This is a
157 little subtle because :LONG in particular can modify an existing size
158 entry.) SPEC is not modified."
159
160 (let ((new-size (case (declspec-size spec)
161 ((nil) size)
162 (:long (if (eq size :long) :long-long nil)))))
163 (and new-size
164 (let ((new (copy-declspec spec)))
165 (setf (declspec-size new) new-size)
166 (check-declspec new)))))
167
168(defun update-declspec-type (spec type)
169 "Update the type in SPEC to be TYPE.
170
171 The new declspec is returned if it's valid; otherwise NIL. SPEC is not
172 modified."
173
174 (and (null (declspec-type spec))
175 (let ((new (copy-declspec spec)))
176 (setf (declspec-type new) type)
177 (check-declspec new))))
178
179(defun canonify-declspec (spec)
180 "Transform the declaration specifiers SPEC into a canonical form.
181
182 The idea is that, however grim the SPEC, we can turn it into something
183 vaguely idiomatic, and pick precisely one of the possible synonyms.
184
185 The rules are that we suppress `signed' when it's redundant, and suppress
186 `int' if a size or signedness specifier is present. (Note that `signed
187 char' is not the same as `char', so stripping `signed' is only correct
188 when the type is `int'.)
189
190 The qualifiers are sorted and uniquified here; the relative ordering of
191 the sign/size/type specifiers will be determined by DECLSPEC-KEYWORDS."
192
193 (let ((quals (declspec-qualifiers spec))
194 (sign (declspec-sign spec))
195 (size (declspec-size spec))
196 (type (declspec-type spec)))
197 (cond ((eq type :int)
198 (when (eq sign :signed)
199 (setf (declspec-sign spec) nil))
200 (when (or sign size)
201 (setf (declspec-type spec) nil)))
202 ((not (or sign size type))
203 (setf (declspec-type spec) :int)))
204 (setf (declspec-qualifiers spec)
205 (delete-duplicates (sort (copy-list quals) #'string<)))
206 spec))
207
208(defun declspec-keywords (spec &optional qualsp)
209 "Return a list of strings for the declaration specifiers SPEC.
210
211 If QUALSP then return the type qualifiers as well."
212
213 (let ((quals (declspec-qualifiers spec))
214 (sign (declspec-sign spec))
215 (size (declspec-size spec))
216 (type (declspec-type spec)))
217 (nconc (and qualsp (mapcar #'string-downcase quals))
218 (and sign (list (string-downcase sign)))
219 (case size
220 ((nil) nil)
221 (:long-long (list "long long"))
222 (t (list (string-downcase size))))
223 (etypecase type
224 (null nil)
225 (keyword (list (string-downcase type)))
226 (simple-c-type (list (c-type-name type)))
227 (tagged-c-type (list (string-downcase (c-tagged-type-kind type))
228 (c-type-tag type)))))))
229
230(defun declspec-c-type (spec)
231 "Return a C-TYPE object corresponding to SPEC."
232 (canonify-declspec spec)
233 (let* ((type (declspec-type spec))
234 (base (etypecase type
235 (symbol (make-simple-type
236 (format nil "~{~A~^ ~}"
237 (declspec-keywords spec))))
238 (c-type type))))
239 (qualify-type base (declspec-qualifiers spec))))
240
241(defun declaration-specifier-p (lexer)
242 "Answer whether the current token might be a declaration specifier."
ddee4bb1
MW
243 (and (eq (token-type lexer) :id)
244 (let ((id (token-value lexer)))
245 (or (gethash id *declspec-map*)
246 (gethash id *type-map*)))))
abdf50aa
MW
247
248(defun parse-c-type (lexer)
249 "Parse declaration specifiers from LEXER and return a C-TYPE."
250
251 (let ((spec (make-declspec))
ddee4bb1
MW
252 (found-any nil)
253 tok)
254 (flet ((token (&optional (ty (next-token lexer)))
255 (setf tok
256 (or (and (eq ty :id)
257 (gethash (token-value lexer) *declspec-map*))
258 ty)))
259 (update (func value)
260 (let ((new (funcall func spec value)))
261 (cond (new (setf spec new))
262 (t (cerror* "Invalid declaration specifier ~(~A~) ~
263 following `~{~A~^ ~}' (ignored)"
264 (format-token tok (token-value lexer))
265 (declspec-keywords spec t))
266 nil)))))
267 (token (token-type lexer))
268 (loop
269 (typecase tok
270 (decl-qualifier (update #'update-declspec-qualifiers tok))
271 (decl-sign (when (update #'update-declspec-sign tok)
272 (setf found-any t)))
273 (decl-size (when (update #'update-declspec-size tok)
274 (setf found-any t)))
275 (decl-type (when (update #'update-declspec-type tok)
276 (setf found-any t)))
277 (decl-tagged (let ((class (ecase tok
278 (:enum 'c-enum-type)
279 (:struct 'c-struct-type)
280 (:union 'c-union-type))))
281 (let ((tag (require-token lexer :id)))
282 (when tag
283 (update #'update-declspec-type
284 (make-instance class :tag tag))))))
285 ((eql :id) (let ((ty (gethash (token-value lexer) *type-map*)))
286 (when (or found-any (not ty))
287 (return))
288 (when (update #'update-declspec-type ty)
289 (setf found-any t))))
290 (t (return)))
291 (token))
292 (unless found-any
293 (cerror* "Missing type name (guessing at `int')"))
294 (declspec-c-type spec))))
abdf50aa
MW
295
296;;;--------------------------------------------------------------------------
297;;; Parsing declarators.
298;;;
299;;; This is a whole different ball game. The syntax is simple enough, but
300;;; the semantics is inside-out in a particularly unpleasant way.
301;;;
302;;; The basic idea is that declarator operators closer to the identifier (or
303;;; where the identifier would be) should be applied last (with postfix
304;;; operators being considered `closer' than prefix).
305;;;
306;;; One might thing that we can process prefix operators immediately. For
307;;; outer prefix operators, this is indeed correct, but in `int (*id)[]', for
308;;; example, we must wait to process the array before applying the pointer.
309;;;
310;;; We can translate each declarator operator into a function which, given a
311;;; type, returns the appropriate derived type. If we can arrange these
312;;; functions in the right order during the parse, we have only to compose
313;;; them together and apply them to the base type in order to finish the job.
314;;;
315;;; Consider the following skeletal declarator, with <> as a parenthesized
316;;; subdeclarator within.
317;;;
318;;; * * <> [] [] ---> a b d c z
319;;; a b z c d
320;;;
321;;; The algorithm is therefore as follows. We first read the prefix
322;;; operators, translate them into closures, and push them onto a list. Each
323;;; parenthesized subdeclarator gets its own list, and we push those into a
324;;; stack each time we encounter a `('. We then parse the middle bit, which
325;;; is a little messy (see the comment there), and start an empty final list
326;;; of operators. Finally, we scan postfix operators; these get pushed onto
327;;; the front of the operator list as we find them. Each time we find a `)',
328;;; we reverse the current prefix-operators list, and attach it to the front
329;;; of the operator list, and pop a new prefix list off the stack: at this
330;;; point, the operator list reflects the type of the subdeclarator we've
331;;; just finished. Eventually we should reach the end with an empty stack
332;;; and a prefix list, which again we reverse and attach to the front of the
333;;; list.
334;;;
335;;; Finally, we apply the operator functions in order.
336
337(defun parse-c-declarator (lexer type &key abstractp dottedp)
338 "Parse a declarator. Return two values: the complete type, and the name.
339
340 Parse a declarator from LEXER. The base type is given by TYPE. If
341 ABSTRACTP is NIL, then require a name; if T then forbid a name; if :MAYBE
342 then don't care either way. If no name is given, return NIL.
343
344 If DOTTEDP then the name may be a dotted item name `NICK.NAME', returned
345 as a cons (NICK . NAME)."
346
347 (let ((ops nil)
348 (item nil)
349 (stack nil)
350 (prefix nil))
351
352 ;; Scan prefix operators.
353 (loop
354 (case (token-type lexer)
355
356 ;; Star: a pointer type.
357 (#\* (let ((quals nil)
358 (tok (next-token lexer)))
359
360 ;; Gather following qualifiers.
361 (loop
362 (case tok
363 ((:const :volatile :restrict)
364 (pushnew tok quals))
365 (t
366 (return))))
367
368 ;; And stash the item.
369 (setf quals (sort quals #'string<))
370 (push (lambda (ty)
371 (make-instance 'c-pointer-type
372 :qualifiers quals
373 :subtype ty))
374 prefix)))
375
376 ;; An open-paren: start a new level of nesting. Maybe. There's an
377 ;; unpleasant ambiguity (DR9, DR249) between a parenthesized
378 ;; subdeclarator and a postfix function argument list following an
379 ;; omitted name. If the next thing looks like it might appear as a
380 ;; declaration specifier then assume it is one, push the paren back,
381 ;; and leave; do the same if the parens are empty, because that's not
382 ;; allowed otherwise.
383 (#\( (let ((tok (next-token lexer)))
384 (when (and abstractp
385 (or (eql tok #\))
386 (declaration-specifier-p lexer)))
387 (pushback-token lexer #\()
388 (return))
389 (push prefix stack)
390 (setf prefix nil)))
391
392 ;; Anything else: we're done.
393 (t (return))))
394
395 ;; We're now at the middle of the declarator. If there's an item name
396 ;; here, we want to snarf it.
397 (when (and (not (eq abstractp t))
398 (eq (token-type lexer) :id))
399 (let ((name (token-value lexer)))
400 (next-token lexer)
a07d8d00
MW
401 (cond ((and dottedp (require-token lexer #\. :errorp nil))
402 (let ((sub (require-token lexer :id :default (gensym))))
abdf50aa
MW
403 (setf item (cons name sub))))
404 (t
405 (setf item name)))))
406
407 ;; If we were meant to have a name, but weren't given one, make one up.
408 (when (and (null item)
409 (not abstractp))
410 (cerror* "Missing name; inventing one")
411 (setf item (gensym)))
412
413 ;; Finally scan the postfix operators.
414 (loop
415 (case (token-type lexer)
416
417 ;; Open-bracket: an array. The dimensions are probably some
418 ;; gods-awful C expressions which we'll just tuck away rather than
419 ;; thinking about too carefully. Our representation of C types is
420 ;; capable of thinking about multidimensional arrays, so we slurp up
421 ;; as many dimensions as we can.
422 (#\[ (let ((dims nil))
423 (loop
424 (let* ((frag (scan-c-fragment lexer '(#\])))
425 (dim (c-fragment-text frag)))
426 (push (if (plusp (length dim)) dim nil) dims))
427 (next-token lexer)
428 (unless (eq (next-token lexer) #\[)
429 (return)))
430 (setf dims (nreverse dims))
431 (push (lambda (ty)
1f1d88f5
MW
432 (when (typep ty 'c-function-type)
433 (error "Array element type cannot be ~
434 a function type"))
abdf50aa
MW
435 (make-instance 'c-array-type
436 :dimensions dims
437 :subtype ty))
438 ops)))
439
440 ;; Open-paren: a function with arguments.
441 (#\( (let ((args nil))
442 (unless (eql (next-token lexer) #\))
443 (loop
444
445 ;; Grab an argument and stash it.
446 (cond ((eql (token-type lexer) :ellipsis)
447 (push :ellipsis args))
448 (t
449 (let ((base-type (parse-c-type lexer)))
450 (multiple-value-bind (type name)
451 (parse-c-declarator lexer base-type
452 :abstractp :maybe)
453 (push (make-argument name type) args)))))
454
455 ;; Decide whether to take another one.
456 (case (token-type lexer)
457 (#\) (return))
458 (#\, (next-token lexer))
459 (t (cerror* "Missing `)' inserted before ~A"
460 (format-token lexer))
461 (return)))))
462 (next-token lexer)
463
464 ;; Catch: if the only thing in the list is `void' (with no
465 ;; identifier) then kill the whole thing.
abdf50aa
MW
466 (setf args
467 (if (and args
468 (null (cdr args))
469 (eq (argument-type (car args)) (c-type void))
470 (not (argument-name (car args))))
471 nil
472 (nreverse args)))
473
474 ;; Stash the operator.
475 (push (lambda (ty)
1f1d88f5
MW
476 (when (typep ty '(or c-function-type c-array-type))
477 (error "Function return type cannot be ~
478 a function or array type"))
abdf50aa
MW
479 (make-instance 'c-function-type
480 :arguments args
481 :subtype ty))
482 ops)))
483
484 ;; Close-paren: exit a level of nesting. Prepend the current prefix
485 ;; list and pop a new level. If there isn't one, this isn't our
486 ;; paren, so we're done.
487 (#\) (unless stack
488 (return))
489 (setf ops (nreconc prefix ops)
490 prefix (pop stack))
491 (next-token lexer))
492
493 ;; Anything else means we've finished.
494 (t (return))))
495
496 ;; If we still have operators stacked then something went wrong.
497 (setf ops (nreconc prefix ops))
498 (when stack
499 (cerror* "Missing `)'(s) inserted before ~A"
500 (format-token lexer))
501 (dolist (prefix stack)
502 (setf ops (nreconc prefix ops))))
503
504 ;; Finally, grind through the list of operations.
505 (do ((ops ops (cdr ops))
506 (type type (funcall (car ops) type)))
507 ((endp ops) (values type item)))))
508
509;;;--------------------------------------------------------------------------
510;;; Testing cruft.
511
512#+test
513(with-input-from-string (in "
514// int stat(struct stat *st)
515// void foo(void)
ddee4bb1 516 int vsnprintf(size_t n, char *buf, va_list ap)
1f1d88f5 517// size_t size_t;
abdf50aa
MW
518// int (*signal(int sig, int (*handler)(int s)))(int t)
519")
520 (let* ((stream (make-instance 'position-aware-input-stream
521 :file "<string>"
522 :stream in))
ddee4bb1 523 (lex (make-instance 'sod-lexer :stream stream)))
abdf50aa
MW
524 (next-char lex)
525 (next-token lex)
526 (let ((ty (parse-c-type lex)))
527 (multiple-value-bind (type name) (parse-c-declarator lex ty)
1f1d88f5
MW
528 (list ty
529 (list type name)
530 (with-output-to-string (out)
531 (pprint-c-type type out name)
abdf50aa
MW
532 (format-token lex)))))))
533
534;;;----- That's all, folks --------------------------------------------------