doc/output.tex: Add a missing member reference in vtable initializer.
[sod] / src / c-types-parse.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; Parser for C types
4 ;;;
5 ;;; (c) 2009 Straylight/Edgeware
6 ;;;
7
8 ;;;----- Licensing notice ---------------------------------------------------
9 ;;;
10 ;;; This file is part of the Sensible Object Design, an object system for C.
11 ;;;
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 (cl:in-package #:sod)
27
28 ;;;--------------------------------------------------------------------------
29 ;;; Declaration specifiers.
30 ;;;
31 ;;; This stuff is distressingly complicated.
32 ;;;
33 ;;; Parsing a (single) declaration specifier is quite easy, and a declaration
34 ;;; is just a sequence of these things. Except that there are a stack of
35 ;;; rules about which ones are allowed to go together, and the language
36 ;;; doesn't require them to appear in any particular order.
37 ;;;
38 ;;; A collection of declaration specifiers is carried about in a purpose-made
39 ;;; object with a number of handy operations defined on it, and then I build
40 ;;; some parsers in terms of them. The basic strategy is to parse
41 ;;; declaration specifiers while they're valid, and keep track of what we've
42 ;;; read. When I've reached the end, we'll convert what we've got into a
43 ;;; `canonical form', and then convert that into a C type object of the
44 ;;; appropriate kind. The whole business is rather more complicated than it
45 ;;; really ought to be.
46
47 ;; Firstly, a table of interesting things about the various declaration
48 ;; specifiers that I might encounter. I categorize declaration specifiers
49 ;; into four kinds.
50 ;;
51 ;; * `Type specifiers' describe the actual type, whether that's integer,
52 ;; character, floating point, or some tagged or user-named type.
53 ;;
54 ;; * `Size specifiers' distinguish different sizes of the same basic type.
55 ;; This is how we tell the difference between `int' and `long'.
56 ;;
57 ;; * `Sign specifiers' distinguish different signednesses. This is how we
58 ;; tell the difference between `int' and `unsigned'.
59 ;;
60 ;; * `Qualifiers' are our old friends `const', `restrict' and `volatile'.
61 ;;
62 ;; These groupings are for my benefit here, in determining whether a
63 ;; particular declaration specifier is valid in the current context. I don't
64 ;; accept `function specifiers' (of which the only current example is
65 ;; `inline') since it's meaningless to me.
66
67 (defclass declspec ()
68 ;; Despite the fact that it looks pretty trivial, this can't be done with
69 ;; `defstruct' for the simple reason that we add more methods to the
70 ;; accessor functions later.
71 ((%label :type keyword :initarg :label :reader ds-label)
72 (name :type string :initarg :name :reader ds-name)
73 (kind :type (member %type complexity sign size qualifier %specs)
74 :initarg :kind :reader ds-kind)
75 (taggedp :type boolean :initarg :taggedp
76 :initform nil :reader ds-taggedp))
77 (:documentation
78 "Represents the important components of a declaration specifier.
79
80 The only interesting instances of this class are in the table
81 `*declspec-map*'."))
82
83 (defmethod shared-initialize :after ((ds declspec) slot-names &key)
84 "If no name is provided then derive one from the label.
85
86 Most declaration specifiers have simple names for which this works well."
87 (default-slot (ds 'name slot-names)
88 (string-downcase (ds-label ds))))
89
90 (defparameter *declspec-map*
91 (let ((map (make-hash-table :test #'equal)))
92 (dolist (item '((%type :char :int :float :double)
93 (complexity (:complex :compat "_Complex")
94 (:imaginary :compat "_Imaginary"))
95 ((type :taggedp t) :enum :struct :union)
96 (size :short :long (:long-long :name "long long"))
97 (sign :signed :unsigned)
98 (qualifier :const :restrict :volatile
99 (:atomic :compat "_Atomic"))))
100 (destructuring-bind (kind &key (taggedp nil))
101 (let ((spec (car item)))
102 (if (consp spec) spec (list spec)))
103 (dolist (spec (cdr item))
104 (destructuring-bind (label
105 &key (name (string-downcase label))
106 compat (taggedp taggedp))
107 (if (consp spec) spec (list spec))
108 (let ((ds (make-instance 'declspec
109 :label label
110 :name (or compat name)
111 :kind kind
112 :taggedp taggedp)))
113 (setf (gethash name map) ds
114 (gethash label map) ds)
115 (when compat
116 (setf (gethash compat map) ds)))))))
117 map)
118 "Maps symbolic labels and textual names to `declspec' instances.")
119
120 (defclass storespec ()
121 ((spec :initarg :spec :reader ds-spec))
122 (:documentation "Carrier for a storage specifier."))
123
124 (defmethod ds-label ((spec storespec)) spec)
125 (defmethod ds-kind ((spec storespec)) '%specs)
126
127 (defmethod ds-label ((ty c-type)) :c-type)
128 (defmethod ds-name ((ty c-type)) (princ-to-string ty))
129 (defmethod ds-kind ((ty c-type)) '%type)
130
131 ;; A collection of declaration specifiers, and how to merge them together.
132
133 (defclass declspecs ()
134 ;; This could have been done with `defstruct' just as well, but a
135 ;; `defclass' can be tweaked interactively, which is a win at the moment.
136 ((%type :initform nil :initarg :type :reader ds-type)
137 (complexity :initform nil :initarg :complexity :reader ds-complexity)
138 (sign :initform nil :initarg :sign :reader ds-sign)
139 (size :initform nil :initarg :size :reader ds-size)
140 (%specs :initform nil :initarg :specs :reader ds-specs)
141 (qualifier :initform nil :initarg :qualifiers :reader ds-qualifiers))
142 (:documentation "Represents a collection of declaration specifiers.
143
144 This is used during type parsing to represent the type under construction.
145 Instances are immutable: we build new ones rather than modifying existing
146 ones. This leads to a certain amount of churn, but we'll just have to
147 live with that.
148
149 (Why are instances immutable? Because it's much easier to merge a new
150 specifier into an existing collection and then check that the resulting
151 thing is valid, rather than having to deal with all of the possible
152 special cases of what the new thing might be. And if the merged
153 collection isn't good, I must roll back to the previous version. So I
154 don't get to take advantage of a mutable structure.)"))
155
156 (defparameter *good-declspecs*
157 '(((:int) (:signed :unsigned) (:short :long :long-long) ())
158 ((:char) (:signed :unsigned) () ())
159 ((:double) () (:long) (:complex :imaginary))
160 (t () () ()))
161 "List of good collections of declaration specifiers.
162
163 Each item is a list of the form (TYPES SIGNS SIZES COMPLEXITIES). Each of
164 TYPES, SIGNS, SIZES, and COMPLEXITIES, is either a list of acceptable
165 specifiers of the appropriate kind, or T, which matches any specifier.")
166
167 (defun good-declspecs-p (specs)
168 "Are SPECS a good collection of declaration specifiers?"
169 (let ((speclist (list (ds-type specs)
170 (ds-sign specs)
171 (ds-size specs)
172 (ds-complexity specs))))
173 (some (lambda (it)
174 (every (lambda (spec pat)
175 (or (eq pat t) (null spec)
176 (member (ds-label spec) pat)))
177 speclist it))
178 *good-declspecs*)))
179
180 (defun combine-declspec (specs ds)
181 "Combine the declspec DS with the existing SPECS.
182
183 Returns new DECLSPECS if they're OK, or `nil' if not. The old SPECS are
184 not modified."
185
186 (let* ((kind (ds-kind ds))
187 (old (slot-value specs kind)))
188 (multiple-value-bind (ok new)
189 (case kind
190 (qualifier (values t (adjoin ds old)))
191 (size (cond ((not old) (values t ds))
192 ((and (eq (ds-label old) :long) (eq ds old))
193 (values t (gethash :long-long *declspec-map*)))
194 (t (values nil nil))))
195 (specs (values t (adjoin (ds-spec ds) old)))
196 (t (values (not old) ds)))
197 (if ok
198 (let ((copy (copy-instance specs)))
199 (setf (slot-value copy kind) new)
200 (and (good-declspecs-p copy) copy))
201 nil))))
202
203 (defun declspecs-type (specs)
204 "Convert `declspecs' SPECS into a standalone C type object."
205 (let* ((base-type (ds-type specs))
206 (size (ds-size specs))
207 (sign (ds-sign specs))
208 (cplx (ds-complexity specs))
209 (quals (mapcar #'ds-label (ds-qualifiers specs)))
210 (specs (ds-specs specs))
211 (type (cond ((typep base-type 'c-type)
212 (qualify-c-type base-type quals))
213 ((or base-type size sign cplx)
214 (when (and sign (eq (ds-label sign) :signed)
215 (eq (ds-label base-type) :int))
216 (setf sign nil))
217 (cond ((and (or (null base-type)
218 (eq (ds-label base-type) :int))
219 (or size sign))
220 (setf base-type nil))
221 ((null base-type)
222 (setf base-type (gethash :int *declspec-map*))))
223 (let* ((things (list sign cplx size base-type))
224 (stripped (remove nil things))
225 (names (mapcar #'ds-name stripped)))
226 (make-simple-type (format nil "~{~A~^ ~}" names)
227 quals)))
228 (t
229 nil))))
230 (cond ((null type) nil)
231 ((null specs) type)
232 (t (make-storage-specifiers-type type specs)))))
233
234 ;; Parsing declaration specifiers.
235
236 (define-indicator :declspec "<declaration-specifier>")
237
238 (defun scan-simple-declspec
239 (scanner &key (predicate (constantly t)) (indicator :declspec))
240 "Scan a simple `declspec' from SCANNER.
241
242 Simple declspecs are the ones defined in the `*declspec-map*' or
243 `*module-type-map*'. This covers the remaining possibilities if the
244 `complex-declspec' pluggable parser didn't find anything to match.
245
246 If PREDICATE is provided then only succeed if (funcall PREDICATE DECLSPEC)
247 is true, where DECLSPEC is the raw declaration specifier or C-type object,
248 so we won't have fetched the tag for a tagged type yet. If the PREDICATE
249 returns false then the scan fails without consuming input.
250
251 If we couldn't find an acceptable declaration specifier then issue
252 INDICATOR as the failure indicator. Value on success is either a
253 `declspec' object or a `c-type' object."
254
255 ;; Turns out to be easier to do this by hand.
256 (let ((ds (and (eq (token-type scanner) :id)
257 (let ((kw (token-value scanner)))
258 (or (gethash kw *declspec-map*)
259 (and (boundp '*module-type-map*)
260 (gethash kw *module-type-map*))
261 (find-simple-c-type kw))))))
262 (cond ((or (not ds) (and predicate (not (funcall predicate ds))))
263 (values (list indicator) nil nil))
264 ((and (typep ds 'declspec) (ds-taggedp ds))
265 (scanner-step scanner)
266 (if (eq (token-type scanner) :id)
267 (let ((ty (make-c-tagged-type (ds-label ds)
268 (token-value scanner))))
269 (scanner-step scanner)
270 (values ty t t))
271 (values :tag nil t)))
272 (t
273 (scanner-step scanner)
274 (values ds t t)))))
275
276 (define-pluggable-parser complex-declspec atomic-typepsec (scanner)
277 ;; `atomic' `(' type-name `)'
278 ;; `_Atomic' `(' type-name `)'
279 (with-parser-context (token-scanner-context :scanner scanner)
280 (parse (peek (seq ((nil (or "atomic" "_Atomic"))
281 #\(
282 (decls (parse-c-type scanner))
283 (subtype (parse-declarator scanner decls
284 :kernel (parse-empty)
285 :abstractp t))
286 #\))
287 (make-atomic-type (car subtype)))))))
288
289 (define-pluggable-parser complex-declspec alignas (scanner)
290 ;; `alignas' `(' fragment `)'
291 ;; `_Alignas' `(' fragment `)'
292 (with-parser-context (token-scanner-context :scanner scanner)
293 (parse (peek (seq ((nil (or "alignas" "_Alignas"))
294 (nil (lisp (values #\(
295 (eq (token-type scanner) #\()
296 nil)))
297 (nil (commit))
298 (frag (parse-delimited-fragment scanner #\( #\))))
299 (make-instance 'storespec
300 :spec (make-instance
301 'alignas-storage-specifier
302 :alignment frag)))))))
303
304 (defun scan-and-merge-declspec (scanner specs)
305 "Scan a declaration specifier and merge it with SPECS.
306
307 This is a parser function. If it succeeds, it returns the merged
308 `declspecs' object. It can fail either if no valid declaration specifier
309 is found or it cannot merge the declaration specifier with the existing
310 SPECS."
311
312 (with-parser-context (token-scanner-context :scanner scanner)
313 (if-parse (:consumedp consumedp)
314 (or (plug complex-declspec scanner)
315 (scan-simple-declspec scanner))
316 (aif (combine-declspec specs it)
317 (values it t consumedp)
318 (values (list :declspec) nil consumedp)))))
319
320 (export 'parse-c-type)
321 (defun parse-c-type (scanner)
322 "Parse a C type from declaration specifiers.
323
324 This is a parser function. If it succeeds then the result is a `c-type'
325 object representing the type it found. Note that this function won't try
326 to parse a C declarator."
327
328 (with-parser-context (token-scanner-context :scanner scanner)
329 (if-parse (:result specs :consumedp cp)
330 (many (specs (make-instance 'declspecs) it :min 1)
331 (peek (scan-and-merge-declspec scanner specs)))
332 (let ((type (declspecs-type specs)))
333 (if type (values type t cp)
334 (values (list :declspec) nil cp))))))
335
336 ;;;--------------------------------------------------------------------------
337 ;;; Parsing declarators.
338 ;;;
339 ;;; The syntax of declaration specifiers was horrific. Declarators are a
340 ;;; very simple expression syntax, but this time the semantics are awful. In
341 ;;; particular, they're inside-out. If <> denotes mumble of foo, then op <>
342 ;;; is something like mumble of op of foo. Unfortunately, the expression
343 ;;; parser engine wants to apply op of mumble of foo, so I'll have to do some
344 ;;; work to fix the impedance mismatch.
345 ;;;
346 ;;; The currency we'll use is a pair (FUNC . NAME), with the semantics that
347 ;;; (funcall FUNC TYPE) returns the derived type. The result of
348 ;;; `parse-declarator' will be of this form.
349
350 (export 'parse-declarator)
351 (defun parse-declarator (scanner base-type &key kernel abstractp keywordp)
352 "Parse a C declarator, returning a pair (C-TYPE . NAME).
353
354 The SCANNER is a token scanner to read from. The BASE-TYPE is the type
355 extracted from the preceding declaration specifiers, as parsed by
356 `parse-c-type'.
357
358 The result contains both the resulting constructed C-TYPE (with any
359 qualifiers etc. as necessary), and the name from the middle of the
360 declarator. The name is parsed using the KERNEL parser provided, and
361 defaults to matching a simple identifier `:id'. This might, e.g., be
362 (? :id) to parse an `abstract declarator' which has optional names.
363
364 If KEYWORDP is true, then a keyword argument list is permitted in
365 function declarations.
366
367 There's an annoying ambiguity in the syntax, if an empty KERNEL is
368 permitted. In this case, you must ensure that ABSTRACTP is true so that
369 the appropriate heuristic can be applied. As a convenience, if ABSTRACTP
370 is true then `(? :id)' is used as the default KERNEL."
371
372 ;; This is a bit confusing. This is a strangely-shaped operator grammer,
373 ;; which wouldn't be so bad, but the `values' being operated on are pairs
374 ;; of the form (FUNC . NAME). The NAME is whatever the KERNEL parser
375 ;; produces as its result, and will be passed out unchanged. The FUNC is a
376 ;; type-constructor function which will be eventually be applied to the
377 ;; input BASE-TYPE, but we can't calculate the actual result as we go along
378 ;; because of the rather annoying inside-out nature of the declarator
379 ;; syntax.
380
381 (with-parser-context (token-scanner-context :scanner scanner)
382 (let ((kernel-parser (cond (kernel kernel)
383 (abstractp (parser () (? :id)))
384 (t (parser () :id)))))
385
386 (labels ((qualifiers ()
387 ;; qualifier*
388
389 (parse
390 (seq ((quals (list ()
391 (scan-simple-declspec
392 scanner
393 :indicator :qualifier
394 :predicate (lambda (ds)
395 (and (typep ds 'declspec)
396 (eq (ds-kind ds)
397 'qualifier)))))))
398 (mapcar #'ds-label quals))))
399
400 (disallow-keyword-functions (type)
401 (when (typep type 'c-keyword-function-type)
402 (error "Functions with keyword arguments are only ~
403 allowed at top-level")))
404
405 (star ()
406 ;; Prefix: `*' qualifiers
407
408 (parse (seq (#\* (quals (qualifiers)))
409 (preop "*" (state 9)
410 (cons (lambda (type)
411 (disallow-keyword-functions type)
412 (funcall (car state)
413 (make-pointer-type type quals)))
414 (cdr state))))))
415
416 (predict-argument-list-p ()
417 ;; See `prefix-lparen'. Predict an argument list rather
418 ;; than a nested declarator if (a) abstract declarators are
419 ;; permitted and (b) the next token is a declaration
420 ;; specifier or ellipsis.
421 (let ((type (token-type scanner))
422 (value (token-value scanner)))
423 (and abstractp
424 (or (eq type :ellipsis)
425 (and (eq type :id)
426 (or (gethash value *module-type-map*)
427 (gethash value *declspec-map*)))))))
428
429 (prefix-lparen ()
430 ;; Prefix: `('
431 ;;
432 ;; Opening parentheses are treated as prefix operators by
433 ;; the expression parsing engine. There's an annoying
434 ;; ambiguity in the syntax if abstract declarators are
435 ;; permitted: a `(' might be either the start of a nested
436 ;; subdeclarator or the start of a postfix function argument
437 ;; list. The two are disambiguated by stating that if the
438 ;; token following the `(' is a `)' or a declaration
439 ;; specifier, then we have a postfix argument list.
440 (parse
441 (peek (seq (#\(
442 (nil (if (predict-argument-list-p)
443 (values nil nil nil)
444 (values t t nil))))
445 (lparen #\))))))
446
447 (kernel ()
448 (parse (seq ((name (funcall kernel-parser)))
449 (cons #'identity name))))
450
451 (arg-decl (abstractp)
452 (parse (seq ((base-type (parse-c-type scanner))
453 (dtor (parse-declarator scanner base-type
454 :abstractp abstractp)))
455 dtor)))
456
457 (argument ()
458 ;; argument ::= type abstract-declspec
459
460 (parse (seq ((dtor (arg-decl t)))
461 (make-argument (cdr dtor) (car dtor)))))
462
463 (kw-argument ()
464 ;; kw-argument ::= type declspec [= c-fragment]
465
466 (parse (seq ((dtor (arg-decl nil))
467 (dflt (? (when (eq (token-type scanner) #\=)
468 (parse-delimited-fragment
469 scanner #\= '(#\, #\))
470 :keep-end t)))))
471 (make-argument (cdr dtor) (car dtor) dflt))))
472
473 (argument-list ()
474 ;; argument-list ::=
475 ;; [argument [`,' argument]* [`,' argument-tail]]
476 ;; | argument-tail
477 ;;
478 ;; argument-tail ::= `...' | keyword-tail
479 ;;
480 ;; keyword-tail ::= `?' [kw-argument [`,' kw-argument]*]
481 ;;
482 ;; kw-argument ::= argument [= c-fragment]
483 ;;
484 ;; The possibility of a trailing `,' `...' means that we
485 ;; can't use the standard `list' parser. Note that, unlike
486 ;; `real' C, we allow an ellipsis even if there are no
487 ;; explicit arguments.
488
489 (let ((args nil)
490 (keys nil)
491 (keysp nil))
492 (loop
493 (when (eq (token-type scanner) :ellipsis)
494 (push :ellipsis args)
495 (scanner-step scanner)
496 (return))
497 (when (and keywordp (eq (token-type scanner) #\?))
498 (setf keysp t)
499 (scanner-step scanner)
500 (multiple-value-bind (arg winp consumedp)
501 (parse (list (:min 0) (kw-argument) #\,))
502 (declare (ignore consumedp))
503 (unless winp
504 (return-from argument-list (values arg nil t)))
505 (setf keys arg)
506 (return)))
507 (multiple-value-bind (arg winp consumedp)
508 (argument)
509 (unless winp
510 (if (or consumedp args)
511 (return-from argument-list (values arg nil t))
512 (return)))
513 (push arg args))
514 (unless (eq (token-type scanner) #\,)
515 (return))
516 (scanner-step scanner))
517 (values (let ((rargs (nreverse args))
518 (rkeys (nreverse keys)))
519 (if keysp
520 (lambda (ret)
521 (make-keyword-function-type
522 ret rargs rkeys))
523 (lambda (ret)
524 (make-function-type ret rargs))))
525 t
526 (or args keysp))))
527
528 (postfix-lparen ()
529 ;; Postfix: `(' argument-list `)'
530
531 (parse (seq (#\( (make (argument-list)) #\))
532 (postop "()" (state 10)
533 (cons (lambda (type)
534 (disallow-keyword-functions type)
535 (funcall (car state)
536 (funcall make type)))
537 (cdr state))))))
538
539 (dimension ()
540 ;; `[' c-fragment ']'
541
542 (parse (seq ((frag (parse-delimited-fragment
543 scanner #\[ #\])))
544 (c-fragment-text frag))))
545
546 (lbracket ()
547 ;; Postfix: dimension+
548
549 (parse (seq ((dims (list (:min 1) (dimension))))
550 (postop "[]" (state 10)
551 (cons (lambda (type)
552 (disallow-keyword-functions type)
553 (funcall (car state)
554 (make-array-type type dims)))
555 (cdr state)))))))
556
557 ;; And now we actually do the declarator parsing.
558 (parse (seq ((value (expr (:nestedp nestedp)
559
560 ;; An actual operand.
561 (kernel)
562
563 ;; Binary operators. There aren't any.
564 nil
565
566 ;; Prefix operators.
567 (or (star)
568 (prefix-lparen))
569
570 ;; Postfix operators.
571 (or (postfix-lparen)
572 (lbracket)
573 (when nestedp (seq (#\)) (rparen #\))))))))
574 (cons (wrap-c-type (lambda (type)
575 (funcall (car value) type))
576 base-type)
577 (cdr value))))))))
578
579 ;;;----- That's all, folks --------------------------------------------------