An actual running implementation, which makes code that compiles.
[sod] / src / c-types-parse.lisp
CommitLineData
dea4d055
MW
1;;; -*-lisp-*-
2;;;
3;;; Parser for C types
4;;;
5;;; (c) 2009 Straylight/Edgeware
6;;;
7
8;;;----- Licensing notice ---------------------------------------------------
9;;;
10;;; This file is part of the Sensble Object Design, an object system for C.
11;;;
12;;; SOD is free software; you can redistribute it and/or modify
13;;; it under the terms of the GNU General Public License as published by
14;;; the Free Software Foundation; either version 2 of the License, or
15;;; (at your option) any later version.
16;;;
17;;; SOD is distributed in the hope that it will be useful,
18;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;;; GNU General Public License for more details.
21;;;
22;;; You should have received a copy of the GNU General Public License
23;;; along with SOD; if not, write to the Free Software Foundation,
24;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26(cl:in-package #:sod)
27
28;;;--------------------------------------------------------------------------
29;;; Declaration specifiers.
bf090e02
MW
30;;;
31;;; This stuff is distressingly complicated.
32;;;
33;;; Parsing a (single) declaration specifier is quite easy, and a declaration
34;;; is just a sequence of these things. Except that there are a stack of
35;;; rules about which ones are allowed to go together, and the language
36;;; doesn't require them to appear in any particular order.
37;;;
38;;; A collection of declaration specifiers is carried about in a purpose-made
39;;; object with a number of handy operations defined on it, and then I build
40;;; some parsers in terms of them. The basic strategy is to parse
41;;; declaration specifiers while they're valid, and keep track of what we've
42;;; read. When I've reached the end, we'll convert what we've got into a
43;;; `canonical form', and then convert that into a C type object of the
44;;; appropriate kind. The whole business is rather more complicated than it
45;;; really ought to be.
46
47;; Firstly, a table of interesting things about the various declaration
48;; specifiers that I might encounter. I categorize declaration specifiers
49;; into four kinds.
50;;
51;; * `Type specifiers' describe the actual type, whether that's integer,
52;; character, floating point, or some tagged or user-named type.
53;;
54;; * `Size specifiers' distinguish different sizes of the same basic type.
55;; This is how we tell the difference between `int' and `long'.
56;;
57;; * `Sign specifiers' distinguish different signednesses. This is how we
58;; tell the difference between `int' and `unsigned'.
59;;
60;; * `Qualifiers' are our old friends `const', `restrict' and `volatile'.
61;;
62;; These groupings are for my benefit here, in determining whether a
63;; particular declaration specifier is valid in the current context. I don't
64;; accept `function specifiers' (of which the only current example is
65;; `inline') since it's meaningless to me.
dea4d055
MW
66
67(defclass declspec ()
239fa5bd
MW
68 ;; Despite the fact that it looks pretty trivial, this can't be done with
69 ;; `defstruct' for the simple reason that we add more methods to the
70 ;; accessor functions later.
dea4d055
MW
71 ((label :type keyword :initarg :label :reader ds-label)
72 (name :type string :initarg :name :reader ds-name)
bf090e02
MW
73 (kind :type (member type sign size qualifier)
74 :initarg :kind :reader ds-kind)
75 (taggedp :type boolean :initarg :taggedp
76 :initform nil :reader ds-taggedp))
77 (:documentation
78 "Represents the important components of a declaration specifier.
79
80 The only interesting instances of this class are in the table
81 `*declspec-map*'."))
dea4d055
MW
82
83(defmethod shared-initialize :after ((ds declspec) slot-names &key)
bf090e02
MW
84 "If no name is provided then derive one from the label.
85
86 Most declaration specifiers have simple names for which this works well."
dea4d055
MW
87 (default-slot (ds 'name slot-names)
88 (string-downcase (ds-label ds))))
89
dea4d055
MW
90(defparameter *declspec-map*
91 (let ((map (make-hash-table :test #'equal)))
92 (dolist (item '((type :void :char :int :float :double)
bf090e02
MW
93 ((type :taggedp t) :enum :struct :union)
94 (size :short :long (:long-long :name "long long"))
dea4d055 95 (sign :signed :unsigned)
bf090e02
MW
96 (qualifier :const :restrict :volatile)))
97 (destructuring-bind (kind &key (taggedp nil))
98 (let ((spec (car item)))
99 (if (consp spec) spec (list spec)))
dea4d055 100 (dolist (spec (cdr item))
bf090e02
MW
101 (destructuring-bind (label
102 &key
103 (name (string-downcase label))
104 (taggedp taggedp))
105 (if (consp spec) spec (list spec))
dea4d055 106 (let ((ds (make-instance 'declspec
bf090e02
MW
107 :label label
108 :name name
109 :kind kind
110 :taggedp taggedp)))
dea4d055
MW
111 (setf (gethash name map) ds
112 (gethash label map) ds))))))
bf090e02 113 map)
3109662a 114 "Maps symbolic labels and textual names to `declspec' instances.")
bf090e02
MW
115
116;; A collection of declaration specifiers, and how to merge them together.
117
118(defclass declspecs ()
239fa5bd
MW
119 ;; This could have been done with `defstruct' just as well, but a
120 ;; `defclass' can be tweaked interactively, which is a win at the moment.
bf090e02
MW
121 ((type :initform nil :initarg :type :reader ds-type)
122 (sign :initform nil :initarg :sign :reader ds-sign)
123 (size :initform nil :initarg :size :reader ds-size)
124 (qualifier :initform nil :initarg :qualifiers :reader ds-qualifiers))
125 (:documentation
126 "Represents a collection of declaration specifiers.
127
128 This is used during type parsing to represent the type under
129 construction. Instances are immutable: we build new ones rather than
130 modifying existing ones. This leads to a certain amount of churn, but
131 we'll just have to live with that.
132
133 (Why are instances immutable? Because it's much easier to merge a new
3109662a
MW
134 specifier into an existing collection and then check that the resulting
135 thing is valid, rather than having to deal with all of the possible
bf090e02
MW
136 special cases of what the new thing might be. And if the merged
137 collection isn't good, I must roll back to the previous version. So I
138 don't get to take advantage of a mutable structure.)"))
dea4d055
MW
139
140(defmethod ds-label ((ty c-type)) :c-type)
141(defmethod ds-name ((ty c-type)) (princ-to-string ty))
142(defmethod ds-kind ((ty c-type)) 'type)
143
144(defparameter *good-declspecs*
145 '(((:int) (:signed :unsigned) (:short :long :long-long))
146 ((:char) (:signed :unsigned) ())
147 ((:double) () (:long))
148 (t () ()))
149 "List of good collections of declaration specifiers.
150
151 Each item is a list of the form (TYPES SIGNS SIZES). Each of TYPES, SIGNS
152 and SIZES is either a list of acceptable specifiers of the appropriate
153 kind, or T, which matches any specifier.")
154
dea4d055
MW
155(defun good-declspecs-p (specs)
156 "Are SPECS a good collection of declaration specifiers?"
157 (let ((speclist (list (ds-type specs) (ds-sign specs) (ds-size specs))))
158 (some (lambda (it)
159 (every (lambda (spec pat)
160 (or (eq pat t) (null spec)
161 (member (ds-label spec) pat)))
162 speclist it))
163 *good-declspecs*)))
164
165(defun combine-declspec (specs ds)
166 "Combine the declspec DS with the existing SPECS.
167
168 Returns new DECLSPECS if they're OK, or `nil' if not. The old SPECS are
169 not modified."
bf090e02 170
dea4d055
MW
171 (let* ((kind (ds-kind ds))
172 (old (slot-value specs kind)))
173 (multiple-value-bind (ok new)
174 (case kind
175 (qualifier (values t (adjoin ds old)))
176 (size (cond ((not old) (values t ds))
177 ((and (eq (ds-label old) :long) (eq ds old))
178 (values t (gethash :long-long *declspec-map*)))
179 (t (values nil nil))))
180 (t (values (not old) ds)))
181 (if ok
182 (let ((copy (copy-instance specs)))
183 (setf (slot-value copy kind) new)
184 (and (good-declspecs-p copy) copy))
185 nil))))
186
dea4d055 187(defun declspecs-type (specs)
bf090e02 188 "Convert `declspecs' SPECS into a standalone C type object."
dea4d055
MW
189 (let ((type (ds-type specs))
190 (size (ds-size specs))
bf090e02
MW
191 (sign (ds-sign specs))
192 (quals (mapcar #'ds-label (ds-qualifiers specs))))
193 (cond ((typep type 'c-type)
194 (qualify-c-type type quals))
195 ((or type size sign)
196 (when (and sign (eq (ds-label sign) :signed)
dea4d055
MW
197 (eq (ds-label type) :int))
198 (setf sign nil))
199 (cond ((and (or (null type) (eq (ds-label type) :int))
200 (or size sign))
201 (setf type nil))
202 ((null type)
203 (setf type (gethash :int *declspec-map*))))
204 (make-simple-type (format nil "~{~@[~A~^ ~]~}"
239fa5bd 205 (mapcar #'ds-name
dea4d055
MW
206 (remove nil
207 (list sign size type))))
bf090e02 208 quals))
dea4d055
MW
209 (t
210 nil))))
211
bf090e02 212;; Parsing declaration specifiers.
dea4d055 213
bf090e02 214(define-indicator :declspec "<declaration-specifier>")
dea4d055 215
bf090e02
MW
216(defun scan-declspec
217 (scanner &key (predicate (constantly t)) (indicator :declspec))
3109662a 218 "Scan a `declspec' from SCANNER.
dea4d055 219
bf090e02
MW
220 If PREDICATE is provided then only succeed if (funcall PREDICATE DECLSPEC)
221 is true, where DECLSPEC is the raw declaration specifier or C-type object,
222 so we won't have fetched the tag for a tagged type yet. If the PREDICATE
223 returns false then the scan fails without consuming input.
dea4d055 224
bf090e02
MW
225 If we couldn't find an acceptable declaration specifier then issue
226 INDICATOR as the failure indicator. Value on success is either a
227 `declspec' object or a `c-type' object."
dea4d055 228
bf090e02
MW
229 ;; Turns out to be easier to do this by hand.
230 (let ((ds (and (eq (token-type scanner) :id)
231 (let ((kw (token-value scanner)))
232 (or (gethash kw *module-type-map*)
233 (gethash kw *declspec-map*))))))
234 (cond ((or (not ds) (and predicate (not (funcall predicate ds))))
235 (values (list indicator) nil nil))
236 ((ds-taggedp ds)
237 (scanner-step scanner)
238 (if (eq (token-type scanner) :id)
239 (let ((ty (make-c-tagged-type (ds-label ds)
240 (token-value scanner))))
241 (scanner-step scanner)
242 (values ty t t))
243 (values :tag nil t)))
244 (t
245 (scanner-step scanner)
246 (values ds t t)))))
dea4d055 247
bf090e02
MW
248(defun scan-and-merge-declspec (scanner specs)
249 "Scan a declaration specifier and merge it with SPECS.
250
251 This is a parser function. If it succeeds, it returns the merged
252 `declspecs' object. It can fail either if no valid declaration specifier
253 is found or it cannot merge the declaration specifier with the existing
254 SPECS."
255
256 (with-parser-context (token-scanner-context :scanner scanner)
257 (if-parse (:consumedp consumedp) (scan-declspec scanner)
258 (aif (combine-declspec specs it)
259 (values it t consumedp)
260 (values (list :declspec) nil consumedp)))))
261
239fa5bd 262(export 'parse-c-type)
bf090e02
MW
263(defun parse-c-type (scanner)
264 "Parse a C type from declaration specifiers.
dea4d055 265
bf090e02
MW
266 This is a parser function. If it succeeds then the result is a `c-type'
267 object representing the type it found. Note that this function won't try
268 to parse a C declarator."
dea4d055 269
bf090e02
MW
270 (with-parser-context (token-scanner-context :scanner scanner)
271 (if-parse (:result specs :consumedp cp)
272 (many (specs (make-instance 'declspecs) it :min 1)
273 (peek (scan-and-merge-declspec scanner specs)))
274 (let ((type (declspecs-type specs)))
275 (if type (values type t cp)
276 (values (list :declspec) nil cp))))))
dea4d055 277
bf090e02
MW
278;;;--------------------------------------------------------------------------
279;;; Parsing declarators.
280;;;
281;;; The syntax of declaration specifiers was horrific. Declarators are a
282;;; very simple expression syntax, but this time the semantics are awful. In
283;;; particular, they're inside-out. If <> denotes mumble of foo, then op <>
284;;; is something like mumble of op of foo. Unfortunately, the expression
285;;; parser engine wants to apply op of mumble of foo, so I'll have to do some
286;;; work to fix the impedance mismatch.
287;;;
288;;; The currency we'll use is a pair (FUNC . NAME), with the semantics that
289;;; (funcall FUNC TYPE) returns the derived type. The result of
290;;; `parse-declarator' will be of this form.
dea4d055 291
239fa5bd 292(export 'parse-declarator)
ea578bb4 293(defun parse-declarator (scanner base-type &key kernel abstractp)
239fa5bd 294 "Parse a C declarator, returning a pair (C-TYPE . NAME).
dea4d055 295
239fa5bd
MW
296 The SCANNER is a token scanner to read from. The BASE-TYPE is the type
297 extracted from the preceding declaration specifiers, as parsed by
298 `parse-c-type'.
299
300 The result contains both the resulting constructed C-TYPE (with any
301 qualifiers etc. as necessary), and the name from the middle of the
ea578bb4 302 declarator. The name is parsed using the KERNEL parser provided, and
239fa5bd
MW
303 defaults to matching a simple identifier `:id'. This might, e.g., be
304 (? :id) to parse an `abstract declarator' which has optional names.
305
ea578bb4 306 There's an annoying ambiguity in the syntax, if an empty KERNEL is
239fa5bd
MW
307 permitted. In this case, you must ensure that ABSTRACTP is true so that
308 the appropriate heuristic can be applied. As a convenience, if ABSTRACTP
ea578bb4 309 is true then `(? :id)' is used as the default KERNEL."
239fa5bd 310 (with-parser-context (token-scanner-context :scanner scanner)
ea578bb4 311 (let ((kernel-parser (cond (kernel kernel)
239fa5bd
MW
312 (abstractp (parser () (? :id)))
313 (t (parser () :id)))))
314
315 (labels ((qualifiers ()
316 ;; qualifier*
317
318 (parse
319 (seq ((quals (list ()
320 (scan-declspec
321 scanner
322 :indicator :qualifier
323 :predicate (lambda (ds)
324 (and (typep ds 'declspec)
325 (eq (ds-kind ds)
326 'qualifier)))))))
327 (mapcar #'ds-label quals))))
328
329 (star ()
330 ;; Prefix: `*' qualifiers
331
332 (parse (seq (#\* (quals (qualifiers)))
333 (preop "*" (state 9)
334 (cons (lambda (type)
335 (funcall (car state)
336 (make-pointer-type type quals)))
337 (cdr state))))))
338
339 (next-declspec-p ()
340 ;; Ansert whether the next token is a valid declaration
341 ;; specifier, without consuming it.
342 (and (eq (token-type scanner) :id)
343 (let ((id (token-value scanner)))
344 (or (gethash id *module-type-map*)
345 (gethash id *declspec-map*)))))
346
347 (prefix-lparen ()
348 ;; Prefix: `('
349 ;;
350 ;; Opening parentheses are treated as prefix operators by
351 ;; the expression parsing engine. There's an annoying
352 ;; ambiguity in the syntax if abstract declarators are
353 ;; permitted: a `(' might be either the start of a nested
354 ;; subdeclarator or the start of a postfix function argument
355 ;; list. The two are disambiguated by stating that if the
356 ;; token following the `(' is a `)' or a declaration
357 ;; specifier, then we have a postfix argument list.
358 (parse
359 (peek (seq (#\(
360 (nil (if (and abstractp (next-declspec-p))
361 (values nil nil nil)
362 (values t t nil))))
363 (lparen #\))))))
364
ea578bb4
MW
365 (kernel ()
366 (parse (seq ((name (funcall kernel-parser)))
239fa5bd
MW
367 (cons #'identity name))))
368
369 (argument-list ()
370 ;; [ argument [ `,' argument ]* ]
371
9ec578d9
MW
372 (parse (list (:min 0)
373 (seq ((base-type (parse-c-type scanner))
374 (dtor (parse-declarator scanner
375 base-type
376 :abstractp t)))
377 (make-argument (cdr dtor) (car dtor)))
378 #\,)))
239fa5bd
MW
379
380 (postfix-lparen ()
381 ;; Postfix: `(' argument-list `)'
382
383 (parse (seq (#\( (args (argument-list)) #\))
384 (postop "()" (state 10)
385 (cons (lambda (type)
386 (funcall (car state)
387 (make-function-type type args)))
388 (cdr state))))))
389
390 (dimension ()
391 ;; `[' c-fragment ']'
392
393 (parse (seq ((frag (parse-delimited-fragment
394 scanner #\[ #\])))
395 (c-fragment-text frag))))
396
397 (lbracket ()
398 ;; Postfix: dimension+
399
400 (parse (seq ((dims (list (:min 1) (dimension))))
401 (postop "[]" (state 10)
402 (cons (lambda (type)
403 (funcall (car state)
404 (make-array-type type dims)))
405 (cdr state)))))))
406
407 ;; And now we actually do the declarator parsing.
408 (parse (seq ((value (expr (:nestedp nestedp)
409
410 ;; An actual operand.
ea578bb4 411 (kernel)
239fa5bd
MW
412
413 ;; Binary operators. There aren't any.
414 nil
415
416 ;; Prefix operators.
417 (or (star)
418 (prefix-lparen))
419
420 ;; Postfix operators.
421 (or (postfix-lparen)
422 (lbracket)
423 (when nestedp (seq (#\)) (rparen #\))))))))
424 (cons (funcall (car value) base-type) (cdr value))))))))
dea4d055
MW
425
426;;;----- That's all, folks --------------------------------------------------