;;; -*-lisp-*- ;;; ;;; Parser for C types ;;; ;;; (c) 2009 Straylight/Edgeware ;;; ;;;----- Licensing notice --------------------------------------------------- ;;; ;;; This file is part of the Simple Object Definition system. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 2 of the License, or ;;; (at your option) any later version. ;;; ;;; SOD is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (cl:in-package #:sod) ;;;-------------------------------------------------------------------------- ;;; Declaration specifiers. ;;; ;;; This is a little messy. The C rules, which we're largely following, ;;; allow declaration specifiers to be written in any oreder, and allows an ;;; arbitrary number of the things. This is mainly an exercise in ;;; book-keeping, but we make an effort to categorize the various kinds of ;;; specifiers rather better than the C standard. ;;; ;;; We consider four kinds of declaration specifiers: ;;; ;;; * Type qualifiers: `const', `restrict', and `volatile'. ;;; * Sign specifiers: `signed' and `unsigned'. ;;; * Size specifiers: `short' and `long'. ;;; * Type specifiers: `void', `char', `int', `float', and `double', ;;; ;;; The C standard acknowledges the category of type qualifiers (6.7.3), but ;;; groups the other three kinds together and calls them all `type ;;; specifiers' (6.7.2). (defstruct (declspec (:predicate declspecp)) "Represents a declaration specifier being built." (qualifiers nil :type list) (sign nil :type (member nil :signed :unsigned)) (size nil :type (member nil :short :long :long-long)) (type nil :type (or (member nil :int :char :float :double :void) c-type))) (defun check-declspec (spec) "Check that the declaration specifiers in SPEC are a valid combination. This is surprisingly hairy. It could be even worse: at least validity is monotonic. Consider an alternate language where `double' is a size specifier like `long' rather than being a primary type specifier like `float' (so you'd be able to say things like `long double float'). Then `long float' would be invalid, but `long float double' would be OK. We'd therefore need an additional argument to know whether we were preparing a final set of specifiers (in which case we'd have to reject `long float') or whether this is an intermediate step (in which case we'd have to tentatively allow it in the hope that the user added the necessary `double' later)." (let ((sign (declspec-sign spec)) (size (declspec-size spec)) (type (declspec-type spec))) (and (loop for (good-type good-signs good-sizes) in ;; The entries in this table have the form (GOOD-TYPE ;; GOOD-SIGNS GOOD-SIZES). The GOOD-TYPE is either a keyword ;; or T (matches anything); the GOOD-SIZES and GOOD-SIGNS are ;; lists. The SPEC must match at least one entry, as follows: ;; the type must be NIL or match GOOD-TYPE; and the size and ;; sign must match one of the elements of the corresponding ;; GOOD list. '((:int (nil :signed :unsigned) (nil :short :long :long-long)) (:char (nil :signed :unsigned) (nil)) (:double (nil) (nil :long)) (t (nil) (nil))) thereis (and (or (eq type nil) (eq good-type t) (eq type good-type)) (member sign good-signs) (member size good-sizes))) spec))) (defun update-declspec-qualifiers (spec qual) "Update the qualifiers in SPEC by adding QUAL. The new declspec is returned if it's valid; otherwise NIL. SPEC is not modified." (let ((new (copy-declspec spec))) (pushnew qual (declspec-qualifiers new)) (check-declspec new))) (defun update-declspec-sign (spec sign) "Update the signedness in SPEC to be SIGN. The new declspec is returned if it's valid; otherwise NIL. SPEC is not modified." (and (null (declspec-sign spec)) (let ((new (copy-declspec spec))) (setf (declspec-sign new) sign) (check-declspec new)))) (defun update-declspec-size (spec size) "Update the size in SPEC according to SIZE. The new declspec is returned if it's valid; otherwise NIL. (This is a little subtle because :LONG in particular can modify an existing size entry.) SPEC is not modified." (let ((new-size (case (declspec-size spec) ((nil) size) (:long (if (eq size :long) :long-long nil))))) (and new-size (let ((new (copy-declspec spec))) (setf (declspec-size new) new-size) (check-declspec new))))) (defun update-declspec-type (spec type) "Update the type in SPEC to be TYPE. The new declspec is returned if it's valid; otherwise NIL. SPEC is not modified." (and (null (declspec-type spec)) (let ((new (copy-declspec spec))) (setf (declspec-type new) type) (check-declspec new)))) (defun canonify-declspec (spec) "Transform the declaration specifiers SPEC into a canonical form. The idea is that, however grim the SPEC, we can turn it into something vaguely idiomatic, and pick precisely one of the possible synonyms. The rules are that we suppress `signed' when it's redundant, and suppress `int' if a size or signedness specifier is present. (Note that `signed char' is not the same as `char', so stripping `signed' is only correct when the type is `int'.) The qualifiers are sorted and uniquified here; the relative ordering of the sign/size/type specifiers will be determined by DECLSPEC-KEYWORDS." (let ((quals (declspec-qualifiers spec)) (sign (declspec-sign spec)) (size (declspec-size spec)) (type (declspec-type spec))) (cond ((eq type :int) (when (eq sign :signed) (setf (declspec-sign spec) nil)) (when (or sign size) (setf (declspec-type spec) nil))) ((not (or sign size type)) (setf (declspec-type spec) :int))) (setf (declspec-qualifiers spec) (delete-duplicates (sort (copy-list quals) #'string<))) spec)) (defun declspec-keywords (spec &optional qualsp) "Return a list of strings for the declaration specifiers SPEC. If QUALSP then return the type qualifiers as well." (let ((quals (declspec-qualifiers spec)) (sign (declspec-sign spec)) (size (declspec-size spec)) (type (declspec-type spec))) (nconc (and qualsp (mapcar #'string-downcase quals)) (and sign (list (string-downcase sign))) (case size ((nil) nil) (:long-long (list "long long")) (t (list (string-downcase size)))) (etypecase type (null nil) (keyword (list (string-downcase type))) (simple-c-type (list (c-type-name type))) (tagged-c-type (list (string-downcase (c-tagged-type-kind type)) (c-type-tag type))))))) (defun declspec-c-type (spec) "Return a C-TYPE object corresponding to SPEC." (canonify-declspec spec) (let* ((type (declspec-type spec)) (base (etypecase type (symbol (make-simple-type (format nil "~{~A~^ ~}" (declspec-keywords spec)))) (c-type type)))) (qualify-type base (declspec-qualifiers spec)))) (defun declaration-specifier-p (lexer) "Answer whether the current token might be a declaration specifier." (case (token-type lexer) ((:const :volatile :restrict :signed :unsigned :short :long :void :char :int :float :double :enum :struct :union) t) (:id (gethash (token-value lexer) *type-map*)) (t nil))) (defun parse-c-type (lexer) "Parse declaration specifiers from LEXER and return a C-TYPE." (let ((spec (make-declspec)) (found-any nil)) (loop (let ((tok (token-type lexer))) (labels ((update (func value) (let ((new (funcall func spec value))) (cond (new (setf spec new)) (t (cerror* "Invalid declaration specifier ~(~A~) after `~{~A~^ ~}' (ignored)" (format-token tok (token-value lexer)) (declspec-keywords spec t)) nil)))) (tagged (class) (let ((kind tok)) (setf tok (next-token lexer)) (if (eql tok :id) (when (update #'update-declspec-type (make-instance class :tag (token-value lexer))) (setf found-any t)) (cerror* "Expected ~(~A~) tag; found ~A" kind (format-token lexer)))))) (case tok ((:const :volatile :restrict) (update #'update-declspec-qualifiers tok)) ((:signed :unsigned) (when (update #'update-declspec-sign tok) (setf found-any t))) ((:short :long) (when (update #'update-declspec-size tok) (setf found-any t))) ((:void :char :int :float :double) (when (update #'update-declspec-type tok) (setf found-any t))) (:enum (tagged 'c-enum-type)) (:struct (tagged 'c-struct-type)) (:union (tagged 'c-union-type)) (:id (let ((ty (gethash (token-value lexer) *type-map*))) (when (or found-any (not ty)) (return)) (when (update #'update-declspec-type ty) (setf found-any t)))) (t (return)))) (setf tok (next-token lexer)))) (unless found-any (cerror* "Missing type name (guessing at `int')")) (declspec-c-type spec))) ;;;-------------------------------------------------------------------------- ;;; Parsing declarators. ;;; ;;; This is a whole different ball game. The syntax is simple enough, but ;;; the semantics is inside-out in a particularly unpleasant way. ;;; ;;; The basic idea is that declarator operators closer to the identifier (or ;;; where the identifier would be) should be applied last (with postfix ;;; operators being considered `closer' than prefix). ;;; ;;; One might thing that we can process prefix operators immediately. For ;;; outer prefix operators, this is indeed correct, but in `int (*id)[]', for ;;; example, we must wait to process the array before applying the pointer. ;;; ;;; We can translate each declarator operator into a function which, given a ;;; type, returns the appropriate derived type. If we can arrange these ;;; functions in the right order during the parse, we have only to compose ;;; them together and apply them to the base type in order to finish the job. ;;; ;;; Consider the following skeletal declarator, with <> as a parenthesized ;;; subdeclarator within. ;;; ;;; * * <> [] [] ---> a b d c z ;;; a b z c d ;;; ;;; The algorithm is therefore as follows. We first read the prefix ;;; operators, translate them into closures, and push them onto a list. Each ;;; parenthesized subdeclarator gets its own list, and we push those into a ;;; stack each time we encounter a `('. We then parse the middle bit, which ;;; is a little messy (see the comment there), and start an empty final list ;;; of operators. Finally, we scan postfix operators; these get pushed onto ;;; the front of the operator list as we find them. Each time we find a `)', ;;; we reverse the current prefix-operators list, and attach it to the front ;;; of the operator list, and pop a new prefix list off the stack: at this ;;; point, the operator list reflects the type of the subdeclarator we've ;;; just finished. Eventually we should reach the end with an empty stack ;;; and a prefix list, which again we reverse and attach to the front of the ;;; list. ;;; ;;; Finally, we apply the operator functions in order. (defun parse-c-declarator (lexer type &key abstractp dottedp) "Parse a declarator. Return two values: the complete type, and the name. Parse a declarator from LEXER. The base type is given by TYPE. If ABSTRACTP is NIL, then require a name; if T then forbid a name; if :MAYBE then don't care either way. If no name is given, return NIL. If DOTTEDP then the name may be a dotted item name `NICK.NAME', returned as a cons (NICK . NAME)." (let ((ops nil) (item nil) (stack nil) (prefix nil)) ;; Scan prefix operators. (loop (case (token-type lexer) ;; Star: a pointer type. (#\* (let ((quals nil) (tok (next-token lexer))) ;; Gather following qualifiers. (loop (case tok ((:const :volatile :restrict) (pushnew tok quals)) (t (return)))) ;; And stash the item. (setf quals (sort quals #'string<)) (push (lambda (ty) (make-instance 'c-pointer-type :qualifiers quals :subtype ty)) prefix))) ;; An open-paren: start a new level of nesting. Maybe. There's an ;; unpleasant ambiguity (DR9, DR249) between a parenthesized ;; subdeclarator and a postfix function argument list following an ;; omitted name. If the next thing looks like it might appear as a ;; declaration specifier then assume it is one, push the paren back, ;; and leave; do the same if the parens are empty, because that's not ;; allowed otherwise. (#\( (let ((tok (next-token lexer))) (when (and abstractp (or (eql tok #\)) (declaration-specifier-p lexer))) (pushback-token lexer #\() (return)) (push prefix stack) (setf prefix nil))) ;; Anything else: we're done. (t (return)))) ;; We're now at the middle of the declarator. If there's an item name ;; here, we want to snarf it. (when (and (not (eq abstractp t)) (eq (token-type lexer) :id)) (let ((name (token-value lexer))) (next-token lexer) (cond ((and dottedp (eq (token-type lexer) #\.)) (let ((sub (require-token :id :default (gensym)))) (setf item (cons name sub)))) (t (setf item name))))) ;; If we were meant to have a name, but weren't given one, make one up. (when (and (null item) (not abstractp)) (cerror* "Missing name; inventing one") (setf item (gensym))) ;; Finally scan the postfix operators. (loop (case (token-type lexer) ;; Open-bracket: an array. The dimensions are probably some ;; gods-awful C expressions which we'll just tuck away rather than ;; thinking about too carefully. Our representation of C types is ;; capable of thinking about multidimensional arrays, so we slurp up ;; as many dimensions as we can. (#\[ (let ((dims nil)) (loop (let* ((frag (scan-c-fragment lexer '(#\]))) (dim (c-fragment-text frag))) (push (if (plusp (length dim)) dim nil) dims)) (next-token lexer) (unless (eq (next-token lexer) #\[) (return))) (setf dims (nreverse dims)) (push (lambda (ty) (when (typep ty 'c-function-type) (error "Array element type cannot be ~ a function type")) (make-instance 'c-array-type :dimensions dims :subtype ty)) ops))) ;; Open-paren: a function with arguments. (#\( (let ((args nil)) (unless (eql (next-token lexer) #\)) (loop ;; Grab an argument and stash it. (cond ((eql (token-type lexer) :ellipsis) (push :ellipsis args)) (t (let ((base-type (parse-c-type lexer))) (multiple-value-bind (type name) (parse-c-declarator lexer base-type :abstractp :maybe) (push (make-argument name type) args))))) ;; Decide whether to take another one. (case (token-type lexer) (#\) (return)) (#\, (next-token lexer)) (t (cerror* "Missing `)' inserted before ~A" (format-token lexer)) (return))))) (next-token lexer) ;; Catch: if the only thing in the list is `void' (with no ;; identifier) then kill the whole thing. (setf args (if (and args (null (cdr args)) (eq (argument-type (car args)) (c-type void)) (not (argument-name (car args)))) nil (nreverse args))) ;; Stash the operator. (push (lambda (ty) (when (typep ty '(or c-function-type c-array-type)) (error "Function return type cannot be ~ a function or array type")) (make-instance 'c-function-type :arguments args :subtype ty)) ops))) ;; Close-paren: exit a level of nesting. Prepend the current prefix ;; list and pop a new level. If there isn't one, this isn't our ;; paren, so we're done. (#\) (unless stack (return)) (setf ops (nreconc prefix ops) prefix (pop stack)) (next-token lexer)) ;; Anything else means we've finished. (t (return)))) ;; If we still have operators stacked then something went wrong. (setf ops (nreconc prefix ops)) (when stack (cerror* "Missing `)'(s) inserted before ~A" (format-token lexer)) (dolist (prefix stack) (setf ops (nreconc prefix ops)))) ;; Finally, grind through the list of operations. (do ((ops ops (cdr ops)) (type type (funcall (car ops) type))) ((endp ops) (values type item))))) ;;;-------------------------------------------------------------------------- ;;; Testing cruft. #+test (with-input-from-string (in " // int stat(struct stat *st) // void foo(void) // int vsnprintf(size_t n, char *buf, va_list ap) // size_t size_t; // int (*signal(int sig, int (*handler)(int s)))(int t) ") (let* ((stream (make-instance 'position-aware-input-stream :file "" :stream in)) (lex (make-instance 'sod-lexer :stream stream :keywords *sod-keywords*))) (next-char lex) (next-token lex) (let ((ty (parse-c-type lex))) (multiple-value-bind (type name) (parse-c-declarator lex ty) (list ty (list type name) (with-output-to-string (out) (pprint-c-type type out name) (format-token lex))))))) ;;;----- That's all, folks --------------------------------------------------