From c91b90c3bfd3d3e68cc3d3ff3f431d1e73920061 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Mon, 15 Jul 2013 01:54:09 +0100 Subject: [PATCH] Daily work in progress. --- src/fragment-parse.lisp | 50 ++++++++----- src/module-parse.lisp | 174 ++++++++++++++++++++++++++++++++++++++++++- src/parser/parser-proto.lisp | 2 +- src/pset-parse.lisp | 18 +++-- 4 files changed, 215 insertions(+), 29 deletions(-) diff --git a/src/fragment-parse.lisp b/src/fragment-parse.lisp index 6e71994..b1e9e8c 100644 --- a/src/fragment-parse.lisp +++ b/src/fragment-parse.lisp @@ -32,7 +32,9 @@ (defun scan-c-fragment (scanner end-chars) "Parse a C fragment from the SCANNER. - SCANNER must be a `sod-token-scanner' instance. + SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a + sequence of characters, any of which delimits the fragment. The + delimiting character is left current in the scanner. The parsing process is a simple approximation to C lexical analysis. It takes into account comments (both C and C++ style), string and character @@ -75,12 +77,14 @@ ((satisfies whitespace-char-p) (parse :whitespace)) ((scan-comment char-scanner)) - ;; See if we've reached the end. There's a small trick here: I - ;; capture the result in the `if-char' consequent to ensure - ;; that we don't include the delimiter. - ((if-char () (and (null delim) (member it end-chars)) - (values (result) t t) - (values end-chars nil nil)) + ;; See if we've reached the end. We must leave the delimiter + ;; in the scanner, so `if-char' and its various friends aren't + ;; appropriate. + ((lisp (if (and (null delim) + (member (scanner-current-char char-scanner) + end-chars)) + (values (result) t t) + (values end-chars nil nil))) (return (values it t t))) (:eof (lexer-error char-scanner '(:any) cp) @@ -111,21 +115,33 @@ (lexer-error char-scanner exp cp))))))))) (export 'parse-delimited-fragment) -(defun parse-delimited-fragment (scanner begin end) +(defun parse-delimited-fragment (scanner begin end &key keep-end) "Parse a C fragment delimited by BEGIN and END. - The BEGIN and END arguments are characters. (Currently, BEGIN can be any - token type, but you probably shouldn't rely on this.)" + The BEGIN and END arguments are the start and end delimiters. BEGIN can + be any token type, but is usually a delimiter character; it may also be t + to mean `don't care' -- but there must be an initial token of some kind + for annoying technical reasons. END may be either a character or a list + of characters. If KEEP-END is true, the trailing delimiter is left in the + token scanner so that it's available for further parsing decisions: this + is probably what you want if END is a list." ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' ;; works at the character level rather than at the lexical level, and if we - ;; commit to the `[' too early then `scanner-step' will eat the first few - ;; characters of the fragment -- and then the rest of the parse will get - ;; horrifically confused. - - (if (eql (token-type scanner) begin) - (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t) - (scanner-step scanner)) + ;; commit to the BEGIN character too early then `scanner-step' will eat the + ;; first few characters of the fragment -- and then the rest of the parse + ;; will get horrifically confused. + + (if (if (eq begin t) + (not (scanner-at-eof-p scanner)) + (eql (token-type scanner) begin)) + (multiple-value-prog1 (values (scan-c-fragment scanner + (if (listp end) + end + (list end))) + t t) + (scanner-step scanner) + (unless keep-end (scanner-step scanner))) (values (list begin) nil nil))) ;;;----- That's all, folks -------------------------------------------------- diff --git a/src/module-parse.lisp b/src/module-parse.lisp index f87c586..6fb6be8 100644 --- a/src/module-parse.lisp +++ b/src/module-parse.lisp @@ -155,12 +155,180 @@ ;;;-------------------------------------------------------------------------- ;;; Class declarations. +(defun parse-class-body (scaner pset name supers) + ;; class-body ::= `{' class-item* `}' + (with-parser-context (token-scanner-context :scanner scanner) + (make-class-type name) + (let* ((class (make-sod-class name (mapcat #'find-sod-class supers) + pset scanner)) + (nick (sod-class-nickname class))) + + (labels ((parse-maybe-dotted-declarator (base-type) + ;; Parse a declarator or dotted-declarator, i.e., one whose + ;; centre is + ;; + ;; maybe-dotted-identifier ::= [id `.'] id + ;; + ;; A plain identifier is returned as a string, as usual; a + ;; dotted identifier is returned as a cons cell of the two + ;; names. + (parse-declarator + scanner base-type + :centre (parser () + (seq ((name-a :id) + (name-b (? (seq (#\. (id :id)) id)))) + (if name-b (cons name-a name-b) + name-a))))) + + ;; class-item ::= [property-set] raw-class-item + ;; + + (parse-message-item (sub-pset type name) + ;; message-item ::= + ;; declspec+ declarator -!- (method-body | `;') + (make-sod-message class name type sub-pset scanner) + (parse (or #\; (parse-method-item nil type nick name)))) + + (parse-method-item (sub-pset type sub-nick name) + ;; method-item ::= + ;; declspec+ dotted-declarator -!- method-body + ;; + ;; method-body ::= `{' c-fragment `}' | `extern' `;' + (parse (seq ((body (or (seq ("extern" #\;) nil) + (parse-delimited-fragment + scanner #\{ #\})))) + (make-sod-method class sub-nick name type + body sub-pset scanner)))) + + (parse-initializer () + ;; initializer ::= `=' c-fragment | `=' `{' c-fragment `}' + ;; + ;; Return (VALUE-KIND . VALUE-FORM), ready for passing to a + ;; `sod-initializer' constructor. + (parse (or (peek (seq (#\= (frag (parse-delimited-fragment + scanner #\{ #\}))) + (cons :compound frag))) + (seq ((frag (parse-delimited-fragment + scanner #\= '(#\; #\,) + :keep-end t))) + (cons :simple frag))))) + + (parse-slot-item (sub-pset base-type type name) + ;; slot-item ::= + ;; declspec+ declarator -!- [initializer] + ;; [`,' init-declarator-list] `;' + ;; + ;; init-declarator-list ::= + ;; declarator [initializer] [`,' init-declarator-list] + (parse (and (seq ((init (? (parse-initializer)))) + (make-sod-slot class name type + sub-pset scanner) + (when init + (make-sod-instance-initializer + class nick name (car init) (cdr init) + nil scanner))) + (skip-many () + (seq (#\, + (ds (parse-declarator scanner + base-type)) + (init (? (parse-initializer)))) + (make-sod-slot class (cdr ds) (car ds) + sub-pset scanner) + (when init + (make-sod-instance-initializer + class nick (cdr ds) + (car init) (cdr init) + nil scanner)))) + #\;))) + + (parse-initializer-item (sub-pset constructor) + ;; initializer-item ::= + ;; [`class'] -!- slot-initializer-list `;' + ;; + ;; slot-initializer ::= id `.' id initializer + (parse (and (skip-many () + (seq ((name-a :id) #\. (name-b :id) + (init (parse-initializer))) + (funcall constructor class + name-a name-b + (car init) (cdr init) + sub-pset scanner)) + #\,) + #\;))) + + (class-item-dispatch (sub-pset base-type type name) + ;; Logically part of `parse-raw-class-item', but the + ;; indentation was getting crazy. We're currently at + ;; + ;; raw-class-item ::= + ;; declspec+ (declarator | dotted-declarator) -!- ... + ;; | other-items + ;; + ;; If the declarator is dotted then this must be a method + ;; definition; otherwise it might be a message or slot. + (cond ((not (typep type 'c-function-type)) + (when (consp name) + (cerror*-with-location + scanner + "Method declarations must have function type.") + (setf name (cdr name))) + (parse-slot-item sub-pset base-type type name)) + ((consp name) + (parse-method-item sub-pset type + (car name) (cdr name))) + (t + (parse-message-item sub-pset type name)))) + + (parse-raw-class-item (sub-pset) + ;; raw-class-item ::= + ;; message-item + ;; | method-item + ;; | slot-item + ;; | initializer-item + ;; + ;; Most of the above begin with declspecs and a declarator + ;; (which might be dotted). So we parse that here and + ;; dispatch based on what we find. + (parse (or (peek + (seq ((ds (parse-c-type scanner)) + (dc (parse-maybe-dotted-declarator ds)) + (result (class-item-dispatch sub-pset + ds + (car dc) + (cdr dc)))) + result)) + (and "class" + (parse-initializer-item + sub-pset + #'make-sod-class-initializer)) + (parse-initializer-item + sub-pset + #'make-sod-instance-initializer))))) + + (parse (and #\{ + (skip-many () + (seq ((sub-pset (? (parse-property-set))) + (nil (parse-raw-class-item sub-pset))))) + #\})))))) + (define-pluggable-parser module class (scanner) - ;; `class' id [`:' id-list] `{' class-item* `}' + ;; `class' id [`:' id-list] class-body + ;; `class' id `;' (with-parser-context (token-scanner-context :scanner scanner) - (labels ((parse-item () - ;; class-item ::= property-set + (parse (seq ("class" + (name :id) + (nil (or (seq (#\;) + (make-class-type name)) + (seq ((supers (? (seq (#\: (ids (list () :id #\,))) + ids))) + (nil (parse-class-body + scanner + pset name supers))))))))))) + + + + (parse (seq ("class" (name :id) (supers (? (seq (#\: (supers (list (:min 1) :id #\,))) diff --git a/src/parser/parser-proto.lisp b/src/parser/parser-proto.lisp index 879db4c..f60e425 100644 --- a/src/parser/parser-proto.lisp +++ b/src/parser/parser-proto.lisp @@ -843,7 +843,7 @@ A token matches under the following conditions: * If the value of TYPE is `t' then the match succeeds if and only if the - parser it not at end-of-file. + parser is not at end-of-file. * If the value of TYPE is not `eql' to the token type then the match fails. diff --git a/src/pset-parse.lisp b/src/pset-parse.lisp index be7984e..0bc4680 100644 --- a/src/pset-parse.lisp +++ b/src/pset-parse.lisp @@ -23,6 +23,8 @@ ;;; along with SOD; if not, write to the Free Software Foundation, ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +(in-package #:sod) + ;;;-------------------------------------------------------------------------- ;;; The expression parser. @@ -127,14 +129,14 @@ (export 'parse-property-set) (defun parse-property-set (scanner) - "Parse an optional property set from the SCANNER and return it, or `nil'." - ;; property-set ::= [`[' property-list `]'] + "Parse an optional property set from the SCANNER and return it." + ;; property-set ::= `[' property-list `]' (with-parser-context (token-scanner-context :scanner scanner) - (parse (? (seq (#\[ - (pset (many (pset (make-property-set) pset) - (parse-property scanner pset) - #\,)) - #\]) - pset))))) + (parse (seq (#\[ + (pset (many (pset (make-property-set) pset) + (parse-property scanner pset) + #\,)) + #\]) + pset)))) ;;;----- That's all, folks -------------------------------------------------- -- 2.11.0