X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/aa14a4cddcb96b681d5c19a2ec8bad382f43b264..12949379840101e2d65883f29c5e8f0f6de49e9c:/src/fragment-parse.lisp diff --git a/src/fragment-parse.lisp b/src/fragment-parse.lisp index 5f58885..fcaa92e 100644 --- a/src/fragment-parse.lisp +++ b/src/fragment-parse.lisp @@ -7,7 +7,7 @@ ;;;----- Licensing notice --------------------------------------------------- ;;; -;;; This file is part of the Sensble Object Design, an object system for C. +;;; This file is part of the Sensible Object Design, an object system for C. ;;; ;;; SOD is free software; you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by @@ -32,15 +32,17 @@ (defun scan-c-fragment (scanner end-chars) "Parse a C fragment from the SCANNER. - SCANNER must be a `sod-token-scanner' instance. + SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a + sequence of characters, any of which delimits the fragment. The + delimiting character is left current in the scanner. The parsing process is a simple approximation to C lexical analysis. It takes into account comments (both C and C++ style), string and character literals." - (let ((char-scanner (token-scanner-char-scanner scanner)) - (delim nil) - (stack nil)) + (let* ((char-scanner (token-scanner-char-scanner scanner)) + (delim-match nil) (delim-found nil) (delim-loc nil) + (stack nil) (start nil) (tokstart nil) (eofwhine t)) (with-parser-context (character-scanner-context :scanner char-scanner) ;; Hack. If the first character is a newline then discard it @@ -49,11 +51,20 @@ (parse #\newline) ;; This seems the easiest way of gathering stuff. + (setf start (file-location char-scanner)) (with-scanner-place (place char-scanner) - (flet ((push-delim (d) - (push delim stack) - (setf delim d)) + (flet ((push-delim (found match) + (push (list delim-found delim-match delim-loc) stack) + (setf delim-found found + delim-match match + delim-loc tokstart)) + + (pop-delim () + (destructuring-bind (found match loc) (pop stack) + (setf delim-found found + delim-match match + delim-loc loc))) (result () (let* ((output (scanner-interval char-scanner place)) @@ -69,38 +80,68 @@ ;; March through characters until we reach the end. (loop + (setf tokstart (file-location char-scanner)) (cond-parse (:consumedp cp :expected exp) ;; Whitespace and comments are universally dull. ((satisfies whitespace-char-p) (parse :whitespace)) ((scan-comment char-scanner)) - ;; See if we've reached the end. There's a small trick here: I - ;; capture the result in the `if-char' consequent to ensure - ;; that we don't include the delimiter. - ((if-char () (and (null delim) (member it end-chars)) - (values (result) t t) - (values end-chars nil nil)) + ;; See if we've reached the end. We must leave the delimiter + ;; in the scanner, so `if-char' and its various friends aren't + ;; appropriate. + ((lisp (if (and (null delim-match) + (not (scanner-at-eof-p char-scanner)) + (member (scanner-current-char char-scanner) + end-chars)) + (values (result) t t) + (values end-chars nil nil))) (return (values it t t))) (:eof - (lexer-error char-scanner '(:any) cp) + (when eofwhine + (lexer-error char-scanner nil)) + (loop + (unless delim-found (return)) + (info-with-location delim-loc + "Unmatched `~C' found here" delim-found) + (pop-delim)) + (info-with-location start "C fragment started here") (return (values (result) t t))) ;; Opening and closing brackets. Opening brackets push things - ;; onto a stack; closing brackets pop things off again. - (#\( (push-delim #\))) - (#\[ (push-delim #\])) - (#\{ (push-delim #\})) - ((or #\) #\] #\}) - (if (eql it delim) - (setf delim (pop stack)) - (cerror* "Unmatched `~C.'." it))) + ;; onto a stack; closing brackets pop things off again. Pop a + ;; bracket even if it doesn't match, to encourage progress + ;; towards finding an end-delimiter. + (#\( (push-delim #\( #\))) + (#\[ (push-delim #\[ #\])) + (#\{ (push-delim #\{ #\})) + ((lisp (let ((char (scanner-current-char char-scanner))) + (case char + ((#\) #\] #\}) + (unless (eql char delim-match) + (lexer-error char-scanner + (and delim-match + (list delim-match))) + (when delim-loc + (info-with-location + delim-loc + "Mismatched `~C' found here" delim-found))) + (scanner-step char-scanner) + (when delim-match (pop-delim)) + (values char t t)) + (t + (values '(#\) #\] #\}) nil nil)))))) ;; String and character literals. ((seq ((quote (or #\" #\')) (nil (skip-many () - (or (and #\\ :any) (not quote)))) - (nil (char quote))))) + (or (and #\\ :any) (not quote)))) + (nil (or (char quote) + (seq (:eof) + (lexer-error char-scanner (list quote)) + (info-with-location tokstart + "Literal started here") + (setf eofwhine nil))))))) ;; Anything else. (:any) @@ -108,24 +149,39 @@ ;; This really shouldn't be able to happen. (t (assert cp) - (lexer-error char-scanner exp cp))))))))) + (when (scanner-at-eof-p char-scanner) + (setf eofwhine nil)) + (lexer-error char-scanner exp))))))))) (export 'parse-delimited-fragment) -(defun parse-delimited-fragment (scanner begin end) +(defun parse-delimited-fragment (scanner begin end &key keep-end) "Parse a C fragment delimited by BEGIN and END. - The BEGIN and END arguments are characters. (Currently, BEGIN can be any - token type, but you probably shouldn't rely on this.)" + The BEGIN and END arguments are the start and end delimiters. BEGIN can + be any token type, but is usually a delimiter character; it may also be t + to mean `don't care' -- but there must be an initial token of some kind + for annoying technical reasons. END may be either a character or a list + of characters. If KEEP-END is true, the trailing delimiter is left in the + token scanner so that it's available for further parsing decisions: this + is probably what you want if END is a list." ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' ;; works at the character level rather than at the lexical level, and if we - ;; commit to the `[' too early then `scanner-step' will eat the first few - ;; characters of the fragment -- and then the rest of the parse will get - ;; horrifically confused. - - (if (eql (token-type scanner) begin) - (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t) - (scanner-step scanner)) + ;; commit to the BEGIN character too early then `scanner-step' will eat the + ;; first few characters of the fragment -- and then the rest of the parse + ;; will get horrifically confused. + + (if (if (eq begin t) + (not (scanner-at-eof-p scanner)) + (eql (token-type scanner) begin)) + (multiple-value-prog1 + (values (scan-c-fragment scanner + (if (listp end) end + (list end))) + t + t) + (scanner-step scanner) + (unless keep-end (scanner-step scanner))) (values (list begin) nil nil))) ;;;----- That's all, folks --------------------------------------------------