X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/3109662aca9c06495ac22c5c58b46e1c036aca5c..61d083c621c983b7bb06a2d7f3d2dc2a80a442b7:/src/fragment-parse.lisp diff --git a/src/fragment-parse.lisp b/src/fragment-parse.lisp index 6e71994..b1e9e8c 100644 --- a/src/fragment-parse.lisp +++ b/src/fragment-parse.lisp @@ -32,7 +32,9 @@ (defun scan-c-fragment (scanner end-chars) "Parse a C fragment from the SCANNER. - SCANNER must be a `sod-token-scanner' instance. + SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a + sequence of characters, any of which delimits the fragment. The + delimiting character is left current in the scanner. The parsing process is a simple approximation to C lexical analysis. It takes into account comments (both C and C++ style), string and character @@ -75,12 +77,14 @@ ((satisfies whitespace-char-p) (parse :whitespace)) ((scan-comment char-scanner)) - ;; See if we've reached the end. There's a small trick here: I - ;; capture the result in the `if-char' consequent to ensure - ;; that we don't include the delimiter. - ((if-char () (and (null delim) (member it end-chars)) - (values (result) t t) - (values end-chars nil nil)) + ;; See if we've reached the end. We must leave the delimiter + ;; in the scanner, so `if-char' and its various friends aren't + ;; appropriate. + ((lisp (if (and (null delim) + (member (scanner-current-char char-scanner) + end-chars)) + (values (result) t t) + (values end-chars nil nil))) (return (values it t t))) (:eof (lexer-error char-scanner '(:any) cp) @@ -111,21 +115,33 @@ (lexer-error char-scanner exp cp))))))))) (export 'parse-delimited-fragment) -(defun parse-delimited-fragment (scanner begin end) +(defun parse-delimited-fragment (scanner begin end &key keep-end) "Parse a C fragment delimited by BEGIN and END. - The BEGIN and END arguments are characters. (Currently, BEGIN can be any - token type, but you probably shouldn't rely on this.)" + The BEGIN and END arguments are the start and end delimiters. BEGIN can + be any token type, but is usually a delimiter character; it may also be t + to mean `don't care' -- but there must be an initial token of some kind + for annoying technical reasons. END may be either a character or a list + of characters. If KEEP-END is true, the trailing delimiter is left in the + token scanner so that it's available for further parsing decisions: this + is probably what you want if END is a list." ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' ;; works at the character level rather than at the lexical level, and if we - ;; commit to the `[' too early then `scanner-step' will eat the first few - ;; characters of the fragment -- and then the rest of the parse will get - ;; horrifically confused. - - (if (eql (token-type scanner) begin) - (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t) - (scanner-step scanner)) + ;; commit to the BEGIN character too early then `scanner-step' will eat the + ;; first few characters of the fragment -- and then the rest of the parse + ;; will get horrifically confused. + + (if (if (eq begin t) + (not (scanner-at-eof-p scanner)) + (eql (token-type scanner) begin)) + (multiple-value-prog1 (values (scan-c-fragment scanner + (if (listp end) + end + (list end))) + t t) + (scanner-step scanner) + (unless keep-end (scanner-step scanner))) (values (list begin) nil nil))) ;;;----- That's all, folks --------------------------------------------------