X-Git-Url: https://git.distorted.org.uk/~mdw/sod/blobdiff_plain/3109662aca9c06495ac22c5c58b46e1c036aca5c..61d083c621c983b7bb06a2d7f3d2dc2a80a442b7:/src/fragment-parse.lisp

diff --git a/src/fragment-parse.lisp b/src/fragment-parse.lisp
index 6e71994..b1e9e8c 100644
--- a/src/fragment-parse.lisp
+++ b/src/fragment-parse.lisp
@@ -32,7 +32,9 @@
 (defun scan-c-fragment (scanner end-chars)
   "Parse a C fragment from the SCANNER.
 
-   SCANNER must be a `sod-token-scanner' instance.
+   SCANNER must be a `sod-token-scanner' instance.  The END-CHARS are a
+   sequence of characters, any of which delimits the fragment.  The
+   delimiting character is left current in the scanner.
 
    The parsing process is a simple approximation to C lexical analysis.  It
    takes into account comments (both C and C++ style), string and character
@@ -75,12 +77,14 @@
 	      ((satisfies whitespace-char-p) (parse :whitespace))
 	      ((scan-comment char-scanner))
 
-	      ;; See if we've reached the end.  There's a small trick here: I
-	      ;; capture the result in the `if-char' consequent to ensure
-	      ;; that we don't include the delimiter.
-	      ((if-char () (and (null delim) (member it end-chars))
-		 (values (result) t t)
-		 (values end-chars nil nil))
+	      ;; See if we've reached the end.  We must leave the delimiter
+	      ;; in the scanner, so `if-char' and its various friends aren't
+	      ;; appropriate.
+	      ((lisp (if (and (null delim)
+			      (member (scanner-current-char char-scanner)
+				      end-chars))
+			 (values (result) t t)
+			 (values end-chars nil nil)))
 	       (return (values it t t)))
 	      (:eof
 	       (lexer-error char-scanner '(:any) cp)
@@ -111,21 +115,33 @@
 	       (lexer-error char-scanner exp cp)))))))))
 
 (export 'parse-delimited-fragment)
-(defun parse-delimited-fragment (scanner begin end)
+(defun parse-delimited-fragment (scanner begin end &key keep-end)
   "Parse a C fragment delimited by BEGIN and END.
 
-   The BEGIN and END arguments are characters.  (Currently, BEGIN can be any
-   token type, but you probably shouldn't rely on this.)"
+   The BEGIN and END arguments are the start and end delimiters.  BEGIN can
+   be any token type, but is usually a delimiter character; it may also be t
+   to mean `don't care' -- but there must be an initial token of some kind
+   for annoying technical reasons.  END may be either a character or a list
+   of characters.  If KEEP-END is true, the trailing delimiter is left in the
+   token scanner so that it's available for further parsing decisions: this
+   is probably what you want if END is a list."
 
   ;; This is decidedly nasty.  The basic problem is that `scan-c-fragment'
   ;; works at the character level rather than at the lexical level, and if we
-  ;; commit to the `[' too early then `scanner-step' will eat the first few
-  ;; characters of the fragment -- and then the rest of the parse will get
-  ;; horrifically confused.
-
-  (if (eql (token-type scanner) begin)
-      (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t)
-	(scanner-step scanner))
+  ;; commit to the BEGIN character too early then `scanner-step' will eat the
+  ;; first few characters of the fragment -- and then the rest of the parse
+  ;; will get horrifically confused.
+
+  (if (if (eq begin t)
+	  (not (scanner-at-eof-p scanner))
+	  (eql (token-type scanner) begin))
+      (multiple-value-prog1 (values (scan-c-fragment scanner
+						     (if (listp end)
+							 end
+							 (list end)))
+				    t t)
+	(scanner-step scanner)
+	(unless keep-end (scanner-step scanner)))
       (values (list begin) nil nil)))
 
 ;;;----- That's all, folks --------------------------------------------------