| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Parsing C fragments from a scanner |
| 4 | ;;; |
| 5 | ;;; (c) 2010 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Fragment parsing. |
| 30 | |
| 31 | (export 'scan-c-fragment) |
| 32 | (defun scan-c-fragment (scanner end-chars) |
| 33 | "Parse a C fragment from the SCANNER. |
| 34 | |
| 35 | SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a |
| 36 | sequence of characters, any of which delimits the fragment. The |
| 37 | delimiting character is left current in the scanner. |
| 38 | |
| 39 | The parsing process is a simple approximation to C lexical analysis. It |
| 40 | takes into account comments (both C and C++ style), string and character |
| 41 | literals." |
| 42 | |
| 43 | (let* ((char-scanner (token-scanner-char-scanner scanner)) |
| 44 | (delim-match nil) (delim-found nil) (delim-loc nil) |
| 45 | (stack nil) (start nil) (tokstart nil) (eofwhine t)) |
| 46 | (with-parser-context (character-scanner-context :scanner char-scanner) |
| 47 | |
| 48 | ;; Hack. If the first character is a newline then discard it |
| 49 | ;; immediately. If I don't, then the output will look strange and the |
| 50 | ;; location information will be unhelpful. |
| 51 | (parse #\newline) |
| 52 | |
| 53 | ;; This seems the easiest way of gathering stuff. |
| 54 | (setf start (file-location char-scanner)) |
| 55 | (with-scanner-place (place char-scanner) |
| 56 | |
| 57 | (flet ((push-delim (found match) |
| 58 | (push (list delim-found delim-match delim-loc) stack) |
| 59 | (setf delim-found found |
| 60 | delim-match match |
| 61 | delim-loc tokstart)) |
| 62 | |
| 63 | (pop-delim () |
| 64 | (destructuring-bind (found match loc) (pop stack) |
| 65 | (setf delim-found found |
| 66 | delim-match match |
| 67 | delim-loc loc))) |
| 68 | |
| 69 | (result () |
| 70 | (let* ((output (scanner-interval char-scanner place)) |
| 71 | (end (position-if (lambda (char) |
| 72 | (or (char= char #\newline) |
| 73 | (not |
| 74 | (whitespace-char-p char)))) |
| 75 | output :from-end t)) |
| 76 | (trimmed (if end (subseq output 0 (1+ end)) ""))) |
| 77 | (make-instance 'c-fragment |
| 78 | :location (file-location place) |
| 79 | :text trimmed)))) |
| 80 | |
| 81 | ;; March through characters until we reach the end. |
| 82 | (loop |
| 83 | (setf tokstart (file-location char-scanner)) |
| 84 | (cond-parse (:consumedp cp :expected exp) |
| 85 | |
| 86 | ;; Whitespace and comments are universally dull. |
| 87 | ((satisfies whitespace-char-p) (parse :whitespace)) |
| 88 | ((scan-comment char-scanner)) |
| 89 | |
| 90 | ;; See if we've reached the end. We must leave the delimiter |
| 91 | ;; in the scanner, so `if-char' and its various friends aren't |
| 92 | ;; appropriate. |
| 93 | ((lisp (if (and (null delim-match) |
| 94 | (not (scanner-at-eof-p char-scanner)) |
| 95 | (member (scanner-current-char char-scanner) |
| 96 | end-chars)) |
| 97 | (values (result) t t) |
| 98 | (values end-chars nil nil))) |
| 99 | (return (values it t t))) |
| 100 | (:eof |
| 101 | (when eofwhine |
| 102 | (lexer-error char-scanner nil)) |
| 103 | (loop |
| 104 | (unless delim-found (return)) |
| 105 | (info-with-location delim-loc |
| 106 | "Unmatched `~C' found here" delim-found) |
| 107 | (pop-delim)) |
| 108 | (info-with-location start "C fragment started here") |
| 109 | (return (values (result) t t))) |
| 110 | |
| 111 | ;; Opening and closing brackets. Opening brackets push things |
| 112 | ;; onto a stack; closing brackets pop things off again. Pop a |
| 113 | ;; bracket even if it doesn't match, to encourage progress |
| 114 | ;; towards finding an end-delimiter. |
| 115 | (#\( (push-delim #\( #\))) |
| 116 | (#\[ (push-delim #\[ #\])) |
| 117 | (#\{ (push-delim #\{ #\})) |
| 118 | ((lisp (let ((char (scanner-current-char char-scanner))) |
| 119 | (case char |
| 120 | ((#\) #\] #\}) |
| 121 | (unless (eql char delim-match) |
| 122 | (lexer-error char-scanner |
| 123 | (and delim-match |
| 124 | (list delim-match))) |
| 125 | (when delim-loc |
| 126 | (info-with-location |
| 127 | delim-loc |
| 128 | "Mismatched `~C' found here" delim-found))) |
| 129 | (scanner-step char-scanner) |
| 130 | (when delim-match (pop-delim)) |
| 131 | (values char t t)) |
| 132 | (t |
| 133 | (values '(#\) #\] #\}) nil nil)))))) |
| 134 | |
| 135 | ;; String and character literals. |
| 136 | ((seq ((quote (or #\" #\')) |
| 137 | (nil (skip-many () |
| 138 | (or (and #\\ :any) (not quote)))) |
| 139 | (nil (or (char quote) |
| 140 | (seq (:eof) |
| 141 | (lexer-error char-scanner (list quote)) |
| 142 | (info-with-location tokstart |
| 143 | "Literal started here") |
| 144 | (setf eofwhine nil))))))) |
| 145 | |
| 146 | ;; Anything else. |
| 147 | (:any) |
| 148 | |
| 149 | ;; This really shouldn't be able to happen. |
| 150 | (t |
| 151 | (assert cp) |
| 152 | (when (scanner-at-eof-p char-scanner) |
| 153 | (setf eofwhine nil)) |
| 154 | (lexer-error char-scanner exp))))))))) |
| 155 | |
| 156 | (export 'parse-delimited-fragment) |
| 157 | (defun parse-delimited-fragment (scanner begin end &key keep-end) |
| 158 | "Parse a C fragment delimited by BEGIN and END. |
| 159 | |
| 160 | The BEGIN and END arguments are the start and end delimiters. BEGIN can |
| 161 | be any token type, but is usually a delimiter character; it may also be t |
| 162 | to mean `don't care' -- but there must be an initial token of some kind |
| 163 | for annoying technical reasons. END may be either a character or a list |
| 164 | of characters. If KEEP-END is true, the trailing delimiter is left in the |
| 165 | token scanner so that it's available for further parsing decisions: this |
| 166 | is probably what you want if END is a list." |
| 167 | |
| 168 | ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' |
| 169 | ;; works at the character level rather than at the lexical level, and if we |
| 170 | ;; commit to the BEGIN character too early then `scanner-step' will eat the |
| 171 | ;; first few characters of the fragment -- and then the rest of the parse |
| 172 | ;; will get horrifically confused. |
| 173 | |
| 174 | (if (if (eq begin t) |
| 175 | (not (scanner-at-eof-p scanner)) |
| 176 | (eql (token-type scanner) begin)) |
| 177 | (multiple-value-prog1 |
| 178 | (values (scan-c-fragment scanner |
| 179 | (if (listp end) end |
| 180 | (list end))) |
| 181 | t |
| 182 | t) |
| 183 | (scanner-step scanner) |
| 184 | (unless keep-end (scanner-step scanner))) |
| 185 | (values (list begin) nil nil))) |
| 186 | |
| 187 | ;;;----- That's all, folks -------------------------------------------------- |