| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Parsing C fragments from a scanner |
| 4 | ;;; |
| 5 | ;;; (c) 2010 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensible Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Fragment parsing. |
| 30 | |
| 31 | (export 'scan-c-fragment) |
| 32 | (defun scan-c-fragment (scanner end-chars) |
| 33 | "Parse a C fragment from the SCANNER. |
| 34 | |
| 35 | SCANNER must be a `sod-token-scanner' instance. The END-CHARS are a |
| 36 | sequence of characters, any of which delimits the fragment. The |
| 37 | delimiting character is left current in the scanner. |
| 38 | |
| 39 | The parsing process is a simple approximation to C lexical analysis. It |
| 40 | takes into account comments (both C and C++ style), string and character |
| 41 | literals." |
| 42 | |
| 43 | (let ((char-scanner (token-scanner-char-scanner scanner)) |
| 44 | (delim nil) |
| 45 | (stack nil)) |
| 46 | (with-parser-context (character-scanner-context :scanner char-scanner) |
| 47 | |
| 48 | ;; Hack. If the first character is a newline then discard it |
| 49 | ;; immediately. If I don't, then the output will look strange and the |
| 50 | ;; location information will be unhelpful. |
| 51 | (parse #\newline) |
| 52 | |
| 53 | ;; This seems the easiest way of gathering stuff. |
| 54 | (with-scanner-place (place char-scanner) |
| 55 | |
| 56 | (flet ((push-delim (d) |
| 57 | (push delim stack) |
| 58 | (setf delim d)) |
| 59 | |
| 60 | (result () |
| 61 | (let* ((output (scanner-interval char-scanner place)) |
| 62 | (end (position-if (lambda (char) |
| 63 | (or (char= char #\newline) |
| 64 | (not |
| 65 | (whitespace-char-p char)))) |
| 66 | output :from-end t)) |
| 67 | (trimmed (if end (subseq output 0 (1+ end)) ""))) |
| 68 | (make-instance 'c-fragment |
| 69 | :location (file-location place) |
| 70 | :text trimmed)))) |
| 71 | |
| 72 | ;; March through characters until we reach the end. |
| 73 | (loop |
| 74 | (cond-parse (:consumedp cp :expected exp) |
| 75 | |
| 76 | ;; Whitespace and comments are universally dull. |
| 77 | ((satisfies whitespace-char-p) (parse :whitespace)) |
| 78 | ((scan-comment char-scanner)) |
| 79 | |
| 80 | ;; See if we've reached the end. We must leave the delimiter |
| 81 | ;; in the scanner, so `if-char' and its various friends aren't |
| 82 | ;; appropriate. |
| 83 | ((lisp (if (and (null delim) |
| 84 | (member (scanner-current-char char-scanner) |
| 85 | end-chars)) |
| 86 | (values (result) t t) |
| 87 | (values end-chars nil nil))) |
| 88 | (return (values it t t))) |
| 89 | (:eof |
| 90 | (lexer-error char-scanner '(:any) cp) |
| 91 | (return (values (result) t t))) |
| 92 | |
| 93 | ;; Opening and closing brackets. Opening brackets push things |
| 94 | ;; onto a stack; closing brackets pop things off again. |
| 95 | (#\( (push-delim #\))) |
| 96 | (#\[ (push-delim #\])) |
| 97 | (#\{ (push-delim #\})) |
| 98 | ((or #\) #\] #\}) |
| 99 | (if (eql it delim) |
| 100 | (setf delim (pop stack)) |
| 101 | (cerror* "Unmatched `~C.'." it))) |
| 102 | |
| 103 | ;; String and character literals. |
| 104 | ((seq ((quote (or #\" #\')) |
| 105 | (nil (skip-many () |
| 106 | (or (and #\\ :any) (not quote)))) |
| 107 | (nil (char quote))))) |
| 108 | |
| 109 | ;; Anything else. |
| 110 | (:any) |
| 111 | |
| 112 | ;; This really shouldn't be able to happen. |
| 113 | (t |
| 114 | (assert cp) |
| 115 | (lexer-error char-scanner exp cp))))))))) |
| 116 | |
| 117 | (export 'parse-delimited-fragment) |
| 118 | (defun parse-delimited-fragment (scanner begin end &key keep-end) |
| 119 | "Parse a C fragment delimited by BEGIN and END. |
| 120 | |
| 121 | The BEGIN and END arguments are the start and end delimiters. BEGIN can |
| 122 | be any token type, but is usually a delimiter character; it may also be t |
| 123 | to mean `don't care' -- but there must be an initial token of some kind |
| 124 | for annoying technical reasons. END may be either a character or a list |
| 125 | of characters. If KEEP-END is true, the trailing delimiter is left in the |
| 126 | token scanner so that it's available for further parsing decisions: this |
| 127 | is probably what you want if END is a list." |
| 128 | |
| 129 | ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' |
| 130 | ;; works at the character level rather than at the lexical level, and if we |
| 131 | ;; commit to the BEGIN character too early then `scanner-step' will eat the |
| 132 | ;; first few characters of the fragment -- and then the rest of the parse |
| 133 | ;; will get horrifically confused. |
| 134 | |
| 135 | (if (if (eq begin t) |
| 136 | (not (scanner-at-eof-p scanner)) |
| 137 | (eql (token-type scanner) begin)) |
| 138 | (multiple-value-prog1 (values (scan-c-fragment scanner |
| 139 | (if (listp end) |
| 140 | end |
| 141 | (list end))) |
| 142 | t t) |
| 143 | (scanner-step scanner) |
| 144 | (unless keep-end (scanner-step scanner))) |
| 145 | (values (list begin) nil nil))) |
| 146 | |
| 147 | ;;;----- That's all, folks -------------------------------------------------- |