| 1 | ;;; -*-lisp-*- |
| 2 | ;;; |
| 3 | ;;; Parsing C fragments from a scanner |
| 4 | ;;; |
| 5 | ;;; (c) 2010 Straylight/Edgeware |
| 6 | ;;; |
| 7 | |
| 8 | ;;;----- Licensing notice --------------------------------------------------- |
| 9 | ;;; |
| 10 | ;;; This file is part of the Sensble Object Design, an object system for C. |
| 11 | ;;; |
| 12 | ;;; SOD is free software; you can redistribute it and/or modify |
| 13 | ;;; it under the terms of the GNU General Public License as published by |
| 14 | ;;; the Free Software Foundation; either version 2 of the License, or |
| 15 | ;;; (at your option) any later version. |
| 16 | ;;; |
| 17 | ;;; SOD is distributed in the hope that it will be useful, |
| 18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;;; GNU General Public License for more details. |
| 21 | ;;; |
| 22 | ;;; You should have received a copy of the GNU General Public License |
| 23 | ;;; along with SOD; if not, write to the Free Software Foundation, |
| 24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 25 | |
| 26 | (in-package #:sod) |
| 27 | |
| 28 | ;;;-------------------------------------------------------------------------- |
| 29 | ;;; Fragment parsing. |
| 30 | |
| 31 | (export 'scan-c-fragment) |
| 32 | (defun scan-c-fragment (scanner end-chars) |
| 33 | "Parse a C fragment from the SCANNER. |
| 34 | |
| 35 | SCANNER must be a `sod-token-scanner' instance. |
| 36 | |
| 37 | The parsing process is a simple approximation to C lexical analysis. It |
| 38 | takes into account comments (both C and C++ style), string and character |
| 39 | literals." |
| 40 | |
| 41 | (let ((char-scanner (token-scanner-char-scanner scanner)) |
| 42 | (delim nil) |
| 43 | (stack nil)) |
| 44 | (with-parser-context (character-scanner-context :scanner char-scanner) |
| 45 | |
| 46 | ;; Hack. If the first character is a newline then discard it |
| 47 | ;; immediately. If I don't, then the output will look strange and the |
| 48 | ;; location information will be unhelpful. |
| 49 | (parse #\newline) |
| 50 | |
| 51 | ;; This seems the easiest way of gathering stuff. |
| 52 | (with-scanner-place (place char-scanner) |
| 53 | |
| 54 | (flet ((push-delim (d) |
| 55 | (push delim stack) |
| 56 | (setf delim d)) |
| 57 | |
| 58 | (result () |
| 59 | (let* ((output (scanner-interval char-scanner place)) |
| 60 | (end (position-if (lambda (char) |
| 61 | (or (char= char #\newline) |
| 62 | (not |
| 63 | (whitespace-char-p char)))) |
| 64 | output :from-end t)) |
| 65 | (trimmed (if end (subseq output 0 (1+ end)) ""))) |
| 66 | (make-instance 'c-fragment |
| 67 | :location (file-location place) |
| 68 | :text trimmed)))) |
| 69 | |
| 70 | ;; March through characters until we reach the end. |
| 71 | (loop |
| 72 | (cond-parse (:consumedp cp :expected exp) |
| 73 | |
| 74 | ;; Whitespace and comments are universally dull. |
| 75 | ((satisfies whitespace-char-p) (parse :whitespace)) |
| 76 | ((scan-comment char-scanner)) |
| 77 | |
| 78 | ;; See if we've reached the end. There's a small trick here: I |
| 79 | ;; capture the result in the `if-char' consequent to ensure |
| 80 | ;; that we don't include the delimiter. |
| 81 | ((if-char () (and (null delim) (member it end-chars)) |
| 82 | (values (result) t t) |
| 83 | (values end-chars nil nil)) |
| 84 | (return (values it t t))) |
| 85 | (:eof |
| 86 | (lexer-error char-scanner '(:any) cp) |
| 87 | (return (values (result) t t))) |
| 88 | |
| 89 | ;; Opening and closing brackets. Opening brackets push things |
| 90 | ;; onto a stack; closing brackets pop things off again. |
| 91 | (#\( (push-delim #\))) |
| 92 | (#\[ (push-delim #\])) |
| 93 | (#\{ (push-delim #\})) |
| 94 | ((or #\) #\] #\}) |
| 95 | (if (eql it delim) |
| 96 | (setf delim (pop stack)) |
| 97 | (cerror* "Unmatched `~C.'." it))) |
| 98 | |
| 99 | ;; String and character literals. |
| 100 | ((seq ((quote (or #\" #\')) |
| 101 | (nil (skip-many () |
| 102 | (or (and #\\ :any) (not quote)))) |
| 103 | (nil (char quote))))) |
| 104 | |
| 105 | ;; Anything else. |
| 106 | (:any) |
| 107 | |
| 108 | ;; This really shouldn't be able to happen. |
| 109 | (t |
| 110 | (assert cp) |
| 111 | (lexer-error char-scanner exp cp))))))))) |
| 112 | |
| 113 | (export 'parse-delimited-fragment) |
| 114 | (defun parse-delimited-fragment (scanner begin end) |
| 115 | "Parse a C fragment delimited by BEGIN and END. |
| 116 | |
| 117 | The BEGIN and END arguments are characters. (Currently, BEGIN can be any |
| 118 | token type, but you probably shouldn't rely on this.)" |
| 119 | |
| 120 | ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' |
| 121 | ;; works at the character level rather than at the lexical level, and if we |
| 122 | ;; commit to the `[' too early then `scanner-step' will eat the first few |
| 123 | ;; characters of the fragment -- and then the rest of the parse will get |
| 124 | ;; horrifically confused. |
| 125 | |
| 126 | (if (eql (token-type scanner) begin) |
| 127 | (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t) |
| 128 | (scanner-step scanner)) |
| 129 | (values (list begin) nil nil))) |
| 130 | |
| 131 | ;;;----- That's all, folks -------------------------------------------------- |