Commit | Line | Data |
---|---|---|
bf090e02 MW |
1 | ;;; -*-lisp-*- |
2 | ;;; | |
3 | ;;; Parsing C fragments from a scanner | |
4 | ;;; | |
5 | ;;; (c) 2010 Straylight/Edgeware | |
6 | ;;; | |
7 | ||
8 | ;;;----- Licensing notice --------------------------------------------------- | |
9 | ;;; | |
10 | ;;; This file is part of the Sensble Object Design, an object system for C. | |
11 | ;;; | |
12 | ;;; SOD is free software; you can redistribute it and/or modify | |
13 | ;;; it under the terms of the GNU General Public License as published by | |
14 | ;;; the Free Software Foundation; either version 2 of the License, or | |
15 | ;;; (at your option) any later version. | |
16 | ;;; | |
17 | ;;; SOD is distributed in the hope that it will be useful, | |
18 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;;; GNU General Public License for more details. | |
21 | ;;; | |
22 | ;;; You should have received a copy of the GNU General Public License | |
23 | ;;; along with SOD; if not, write to the Free Software Foundation, | |
24 | ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | (in-package #:sod) | |
27 | ||
28 | ;;;-------------------------------------------------------------------------- | |
29 | ;;; Fragment parsing. | |
30 | ||
31 | (export 'scan-c-fragment) | |
32 | (defun scan-c-fragment (scanner end-chars) | |
33 | "Parse a C fragment from the SCANNER. | |
34 | ||
35 | SCANNER must be a `sod-token-scanner' instance. | |
36 | ||
37 | The parsing process is a simple approximation to C lexical analysis. It | |
38 | takes into account comments (both C and C++ style), string and character | |
39 | literals." | |
40 | ||
41 | (let ((char-scanner (token-scanner-char-scanner scanner)) | |
42 | (delim nil) | |
43 | (stack nil)) | |
44 | (with-parser-context (character-scanner-context :scanner char-scanner) | |
45 | ||
46 | ;; Hack. If the first character is a newline then discard it | |
47 | ;; immediately. If I don't, then the output will look strange and the | |
48 | ;; location information will be unhelpful. | |
49 | (parse #\newline) | |
50 | ||
51 | ;; This seems the easiest way of gathering stuff. | |
52 | (with-scanner-place (place char-scanner) | |
53 | ||
54 | (flet ((push-delim (d) | |
55 | (push delim stack) | |
56 | (setf delim d)) | |
57 | ||
58 | (result () | |
59 | (let* ((output (scanner-interval char-scanner place)) | |
60 | (end (position-if (lambda (char) | |
61 | (or (char= char #\newline) | |
62 | (not | |
63 | (whitespace-char-p char)))) | |
64 | output :from-end t)) | |
65 | (trimmed (if end (subseq output 0 (1+ end)) ""))) | |
66 | (make-instance 'c-fragment | |
67 | :location (file-location place) | |
68 | :text trimmed)))) | |
69 | ||
70 | ;; March through characters until we reach the end. | |
71 | (loop | |
72 | (cond-parse (:consumedp cp :expected exp) | |
73 | ||
74 | ;; Whitespace and comments are universally dull. | |
75 | ((satisfies whitespace-char-p) (parse :whitespace)) | |
76 | ((scan-comment char-scanner)) | |
77 | ||
78 | ;; See if we've reached the end. There's a small trick here: I | |
79 | ;; capture the result in the `if-char' consequent to ensure | |
80 | ;; that we don't include the delimiter. | |
81 | ((if-char () (and (null delim) (member it end-chars)) | |
82 | (values (result) t t) | |
83 | (values end-chars nil nil)) | |
84 | (return (values it t t))) | |
85 | (:eof | |
86 | (lexer-error char-scanner '(:any) cp) | |
87 | (return (values (result) t t))) | |
88 | ||
89 | ;; Opening and closing brackets. Opening brackets push things | |
90 | ;; onto a stack; closing brackets pop things off again. | |
91 | (#\( (push-delim #\))) | |
92 | (#\[ (push-delim #\])) | |
93 | (#\{ (push-delim #\})) | |
94 | ((or #\) #\] #\}) | |
95 | (if (eql it delim) | |
96 | (setf delim (pop stack)) | |
97 | (cerror* "Unmatched `~C.'." it))) | |
98 | ||
99 | ;; String and character literals. | |
100 | ((seq ((quote (or #\" #\')) | |
101 | (nil (skip-many () | |
102 | (or (and #\\ :any) (not quote)))) | |
103 | (nil (char quote))))) | |
104 | ||
105 | ;; Anything else. | |
106 | (:any) | |
107 | ||
108 | ;; This really shouldn't be able to happen. | |
109 | (t | |
110 | (assert cp) | |
111 | (lexer-error char-scanner exp cp))))))))) | |
112 | ||
113 | (export 'parse-delimited-fragment) | |
114 | (defun parse-delimited-fragment (scanner begin end) | |
115 | "Parse a C fragment delimited by BEGIN and END. | |
116 | ||
117 | The BEGIN and END arguments are characters. (Currently, BEGIN can be any | |
3109662a | 118 | token type, but you probably shouldn't rely on this.)" |
bf090e02 MW |
119 | |
120 | ;; This is decidedly nasty. The basic problem is that `scan-c-fragment' | |
121 | ;; works at the character level rather than at the lexical level, and if we | |
122 | ;; commit to the `[' too early then `scanner-step' will eat the first few | |
123 | ;; characters of the fragment -- and then the rest of the parse will get | |
124 | ;; horrifically confused. | |
125 | ||
126 | (if (eql (token-type scanner) begin) | |
127 | (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t) | |
128 | (scanner-step scanner)) | |
129 | (values (list begin) nil nil))) | |
130 | ||
131 | ;;;----- That's all, folks -------------------------------------------------- |