5f58885f2b6f59c4669d013be0672cc9839d2183
[sod] / src / fragment-parse.lisp
1 ;;; -*-lisp-*-
2 ;;;
3 ;;; Parsing C fragments from a scanner
4 ;;;
5 ;;; (c) 2010 Straylight/Edgeware
6 ;;;
7
8 ;;;----- Licensing notice ---------------------------------------------------
9 ;;;
10 ;;; This file is part of the Sensble Object Design, an object system for C.
11 ;;;
12 ;;; SOD is free software; you can redistribute it and/or modify
13 ;;; it under the terms of the GNU General Public License as published by
14 ;;; the Free Software Foundation; either version 2 of the License, or
15 ;;; (at your option) any later version.
16 ;;;
17 ;;; SOD is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;;; GNU General Public License for more details.
21 ;;;
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with SOD; if not, write to the Free Software Foundation,
24 ;;; Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25
26 (in-package #:sod)
27
28 ;;;--------------------------------------------------------------------------
29 ;;; Fragment parsing.
30
31 (export 'scan-c-fragment)
32 (defun scan-c-fragment (scanner end-chars)
33 "Parse a C fragment from the SCANNER.
34
35 SCANNER must be a `sod-token-scanner' instance.
36
37 The parsing process is a simple approximation to C lexical analysis. It
38 takes into account comments (both C and C++ style), string and character
39 literals."
40
41 (let ((char-scanner (token-scanner-char-scanner scanner))
42 (delim nil)
43 (stack nil))
44 (with-parser-context (character-scanner-context :scanner char-scanner)
45
46 ;; Hack. If the first character is a newline then discard it
47 ;; immediately. If I don't, then the output will look strange and the
48 ;; location information will be unhelpful.
49 (parse #\newline)
50
51 ;; This seems the easiest way of gathering stuff.
52 (with-scanner-place (place char-scanner)
53
54 (flet ((push-delim (d)
55 (push delim stack)
56 (setf delim d))
57
58 (result ()
59 (let* ((output (scanner-interval char-scanner place))
60 (end (position-if (lambda (char)
61 (or (char= char #\newline)
62 (not
63 (whitespace-char-p char))))
64 output :from-end t))
65 (trimmed (if end (subseq output 0 (1+ end)) "")))
66 (make-instance 'c-fragment
67 :location (file-location place)
68 :text trimmed))))
69
70 ;; March through characters until we reach the end.
71 (loop
72 (cond-parse (:consumedp cp :expected exp)
73
74 ;; Whitespace and comments are universally dull.
75 ((satisfies whitespace-char-p) (parse :whitespace))
76 ((scan-comment char-scanner))
77
78 ;; See if we've reached the end. There's a small trick here: I
79 ;; capture the result in the `if-char' consequent to ensure
80 ;; that we don't include the delimiter.
81 ((if-char () (and (null delim) (member it end-chars))
82 (values (result) t t)
83 (values end-chars nil nil))
84 (return (values it t t)))
85 (:eof
86 (lexer-error char-scanner '(:any) cp)
87 (return (values (result) t t)))
88
89 ;; Opening and closing brackets. Opening brackets push things
90 ;; onto a stack; closing brackets pop things off again.
91 (#\( (push-delim #\)))
92 (#\[ (push-delim #\]))
93 (#\{ (push-delim #\}))
94 ((or #\) #\] #\})
95 (if (eql it delim)
96 (setf delim (pop stack))
97 (cerror* "Unmatched `~C.'." it)))
98
99 ;; String and character literals.
100 ((seq ((quote (or #\" #\'))
101 (nil (skip-many ()
102 (or (and #\\ :any) (not quote))))
103 (nil (char quote)))))
104
105 ;; Anything else.
106 (:any)
107
108 ;; This really shouldn't be able to happen.
109 (t
110 (assert cp)
111 (lexer-error char-scanner exp cp)))))))))
112
113 (export 'parse-delimited-fragment)
114 (defun parse-delimited-fragment (scanner begin end)
115 "Parse a C fragment delimited by BEGIN and END.
116
117 The BEGIN and END arguments are characters. (Currently, BEGIN can be any
118 token type, but you probably shouldn't rely on this.)"
119
120 ;; This is decidedly nasty. The basic problem is that `scan-c-fragment'
121 ;; works at the character level rather than at the lexical level, and if we
122 ;; commit to the `[' too early then `scanner-step' will eat the first few
123 ;; characters of the fragment -- and then the rest of the parse will get
124 ;; horrifically confused.
125
126 (if (eql (token-type scanner) begin)
127 (multiple-value-prog1 (values (scan-c-fragment scanner (list end)) t t)
128 (scanner-step scanner))
129 (values (list begin) nil nil)))
130
131 ;;;----- That's all, folks --------------------------------------------------