Commit | Line | Data |
---|---|---|
3cd4b0f8 MW |
1 | %{ |
2 | ||
3 | #include "cparse.h" | |
4 | ||
5 | static int yywrap(void) { | |
6 | BEGIN(INITIAL); | |
7 | return 1; | |
8 | } | |
9 | ||
10 | /* cheesy dynamic string stuff */ | |
11 | static int stringtype; | |
12 | static int cppstring; | |
13 | static size_t stringlen; | |
14 | static char *string; | |
15 | static int stringcode; | |
16 | ||
17 | /* append some bytes to the string */ | |
18 | static void morestring(const char *ptr, size_t len) { | |
19 | if(len > SIZE_MAX - stringlen) | |
20 | fatal(0, "string too long"); | |
21 | string = xrealloc_noptr(string, stringlen + len); | |
22 | memcpy(string + stringlen, ptr, len); | |
23 | stringlen += len; | |
24 | } | |
25 | ||
26 | static void newstring(const char *ptr, size_t len) { | |
27 | stringlen = 0; | |
28 | string = 0; | |
29 | morestring(ptr, len); | |
30 | } | |
31 | ||
32 | /* validate a UCN - return 1 if valid, 0 if not, and report the error. | |
33 | * Assumes yylloc set. */ | |
34 | static int ucnvalid(const char *s) { | |
35 | /* 6.4.3 range constraints for UCNs */ | |
36 | unsigned long u; | |
37 | ||
38 | errno = 0; | |
39 | u = strtoul(s + 2, 0, 16); | |
40 | if(errno) { | |
41 | inputerror(&yylloc, "cannot convert UCN '%s'", yytext); | |
42 | return 0; | |
43 | } else switch(u) { | |
44 | case 0x0024: | |
45 | case 0x0040: | |
46 | case 0x0060: | |
47 | return 1; | |
48 | default: | |
49 | if(u < 0x00A0 | |
50 | || (u >= 0xD800 && u <= 0xDFFF)) { | |
51 | inputerror(&yylloc, "illegal UCN escape '%s'", yytext); | |
52 | return 0; | |
53 | } else | |
54 | return 1; | |
55 | } | |
56 | } | |
57 | ||
58 | /* set yylloc */ | |
59 | static inline void loc(void) { | |
60 | yylloc.path = path; | |
61 | yylloc.line = line; | |
62 | } | |
63 | ||
64 | %} | |
65 | ||
66 | %option debug | |
67 | %option batch | |
68 | %option perf-report | |
69 | ||
70 | %x CPP CPPSKIP LITERAL IDENT | |
71 | ||
72 | HEXDIGIT [0-9a-fA-f] | |
73 | UCN \\(u{HEXDIGIT}{4}|U{HEXDIGIT}{8}) | |
74 | BADUCN \\(u.{1,4}|U.{1,8}) | |
75 | DIGIT [0-9] | |
76 | IDND ([a-zA-Z_]|{UCN}) | |
77 | WS [ \t\r\v\f] | |
78 | ||
79 | %% | |
80 | ||
81 | ^#{WS}* { BEGIN(CPP); } | |
82 | ||
83 | <CPP>[0-9]+ { | |
84 | long l; | |
85 | /* collect line number, crapping out if we can't represent it */ | |
86 | errno = 0; | |
87 | l = strtol(yytext, 0, 10); | |
88 | if(errno) | |
89 | fatal(errno, "cannot convert line number '%s'", yytext); | |
90 | if(l > INT_MAX) | |
91 | fatal(0, "line number '%s' is too big", yytext); | |
92 | line = l; | |
93 | } | |
94 | <CPP,CPPSKIP>{WS}+ { } | |
95 | <CPP>\" { | |
96 | /* collect filename */ | |
97 | cppstring = 1; | |
98 | stringtype = yytext[0]; | |
99 | BEGIN(LITERAL); | |
100 | newstring(0, 0); | |
101 | } | |
102 | <CPPSKIP>[0-9]+ { } | |
103 | <CPP,CPPSKIP>\n { BEGIN(INITIAL); } | |
104 | ||
105 | \n { | |
106 | if(line == INT_MAX) | |
107 | fatal(0, "cannot count beyond line %d", INT_MAX); | |
108 | ++line; | |
109 | } | |
110 | {WS}+ { } | |
111 | ||
112 | [\"\'] { | |
113 | stringtype = yytext[0]; | |
114 | stringcode = yytext[0] == '"' ? STRINGLIT : CHARLIT; | |
115 | loc(); | |
116 | cppstring = 0; | |
117 | BEGIN(LITERAL); | |
118 | newstring(0, 0); | |
119 | } | |
120 | L[\"\'] { | |
121 | stringtype = yytext[1]; | |
122 | stringcode = yytext[1] == '"' ? WSTRINGLIT : WCHARLIT; | |
123 | loc(); | |
124 | cppstring = 0; | |
125 | BEGIN(LITERAL); | |
126 | newstring(0, 0); | |
127 | } | |
128 | <LITERAL>([^\'\"\\\n]|\\[\'\"\?\\abtnvfr])+ { | |
129 | morestring(yytext, yyleng); | |
130 | } | |
131 | <LITERAL>\\[0-7]{1,3} { | |
132 | /* 6.4.4.4 escapes must fit in an unsigned char */ | |
133 | unsigned long n; | |
134 | ||
135 | errno = 0; | |
136 | n = strtoul(yytext + 1, 0, 8); | |
137 | if(errno) | |
138 | inputerror(&yylloc, "cannot convert octal escape sequence '%s'", yytext); | |
139 | else if(n > P_UCHAR_MAX) | |
140 | inputerror(&yylloc, "octal escape sequence '%s' out of range", yytext); | |
141 | morestring(yytext, yyleng); | |
142 | } | |
143 | <LITERAL>\\x[0-9a-fA-F]+ { | |
144 | /* 6.4.4.4 escapes must fit in an unsigned char */ | |
145 | unsigned long n; | |
146 | ||
147 | errno = 0; | |
148 | n = strtoul(yytext + 2, 0, 16); | |
149 | if(errno) | |
150 | inputerror(&yylloc, "cannot convert hexadecimal escape sequence '%s'", | |
151 | yytext); | |
152 | else if(n > P_UCHAR_MAX) | |
153 | inputerror(&yylloc, "hexadecimal escape sequence '%s' out of range", | |
154 | yytext); | |
155 | morestring(yytext, yyleng); | |
156 | } | |
157 | <LITERAL>\\x. { | |
158 | inputerror(&yylloc, "invalid hexadecimal escape sequence '%s'", yytext); | |
159 | morestring(yytext, yyleng); | |
160 | } | |
161 | <LITERAL>{UCN} { | |
162 | ucnvalid(yytext); | |
163 | morestring(yytext, yyleng); | |
164 | } | |
165 | <LITERAL>{BADUCN} { | |
166 | inputerror(&yylloc, "invalid UCN '%s'", yytext); | |
167 | morestring(yytext, yyleng); | |
168 | } | |
169 | <LITERAL>[\"\'] { | |
170 | if(yytext[0] == stringtype) { | |
171 | finish_string: | |
172 | if(cppstring) { | |
173 | morestring("", 1); | |
174 | path = string; | |
175 | BEGIN(CPPSKIP); | |
176 | } else { | |
177 | BEGIN(INITIAL); | |
178 | /* XXX support strings with embedded 0s */ | |
179 | yylval.s = xstrndup(string, stringlen); | |
180 | return stringcode; | |
181 | } | |
182 | } else | |
183 | morestring(yytext, 1); | |
184 | } | |
185 | <LITERAL>\\. { | |
186 | inputerror(&yylloc, "invalid escape sequence '%s'", yytext); | |
187 | morestring(yytext, yyleng); | |
188 | } | |
189 | <LITERAL>\n { | |
190 | inputerror(&yylloc, "unterminated literal"); | |
191 | goto finish_string; | |
192 | } | |
193 | <LITERAL><<EOF>> { | |
194 | inputerror(&yylloc, "unterminated literal"); | |
195 | goto finish_string; | |
196 | } | |
197 | ||
198 | \.?{DIGIT}({DIGIT}|{IDND}|[eEpP][+\-]|\.)* { | |
199 | yylval.s = xstrndup(yytext, yyleng); | |
200 | loc(); | |
201 | return NUMBER; | |
202 | } | |
203 | ||
204 | -\> { loc(); return yylval.i = MEMBER; } | |
205 | \+\+ { loc(); return yylval.i = INCR; } | |
206 | -- { loc(); return yylval.i = DECR; } | |
207 | \<\< { loc(); return yylval.i = SL; } | |
208 | \>\> { loc(); return yylval.i = SR; } | |
209 | \<= { loc(); return yylval.i = LE; } | |
210 | \>= { loc(); return yylval.i = GE; } | |
211 | == { loc(); return yylval.i = EQ; } | |
212 | != { loc(); return yylval.i = NE; } | |
213 | && { loc(); return yylval.i = AND; } | |
214 | \|\| { loc(); return yylval.i = OR; } | |
215 | \.\.\. { loc(); return yylval.i = VARARG; } | |
216 | \*= { loc(); return yylval.i = MULEQ; } | |
217 | \/= { loc(); return yylval.i = DIVEQ; } | |
218 | %= { loc(); return yylval.i = MODEQ; } | |
219 | \+= { loc(); return yylval.i = ADDEQ; } | |
220 | -= { loc(); return yylval.i = SUBEQ; } | |
221 | \<\<= { loc(); return yylval.i = SLEQ; } | |
222 | \>\>= { loc(); return yylval.i = SREQ; } | |
223 | &= { loc(); return yylval.i = ANDEQ; } | |
224 | \^= { loc(); return yylval.i = XOREQ; } | |
225 | \|= { loc(); return yylval.i = OREQ; } | |
226 | \<: { loc(); return yylval.i = '['; } | |
227 | :\> { loc(); return yylval.i = ']'; } | |
228 | \<% { loc(); return yylval.i = '{'; } | |
229 | %\> { loc(); return yylval.i = '}'; } | |
230 | %: { loc(); return yylval.i = '#'; } | |
231 | ||
232 | auto { yylval.u = SCS_AUTO; loc(); return AUTO; } | |
233 | break { loc(); return BREAK; } | |
234 | case { loc(); return CASE; } | |
235 | char { yylval.u = TS_CHAR; loc(); return CHAR; } | |
236 | const|__const|__const__ { yylval.u = TQ_CONST; loc(); return CONST; } | |
237 | continue { loc(); return CONTINUE; } | |
238 | default { loc(); return DEFAULT; } | |
239 | do { loc(); return DO; } | |
240 | double { yylval.u = TS_DOUBLE; loc(); return DOUBLE; } | |
241 | else { loc(); return ELSE; } | |
242 | enum { loc(); return ENUM; } | |
243 | extern { yylval.u = SCS_EXTERN; loc(); return EXTERN; } | |
244 | float { yylval.u = TS_FLOAT; loc(); return FLOAT; } | |
245 | for { loc(); return FOR; } | |
246 | goto { loc(); return GOTO; } | |
247 | if { loc(); return IF; } | |
248 | inline|__inline|__inline__ { yylval.u = FS_INLINE; loc(); return INLINE; } | |
249 | int { yylval.u = TS_INT; loc(); return INT; } | |
250 | long { yylval.u = TS_LONG; loc(); return LONG; } | |
251 | register { yylval.u = SCS_REGISTER; loc(); return REGISTER; } | |
252 | restrict|__restrict|__restrict__ { yylval.u = TQ_RESTRICT; loc(); return RESTRICT; } | |
253 | return { loc(); return RETURN; } | |
254 | short { yylval.u = TS_SHORT; loc(); return SHORT; } | |
255 | signed { yylval.u = TS_SIGNED; loc(); return SIGNED; } | |
256 | sizeof { loc(); return SIZEOF; } | |
257 | static { yylval.u = SCS_STATIC; loc(); return STATIC; } | |
258 | struct { yylval.u = TS_STRUCT; loc(); return STRUCT; } | |
259 | switch { loc(); return SWITCH; } | |
260 | typedef { yylval.u = SCS_TYPEDEF; loc(); return TYPEDEF; } | |
261 | union { yylval.u = TS_UNION; loc(); return UNION; } | |
262 | unsigned { yylval.u = TS_UNSIGNED; loc(); return UNSIGNED; } | |
263 | void { yylval.u = TS_VOID; loc(); return VOID; } | |
264 | volatile|__volatile|__volatile__ { yylval.u = TQ_VOLATILE; loc(); return VOLATILE; } | |
265 | while { loc(); return WHILE; } | |
266 | _Bool { yylval.u = TS_BOOL; loc(); return BOOL; } | |
267 | _Complex { yylval.u = TS_COMPLEX; loc(); return COMPLEX; } | |
268 | _Imaginary { yylval.u = TS_IMAGINARY; loc(); return IMAGINARY; } | |
269 | ||
270 | __extension__ { } | |
271 | __attribute__ { loc(); return ATTRIBUTE; } | |
272 | ||
273 | __builtin_va_list { yylval.u = TS_GCC_VA_LIST; loc(); return GCC_VA_LIST; } | |
274 | __builtin_expect { loc(); return GCC_EXPECT; } | |
275 | ||
276 | __builtin_va_arg { | |
277 | loc(); | |
278 | return GCC_VA_ARG; | |
279 | } | |
280 | ||
281 | [a-zA-Z_][a-zA-Z0-9_]* { | |
282 | /* identifiers are complicated by our desire to strictly check any UCNs they | |
283 | * contain */ | |
284 | loc(); | |
285 | BEGIN(IDENT); | |
286 | newstring(yytext, yyleng); | |
287 | } | |
288 | {UCN} { | |
289 | loc(); | |
290 | ucnvalid(yytext); | |
291 | BEGIN(IDENT); | |
292 | newstring(yytext, yyleng); | |
293 | } | |
294 | {BADUCN} { | |
295 | loc(); | |
296 | inputerror(&yylloc, "invalid UCN '%s'", yytext); | |
297 | BEGIN(IDENT); | |
298 | newstring(yytext, yyleng); | |
299 | } | |
300 | ||
301 | <IDENT>[a-zA-Z0-9_]+ { | |
302 | morestring(yytext, yyleng); | |
303 | } | |
304 | <IDENT>{UCN} { | |
305 | ucnvalid(yytext); | |
306 | morestring(yytext, yyleng); | |
307 | } | |
308 | <IDENT>{BADUCN} { | |
309 | inputerror(&yylloc, "invalid UCN '%s'", yytext); | |
310 | morestring(yytext, yyleng); | |
311 | } | |
312 | ||
313 | <IDENT>.|\n { | |
314 | unput(yytext[0]); | |
315 | got_id: | |
316 | BEGIN(INITIAL); | |
317 | /* we need to look up the declaration of each name anyway so we record the | |
318 | * type for all of them. But we can't produce an error here as it might be a | |
319 | * struct member or something. */ | |
320 | yylval.name.name = xstrndup(string, stringlen); | |
321 | yylval.name.declarator = lookup(yylval.name.name); | |
322 | if(yylval.name.declarator | |
323 | && yylval.name.declarator->declaration_specifiers | |
324 | && (yylval.name.declarator->declaration_specifiers->storage_class_specifiers | |
325 | & SCS_TYPEDEF)) | |
326 | return TYPEDEF_NAME; | |
327 | else | |
328 | return ID; | |
329 | } | |
330 | ||
331 | <IDENT><<EOF>> { | |
332 | goto got_id; | |
333 | } | |
334 | ||
335 | . { | |
336 | loc(); | |
337 | return yylval.i = yytext[0]; | |
338 | } | |
339 | ||
340 | %% | |
341 | ||
342 | /* | |
343 | Local Variables: | |
344 | mode:c | |
345 | c-basic-offset:2 | |
346 | comment-column:40 | |
347 | fill-column:79 | |
348 | End: | |
349 | */ |