Import upstream sources.
[cparse] / c-lex.l
1 %{
2
3 #include "cparse.h"
4
5 static int yywrap(void) {
6 BEGIN(INITIAL);
7 return 1;
8 }
9
10 /* cheesy dynamic string stuff */
11 static int stringtype;
12 static int cppstring;
13 static size_t stringlen;
14 static char *string;
15 static int stringcode;
16
17 /* append some bytes to the string */
18 static void morestring(const char *ptr, size_t len) {
19 if(len > SIZE_MAX - stringlen)
20 fatal(0, "string too long");
21 string = xrealloc_noptr(string, stringlen + len);
22 memcpy(string + stringlen, ptr, len);
23 stringlen += len;
24 }
25
26 static void newstring(const char *ptr, size_t len) {
27 stringlen = 0;
28 string = 0;
29 morestring(ptr, len);
30 }
31
32 /* validate a UCN - return 1 if valid, 0 if not, and report the error.
33 * Assumes yylloc set. */
34 static int ucnvalid(const char *s) {
35 /* 6.4.3 range constraints for UCNs */
36 unsigned long u;
37
38 errno = 0;
39 u = strtoul(s + 2, 0, 16);
40 if(errno) {
41 inputerror(&yylloc, "cannot convert UCN '%s'", yytext);
42 return 0;
43 } else switch(u) {
44 case 0x0024:
45 case 0x0040:
46 case 0x0060:
47 return 1;
48 default:
49 if(u < 0x00A0
50 || (u >= 0xD800 && u <= 0xDFFF)) {
51 inputerror(&yylloc, "illegal UCN escape '%s'", yytext);
52 return 0;
53 } else
54 return 1;
55 }
56 }
57
58 /* set yylloc */
59 static inline void loc(void) {
60 yylloc.path = path;
61 yylloc.line = line;
62 }
63
64 %}
65
66 %option debug
67 %option batch
68 %option perf-report
69
70 %x CPP CPPSKIP LITERAL IDENT
71
72 HEXDIGIT [0-9a-fA-f]
73 UCN \\(u{HEXDIGIT}{4}|U{HEXDIGIT}{8})
74 BADUCN \\(u.{1,4}|U.{1,8})
75 DIGIT [0-9]
76 IDND ([a-zA-Z_]|{UCN})
77 WS [ \t\r\v\f]
78
79 %%
80
81 ^#{WS}* { BEGIN(CPP); }
82
83 <CPP>[0-9]+ {
84 long l;
85 /* collect line number, crapping out if we can't represent it */
86 errno = 0;
87 l = strtol(yytext, 0, 10);
88 if(errno)
89 fatal(errno, "cannot convert line number '%s'", yytext);
90 if(l > INT_MAX)
91 fatal(0, "line number '%s' is too big", yytext);
92 line = l;
93 }
94 <CPP,CPPSKIP>{WS}+ { }
95 <CPP>\" {
96 /* collect filename */
97 cppstring = 1;
98 stringtype = yytext[0];
99 BEGIN(LITERAL);
100 newstring(0, 0);
101 }
102 <CPPSKIP>[0-9]+ { }
103 <CPP,CPPSKIP>\n { BEGIN(INITIAL); }
104
105 \n {
106 if(line == INT_MAX)
107 fatal(0, "cannot count beyond line %d", INT_MAX);
108 ++line;
109 }
110 {WS}+ { }
111
112 [\"\'] {
113 stringtype = yytext[0];
114 stringcode = yytext[0] == '"' ? STRINGLIT : CHARLIT;
115 loc();
116 cppstring = 0;
117 BEGIN(LITERAL);
118 newstring(0, 0);
119 }
120 L[\"\'] {
121 stringtype = yytext[1];
122 stringcode = yytext[1] == '"' ? WSTRINGLIT : WCHARLIT;
123 loc();
124 cppstring = 0;
125 BEGIN(LITERAL);
126 newstring(0, 0);
127 }
128 <LITERAL>([^\'\"\\\n]|\\[\'\"\?\\abtnvfr])+ {
129 morestring(yytext, yyleng);
130 }
131 <LITERAL>\\[0-7]{1,3} {
132 /* 6.4.4.4 escapes must fit in an unsigned char */
133 unsigned long n;
134
135 errno = 0;
136 n = strtoul(yytext + 1, 0, 8);
137 if(errno)
138 inputerror(&yylloc, "cannot convert octal escape sequence '%s'", yytext);
139 else if(n > P_UCHAR_MAX)
140 inputerror(&yylloc, "octal escape sequence '%s' out of range", yytext);
141 morestring(yytext, yyleng);
142 }
143 <LITERAL>\\x[0-9a-fA-F]+ {
144 /* 6.4.4.4 escapes must fit in an unsigned char */
145 unsigned long n;
146
147 errno = 0;
148 n = strtoul(yytext + 2, 0, 16);
149 if(errno)
150 inputerror(&yylloc, "cannot convert hexadecimal escape sequence '%s'",
151 yytext);
152 else if(n > P_UCHAR_MAX)
153 inputerror(&yylloc, "hexadecimal escape sequence '%s' out of range",
154 yytext);
155 morestring(yytext, yyleng);
156 }
157 <LITERAL>\\x. {
158 inputerror(&yylloc, "invalid hexadecimal escape sequence '%s'", yytext);
159 morestring(yytext, yyleng);
160 }
161 <LITERAL>{UCN} {
162 ucnvalid(yytext);
163 morestring(yytext, yyleng);
164 }
165 <LITERAL>{BADUCN} {
166 inputerror(&yylloc, "invalid UCN '%s'", yytext);
167 morestring(yytext, yyleng);
168 }
169 <LITERAL>[\"\'] {
170 if(yytext[0] == stringtype) {
171 finish_string:
172 if(cppstring) {
173 morestring("", 1);
174 path = string;
175 BEGIN(CPPSKIP);
176 } else {
177 BEGIN(INITIAL);
178 /* XXX support strings with embedded 0s */
179 yylval.s = xstrndup(string, stringlen);
180 return stringcode;
181 }
182 } else
183 morestring(yytext, 1);
184 }
185 <LITERAL>\\. {
186 inputerror(&yylloc, "invalid escape sequence '%s'", yytext);
187 morestring(yytext, yyleng);
188 }
189 <LITERAL>\n {
190 inputerror(&yylloc, "unterminated literal");
191 goto finish_string;
192 }
193 <LITERAL><<EOF>> {
194 inputerror(&yylloc, "unterminated literal");
195 goto finish_string;
196 }
197
198 \.?{DIGIT}({DIGIT}|{IDND}|[eEpP][+\-]|\.)* {
199 yylval.s = xstrndup(yytext, yyleng);
200 loc();
201 return NUMBER;
202 }
203
204 -\> { loc(); return yylval.i = MEMBER; }
205 \+\+ { loc(); return yylval.i = INCR; }
206 -- { loc(); return yylval.i = DECR; }
207 \<\< { loc(); return yylval.i = SL; }
208 \>\> { loc(); return yylval.i = SR; }
209 \<= { loc(); return yylval.i = LE; }
210 \>= { loc(); return yylval.i = GE; }
211 == { loc(); return yylval.i = EQ; }
212 != { loc(); return yylval.i = NE; }
213 && { loc(); return yylval.i = AND; }
214 \|\| { loc(); return yylval.i = OR; }
215 \.\.\. { loc(); return yylval.i = VARARG; }
216 \*= { loc(); return yylval.i = MULEQ; }
217 \/= { loc(); return yylval.i = DIVEQ; }
218 %= { loc(); return yylval.i = MODEQ; }
219 \+= { loc(); return yylval.i = ADDEQ; }
220 -= { loc(); return yylval.i = SUBEQ; }
221 \<\<= { loc(); return yylval.i = SLEQ; }
222 \>\>= { loc(); return yylval.i = SREQ; }
223 &= { loc(); return yylval.i = ANDEQ; }
224 \^= { loc(); return yylval.i = XOREQ; }
225 \|= { loc(); return yylval.i = OREQ; }
226 \<: { loc(); return yylval.i = '['; }
227 :\> { loc(); return yylval.i = ']'; }
228 \<% { loc(); return yylval.i = '{'; }
229 %\> { loc(); return yylval.i = '}'; }
230 %: { loc(); return yylval.i = '#'; }
231
232 auto { yylval.u = SCS_AUTO; loc(); return AUTO; }
233 break { loc(); return BREAK; }
234 case { loc(); return CASE; }
235 char { yylval.u = TS_CHAR; loc(); return CHAR; }
236 const|__const|__const__ { yylval.u = TQ_CONST; loc(); return CONST; }
237 continue { loc(); return CONTINUE; }
238 default { loc(); return DEFAULT; }
239 do { loc(); return DO; }
240 double { yylval.u = TS_DOUBLE; loc(); return DOUBLE; }
241 else { loc(); return ELSE; }
242 enum { loc(); return ENUM; }
243 extern { yylval.u = SCS_EXTERN; loc(); return EXTERN; }
244 float { yylval.u = TS_FLOAT; loc(); return FLOAT; }
245 for { loc(); return FOR; }
246 goto { loc(); return GOTO; }
247 if { loc(); return IF; }
248 inline|__inline|__inline__ { yylval.u = FS_INLINE; loc(); return INLINE; }
249 int { yylval.u = TS_INT; loc(); return INT; }
250 long { yylval.u = TS_LONG; loc(); return LONG; }
251 register { yylval.u = SCS_REGISTER; loc(); return REGISTER; }
252 restrict|__restrict|__restrict__ { yylval.u = TQ_RESTRICT; loc(); return RESTRICT; }
253 return { loc(); return RETURN; }
254 short { yylval.u = TS_SHORT; loc(); return SHORT; }
255 signed { yylval.u = TS_SIGNED; loc(); return SIGNED; }
256 sizeof { loc(); return SIZEOF; }
257 static { yylval.u = SCS_STATIC; loc(); return STATIC; }
258 struct { yylval.u = TS_STRUCT; loc(); return STRUCT; }
259 switch { loc(); return SWITCH; }
260 typedef { yylval.u = SCS_TYPEDEF; loc(); return TYPEDEF; }
261 union { yylval.u = TS_UNION; loc(); return UNION; }
262 unsigned { yylval.u = TS_UNSIGNED; loc(); return UNSIGNED; }
263 void { yylval.u = TS_VOID; loc(); return VOID; }
264 volatile|__volatile|__volatile__ { yylval.u = TQ_VOLATILE; loc(); return VOLATILE; }
265 while { loc(); return WHILE; }
266 _Bool { yylval.u = TS_BOOL; loc(); return BOOL; }
267 _Complex { yylval.u = TS_COMPLEX; loc(); return COMPLEX; }
268 _Imaginary { yylval.u = TS_IMAGINARY; loc(); return IMAGINARY; }
269
270 __extension__ { }
271 __attribute__ { loc(); return ATTRIBUTE; }
272
273 __builtin_va_list { yylval.u = TS_GCC_VA_LIST; loc(); return GCC_VA_LIST; }
274 __builtin_expect { loc(); return GCC_EXPECT; }
275
276 __builtin_va_arg {
277 loc();
278 return GCC_VA_ARG;
279 }
280
281 [a-zA-Z_][a-zA-Z0-9_]* {
282 /* identifiers are complicated by our desire to strictly check any UCNs they
283 * contain */
284 loc();
285 BEGIN(IDENT);
286 newstring(yytext, yyleng);
287 }
288 {UCN} {
289 loc();
290 ucnvalid(yytext);
291 BEGIN(IDENT);
292 newstring(yytext, yyleng);
293 }
294 {BADUCN} {
295 loc();
296 inputerror(&yylloc, "invalid UCN '%s'", yytext);
297 BEGIN(IDENT);
298 newstring(yytext, yyleng);
299 }
300
301 <IDENT>[a-zA-Z0-9_]+ {
302 morestring(yytext, yyleng);
303 }
304 <IDENT>{UCN} {
305 ucnvalid(yytext);
306 morestring(yytext, yyleng);
307 }
308 <IDENT>{BADUCN} {
309 inputerror(&yylloc, "invalid UCN '%s'", yytext);
310 morestring(yytext, yyleng);
311 }
312
313 <IDENT>.|\n {
314 unput(yytext[0]);
315 got_id:
316 BEGIN(INITIAL);
317 /* we need to look up the declaration of each name anyway so we record the
318 * type for all of them. But we can't produce an error here as it might be a
319 * struct member or something. */
320 yylval.name.name = xstrndup(string, stringlen);
321 yylval.name.declarator = lookup(yylval.name.name);
322 if(yylval.name.declarator
323 && yylval.name.declarator->declaration_specifiers
324 && (yylval.name.declarator->declaration_specifiers->storage_class_specifiers
325 & SCS_TYPEDEF))
326 return TYPEDEF_NAME;
327 else
328 return ID;
329 }
330
331 <IDENT><<EOF>> {
332 goto got_id;
333 }
334
335 . {
336 loc();
337 return yylval.i = yytext[0];
338 }
339
340 %%
341
342 /*
343 Local Variables:
344 mode:c
345 c-basic-offset:2
346 comment-column:40
347 fill-column:79
348 End:
349 */