Import upstream sources.
[cparse] / c-lex.l
CommitLineData
3cd4b0f8
MW
1%{
2
3 #include "cparse.h"
4
5 static int yywrap(void) {
6 BEGIN(INITIAL);
7 return 1;
8 }
9
10 /* cheesy dynamic string stuff */
11 static int stringtype;
12 static int cppstring;
13 static size_t stringlen;
14 static char *string;
15 static int stringcode;
16
17 /* append some bytes to the string */
18 static void morestring(const char *ptr, size_t len) {
19 if(len > SIZE_MAX - stringlen)
20 fatal(0, "string too long");
21 string = xrealloc_noptr(string, stringlen + len);
22 memcpy(string + stringlen, ptr, len);
23 stringlen += len;
24 }
25
26 static void newstring(const char *ptr, size_t len) {
27 stringlen = 0;
28 string = 0;
29 morestring(ptr, len);
30 }
31
32 /* validate a UCN - return 1 if valid, 0 if not, and report the error.
33 * Assumes yylloc set. */
34 static int ucnvalid(const char *s) {
35 /* 6.4.3 range constraints for UCNs */
36 unsigned long u;
37
38 errno = 0;
39 u = strtoul(s + 2, 0, 16);
40 if(errno) {
41 inputerror(&yylloc, "cannot convert UCN '%s'", yytext);
42 return 0;
43 } else switch(u) {
44 case 0x0024:
45 case 0x0040:
46 case 0x0060:
47 return 1;
48 default:
49 if(u < 0x00A0
50 || (u >= 0xD800 && u <= 0xDFFF)) {
51 inputerror(&yylloc, "illegal UCN escape '%s'", yytext);
52 return 0;
53 } else
54 return 1;
55 }
56 }
57
58 /* set yylloc */
59 static inline void loc(void) {
60 yylloc.path = path;
61 yylloc.line = line;
62 }
63
64%}
65
66%option debug
67%option batch
68%option perf-report
69
70%x CPP CPPSKIP LITERAL IDENT
71
72HEXDIGIT [0-9a-fA-f]
73UCN \\(u{HEXDIGIT}{4}|U{HEXDIGIT}{8})
74BADUCN \\(u.{1,4}|U.{1,8})
75DIGIT [0-9]
76IDND ([a-zA-Z_]|{UCN})
77WS [ \t\r\v\f]
78
79%%
80
81^#{WS}* { BEGIN(CPP); }
82
83<CPP>[0-9]+ {
84 long l;
85 /* collect line number, crapping out if we can't represent it */
86 errno = 0;
87 l = strtol(yytext, 0, 10);
88 if(errno)
89 fatal(errno, "cannot convert line number '%s'", yytext);
90 if(l > INT_MAX)
91 fatal(0, "line number '%s' is too big", yytext);
92 line = l;
93}
94<CPP,CPPSKIP>{WS}+ { }
95<CPP>\" {
96 /* collect filename */
97 cppstring = 1;
98 stringtype = yytext[0];
99 BEGIN(LITERAL);
100 newstring(0, 0);
101}
102<CPPSKIP>[0-9]+ { }
103<CPP,CPPSKIP>\n { BEGIN(INITIAL); }
104
105\n {
106 if(line == INT_MAX)
107 fatal(0, "cannot count beyond line %d", INT_MAX);
108 ++line;
109}
110{WS}+ { }
111
112[\"\'] {
113 stringtype = yytext[0];
114 stringcode = yytext[0] == '"' ? STRINGLIT : CHARLIT;
115 loc();
116 cppstring = 0;
117 BEGIN(LITERAL);
118 newstring(0, 0);
119}
120L[\"\'] {
121 stringtype = yytext[1];
122 stringcode = yytext[1] == '"' ? WSTRINGLIT : WCHARLIT;
123 loc();
124 cppstring = 0;
125 BEGIN(LITERAL);
126 newstring(0, 0);
127}
128<LITERAL>([^\'\"\\\n]|\\[\'\"\?\\abtnvfr])+ {
129 morestring(yytext, yyleng);
130}
131<LITERAL>\\[0-7]{1,3} {
132 /* 6.4.4.4 escapes must fit in an unsigned char */
133 unsigned long n;
134
135 errno = 0;
136 n = strtoul(yytext + 1, 0, 8);
137 if(errno)
138 inputerror(&yylloc, "cannot convert octal escape sequence '%s'", yytext);
139 else if(n > P_UCHAR_MAX)
140 inputerror(&yylloc, "octal escape sequence '%s' out of range", yytext);
141 morestring(yytext, yyleng);
142}
143<LITERAL>\\x[0-9a-fA-F]+ {
144 /* 6.4.4.4 escapes must fit in an unsigned char */
145 unsigned long n;
146
147 errno = 0;
148 n = strtoul(yytext + 2, 0, 16);
149 if(errno)
150 inputerror(&yylloc, "cannot convert hexadecimal escape sequence '%s'",
151 yytext);
152 else if(n > P_UCHAR_MAX)
153 inputerror(&yylloc, "hexadecimal escape sequence '%s' out of range",
154 yytext);
155 morestring(yytext, yyleng);
156}
157<LITERAL>\\x. {
158 inputerror(&yylloc, "invalid hexadecimal escape sequence '%s'", yytext);
159 morestring(yytext, yyleng);
160}
161<LITERAL>{UCN} {
162 ucnvalid(yytext);
163 morestring(yytext, yyleng);
164}
165<LITERAL>{BADUCN} {
166 inputerror(&yylloc, "invalid UCN '%s'", yytext);
167 morestring(yytext, yyleng);
168}
169<LITERAL>[\"\'] {
170 if(yytext[0] == stringtype) {
171 finish_string:
172 if(cppstring) {
173 morestring("", 1);
174 path = string;
175 BEGIN(CPPSKIP);
176 } else {
177 BEGIN(INITIAL);
178 /* XXX support strings with embedded 0s */
179 yylval.s = xstrndup(string, stringlen);
180 return stringcode;
181 }
182 } else
183 morestring(yytext, 1);
184}
185<LITERAL>\\. {
186 inputerror(&yylloc, "invalid escape sequence '%s'", yytext);
187 morestring(yytext, yyleng);
188}
189<LITERAL>\n {
190 inputerror(&yylloc, "unterminated literal");
191 goto finish_string;
192}
193<LITERAL><<EOF>> {
194 inputerror(&yylloc, "unterminated literal");
195 goto finish_string;
196}
197
198\.?{DIGIT}({DIGIT}|{IDND}|[eEpP][+\-]|\.)* {
199 yylval.s = xstrndup(yytext, yyleng);
200 loc();
201 return NUMBER;
202}
203
204-\> { loc(); return yylval.i = MEMBER; }
205\+\+ { loc(); return yylval.i = INCR; }
206-- { loc(); return yylval.i = DECR; }
207\<\< { loc(); return yylval.i = SL; }
208\>\> { loc(); return yylval.i = SR; }
209\<= { loc(); return yylval.i = LE; }
210\>= { loc(); return yylval.i = GE; }
211== { loc(); return yylval.i = EQ; }
212!= { loc(); return yylval.i = NE; }
213&& { loc(); return yylval.i = AND; }
214\|\| { loc(); return yylval.i = OR; }
215\.\.\. { loc(); return yylval.i = VARARG; }
216\*= { loc(); return yylval.i = MULEQ; }
217\/= { loc(); return yylval.i = DIVEQ; }
218%= { loc(); return yylval.i = MODEQ; }
219\+= { loc(); return yylval.i = ADDEQ; }
220-= { loc(); return yylval.i = SUBEQ; }
221\<\<= { loc(); return yylval.i = SLEQ; }
222\>\>= { loc(); return yylval.i = SREQ; }
223&= { loc(); return yylval.i = ANDEQ; }
224\^= { loc(); return yylval.i = XOREQ; }
225\|= { loc(); return yylval.i = OREQ; }
226\<: { loc(); return yylval.i = '['; }
227:\> { loc(); return yylval.i = ']'; }
228\<% { loc(); return yylval.i = '{'; }
229%\> { loc(); return yylval.i = '}'; }
230%: { loc(); return yylval.i = '#'; }
231
232auto { yylval.u = SCS_AUTO; loc(); return AUTO; }
233break { loc(); return BREAK; }
234case { loc(); return CASE; }
235char { yylval.u = TS_CHAR; loc(); return CHAR; }
236const|__const|__const__ { yylval.u = TQ_CONST; loc(); return CONST; }
237continue { loc(); return CONTINUE; }
238default { loc(); return DEFAULT; }
239do { loc(); return DO; }
240double { yylval.u = TS_DOUBLE; loc(); return DOUBLE; }
241else { loc(); return ELSE; }
242enum { loc(); return ENUM; }
243extern { yylval.u = SCS_EXTERN; loc(); return EXTERN; }
244float { yylval.u = TS_FLOAT; loc(); return FLOAT; }
245for { loc(); return FOR; }
246goto { loc(); return GOTO; }
247if { loc(); return IF; }
248inline|__inline|__inline__ { yylval.u = FS_INLINE; loc(); return INLINE; }
249int { yylval.u = TS_INT; loc(); return INT; }
250long { yylval.u = TS_LONG; loc(); return LONG; }
251register { yylval.u = SCS_REGISTER; loc(); return REGISTER; }
252restrict|__restrict|__restrict__ { yylval.u = TQ_RESTRICT; loc(); return RESTRICT; }
253return { loc(); return RETURN; }
254short { yylval.u = TS_SHORT; loc(); return SHORT; }
255signed { yylval.u = TS_SIGNED; loc(); return SIGNED; }
256sizeof { loc(); return SIZEOF; }
257static { yylval.u = SCS_STATIC; loc(); return STATIC; }
258struct { yylval.u = TS_STRUCT; loc(); return STRUCT; }
259switch { loc(); return SWITCH; }
260typedef { yylval.u = SCS_TYPEDEF; loc(); return TYPEDEF; }
261union { yylval.u = TS_UNION; loc(); return UNION; }
262unsigned { yylval.u = TS_UNSIGNED; loc(); return UNSIGNED; }
263void { yylval.u = TS_VOID; loc(); return VOID; }
264volatile|__volatile|__volatile__ { yylval.u = TQ_VOLATILE; loc(); return VOLATILE; }
265while { loc(); return WHILE; }
266_Bool { yylval.u = TS_BOOL; loc(); return BOOL; }
267_Complex { yylval.u = TS_COMPLEX; loc(); return COMPLEX; }
268_Imaginary { yylval.u = TS_IMAGINARY; loc(); return IMAGINARY; }
269
270__extension__ { }
271__attribute__ { loc(); return ATTRIBUTE; }
272
273__builtin_va_list { yylval.u = TS_GCC_VA_LIST; loc(); return GCC_VA_LIST; }
274__builtin_expect { loc(); return GCC_EXPECT; }
275
276__builtin_va_arg {
277 loc();
278 return GCC_VA_ARG;
279}
280
281[a-zA-Z_][a-zA-Z0-9_]* {
282 /* identifiers are complicated by our desire to strictly check any UCNs they
283 * contain */
284 loc();
285 BEGIN(IDENT);
286 newstring(yytext, yyleng);
287}
288{UCN} {
289 loc();
290 ucnvalid(yytext);
291 BEGIN(IDENT);
292 newstring(yytext, yyleng);
293}
294{BADUCN} {
295 loc();
296 inputerror(&yylloc, "invalid UCN '%s'", yytext);
297 BEGIN(IDENT);
298 newstring(yytext, yyleng);
299}
300
301<IDENT>[a-zA-Z0-9_]+ {
302 morestring(yytext, yyleng);
303}
304<IDENT>{UCN} {
305 ucnvalid(yytext);
306 morestring(yytext, yyleng);
307}
308<IDENT>{BADUCN} {
309 inputerror(&yylloc, "invalid UCN '%s'", yytext);
310 morestring(yytext, yyleng);
311}
312
313<IDENT>.|\n {
314 unput(yytext[0]);
315got_id:
316 BEGIN(INITIAL);
317 /* we need to look up the declaration of each name anyway so we record the
318 * type for all of them. But we can't produce an error here as it might be a
319 * struct member or something. */
320 yylval.name.name = xstrndup(string, stringlen);
321 yylval.name.declarator = lookup(yylval.name.name);
322 if(yylval.name.declarator
323 && yylval.name.declarator->declaration_specifiers
324 && (yylval.name.declarator->declaration_specifiers->storage_class_specifiers
325 & SCS_TYPEDEF))
326 return TYPEDEF_NAME;
327 else
328 return ID;
329}
330
331<IDENT><<EOF>> {
332 goto got_id;
333}
334
335. {
336 loc();
337 return yylval.i = yytext[0];
338}
339
340%%
341
342/*
343Local Variables:
344mode:c
345c-basic-offset:2
346comment-column:40
347fill-column:79
348End:
349*/