%{ #include "cparse.h" static int yywrap(void) { BEGIN(INITIAL); return 1; } /* cheesy dynamic string stuff */ static int stringtype; static int cppstring; static size_t stringlen; static char *string; static int stringcode; /* append some bytes to the string */ static void morestring(const char *ptr, size_t len) { if(len > SIZE_MAX - stringlen) fatal(0, "string too long"); string = xrealloc_noptr(string, stringlen + len); memcpy(string + stringlen, ptr, len); stringlen += len; } static void newstring(const char *ptr, size_t len) { stringlen = 0; string = 0; morestring(ptr, len); } /* validate a UCN - return 1 if valid, 0 if not, and report the error. * Assumes yylloc set. */ static int ucnvalid(const char *s) { /* 6.4.3 range constraints for UCNs */ unsigned long u; errno = 0; u = strtoul(s + 2, 0, 16); if(errno) { inputerror(&yylloc, "cannot convert UCN '%s'", yytext); return 0; } else switch(u) { case 0x0024: case 0x0040: case 0x0060: return 1; default: if(u < 0x00A0 || (u >= 0xD800 && u <= 0xDFFF)) { inputerror(&yylloc, "illegal UCN escape '%s'", yytext); return 0; } else return 1; } } /* set yylloc */ static inline void loc(void) { yylloc.path = path; yylloc.line = line; } %} %option debug %option batch %option perf-report %x CPP CPPSKIP LITERAL IDENT HEXDIGIT [0-9a-fA-f] UCN \\(u{HEXDIGIT}{4}|U{HEXDIGIT}{8}) BADUCN \\(u.{1,4}|U.{1,8}) DIGIT [0-9] IDND ([a-zA-Z_]|{UCN}) WS [ \t\r\v\f] %% ^#{WS}* { BEGIN(CPP); } [0-9]+ { long l; /* collect line number, crapping out if we can't represent it */ errno = 0; l = strtol(yytext, 0, 10); if(errno) fatal(errno, "cannot convert line number '%s'", yytext); if(l > INT_MAX) fatal(0, "line number '%s' is too big", yytext); line = l; } {WS}+ { } \" { /* collect filename */ cppstring = 1; stringtype = yytext[0]; BEGIN(LITERAL); newstring(0, 0); } [0-9]+ { } \n { BEGIN(INITIAL); } \n { if(line == INT_MAX) fatal(0, "cannot count beyond line %d", INT_MAX); ++line; } {WS}+ { } [\"\'] { stringtype = yytext[0]; stringcode = yytext[0] == '"' ? STRINGLIT : CHARLIT; loc(); cppstring = 0; BEGIN(LITERAL); newstring(0, 0); } L[\"\'] { stringtype = yytext[1]; stringcode = yytext[1] == '"' ? WSTRINGLIT : WCHARLIT; loc(); cppstring = 0; BEGIN(LITERAL); newstring(0, 0); } ([^\'\"\\\n]|\\[\'\"\?\\abtnvfr])+ { morestring(yytext, yyleng); } \\[0-7]{1,3} { /* 6.4.4.4 escapes must fit in an unsigned char */ unsigned long n; errno = 0; n = strtoul(yytext + 1, 0, 8); if(errno) inputerror(&yylloc, "cannot convert octal escape sequence '%s'", yytext); else if(n > P_UCHAR_MAX) inputerror(&yylloc, "octal escape sequence '%s' out of range", yytext); morestring(yytext, yyleng); } \\x[0-9a-fA-F]+ { /* 6.4.4.4 escapes must fit in an unsigned char */ unsigned long n; errno = 0; n = strtoul(yytext + 2, 0, 16); if(errno) inputerror(&yylloc, "cannot convert hexadecimal escape sequence '%s'", yytext); else if(n > P_UCHAR_MAX) inputerror(&yylloc, "hexadecimal escape sequence '%s' out of range", yytext); morestring(yytext, yyleng); } \\x. { inputerror(&yylloc, "invalid hexadecimal escape sequence '%s'", yytext); morestring(yytext, yyleng); } {UCN} { ucnvalid(yytext); morestring(yytext, yyleng); } {BADUCN} { inputerror(&yylloc, "invalid UCN '%s'", yytext); morestring(yytext, yyleng); } [\"\'] { if(yytext[0] == stringtype) { finish_string: if(cppstring) { morestring("", 1); path = string; BEGIN(CPPSKIP); } else { BEGIN(INITIAL); /* XXX support strings with embedded 0s */ yylval.s = xstrndup(string, stringlen); return stringcode; } } else morestring(yytext, 1); } \\. { inputerror(&yylloc, "invalid escape sequence '%s'", yytext); morestring(yytext, yyleng); } \n { inputerror(&yylloc, "unterminated literal"); goto finish_string; } <> { inputerror(&yylloc, "unterminated literal"); goto finish_string; } \.?{DIGIT}({DIGIT}|{IDND}|[eEpP][+\-]|\.)* { yylval.s = xstrndup(yytext, yyleng); loc(); return NUMBER; } -\> { loc(); return yylval.i = MEMBER; } \+\+ { loc(); return yylval.i = INCR; } -- { loc(); return yylval.i = DECR; } \<\< { loc(); return yylval.i = SL; } \>\> { loc(); return yylval.i = SR; } \<= { loc(); return yylval.i = LE; } \>= { loc(); return yylval.i = GE; } == { loc(); return yylval.i = EQ; } != { loc(); return yylval.i = NE; } && { loc(); return yylval.i = AND; } \|\| { loc(); return yylval.i = OR; } \.\.\. { loc(); return yylval.i = VARARG; } \*= { loc(); return yylval.i = MULEQ; } \/= { loc(); return yylval.i = DIVEQ; } %= { loc(); return yylval.i = MODEQ; } \+= { loc(); return yylval.i = ADDEQ; } -= { loc(); return yylval.i = SUBEQ; } \<\<= { loc(); return yylval.i = SLEQ; } \>\>= { loc(); return yylval.i = SREQ; } &= { loc(); return yylval.i = ANDEQ; } \^= { loc(); return yylval.i = XOREQ; } \|= { loc(); return yylval.i = OREQ; } \<: { loc(); return yylval.i = '['; } :\> { loc(); return yylval.i = ']'; } \<% { loc(); return yylval.i = '{'; } %\> { loc(); return yylval.i = '}'; } %: { loc(); return yylval.i = '#'; } auto { yylval.u = SCS_AUTO; loc(); return AUTO; } break { loc(); return BREAK; } case { loc(); return CASE; } char { yylval.u = TS_CHAR; loc(); return CHAR; } const|__const|__const__ { yylval.u = TQ_CONST; loc(); return CONST; } continue { loc(); return CONTINUE; } default { loc(); return DEFAULT; } do { loc(); return DO; } double { yylval.u = TS_DOUBLE; loc(); return DOUBLE; } else { loc(); return ELSE; } enum { loc(); return ENUM; } extern { yylval.u = SCS_EXTERN; loc(); return EXTERN; } float { yylval.u = TS_FLOAT; loc(); return FLOAT; } for { loc(); return FOR; } goto { loc(); return GOTO; } if { loc(); return IF; } inline|__inline|__inline__ { yylval.u = FS_INLINE; loc(); return INLINE; } int { yylval.u = TS_INT; loc(); return INT; } long { yylval.u = TS_LONG; loc(); return LONG; } register { yylval.u = SCS_REGISTER; loc(); return REGISTER; } restrict|__restrict|__restrict__ { yylval.u = TQ_RESTRICT; loc(); return RESTRICT; } return { loc(); return RETURN; } short { yylval.u = TS_SHORT; loc(); return SHORT; } signed { yylval.u = TS_SIGNED; loc(); return SIGNED; } sizeof { loc(); return SIZEOF; } static { yylval.u = SCS_STATIC; loc(); return STATIC; } struct { yylval.u = TS_STRUCT; loc(); return STRUCT; } switch { loc(); return SWITCH; } typedef { yylval.u = SCS_TYPEDEF; loc(); return TYPEDEF; } union { yylval.u = TS_UNION; loc(); return UNION; } unsigned { yylval.u = TS_UNSIGNED; loc(); return UNSIGNED; } void { yylval.u = TS_VOID; loc(); return VOID; } volatile|__volatile|__volatile__ { yylval.u = TQ_VOLATILE; loc(); return VOLATILE; } while { loc(); return WHILE; } _Bool { yylval.u = TS_BOOL; loc(); return BOOL; } _Complex { yylval.u = TS_COMPLEX; loc(); return COMPLEX; } _Imaginary { yylval.u = TS_IMAGINARY; loc(); return IMAGINARY; } __extension__ { } __attribute__ { loc(); return ATTRIBUTE; } __builtin_va_list { yylval.u = TS_GCC_VA_LIST; loc(); return GCC_VA_LIST; } __builtin_expect { loc(); return GCC_EXPECT; } __builtin_va_arg { loc(); return GCC_VA_ARG; } [a-zA-Z_][a-zA-Z0-9_]* { /* identifiers are complicated by our desire to strictly check any UCNs they * contain */ loc(); BEGIN(IDENT); newstring(yytext, yyleng); } {UCN} { loc(); ucnvalid(yytext); BEGIN(IDENT); newstring(yytext, yyleng); } {BADUCN} { loc(); inputerror(&yylloc, "invalid UCN '%s'", yytext); BEGIN(IDENT); newstring(yytext, yyleng); } [a-zA-Z0-9_]+ { morestring(yytext, yyleng); } {UCN} { ucnvalid(yytext); morestring(yytext, yyleng); } {BADUCN} { inputerror(&yylloc, "invalid UCN '%s'", yytext); morestring(yytext, yyleng); } .|\n { unput(yytext[0]); got_id: BEGIN(INITIAL); /* we need to look up the declaration of each name anyway so we record the * type for all of them. But we can't produce an error here as it might be a * struct member or something. */ yylval.name.name = xstrndup(string, stringlen); yylval.name.declarator = lookup(yylval.name.name); if(yylval.name.declarator && yylval.name.declarator->declaration_specifiers && (yylval.name.declarator->declaration_specifiers->storage_class_specifiers & SCS_TYPEDEF)) return TYPEDEF_NAME; else return ID; } <> { goto got_id; } . { loc(); return yylval.i = yytext[0]; } %% /* Local Variables: mode:c c-basic-offset:2 comment-column:40 fill-column:79 End: */