2 * input.c: read the source form
10 #define TAB_STOP 8 /* for column number tracking */
12 static void setpos(input
*in
, char *fname
) {
13 in
->pos
.filename
= fname
;
15 in
->pos
.col
= (in
->reportcols ?
1 : -1);
18 static void unget(input
*in
, int c
, filepos
*pos
) {
19 if (in
->npushback
>= in
->pushbacksize
) {
20 in
->pushbacksize
= in
->npushback
+ 16;
21 in
->pushback
= resize(in
->pushback
, in
->pushbacksize
);
23 in
->pushback
[in
->npushback
].chr
= c
;
24 in
->pushback
[in
->npushback
].pos
= *pos
; /* structure copy */
28 /* ---------------------------------------------------------------------- */
32 typedef struct macro_Tag macro
;
36 struct macrostack_Tag
{
42 static int macrocmp(void *av
, void *bv
) {
43 macro
*a
= (macro
*)av
, *b
= (macro
*)bv
;
44 return ustrcmp(a
->name
, b
->name
);
46 static void macrodef(tree234
*macros
, wchar_t *name
, wchar_t *text
,
48 macro
*m
= mknew(macro
);
51 if (add234(macros
, m
) != m
) {
52 error(err_macroexists
, &fpos
, name
);
57 static int macrolookup(tree234
*macros
, input
*in
, wchar_t *name
,
61 gotit
= find234(macros
, &m
, NULL
);
63 macrostack
*expansion
= mknew(macrostack
);
64 expansion
->next
= in
->stack
;
65 expansion
->text
= gotit
->text
;
66 expansion
->pos
= *pos
; /* structure copy */
68 expansion
->npushback
= in
->npushback
;
69 in
->stack
= expansion
;
74 static void macrocleanup(tree234
*macros
) {
77 for (ti
= 0; (m
= (macro
*)index234(macros
, ti
)) != NULL
; ti
++) {
88 static int get(input
*in
, filepos
*pos
) {
89 int pushbackpt
= in
->stack ? in
->stack
->npushback
: 0;
90 if (in
->npushback
> pushbackpt
) {
93 *pos
= in
->pushback
[in
->npushback
].pos
; /* structure copy */
94 return in
->pushback
[in
->npushback
].chr
;
97 wchar_t c
= in
->stack
->text
[in
->stack
->ptr
];
98 if (in
->stack
->text
[++in
->stack
->ptr
] == L
'\0') {
99 macrostack
*tmp
= in
->stack
;
100 in
->stack
= tmp
->next
;
105 else if (in
->currfp
) {
106 int c
= getc(in
->currfp
);
112 /* Track line numbers, for error reporting */
115 if (in
->reportcols
) {
118 in
->pos
.col
= 1 + (in
->pos
.col
+ TAB_STOP
-1) % TAB_STOP
;
133 /* FIXME: do input charmap translation. We should be returning
141 * Lexical analysis of source files.
143 typedef struct token_Tag token
;
151 tok_eof
, /* end of file */
152 tok_eop
, /* end of paragraph */
153 tok_white
, /* whitespace */
154 tok_word
, /* a word or word fragment */
155 tok_cmd
, /* \command */
160 /* Halibut command keywords. */
162 c__invalid
, /* invalid command */
163 c__comment
, /* comment command (\#) */
164 c__escaped
, /* escaped character */
165 c__nbsp
, /* nonbreaking space */
166 c_A
, /* appendix heading */
167 c_B
, /* bibliography entry */
168 c_BR
, /* bibliography rewrite */
169 c_C
, /* chapter heading */
171 c_I
, /* invisible index mark */
172 c_IM
, /* index merge/rewrite */
173 c_K
, /* capitalised cross-reference */
174 c_S
, /* aux field is 0, 1, 2, ... */
175 c_U
, /* unnumbered-chapter heading */
176 c_W
, /* Web hyperlink */
177 c_b
, /* bulletted list */
179 c_cfg
, /* configuration directive */
180 c_copyright
, /* copyright statement */
181 c_cw
, /* weak code */
182 c_date
, /* document processing date */
183 c_dd
, /* description list: description */
184 c_define
, /* macro definition */
185 c_dt
, /* description list: described thing */
187 c_i
, /* visible index mark */
188 c_ii
, /* uncapitalised visible index mark */
189 c_k
, /* uncapitalised cross-reference */
190 c_lcont
, /* continuation para(s) for list item */
191 c_n
, /* numbered list */
192 c_nocite
, /* bibliography trickery */
193 c_preamble
, /* document preamble text */
194 c_q
, /* quote marks */
195 c_rule
, /* horizontal rule */
196 c_title
, /* document title */
197 c_u
, /* aux field is char code */
198 c_versionid
/* document RCS id */
201 /* Perhaps whitespace should be defined in a more Unicode-friendly way? */
202 #define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
203 #define isnl(c) ( (c)==10 )
204 #define isdec(c) ( ((c)>='0'&&(c)<='9') )
205 #define fromdec(c) ( (c)-'0' )
206 #define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
207 #define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
208 #define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
211 * Keyword comparison function. Like strcmp, but between a wchar_t *
214 static int kwcmp(wchar_t const *p
, char const *q
) {
218 } while (*p
++ && *q
++ && !i
);
225 static void match_kw(token
*tok
) {
227 * FIXME. The ids are explicit in here so as to allow long-name
228 * equivalents to the various very short keywords.
230 static const struct { char const *name
; int id
; } keywords
[] = {
231 {"#", c__comment
}, /* comment command (\#) */
232 {"-", c__escaped
}, /* nonbreaking hyphen */
233 {"A", c_A
}, /* appendix heading */
234 {"B", c_B
}, /* bibliography entry */
235 {"BR", c_BR
}, /* bibliography rewrite */
236 {"C", c_C
}, /* chapter heading */
237 {"H", c_H
}, /* heading */
238 {"I", c_I
}, /* invisible index mark */
239 {"IM", c_IM
}, /* index merge/rewrite */
240 {"K", c_K
}, /* capitalised cross-reference */
241 {"U", c_U
}, /* unnumbered-chapter heading */
242 {"W", c_W
}, /* Web hyperlink */
243 {"\\", c__escaped
}, /* escaped backslash (\\) */
244 {"_", c__nbsp
}, /* nonbreaking space (\_) */
245 {"b", c_b
}, /* bulletted list */
246 {"c", c_c
}, /* code */
247 {"cfg", c_cfg
}, /* configuration directive */
248 {"copyright", c_copyright
}, /* copyright statement */
249 {"cw", c_cw
}, /* weak code */
250 {"date", c_date
}, /* document processing date */
251 {"dd", c_dd
}, /* description list: description */
252 {"define", c_define
}, /* macro definition */
253 {"dt", c_dt
}, /* description list: described thing */
254 {"e", c_e
}, /* emphasis */
255 {"i", c_i
}, /* visible index mark */
256 {"ii", c_ii
}, /* uncapitalised visible index mark */
257 {"k", c_k
}, /* uncapitalised cross-reference */
258 {"lcont", c_lcont
}, /* continuation para(s) for list item */
259 {"n", c_n
}, /* numbered list */
260 {"nocite", c_nocite
}, /* bibliography trickery */
261 {"preamble", c_preamble
}, /* document preamble text */
262 {"q", c_q
}, /* quote marks */
263 {"rule", c_rule
}, /* horizontal rule */
264 {"title", c_title
}, /* document title */
265 {"versionid", c_versionid
}, /* document RCS id */
266 {"{", c__escaped
}, /* escaped lbrace (\{) */
267 {"}", c__escaped
}, /* escaped rbrace (\}) */
272 * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
273 * doesn't match correctly, we just fall through to the
274 * binary-search phase.
276 if (tok
->text
[0] == 'S') {
277 /* We expect numeric characters thereafter. */
278 wchar_t *p
= tok
->text
+1;
284 while (*p
&& isdec(*p
)) {
285 n
= 10 * n
+ fromdec(*p
);
294 } else if (tok
->text
[0] == 'u') {
295 /* We expect hex characters thereafter. */
296 wchar_t *p
= tok
->text
+1;
298 while (*p
&& ishex(*p
)) {
299 n
= 16 * n
+ fromhex(*p
);
310 j
= sizeof(keywords
)/sizeof(*keywords
);
313 c
= kwcmp(tok
->text
, keywords
[k
].name
);
319 tok
->cmd
= keywords
[k
].id
;
324 tok
->cmd
= c__invalid
;
329 * Read a token from the input file, in the normal way (`normal' in
330 * the sense that code paragraphs work a different way).
332 token
get_token(input
*in
) {
336 rdstring rs
= { 0, 0, NULL
};
339 ret
.text
= NULL
; /* default */
342 if (iswhite(c
)) { /* tok_white or tok_eop */
347 } while ((c
= get(in
, &cpos
)) != EOF
&& iswhite(c
));
353 ret
.type
= (nls
> 1 ? tok_eop
: tok_white
);
355 } else if (c
== EOF
) { /* tok_eof */
358 } else if (c
== '\\') { /* tok_cmd */
360 if (c
== '-' || c
== '\\' || c
== '_' ||
361 c
== '#' || c
== '{' || c
== '}') {
362 /* single-char command */
364 } else if (c
== 'u') {
370 } while (ishex(c
) && len
< 5);
372 } else if (iscmd(c
)) {
380 * Now match the command against the list of available
384 ret
.text
= ustrdup(rs
.text
);
388 } else if (c
== '{') { /* tok_lbrace */
389 ret
.type
= tok_lbrace
;
391 } else if (c
== '}') { /* tok_rbrace */
392 ret
.type
= tok_rbrace
;
394 } else { /* tok_word */
396 * Read a word: the longest possible contiguous sequence of
397 * things other than whitespace, backslash, braces and
398 * hyphen. A hyphen terminates the word but is returned as
399 * part of it; everything else is pushed back for the next
400 * token. The `aux' field contains TRUE if the word ends in
403 ret
.aux
= FALSE
; /* assumed for now */
405 if (iswhite(c
) || c
=='{' || c
=='}' || c
=='\\' || c
==EOF
) {
406 /* Put back the character that caused termination */
413 break; /* hyphen terminates word */
419 ret
.text
= ustrdup(rs
.text
);
426 * Determine whether the next input character is an open brace (for
427 * telling code paragraphs from paragraphs which merely start with
430 int isbrace(input
*in
) {
440 * Read the rest of a line that starts `\c'. Including nothing at
441 * all (tok_word with empty text).
443 token
get_codepar_token(input
*in
) {
446 rdstring rs
= { 0, 0, NULL
};
450 c
= get(in
, &cpos
); /* expect (and discard) one space */
456 while (!isnl(c
) && c
!= EOF
) {
459 /* Discard \r just before \n. */
460 if (c2
!= 13 || !isnl(c
))
464 ret
.text
= ustrdup(rs
.text
);
470 * Adds a new word to a linked list
472 static word
*addword(word newword
, word
***hptrptr
) {
476 mnewword
= mknew(word
);
477 *mnewword
= newword
; /* structure copy */
478 mnewword
->next
= NULL
;
479 **hptrptr
= mnewword
;
480 *hptrptr
= &mnewword
->next
;
485 * Adds a new paragraph to a linked list
487 static paragraph
*addpara(paragraph newpara
, paragraph
***hptrptr
) {
488 paragraph
*mnewpara
= mknew(paragraph
);
489 *mnewpara
= newpara
; /* structure copy */
490 mnewpara
->next
= NULL
;
491 **hptrptr
= mnewpara
;
492 *hptrptr
= &mnewpara
->next
;
497 * Destructor before token is reassigned; should catch most memory
500 #define dtor(t) ( sfree(t.text) )
503 * Reads a single file (ie until get() returns EOF)
505 static void read_file(paragraph
***ret
, input
*in
, indexdata
*idx
) {
508 word wd
, **whptr
, **idximplicit
;
510 wchar_t utext
[2], *wdtext
;
513 int iswhite
, seenwhite
;
518 stack_nop
= 0, /* do nothing (for error recovery) */
519 stack_ualt
= 1, /* \u alternative */
520 stack_style
= 2, /* \e, \c, \cw */
521 stack_idx
= 4, /* \I, \i, \ii */
522 stack_hyper
= 8, /* \W */
523 stack_quote
= 16, /* \q */
525 word
**whptr
; /* to restore from \u alternatives */
526 word
**idximplicit
; /* to restore from \u alternatives */
529 struct crossparaitem
{
530 int type
; /* currently c_lcont or -1 */
534 word
*indexword
, *uword
, *iword
;
537 int index_downcase
, index_visible
, indexing
;
538 const rdstring nullrs
= { 0, 0, NULL
};
542 macros
= newtree234(macrocmp
);
545 crossparastk
= stk_new();
548 * Loop on each paragraph.
551 int start_cmd
= c__invalid
;
561 dtor(t
), t
= get_token(in
);
564 } while (t
.type
== tok_eop
);
565 if (t
.type
== tok_eof
)
569 * Parse code paragraphs separately.
571 if (t
.type
== tok_cmd
&& t
.cmd
== c_c
&& !isbrace(in
)) {
572 par
.type
= para_Code
;
575 dtor(t
), t
= get_codepar_token(in
);
576 wd
.type
= word_WeakCode
;
577 wd
.breaks
= FALSE
; /* shouldn't need this... */
578 wd
.text
= ustrdup(t
.text
);
582 dtor(t
), t
= get_token(in
);
583 if (t
.type
== tok_white
) {
585 * The newline after a code-paragraph line
587 dtor(t
), t
= get_token(in
);
589 if (t
.type
== tok_eop
|| t
.type
== tok_eof
)
591 else if (t
.type
!= tok_cmd
|| t
.cmd
!= c_c
) {
592 error(err_brokencodepara
, &t
.pos
);
593 prev_para_type
= par
.type
;
595 while (t
.type
!= tok_eop
) /* error recovery: */
596 dtor(t
), t
= get_token(in
); /* eat rest of paragraph */
597 goto codeparabroken
; /* ick, but such is life */
600 prev_para_type
= par
.type
;
607 * Spot the special commands that define a grouping of more
608 * than one paragraph, and also the closing braces that
611 if (t
.type
== tok_cmd
&&
613 struct crossparaitem
*sitem
, *stop
;
616 * Expect, and swallow, an open brace.
618 dtor(t
), t
= get_token(in
);
619 if (t
.type
!= tok_lbrace
) {
620 error(err_explbr
, &t
.pos
);
625 * \lcont causes a continuation of a list item into
626 * multiple paragraphs (which may in turn contain
627 * nested lists, code paras etc). Hence, the previous
628 * paragraph must be of a list type.
630 sitem
= mknew(struct crossparaitem
);
631 stop
= (struct crossparaitem
*)stk_top(crossparastk
);
632 if (prev_para_type
== para_Bullet
||
633 prev_para_type
== para_NumberedList
||
634 prev_para_type
== para_Description
) {
635 sitem
->type
= c_lcont
;
636 sitem
->seen_lcont
= 1;
637 par
.type
= para_LcontPush
;
638 prev_para_type
= par
.type
;
642 * Push a null item on the cross-para stack so that
643 * when we see the corresponding closing brace we
644 * don't give a cascade error.
647 sitem
->seen_lcont
= (stop ? stop
->seen_lcont
: 0);
648 error(err_misplacedlcont
, &t
.pos
);
650 stk_push(crossparastk
, sitem
);
652 } else if (t
.type
== tok_rbrace
) {
653 struct crossparaitem
*sitem
= stk_pop(crossparastk
);
655 error(err_unexbrace
, &t
.pos
);
657 switch (sitem
->type
) {
659 par
.type
= para_LcontPop
;
660 prev_para_type
= par
.type
;
670 * This token begins a paragraph. See if it's one of the
671 * special commands that define a paragraph type.
673 * (note that \# is special in a way, and \nocite takes no
676 par
.type
= para_Normal
;
677 if (t
.type
== tok_cmd
) {
679 int is_macro
= FALSE
;
687 error(err_badparatype
, t
.text
, &t
.pos
);
692 break; /* `\#{': isn't a comment para */
694 dtor(t
), t
= get_token(in
);
695 } while (t
.type
!= tok_eop
&& t
.type
!= tok_eof
);
696 continue; /* next paragraph */
700 * 1 -- exactly one keyword
701 * 2 -- at least one keyword
702 * 4 -- any number of keywords including zero
703 * 8 -- at least one keyword and then nothing else
704 * 16 -- nothing at all! no keywords, no body
705 * 32 -- no keywords at all
707 case c_A
: needkw
= 2; par
.type
= para_Appendix
; break;
708 case c_B
: needkw
= 2; par
.type
= para_Biblio
; break;
709 case c_BR
: needkw
= 1; par
.type
= para_BR
;
710 start_cmd
= c_BR
; break;
711 case c_C
: needkw
= 2; par
.type
= para_Chapter
; break;
712 case c_H
: needkw
= 2; par
.type
= para_Heading
;
715 case c_IM
: needkw
= 2; par
.type
= para_IM
;
716 start_cmd
= c_IM
; break;
717 case c_S
: needkw
= 2; par
.type
= para_Subsect
;
718 par
.aux
= t
.aux
; break;
719 case c_U
: needkw
= 32; par
.type
= para_UnnumberedChapter
; break;
720 /* For \b and \n the keyword is optional */
721 case c_b
: needkw
= 4; par
.type
= para_Bullet
; break;
722 case c_dt
: needkw
= 4; par
.type
= para_DescribedThing
; break;
723 case c_dd
: needkw
= 4; par
.type
= para_Description
; break;
724 case c_n
: needkw
= 4; par
.type
= para_NumberedList
; break;
725 case c_cfg
: needkw
= 8; par
.type
= para_Config
;
726 start_cmd
= c_cfg
; break;
727 case c_copyright
: needkw
= 32; par
.type
= para_Copyright
; break;
728 case c_define
: is_macro
= TRUE
; needkw
= 1; break;
729 /* For \nocite the keyword is _everything_ */
730 case c_nocite
: needkw
= 8; par
.type
= para_NoCite
; break;
731 case c_preamble
: needkw
= 32; par
.type
= para_Preamble
; break;
732 case c_rule
: needkw
= 16; par
.type
= para_Rule
; break;
733 case c_title
: needkw
= 32; par
.type
= para_Title
; break;
734 case c_versionid
: needkw
= 32; par
.type
= para_VersionID
; break;
737 if (par
.type
== para_Chapter
||
738 par
.type
== para_Heading
||
739 par
.type
== para_Subsect
||
740 par
.type
== para_Appendix
||
741 par
.type
== para_UnnumberedChapter
) {
742 struct crossparaitem
*sitem
= stk_top(crossparastk
);
743 if (sitem
&& sitem
->seen_lcont
) {
744 error(err_sectmarkerinlcont
, &t
.pos
);
749 rdstring rs
= { 0, 0, NULL
};
754 dtor(t
), t
= get_token(in
);
756 while (t
.type
== tok_lbrace
) {
757 /* This is a keyword. */
759 /* FIXME: there will be bugs if anyone specifies an
760 * empty keyword (\foo{}), so trap this case. */
761 while (dtor(t
), t
= get_token(in
),
762 t
.type
== tok_word
||
763 t
.type
== tok_white
||
764 (t
.type
== tok_cmd
&& t
.cmd
== c__nbsp
) ||
765 (t
.type
== tok_cmd
&& t
.cmd
== c__escaped
)) {
766 if (t
.type
== tok_white
||
767 (t
.type
== tok_cmd
&& t
.cmd
== c__nbsp
))
772 if (t
.type
!= tok_rbrace
) {
773 error(err_kwunclosed
, &t
.pos
);
776 rdadd(&rs
, 0); /* add string terminator */
777 dtor(t
), t
= get_token(in
); /* eat right brace */
780 rdadd(&rs
, 0); /* add string terminator */
782 /* See whether we have the right number of keywords. */
783 if ((needkw
& 48) && nkeys
> 0)
784 error(err_kwillegal
, &fp
);
785 if ((needkw
& 11) && nkeys
== 0)
786 error(err_kwexpected
, &fp
);
787 if ((needkw
& 5) && nkeys
> 1)
788 error(err_kwtoomany
, &fp
);
792 * Macro definition. Get the rest of the line
793 * as a code-paragraph token, repeatedly until
794 * there's nothing more left of it. Separate
797 rdstring macrotext
= { 0, 0, NULL
};
799 dtor(t
), t
= get_codepar_token(in
);
800 if (macrotext
.pos
> 0)
801 rdadd(¯otext
, L
'\n');
802 rdadds(¯otext
, t
.text
);
803 dtor(t
), t
= get_token(in
);
804 if (t
.type
== tok_eop
) break;
806 macrodef(macros
, rs
.text
, macrotext
.text
, fp
);
807 continue; /* next paragraph */
810 par
.keyword
= rdtrim(&rs
);
812 /* Move to EOP in case of needkw==8 or 16 (no body) */
814 /* We allow whitespace even when we expect no para body */
815 while (t
.type
== tok_white
)
816 dtor(t
), t
= get_token(in
);
817 if (t
.type
!= tok_eop
&& t
.type
!= tok_eof
&&
818 (start_cmd
== c__invalid
||
819 t
.type
!= tok_cmd
|| t
.cmd
!= start_cmd
)) {
820 error(err_bodyillegal
, &t
.pos
);
821 /* Error recovery: eat the rest of the paragraph */
822 while (t
.type
!= tok_eop
&& t
.type
!= tok_eof
&&
823 (start_cmd
== c__invalid
||
824 t
.type
!= tok_cmd
|| t
.cmd
!= start_cmd
))
825 dtor(t
), t
= get_token(in
);
827 if (t
.type
== tok_cmd
)
828 already
= TRUE
;/* inhibit get_token at top of loop */
829 prev_para_type
= par
.type
;
831 continue; /* next paragraph */
837 * Now read the actual paragraph, word by word, adding to
838 * the paragraph list.
840 * Mid-paragraph commands:
852 parsestk
= stk_new();
854 spcstyle
= word_WhiteSpace
;
857 while (t
.type
!= tok_eop
&& t
.type
!= tok_eof
) {
861 /* Handle implicit paragraph breaks after \IM, \BR etc */
862 if (start_cmd
!= c__invalid
&&
863 t
.type
== tok_cmd
&& t
.cmd
== start_cmd
) {
864 already
= TRUE
; /* inhibit get_token at top of loop */
868 if (t
.type
== tok_cmd
&& t
.cmd
== c__escaped
) {
869 t
.type
= tok_word
; /* nice and simple */
870 t
.aux
= 0; /* even if `\-' - nonbreaking! */
872 if (t
.type
== tok_cmd
&& t
.cmd
== c__nbsp
) {
873 t
.type
= tok_word
; /* nice and simple */
875 t
.text
= ustrdup(L
" "); /* text is ` ' not `_' */
876 t
.aux
= 0; /* (nonbreaking) */
880 if (whptr
== &par
.words
)
881 break; /* strip whitespace at start of para */
890 * Inhibit use of whitespace if it's (probably the
891 * newline) before a repeat \IM / \BR type
894 if (start_cmd
!= c__invalid
) {
895 dtor(t
), t
= get_token(in
);
897 if (t
.type
== tok_cmd
&& t
.cmd
== start_cmd
)
902 rdadd(&indexstr
, ' ');
903 if (!indexing
|| index_visible
)
906 addword(wd
, &idximplicit
);
911 rdadds(&indexstr
, t
.text
);
917 if (!indexing
|| index_visible
) {
918 wd
.text
= ustrdup(t
.text
);
922 wd
.text
= ustrdup(t
.text
);
923 addword(wd
, &idximplicit
);
927 error(err_unexbrace
, &t
.pos
);
928 /* Error recovery: push nop */
929 sitem
= mknew(struct stack_item
);
930 sitem
->type
= stack_nop
;
931 stk_push(parsestk
, sitem
);
934 sitem
= stk_pop(parsestk
);
937 * This closing brace could have been an
938 * indication that the cross-paragraph stack
939 * wants popping. Accordingly, we treat it here
940 * as an indication that the paragraph is over.
945 if (sitem
->type
& stack_ualt
) {
946 whptr
= sitem
->whptr
;
947 idximplicit
= sitem
->idximplicit
;
949 if (sitem
->type
& stack_style
) {
951 spcstyle
= word_WhiteSpace
;
953 if (sitem
->type
& stack_idx
) {
954 indexword
->text
= ustrdup(indexstr
.text
);
956 ustrlow(indexword
->text
);
958 rdadd(&indexstr
, L
'\0');
959 index_merge(idx
, FALSE
, indexstr
.text
, idxwordlist
);
960 sfree(indexstr
.text
);
962 if (sitem
->type
& stack_hyper
) {
964 wd
.type
= word_HyperEnd
;
969 if (!indexing
|| index_visible
)
972 addword(wd
, &idximplicit
);
974 if (sitem
->type
& stack_quote
) {
976 wd
.type
= toquotestyle(style
);
978 wd
.aux
= quote_Close
;
981 if (!indexing
|| index_visible
)
984 rdadd(&indexstr
, L
'"');
985 addword(wd
, &idximplicit
);
995 * In-paragraph comment: \#{ balanced braces }
997 * Anything goes here; even tok_eop. We should
998 * eat whitespace after the close brace _if_
999 * there was whitespace before the \#.
1001 dtor(t
), t
= get_token(in
);
1002 if (t
.type
!= tok_lbrace
) {
1003 error(err_explbr
, &t
.pos
);
1006 while (braces
> 0) {
1007 dtor(t
), t
= get_token(in
);
1008 if (t
.type
== tok_lbrace
)
1010 else if (t
.type
== tok_rbrace
)
1012 else if (t
.type
== tok_eof
) {
1013 error(err_commenteof
, &t
.pos
);
1020 dtor(t
), t
= get_token(in
);
1021 if (t
.type
== tok_white
) {
1028 dtor(t
), t
= get_token(in
);
1029 if (t
.type
!= tok_lbrace
) {
1030 error(err_explbr
, &t
.pos
);
1033 wd
.type
= toquotestyle(style
);
1035 wd
.aux
= quote_Open
;
1038 if (!indexing
|| index_visible
)
1039 addword(wd
, &whptr
);
1041 rdadd(&indexstr
, L
'"');
1042 addword(wd
, &idximplicit
);
1044 sitem
= mknew(struct stack_item
);
1045 sitem
->type
= stack_quote
;
1046 stk_push(parsestk
, sitem
);
1054 * Keyword, hyperlink, or \date. We expect a
1055 * left brace, some text, and then a right
1056 * brace. No nesting; no arguments.
1061 wd
.type
= word_UpperXref
;
1062 else if (t
.cmd
== c_k
)
1063 wd
.type
= word_LowerXref
;
1064 else if (t
.cmd
== c_W
)
1065 wd
.type
= word_HyperLink
;
1067 wd
.type
= word_Normal
;
1068 dtor(t
), t
= get_token(in
);
1069 if (t
.type
!= tok_lbrace
) {
1070 if (wd
.type
== word_Normal
) {
1071 time_t thetime
= time(NULL
);
1072 struct tm
*broken
= localtime(&thetime
);
1074 wdtext
= ustrftime(NULL
, broken
);
1077 error(err_explbr
, &t
.pos
);
1081 rdstring rs
= { 0, 0, NULL
};
1082 while (dtor(t
), t
= get_token(in
),
1083 t
.type
== tok_word
|| t
.type
== tok_white
) {
1084 if (t
.type
== tok_white
)
1087 rdadds(&rs
, t
.text
);
1089 if (wd
.type
== word_Normal
) {
1090 time_t thetime
= time(NULL
);
1091 struct tm
*broken
= localtime(&thetime
);
1092 wdtext
= ustrftime(rs
.text
, broken
);
1095 wdtext
= ustrdup(rs
.text
);
1098 if (t
.type
!= tok_rbrace
) {
1099 error(err_kwexprbr
, &t
.pos
);
1104 if (!indexing
|| index_visible
) {
1105 wd
.text
= ustrdup(wdtext
);
1106 addword(wd
, &whptr
);
1109 wd
.text
= ustrdup(wdtext
);
1110 addword(wd
, &idximplicit
);
1113 if (wd
.type
== word_HyperLink
) {
1115 * Hyperlinks are different: they then
1116 * expect another left brace, to begin
1117 * delimiting the text marked by the link.
1119 dtor(t
), t
= get_token(in
);
1121 * Special cases: \W{}\c, \W{}\e, \W{}\cw
1123 sitem
= mknew(struct stack_item
);
1124 sitem
->type
= stack_hyper
;
1125 if (t
.type
== tok_cmd
&&
1126 (t
.cmd
== c_e
|| t
.cmd
== c_c
|| t
.cmd
== c_cw
)) {
1127 if (style
!= word_Normal
)
1128 error(err_nestedstyles
, &t
.pos
);
1130 style
= (t
.cmd
== c_c ? word_Code
:
1131 t
.cmd
== c_cw ? word_WeakCode
:
1133 spcstyle
= tospacestyle(style
);
1134 sitem
->type
|= stack_style
;
1136 dtor(t
), t
= get_token(in
);
1138 if (t
.type
!= tok_lbrace
) {
1139 error(err_explbr
, &t
.pos
);
1142 stk_push(parsestk
, sitem
);
1150 if (style
!= word_Normal
) {
1151 error(err_nestedstyles
, &t
.pos
);
1152 /* Error recovery: eat lbrace, push nop. */
1153 dtor(t
), t
= get_token(in
);
1154 sitem
= mknew(struct stack_item
);
1155 sitem
->type
= stack_nop
;
1156 stk_push(parsestk
, sitem
);
1158 dtor(t
), t
= get_token(in
);
1159 if (t
.type
!= tok_lbrace
) {
1160 error(err_explbr
, &t
.pos
);
1162 style
= (type
== c_c ? word_Code
:
1163 type
== c_cw ? word_WeakCode
:
1165 spcstyle
= tospacestyle(style
);
1166 sitem
= mknew(struct stack_item
);
1167 sitem
->type
= stack_style
;
1168 stk_push(parsestk
, sitem
);
1176 error(err_nestedindex
, &t
.pos
);
1177 /* Error recovery: eat lbrace, push nop. */
1178 dtor(t
), t
= get_token(in
);
1179 sitem
= mknew(struct stack_item
);
1180 sitem
->type
= stack_nop
;
1181 stk_push(parsestk
, sitem
);
1183 sitem
= mknew(struct stack_item
);
1184 sitem
->type
= stack_idx
;
1185 dtor(t
), t
= get_token(in
);
1187 * Special cases: \i\c, \i\e, \i\cw
1190 if (t
.type
== tok_cmd
&&
1191 (t
.cmd
== c_e
|| t
.cmd
== c_c
|| t
.cmd
== c_cw
)) {
1192 if (style
!= word_Normal
)
1193 error(err_nestedstyles
, &t
.pos
);
1195 style
= (t
.cmd
== c_c ? word_Code
:
1196 t
.cmd
== c_cw ? word_WeakCode
:
1198 spcstyle
= tospacestyle(style
);
1199 sitem
->type
|= stack_style
;
1201 dtor(t
), t
= get_token(in
);
1203 if (t
.type
!= tok_lbrace
) {
1205 error(err_explbr
, &t
.pos
);
1207 /* Add an index-reference word with no text as yet */
1208 wd
.type
= word_IndexRef
;
1213 indexword
= addword(wd
, &whptr
);
1214 /* Set up a rdstring to read the index text */
1216 /* Flags so that we do the Right Things with text */
1217 index_visible
= (type
!= c_I
);
1218 index_downcase
= (type
== c_ii
);
1221 idximplicit
= &idxwordlist
;
1222 /* Stack item to close the indexing on exit */
1223 stk_push(parsestk
, sitem
);
1228 utext
[0] = uchr
; utext
[1] = 0;
1234 if (!indexing
|| index_visible
) {
1235 wd
.text
= ustrdup(utext
);
1236 uword
= addword(wd
, &whptr
);
1240 wd
.text
= ustrdup(utext
);
1241 iword
= addword(wd
, &idximplicit
);
1244 dtor(t
), t
= get_token(in
);
1245 if (t
.type
== tok_lbrace
) {
1247 * \u with a left brace. Until the brace
1248 * closes, all further words go on a
1249 * sidetrack from the main thread of the
1252 sitem
= mknew(struct stack_item
);
1253 sitem
->type
= stack_ualt
;
1254 sitem
->whptr
= whptr
;
1255 sitem
->idximplicit
= idximplicit
;
1256 stk_push(parsestk
, sitem
);
1257 whptr
= uword ?
&uword
->alt
: NULL
;
1258 idximplicit
= iword ?
&iword
->alt
: NULL
;
1261 rdadd(&indexstr
, uchr
);
1266 if (!macrolookup(macros
, in
, t
.text
, &t
.pos
))
1267 error(err_badmidcmd
, t
.text
, &t
.pos
);
1272 dtor(t
), t
= get_token(in
);
1273 seenwhite
= iswhite
;
1276 /* Check the stack is empty */
1277 if (stk_top(parsestk
)) {
1278 while ((sitem
= stk_pop(parsestk
)))
1280 error(err_missingrbrace
, &t
.pos
);
1283 prev_para_type
= par
.type
;
1285 if (t
.type
== tok_eof
)
1289 if (stk_top(crossparastk
)) {
1292 error(err_missingrbrace2
, &t
.pos
);
1293 while ((p
= stk_pop(crossparastk
)))
1298 * We break to here rather than returning, because otherwise
1299 * this cleanup doesn't happen.
1302 macrocleanup(macros
);
1304 stk_free(crossparastk
);
1307 paragraph
*read_input(input
*in
, indexdata
*idx
) {
1308 paragraph
*head
= NULL
;
1309 paragraph
**hptr
= &head
;
1311 while (in
->currindex
< in
->nfiles
) {
1312 in
->currfp
= fopen(in
->filenames
[in
->currindex
], "r");
1314 setpos(in
, in
->filenames
[in
->currindex
]);
1315 read_file(&hptr
, in
, idx
);