d7482997 |
1 | #ifndef HALIBUT_HALIBUT_H |
2 | #define HALIBUT_HALIBUT_H |
3 | |
4 | #include <stdio.h> |
5 | #include <wchar.h> |
6 | #include <time.h> |
9c1cf191 |
7 | #include <string.h> |
d7482997 |
8 | |
e34ba5c3 |
9 | #include "charset.h" |
10 | |
d7482997 |
11 | #ifdef __GNUC__ |
12 | #define NORETURN __attribute__((__noreturn__)) |
13 | #else |
14 | #define NORETURN /* nothing */ |
15 | #endif |
16 | |
17 | #ifndef TRUE |
18 | #define TRUE 1 |
19 | #endif |
20 | #ifndef FALSE |
21 | #define FALSE 0 |
22 | #endif |
23 | |
24 | /* For suppressing unused-parameter warnings */ |
25 | #define IGNORE(x) ( (x) = (x) ) |
26 | |
27 | #include "tree234.h" |
28 | |
29 | /* |
30 | * Structure tags |
31 | */ |
32 | typedef struct input_Tag input; |
33 | typedef struct filepos_Tag filepos; |
34 | typedef struct paragraph_Tag paragraph; |
35 | typedef struct word_Tag word; |
36 | typedef struct keywordlist_Tag keywordlist; |
37 | typedef struct keyword_Tag keyword; |
d7482997 |
38 | typedef struct numberstate_Tag numberstate; |
39 | typedef struct indexdata_Tag indexdata; |
40 | typedef struct indextag_Tag indextag; |
41 | typedef struct indexentry_Tag indexentry; |
42 | typedef struct macrostack_Tag macrostack; |
43 | |
44 | /* |
45 | * Data structure to hold a file name and index, a line and a |
46 | * column number, for reporting errors |
47 | */ |
48 | struct filepos_Tag { |
49 | char *filename; |
50 | int line, col; |
51 | }; |
52 | |
53 | /* |
54 | * Data structure to hold all the file names etc for input |
55 | */ |
56 | typedef struct pushback_Tag { |
57 | int chr; |
58 | filepos pos; |
59 | } pushback; |
60 | struct input_Tag { |
61 | char **filenames; /* complete list of input files */ |
62 | int nfiles; /* how many in the list */ |
63 | FILE *currfp; /* the currently open one */ |
64 | int currindex; /* which one is that in the list */ |
65 | pushback *pushback; /* pushed-back input characters */ |
66 | int npushback, pushbacksize; |
67 | filepos pos; |
68 | int reportcols; /* report column numbers in errors */ |
69 | macrostack *stack; /* macro expansions in force */ |
e34ba5c3 |
70 | int defcharset, charset; /* character sets for input files */ |
71 | charset_state csstate; |
72 | wchar_t wc[16]; /* wide chars from input conversion */ |
73 | int nwc, wcpos; /* size of, and position in, wc[] */ |
e4ea58f8 |
74 | char *pushback_chars; /* used to save input-encoding data */ |
d7482997 |
75 | }; |
76 | |
77 | /* |
78 | * Data structure to hold the input form of the source, ie a linked |
79 | * list of paragraphs |
80 | */ |
81 | struct paragraph_Tag { |
82 | paragraph *next; |
83 | int type; |
84 | wchar_t *keyword; /* for most special paragraphs */ |
e4ea58f8 |
85 | char *origkeyword; /* same again in original charset */ |
d7482997 |
86 | word *words; /* list of words in paragraph */ |
87 | int aux; /* number, in a numbered paragraph |
88 | * or subsection level |
89 | */ |
90 | word *kwtext; /* chapter/section indication */ |
91 | word *kwtext2; /* numeric-only form of kwtext */ |
92 | filepos fpos; |
93 | |
94 | paragraph *parent, *child, *sibling; /* for hierarchy navigation */ |
95 | |
96 | void *private_data; /* for temp use in backends */ |
97 | }; |
98 | enum { |
99 | para_IM, /* index merge */ |
100 | para_BR, /* bibliography rewrite */ |
101 | para_Rule, /* random horizontal rule */ |
102 | para_Chapter, |
103 | para_Appendix, |
104 | para_UnnumberedChapter, |
105 | para_Heading, |
106 | para_Subsect, |
107 | para_Normal, |
108 | para_Biblio, /* causes no output unless turned ... */ |
109 | para_BiblioCited, /* ... into this paragraph type */ |
110 | para_Bullet, |
111 | para_NumberedList, |
7136a6c7 |
112 | para_DescribedThing, |
113 | para_Description, |
d7482997 |
114 | para_Code, |
115 | para_Copyright, |
d7482997 |
116 | para_NoCite, |
117 | para_Title, |
118 | para_VersionID, |
119 | para_Config, /* configuration directive */ |
7136a6c7 |
120 | para_LcontPush, /* begin continuation of list item */ |
121 | para_LcontPop, /* end continuation of list item */ |
2614b01d |
122 | para_QuotePush, /* begin block quote */ |
123 | para_QuotePop, /* end block quote */ |
3f3d1acc |
124 | /* |
125 | * Back ends may define their own paragraph types beyond here, |
126 | * in case they need to use them internally. |
127 | */ |
d7482997 |
128 | para_NotParaType /* placeholder value */ |
129 | }; |
130 | |
131 | /* |
132 | * Data structure to hold an individual word |
133 | */ |
134 | struct word_Tag { |
135 | word *next, *alt; |
136 | int type; |
137 | int aux; |
138 | int breaks; /* can a line break after it? */ |
139 | wchar_t *text; |
140 | filepos fpos; |
5dd44dce |
141 | |
142 | void *private_data; /* for temp use in backends */ |
d7482997 |
143 | }; |
144 | enum { |
145 | /* ORDERING CONSTRAINT: these normal-word types ... */ |
146 | word_Normal, |
147 | word_Emph, |
148 | word_Code, /* monospaced; `quoted' in text */ |
149 | word_WeakCode, /* monospaced, normal in text */ |
150 | /* ... must be in the same order as these space types ... */ |
151 | word_WhiteSpace, /* text is NULL or ignorable */ |
152 | word_EmphSpace, /* WhiteSpace when emphasised */ |
153 | word_CodeSpace, /* WhiteSpace when code */ |
154 | word_WkCodeSpace, /* WhiteSpace when weak code */ |
155 | /* ... and must be in the same order as these quote types ... */ |
156 | word_Quote, /* text is NULL or ignorable */ |
157 | word_EmphQuote, /* Quote when emphasised */ |
158 | word_CodeQuote, /* (can't happen) */ |
159 | word_WkCodeQuote, /* (can't happen) */ |
160 | /* END ORDERING CONSTRAINT */ |
161 | word_internal_endattrs, |
162 | word_UpperXref, /* \K */ |
163 | word_LowerXref, /* \k */ |
164 | word_XrefEnd, /* (invisible; no text) */ |
165 | word_IndexRef, /* (always an invisible one) */ |
166 | word_HyperLink, /* (invisible) */ |
3f3d1acc |
167 | word_HyperEnd, /* (also invisible; no text) */ |
168 | /* |
169 | * Back ends may define their own word types beyond here, in |
170 | * case they need to use them internally. |
171 | */ |
172 | word_NotWordType /* placeholder value */ |
d7482997 |
173 | }; |
174 | /* aux values for attributed words */ |
175 | enum { |
176 | attr_Only = 0x0000, /* a lone word with the attribute */ |
177 | attr_First = 0x0001, /* the first of a series */ |
178 | attr_Last = 0x0002, /* the last of a series */ |
179 | attr_Always = 0x0003, /* any other part of a series */ |
ee90d1f0 |
180 | attr_mask = 0x0003 |
d7482997 |
181 | }; |
182 | /* aux values for quote-type words */ |
183 | enum { |
184 | quote_Open = 0x0010, |
185 | quote_Close = 0x0020, |
ee90d1f0 |
186 | quote_mask = 0x0030 |
d7482997 |
187 | }; |
b9e27ab6 |
188 | #define isvis(x) ( ( (x) >= word_Normal && (x) <= word_LowerXref ) ) |
d7482997 |
189 | #define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \ |
190 | ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) ) |
191 | #define sameattr(x,y) ( (((x)-(y)) & 3) == 0 ) |
192 | #define towordstyle(x) ( word_Normal + ((x) & 3) ) |
193 | #define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) ) |
194 | #define toquotestyle(x) ( word_Quote + ((x) & 3) ) |
195 | #define removeattr(x) ( word_Normal + ((x) &~ 3) ) |
196 | |
197 | #define attraux(x) ( (x) & attr_mask ) |
198 | #define quoteaux(x) ( (x) & quote_mask ) |
199 | |
200 | /* |
201 | * error.c |
202 | */ |
203 | void fatal(int code, ...) NORETURN; |
204 | void error(int code, ...); |
205 | enum { |
206 | err_nomemory, /* out of memory */ |
207 | err_optnoarg, /* option `-%s' requires an argument */ |
208 | err_nosuchopt, /* unrecognised option `-%s' */ |
675958c3 |
209 | err_cmdcharset, /* unrecognised charset %s (cmdline) */ |
210 | err_futileopt, /* futile option `-%s'%s */ |
d7482997 |
211 | err_noinput, /* no input files */ |
212 | err_cantopen, /* unable to open input file `%s' */ |
213 | err_nodata, /* no data in input files */ |
214 | err_brokencodepara, /* line in codepara didn't begin `\c' */ |
215 | err_kwunclosed, /* expected `}' after keyword */ |
216 | err_kwillegal, /* paragraph type expects no keyword */ |
217 | err_kwexpected, /* paragraph type expects a keyword */ |
218 | err_kwtoomany, /* paragraph type expects only 1 */ |
219 | err_bodyillegal, /* paragraph type expects only kws! */ |
220 | err_badparatype, /* invalid command at start of para */ |
221 | err_badmidcmd, /* invalid command in mid-para */ |
222 | err_unexbrace, /* unexpected brace */ |
223 | err_explbr, /* expected `{' after command */ |
224 | err_commenteof, /* EOF inside braced comment */ |
225 | err_kwexprbr, /* expected `}' after cross-ref */ |
6ff15f2b |
226 | err_codequote, /* \q within \c is not supported */ |
d7482997 |
227 | err_missingrbrace, /* unclosed braces at end of para */ |
7136a6c7 |
228 | err_missingrbrace2, /* unclosed braces at end of file */ |
d7482997 |
229 | err_nestedstyles, /* unable to nest text styles */ |
230 | err_nestedindex, /* unable to nest `\i' thingys */ |
da090173 |
231 | err_indexcase, /* two \i differing only in case */ |
d7482997 |
232 | err_nosuchkw, /* unresolved cross-reference */ |
233 | err_multiBR, /* multiple \BRs on same keyword */ |
234 | err_nosuchidxtag, /* \IM on unknown index tag (warning) */ |
235 | err_cantopenw, /* can't open output file for write */ |
236 | err_macroexists, /* this macro already exists */ |
237 | err_sectjump, /* jump a heading level, eg \C -> \S */ |
238 | err_winhelp_ctxclash, /* WinHelp context ID hash clash */ |
239 | err_multikw, /* keyword clash in sections */ |
7136a6c7 |
240 | err_misplacedlcont, /* \lcont not after a list item */ |
2614b01d |
241 | err_sectmarkerinblock, /* section marker appeared in block */ |
12f0ee84 |
242 | err_cfginsufarg, /* \cfg{%s} insufficient args (<%d) */ |
f4551933 |
243 | err_infonodechar, /* colon/comma in node name in info */ |
db662ca1 |
244 | err_text_codeline, /* \c line too long in text backend */ |
27bdc5ab |
245 | err_htmlver, /* unrecognised HTML version keyword */ |
0960a3d8 |
246 | err_charset, /* unrecognised character set name */ |
ba0fe3ec |
247 | err_nofont, /* unrecognised font name */ |
248 | err_afmeof, /* eof in AFM file */ |
249 | err_afmkey, /* missing expected keyword in AFM */ |
250 | err_afmvers, /* unsupported AFM version */ |
251 | err_afmval, /* missing value(s) for AFM key */ |
c885c2ff |
252 | err_pfeof, /* eof in Type 1 font file */ |
253 | err_pfhead, /* bad Type 1 header line */ |
254 | err_pfbad, /* otherwise invalide Type 1 font */ |
255 | err_pfnoafm, /* Type 1 font but no AFM */ |
f2ef00b5 |
256 | err_chmnames, /* need both or neither of hhp+chm */ |
d7482997 |
257 | err_whatever /* random error of another type */ |
258 | }; |
259 | |
260 | /* |
261 | * malloc.c |
262 | */ |
263 | #ifdef LOGALLOC |
264 | void *smalloc(char *file, int line, int size); |
265 | void *srealloc(char *file, int line, void *p, int size); |
266 | void sfree(char *file, int line, void *p); |
267 | #define smalloc(x) smalloc(__FILE__, __LINE__, x) |
268 | #define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y) |
269 | #define sfree(x) sfree(__FILE__, __LINE__, x) |
270 | #else |
271 | void *smalloc(int size); |
272 | void *srealloc(void *p, int size); |
273 | void sfree(void *p); |
274 | #endif |
275 | void free_word_list(word *w); |
276 | void free_para_list(paragraph *p); |
277 | word *dup_word_list(word *w); |
278 | char *dupstr(char *s); |
279 | |
f1530049 |
280 | #define snew(type) ( (type *) smalloc (sizeof (type)) ) |
281 | #define snewn(number, type) ( (type *) smalloc ((number) * sizeof (type)) ) |
282 | #define sresize(array, number, type) \ |
283 | ( (type *) srealloc ((array), (number) * sizeof (type)) ) |
d7482997 |
284 | #define lenof(array) ( sizeof(array) / sizeof(*(array)) ) |
285 | |
286 | /* |
287 | * ustring.c |
288 | */ |
e4ea58f8 |
289 | wchar_t *ustrdup(wchar_t const *s); |
290 | char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset); |
291 | char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset); |
292 | wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset); |
293 | char *utoa_dup(wchar_t const *s, int charset); |
294 | char *utoa_dup_len(wchar_t const *s, int charset, int *len); |
295 | char *utoa_careful_dup(wchar_t const *s, int charset); |
296 | wchar_t *ufroma_dup(char const *s, int charset); |
7e976207 |
297 | char *utoa_locale_dup(wchar_t const *s); |
298 | wchar_t *ufroma_locale_dup(char const *s); |
5dd44dce |
299 | int ustrlen(wchar_t const *s); |
d7482997 |
300 | wchar_t *uadv(wchar_t *s); |
5dd44dce |
301 | wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source); |
08e78486 |
302 | wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n); |
d7482997 |
303 | wchar_t utolower(wchar_t); |
831da32e |
304 | int uisalpha(wchar_t); |
d7482997 |
305 | int ustrcmp(wchar_t *lhs, wchar_t *rhs); |
78c73085 |
306 | int ustricmp(wchar_t const *lhs, wchar_t const *rhs); |
307 | int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen); |
dd567011 |
308 | int utoi(wchar_t const *); |
309 | double utof(wchar_t const *); |
310 | int utob(wchar_t const *); |
d7482997 |
311 | int uisdigit(wchar_t); |
312 | wchar_t *ustrlow(wchar_t *s); |
c8422236 |
313 | wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec); |
91f93b94 |
314 | int cvt_ok(int charset, const wchar_t *s); |
0960a3d8 |
315 | int charset_from_ustr(filepos *fpos, const wchar_t *name); |
d7482997 |
316 | |
317 | /* |
e5cd393f |
318 | * wcwidth.c |
319 | */ |
320 | int strwid(char const *s, int charset); |
321 | int ustrwid(wchar_t const *s, int charset); |
322 | |
323 | /* |
d7482997 |
324 | * help.c |
325 | */ |
326 | void help(void); |
327 | void usage(void); |
328 | void showversion(void); |
f336fa9a |
329 | void listcharsets(void); |
d7482997 |
330 | |
331 | /* |
332 | * licence.c |
333 | */ |
334 | void licence(void); |
335 | |
336 | /* |
337 | * version.c |
338 | */ |
961ee75b |
339 | extern const char *const version; |
d7482997 |
340 | |
341 | /* |
342 | * misc.c |
343 | */ |
e4ea58f8 |
344 | char *adv(char *s); |
345 | |
d7482997 |
346 | typedef struct stackTag *stack; |
347 | stack stk_new(void); |
348 | void stk_free(stack); |
349 | void stk_push(stack, void *); |
350 | void *stk_pop(stack); |
7136a6c7 |
351 | void *stk_top(stack); |
d7482997 |
352 | |
353 | typedef struct tagRdstring rdstring; |
354 | struct tagRdstring { |
355 | int pos, size; |
356 | wchar_t *text; |
357 | }; |
358 | typedef struct tagRdstringc rdstringc; |
359 | struct tagRdstringc { |
360 | int pos, size; |
361 | char *text; |
362 | }; |
363 | extern const rdstring empty_rdstring; |
364 | extern const rdstringc empty_rdstringc; |
365 | void rdadd(rdstring *rs, wchar_t c); |
5dd44dce |
366 | void rdadds(rdstring *rs, wchar_t const *p); |
d7482997 |
367 | wchar_t *rdtrim(rdstring *rs); |
368 | void rdaddc(rdstringc *rs, char c); |
5dd44dce |
369 | void rdaddsc(rdstringc *rs, char const *p); |
7e2417cc |
370 | void rdaddsn(rdstringc *rc, char const *p, int len); |
d7482997 |
371 | char *rdtrimc(rdstringc *rs); |
372 | |
373 | int compare_wordlists(word *a, word *b); |
374 | |
bb9e7835 |
375 | void mark_attr_ends(word *words); |
d7482997 |
376 | |
377 | typedef struct tagWrappedLine wrappedline; |
378 | struct tagWrappedLine { |
379 | wrappedline *next; |
380 | word *begin, *end; /* first & last words of line */ |
381 | int nspaces; /* number of whitespaces in line */ |
382 | int shortfall; /* how much shorter than max width */ |
383 | }; |
43341922 |
384 | wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int); |
d7482997 |
385 | void wrap_free(wrappedline *); |
e4ea58f8 |
386 | void cmdline_cfg_add(paragraph *cfg, char *string); |
387 | paragraph *cmdline_cfg_new(void); |
388 | paragraph *cmdline_cfg_simple(char *string, ...); |
d7482997 |
389 | |
390 | /* |
391 | * input.c |
392 | */ |
393 | paragraph *read_input(input *in, indexdata *idx); |
394 | |
395 | /* |
ba0fe3ec |
396 | * in_afm.c |
397 | */ |
398 | void read_afm_file(input *in); |
399 | |
400 | /* |
44407fea |
401 | * in_pf.c |
402 | */ |
403 | void read_pfa_file(input *in); |
404 | |
405 | /* |
d7482997 |
406 | * keywords.c |
407 | */ |
408 | struct keywordlist_Tag { |
409 | int nkeywords; |
410 | int size; |
411 | tree234 *keys; /* sorted by `key' field */ |
412 | word **looseends; /* non-keyword list element numbers */ |
413 | int nlooseends; |
414 | int looseendssize; |
415 | }; |
416 | struct keyword_Tag { |
417 | wchar_t *key; /* the keyword itself */ |
418 | word *text; /* "Chapter 2", "Appendix Q"... */ |
419 | /* (NB: filepos are not set) */ |
420 | paragraph *para; /* the paragraph referenced */ |
421 | }; |
422 | keyword *kw_lookup(keywordlist *, wchar_t *); |
423 | keywordlist *get_keywords(paragraph *); |
424 | void free_keywords(keywordlist *); |
425 | void subst_keywords(paragraph *, keywordlist *); |
426 | |
427 | /* |
428 | * index.c |
429 | */ |
430 | |
431 | /* |
432 | * Data structure to hold both sides of the index. |
433 | */ |
434 | struct indexdata_Tag { |
435 | tree234 *tags; /* holds type `indextag' */ |
436 | tree234 *entries; /* holds type `indexentry' */ |
437 | }; |
438 | |
439 | /* |
440 | * Data structure to hold an index tag (LHS of index). |
441 | */ |
442 | struct indextag_Tag { |
443 | wchar_t *name; |
444 | word *implicit_text; |
f4551933 |
445 | filepos implicit_fpos; |
d7482997 |
446 | word **explicit_texts; |
f4551933 |
447 | filepos *explicit_fpos; |
d7482997 |
448 | int nexplicit, explicit_size; |
449 | int nrefs; |
450 | indexentry **refs; /* array of entries referenced by tag */ |
451 | }; |
452 | |
453 | /* |
454 | * Data structure to hold an index entry (RHS of index). |
455 | */ |
456 | struct indexentry_Tag { |
457 | word *text; |
458 | void *backend_data; /* private to back end */ |
f4551933 |
459 | filepos fpos; |
d7482997 |
460 | }; |
461 | |
462 | indexdata *make_index(void); |
463 | void cleanup_index(indexdata *); |
464 | /* index_merge takes responsibility for freeing arg 3 iff implicit; never |
465 | * takes responsibility for arg 2 */ |
f4551933 |
466 | void index_merge(indexdata *, int is_explicit, wchar_t *, word *, filepos *); |
d7482997 |
467 | void build_index(indexdata *); |
468 | void index_debug(indexdata *); |
469 | indextag *index_findtag(indexdata *idx, wchar_t *name); |
470 | |
471 | /* |
472 | * contents.c |
473 | */ |
474 | numberstate *number_init(void); |
475 | void number_cfg(numberstate *, paragraph *); |
96f3af16 |
476 | word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, int *); |
d7482997 |
477 | void number_free(numberstate *); |
478 | |
479 | /* |
480 | * biblio.c |
481 | */ |
482 | void gen_citations(paragraph *, keywordlist *); |
483 | |
484 | /* |
d7482997 |
485 | * bk_text.c |
486 | */ |
43341922 |
487 | void text_backend(paragraph *, keywordlist *, indexdata *, void *); |
ba9c1487 |
488 | paragraph *text_config_filename(char *filename); |
d7482997 |
489 | |
490 | /* |
78c73085 |
491 | * bk_html.c |
d7482997 |
492 | */ |
78c73085 |
493 | void html_backend(paragraph *, keywordlist *, indexdata *, void *); |
494 | paragraph *html_config_filename(char *filename); |
d7482997 |
495 | |
496 | /* |
497 | * bk_whlp.c |
498 | */ |
43341922 |
499 | void whlp_backend(paragraph *, keywordlist *, indexdata *, void *); |
ba9c1487 |
500 | paragraph *whlp_config_filename(char *filename); |
d7482997 |
501 | |
7136a6c7 |
502 | /* |
503 | * bk_man.c |
504 | */ |
43341922 |
505 | void man_backend(paragraph *, keywordlist *, indexdata *, void *); |
ba9c1487 |
506 | paragraph *man_config_filename(char *filename); |
7136a6c7 |
507 | |
5dd44dce |
508 | /* |
509 | * bk_info.c |
510 | */ |
43341922 |
511 | void info_backend(paragraph *, keywordlist *, indexdata *, void *); |
5dd44dce |
512 | paragraph *info_config_filename(char *filename); |
513 | |
43341922 |
514 | /* |
515 | * bk_paper.c |
516 | */ |
517 | void *paper_pre_backend(paragraph *, keywordlist *, indexdata *); |
518 | |
519 | /* |
520 | * bk_ps.c |
521 | */ |
522 | void ps_backend(paragraph *, keywordlist *, indexdata *, void *); |
523 | paragraph *ps_config_filename(char *filename); |
524 | |
525 | /* |
526 | * bk_pdf.c |
527 | */ |
528 | void pdf_backend(paragraph *, keywordlist *, indexdata *, void *); |
529 | paragraph *pdf_config_filename(char *filename); |
530 | |
d7482997 |
531 | #endif |