Make the Deflate decoder correctly handle the special case of a
[sgt/halibut] / halibut.h
1 #ifndef HALIBUT_HALIBUT_H
2 #define HALIBUT_HALIBUT_H
3
4 #include <stdio.h>
5 #include <wchar.h>
6 #include <time.h>
7 #include <string.h>
8
9 #include "charset.h"
10
11 #ifdef __GNUC__
12 #define NORETURN __attribute__((__noreturn__))
13 #else
14 #define NORETURN /* nothing */
15 #endif
16
17 #ifndef TRUE
18 #define TRUE 1
19 #endif
20 #ifndef FALSE
21 #define FALSE 0
22 #endif
23
24 /* For suppressing unused-parameter warnings */
25 #define IGNORE(x) ( (x) = (x) )
26
27 #include "tree234.h"
28
29 /*
30 * Structure tags
31 */
32 typedef struct input_Tag input;
33 typedef struct filepos_Tag filepos;
34 typedef struct paragraph_Tag paragraph;
35 typedef struct word_Tag word;
36 typedef struct keywordlist_Tag keywordlist;
37 typedef struct keyword_Tag keyword;
38 typedef struct numberstate_Tag numberstate;
39 typedef struct indexdata_Tag indexdata;
40 typedef struct indextag_Tag indextag;
41 typedef struct indexentry_Tag indexentry;
42 typedef struct macrostack_Tag macrostack;
43
44 /*
45 * Data structure to hold a file name and index, a line and a
46 * column number, for reporting errors
47 */
48 struct filepos_Tag {
49 char *filename;
50 int line, col;
51 };
52
53 /*
54 * Data structure to hold all the file names etc for input
55 */
56 typedef struct pushback_Tag {
57 int chr;
58 filepos pos;
59 } pushback;
60 struct input_Tag {
61 char **filenames; /* complete list of input files */
62 int nfiles; /* how many in the list */
63 FILE *currfp; /* the currently open one */
64 int currindex; /* which one is that in the list */
65 int wantclose; /* does the current file want closing */
66 pushback *pushback; /* pushed-back input characters */
67 int npushback, pushbacksize;
68 filepos pos;
69 int reportcols; /* report column numbers in errors */
70 macrostack *stack; /* macro expansions in force */
71 int defcharset, charset; /* character sets for input files */
72 charset_state csstate;
73 wchar_t wc[16]; /* wide chars from input conversion */
74 int nwc, wcpos; /* size of, and position in, wc[] */
75 char *pushback_chars; /* used to save input-encoding data */
76 };
77
78 /*
79 * Data structure to hold the input form of the source, ie a linked
80 * list of paragraphs
81 */
82 struct paragraph_Tag {
83 paragraph *next;
84 int type;
85 wchar_t *keyword; /* for most special paragraphs */
86 char *origkeyword; /* same again in original charset */
87 word *words; /* list of words in paragraph */
88 int aux; /* number, in a numbered paragraph
89 * or subsection level
90 */
91 word *kwtext; /* chapter/section indication */
92 word *kwtext2; /* numeric-only form of kwtext */
93 filepos fpos;
94
95 paragraph *parent, *child, *sibling; /* for hierarchy navigation */
96
97 void *private_data; /* for temp use in backends */
98 };
99 enum {
100 para_IM, /* index merge */
101 para_BR, /* bibliography rewrite */
102 para_Rule, /* random horizontal rule */
103 para_Chapter,
104 para_Appendix,
105 para_UnnumberedChapter,
106 para_Heading,
107 para_Subsect,
108 para_Normal,
109 para_Biblio, /* causes no output unless turned ... */
110 para_BiblioCited, /* ... into this paragraph type */
111 para_Bullet,
112 para_NumberedList,
113 para_DescribedThing,
114 para_Description,
115 para_Code,
116 para_Copyright,
117 para_NoCite,
118 para_Title,
119 para_VersionID,
120 para_Config, /* configuration directive */
121 para_LcontPush, /* begin continuation of list item */
122 para_LcontPop, /* end continuation of list item */
123 para_QuotePush, /* begin block quote */
124 para_QuotePop, /* end block quote */
125 /*
126 * Back ends may define their own paragraph types beyond here,
127 * in case they need to use them internally.
128 */
129 para_NotParaType /* placeholder value */
130 };
131
132 /*
133 * Data structure to hold an individual word
134 */
135 struct word_Tag {
136 word *next, *alt;
137 int type;
138 int aux;
139 int breaks; /* can a line break after it? */
140 wchar_t *text;
141 filepos fpos;
142
143 void *private_data; /* for temp use in backends */
144 };
145 enum {
146 /* ORDERING CONSTRAINT: these normal-word types ... */
147 word_Normal,
148 word_Emph,
149 word_Code, /* monospaced; `quoted' in text */
150 word_WeakCode, /* monospaced, normal in text */
151 /* ... must be in the same order as these space types ... */
152 word_WhiteSpace, /* text is NULL or ignorable */
153 word_EmphSpace, /* WhiteSpace when emphasised */
154 word_CodeSpace, /* WhiteSpace when code */
155 word_WkCodeSpace, /* WhiteSpace when weak code */
156 /* ... and must be in the same order as these quote types ... */
157 word_Quote, /* text is NULL or ignorable */
158 word_EmphQuote, /* Quote when emphasised */
159 word_CodeQuote, /* (can't happen) */
160 word_WkCodeQuote, /* (can't happen) */
161 /* END ORDERING CONSTRAINT */
162 word_internal_endattrs,
163 word_UpperXref, /* \K */
164 word_LowerXref, /* \k */
165 word_XrefEnd, /* (invisible; no text) */
166 word_IndexRef, /* (always an invisible one) */
167 word_HyperLink, /* (invisible) */
168 word_HyperEnd, /* (also invisible; no text) */
169 /*
170 * Back ends may define their own word types beyond here, in
171 * case they need to use them internally.
172 */
173 word_NotWordType /* placeholder value */
174 };
175 /* aux values for attributed words */
176 enum {
177 attr_Only = 0x0000, /* a lone word with the attribute */
178 attr_First = 0x0001, /* the first of a series */
179 attr_Last = 0x0002, /* the last of a series */
180 attr_Always = 0x0003, /* any other part of a series */
181 attr_mask = 0x0003
182 };
183 /* aux values for quote-type words */
184 enum {
185 quote_Open = 0x0010,
186 quote_Close = 0x0020,
187 quote_mask = 0x0030
188 };
189 #define isvis(x) ( ( (x) >= word_Normal && (x) <= word_LowerXref ) )
190 #define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \
191 ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) )
192 #define sameattr(x,y) ( (((x)-(y)) & 3) == 0 )
193 #define towordstyle(x) ( word_Normal + ((x) & 3) )
194 #define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) )
195 #define toquotestyle(x) ( word_Quote + ((x) & 3) )
196 #define removeattr(x) ( word_Normal + ((x) &~ 3) )
197
198 #define attraux(x) ( (x) & attr_mask )
199 #define quoteaux(x) ( (x) & quote_mask )
200
201 /*
202 * error.c
203 */
204 void fatal(int code, ...) NORETURN;
205 void error(int code, ...);
206 enum {
207 err_nomemory, /* out of memory */
208 err_optnoarg, /* option `-%s' requires an argument */
209 err_nosuchopt, /* unrecognised option `-%s' */
210 err_cmdcharset, /* unrecognised charset %s (cmdline) */
211 err_futileopt, /* futile option `-%s'%s */
212 err_noinput, /* no input files */
213 err_cantopen, /* unable to open input file `%s' */
214 err_nodata, /* no data in input files */
215 err_brokencodepara, /* line in codepara didn't begin `\c' */
216 err_kwunclosed, /* expected `}' after keyword */
217 err_kwillegal, /* paragraph type expects no keyword */
218 err_kwexpected, /* paragraph type expects a keyword */
219 err_kwtoomany, /* paragraph type expects only 1 */
220 err_bodyillegal, /* paragraph type expects only kws! */
221 err_badparatype, /* invalid command at start of para */
222 err_badmidcmd, /* invalid command in mid-para */
223 err_unexbrace, /* unexpected brace */
224 err_explbr, /* expected `{' after command */
225 err_commenteof, /* EOF inside braced comment */
226 err_kwexprbr, /* expected `}' after cross-ref */
227 err_codequote, /* \q within \c is not supported */
228 err_missingrbrace, /* unclosed braces at end of para */
229 err_missingrbrace2, /* unclosed braces at end of file */
230 err_nestedstyles, /* unable to nest text styles */
231 err_nestedindex, /* unable to nest `\i' thingys */
232 err_indexcase, /* two \i differing only in case */
233 err_nosuchkw, /* unresolved cross-reference */
234 err_multiBR, /* multiple \BRs on same keyword */
235 err_nosuchidxtag, /* \IM on unknown index tag (warning) */
236 err_cantopenw, /* can't open output file for write */
237 err_macroexists, /* this macro already exists */
238 err_sectjump, /* jump a heading level, eg \C -> \S */
239 err_winhelp_ctxclash, /* WinHelp context ID hash clash */
240 err_multikw, /* keyword clash in sections */
241 err_misplacedlcont, /* \lcont not after a list item */
242 err_sectmarkerinblock, /* section marker appeared in block */
243 err_cfginsufarg, /* \cfg{%s} insufficient args (<%d) */
244 err_infonodechar, /* colon/comma in node name in info */
245 err_text_codeline, /* \c line too long in text backend */
246 err_htmlver, /* unrecognised HTML version keyword */
247 err_charset, /* unrecognised character set name */
248 err_nofont, /* unrecognised font name */
249 err_afmeof, /* eof in AFM file */
250 err_afmkey, /* missing expected keyword in AFM */
251 err_afmvers, /* unsupported AFM version */
252 err_afmval, /* missing value(s) for AFM key */
253 err_pfeof, /* eof in Type 1 font file */
254 err_pfhead, /* bad Type 1 header line */
255 err_pfbad, /* otherwise invalide Type 1 font */
256 err_pfnoafm, /* Type 1 font but no AFM */
257 err_chmnames, /* need both or neither of hhp+chm */
258 err_sfntnotable, /* required sfnt table missing */
259 err_sfntnopsname, /* sfnt has no PostScript name */
260 err_sfntbadtable, /* sfnt table not valid */
261 err_sfntnounicmap, /* sfnt has no UCS-2 cmap */
262 err_sfnttablevers, /* sfnt table version unknown */
263 err_sfntbadhdr, /* sfnt has bad header */
264 err_sfntbadglyph, /* sfnt cmap references bad glyph */
265 err_whatever /* random error of another type */
266 };
267
268 /*
269 * malloc.c
270 */
271 #ifdef LOGALLOC
272 void *smalloc(char *file, int line, int size);
273 void *srealloc(char *file, int line, void *p, int size);
274 void sfree(char *file, int line, void *p);
275 #define smalloc(x) smalloc(__FILE__, __LINE__, x)
276 #define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y)
277 #define sfree(x) sfree(__FILE__, __LINE__, x)
278 #else
279 void *smalloc(int size);
280 void *srealloc(void *p, int size);
281 void sfree(void *p);
282 #endif
283 void free_word_list(word *w);
284 void free_para_list(paragraph *p);
285 word *dup_word_list(word *w);
286 char *dupstr(char const *s);
287
288 #define snew(type) ( (type *) smalloc (sizeof (type)) )
289 #define snewn(number, type) ( (type *) smalloc ((number) * sizeof (type)) )
290 #define sresize(array, number, type) \
291 ( (type *) srealloc ((array), (number) * sizeof (type)) )
292 #define lenof(array) ( sizeof(array) / sizeof(*(array)) )
293
294 /*
295 * ustring.c
296 */
297 wchar_t *ustrdup(wchar_t const *s);
298 char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset);
299 char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset);
300 wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset);
301 char *utoa_dup(wchar_t const *s, int charset);
302 char *utoa_dup_len(wchar_t const *s, int charset, int *len);
303 char *utoa_careful_dup(wchar_t const *s, int charset);
304 wchar_t *ufroma_dup(char const *s, int charset);
305 char *utoa_locale_dup(wchar_t const *s);
306 wchar_t *ufroma_locale_dup(char const *s);
307 int ustrlen(wchar_t const *s);
308 wchar_t *uadv(wchar_t *s);
309 wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source);
310 wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n);
311 wchar_t utolower(wchar_t);
312 int uisalpha(wchar_t);
313 int ustrcmp(wchar_t *lhs, wchar_t *rhs);
314 int ustricmp(wchar_t const *lhs, wchar_t const *rhs);
315 int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen);
316 int utoi(wchar_t const *);
317 double utof(wchar_t const *);
318 int utob(wchar_t const *);
319 int uisdigit(wchar_t);
320 wchar_t *ustrlow(wchar_t *s);
321 wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec);
322 int cvt_ok(int charset, const wchar_t *s);
323 int charset_from_ustr(filepos *fpos, const wchar_t *name);
324
325 /*
326 * wcwidth.c
327 */
328 int strwid(char const *s, int charset);
329 int ustrwid(wchar_t const *s, int charset);
330
331 /*
332 * help.c
333 */
334 void help(void);
335 void usage(void);
336 void showversion(void);
337 void listcharsets(void);
338
339 /*
340 * licence.c
341 */
342 void licence(void);
343
344 /*
345 * version.c
346 */
347 extern const char *const version;
348
349 /*
350 * misc.c
351 */
352 char *adv(char *s);
353
354 typedef struct stackTag *stack;
355 stack stk_new(void);
356 void stk_free(stack);
357 void stk_push(stack, void *);
358 void *stk_pop(stack);
359 void *stk_top(stack);
360
361 typedef struct tagRdstring rdstring;
362 struct tagRdstring {
363 int pos, size;
364 wchar_t *text;
365 };
366 typedef struct tagRdstringc rdstringc;
367 struct tagRdstringc {
368 int pos, size;
369 char *text;
370 };
371 extern const rdstring empty_rdstring;
372 extern const rdstringc empty_rdstringc;
373 void rdadd(rdstring *rs, wchar_t c);
374 void rdadds(rdstring *rs, wchar_t const *p);
375 wchar_t *rdtrim(rdstring *rs);
376 void rdaddc(rdstringc *rs, char c);
377 void rdaddsc(rdstringc *rs, char const *p);
378 void rdaddsn(rdstringc *rc, char const *p, int len);
379 char *rdtrimc(rdstringc *rs);
380
381 int compare_wordlists(word *a, word *b);
382
383 void mark_attr_ends(word *words);
384
385 typedef struct tagWrappedLine wrappedline;
386 struct tagWrappedLine {
387 wrappedline *next;
388 word *begin, *end; /* first & last words of line */
389 int nspaces; /* number of whitespaces in line */
390 int shortfall; /* how much shorter than max width */
391 };
392 wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int);
393 void wrap_free(wrappedline *);
394 void cmdline_cfg_add(paragraph *cfg, char *string);
395 paragraph *cmdline_cfg_new(void);
396 paragraph *cmdline_cfg_simple(char *string, ...);
397
398 /*
399 * input.c
400 */
401 paragraph *read_input(input *in, indexdata *idx);
402
403 /*
404 * in_afm.c
405 */
406 void read_afm_file(input *in);
407
408 /*
409 * in_pf.c
410 */
411 void read_pfa_file(input *in);
412 void read_pfb_file(input *in);
413
414 /*
415 * in_sfnt.c
416 */
417 void read_sfnt_file(input *in);
418
419 /*
420 * keywords.c
421 */
422 struct keywordlist_Tag {
423 int nkeywords;
424 int size;
425 tree234 *keys; /* sorted by `key' field */
426 word **looseends; /* non-keyword list element numbers */
427 int nlooseends;
428 int looseendssize;
429 };
430 struct keyword_Tag {
431 wchar_t *key; /* the keyword itself */
432 word *text; /* "Chapter 2", "Appendix Q"... */
433 /* (NB: filepos are not set) */
434 paragraph *para; /* the paragraph referenced */
435 };
436 keyword *kw_lookup(keywordlist *, wchar_t *);
437 keywordlist *get_keywords(paragraph *);
438 void free_keywords(keywordlist *);
439 void subst_keywords(paragraph *, keywordlist *);
440
441 /*
442 * index.c
443 */
444
445 /*
446 * Data structure to hold both sides of the index.
447 */
448 struct indexdata_Tag {
449 tree234 *tags; /* holds type `indextag' */
450 tree234 *entries; /* holds type `indexentry' */
451 };
452
453 /*
454 * Data structure to hold an index tag (LHS of index).
455 */
456 struct indextag_Tag {
457 wchar_t *name;
458 word *implicit_text;
459 filepos implicit_fpos;
460 word **explicit_texts;
461 filepos *explicit_fpos;
462 int nexplicit, explicit_size;
463 int nrefs;
464 indexentry **refs; /* array of entries referenced by tag */
465 };
466
467 /*
468 * Data structure to hold an index entry (RHS of index).
469 */
470 struct indexentry_Tag {
471 word *text;
472 void *backend_data; /* private to back end */
473 filepos fpos;
474 };
475
476 indexdata *make_index(void);
477 void cleanup_index(indexdata *);
478 /* index_merge takes responsibility for freeing arg 3 iff implicit; never
479 * takes responsibility for arg 2 */
480 void index_merge(indexdata *, int is_explicit, wchar_t *, word *, filepos *);
481 void build_index(indexdata *);
482 void index_debug(indexdata *);
483 indextag *index_findtag(indexdata *idx, wchar_t *name);
484
485 /*
486 * contents.c
487 */
488 numberstate *number_init(void);
489 void number_cfg(numberstate *, paragraph *);
490 word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, int *);
491 void number_free(numberstate *);
492
493 /*
494 * biblio.c
495 */
496 void gen_citations(paragraph *, keywordlist *);
497
498 /*
499 * bk_text.c
500 */
501 void text_backend(paragraph *, keywordlist *, indexdata *, void *);
502 paragraph *text_config_filename(char *filename);
503
504 /*
505 * bk_html.c
506 */
507 void html_backend(paragraph *, keywordlist *, indexdata *, void *);
508 paragraph *html_config_filename(char *filename);
509
510 /*
511 * bk_whlp.c
512 */
513 void whlp_backend(paragraph *, keywordlist *, indexdata *, void *);
514 paragraph *whlp_config_filename(char *filename);
515
516 /*
517 * bk_man.c
518 */
519 void man_backend(paragraph *, keywordlist *, indexdata *, void *);
520 paragraph *man_config_filename(char *filename);
521
522 /*
523 * bk_info.c
524 */
525 void info_backend(paragraph *, keywordlist *, indexdata *, void *);
526 paragraph *info_config_filename(char *filename);
527
528 /*
529 * bk_paper.c
530 */
531 void *paper_pre_backend(paragraph *, keywordlist *, indexdata *);
532 void listfonts(void);
533
534 /*
535 * bk_ps.c
536 */
537 void ps_backend(paragraph *, keywordlist *, indexdata *, void *);
538 paragraph *ps_config_filename(char *filename);
539
540 /*
541 * bk_pdf.c
542 */
543 void pdf_backend(paragraph *, keywordlist *, indexdata *, void *);
544 paragraph *pdf_config_filename(char *filename);
545
546 #endif