Charset support for the man page backend (\cfg{man-charset}).
[sgt/halibut] / halibut.h
CommitLineData
d7482997 1#ifndef HALIBUT_HALIBUT_H
2#define HALIBUT_HALIBUT_H
3
4#include <stdio.h>
5#include <wchar.h>
6#include <time.h>
9c1cf191 7#include <string.h>
d7482997 8
e34ba5c3 9#include "charset.h"
10
d7482997 11#ifdef __GNUC__
12#define NORETURN __attribute__((__noreturn__))
13#else
14#define NORETURN /* nothing */
15#endif
16
17#ifndef TRUE
18#define TRUE 1
19#endif
20#ifndef FALSE
21#define FALSE 0
22#endif
23
24/* For suppressing unused-parameter warnings */
25#define IGNORE(x) ( (x) = (x) )
26
27#include "tree234.h"
28
29/*
e4ea58f8 30 * FIXME: Charset temporary workarounds
31 */
32#define CS_FIXME CS_ISO8859_1
33#define CS_LOCAL CS_ISO8859_1
34
35/*
d7482997 36 * Structure tags
37 */
38typedef struct input_Tag input;
39typedef struct filepos_Tag filepos;
40typedef struct paragraph_Tag paragraph;
41typedef struct word_Tag word;
42typedef struct keywordlist_Tag keywordlist;
43typedef struct keyword_Tag keyword;
44typedef struct userstyle_Tag userstyle;
45typedef struct numberstate_Tag numberstate;
46typedef struct indexdata_Tag indexdata;
47typedef struct indextag_Tag indextag;
48typedef struct indexentry_Tag indexentry;
49typedef struct macrostack_Tag macrostack;
50
51/*
52 * Data structure to hold a file name and index, a line and a
53 * column number, for reporting errors
54 */
55struct filepos_Tag {
56 char *filename;
57 int line, col;
58};
59
60/*
61 * Data structure to hold all the file names etc for input
62 */
63typedef struct pushback_Tag {
64 int chr;
65 filepos pos;
66} pushback;
67struct input_Tag {
68 char **filenames; /* complete list of input files */
69 int nfiles; /* how many in the list */
70 FILE *currfp; /* the currently open one */
71 int currindex; /* which one is that in the list */
72 pushback *pushback; /* pushed-back input characters */
73 int npushback, pushbacksize;
74 filepos pos;
75 int reportcols; /* report column numbers in errors */
76 macrostack *stack; /* macro expansions in force */
e34ba5c3 77 int defcharset, charset; /* character sets for input files */
78 charset_state csstate;
79 wchar_t wc[16]; /* wide chars from input conversion */
80 int nwc, wcpos; /* size of, and position in, wc[] */
e4ea58f8 81 char *pushback_chars; /* used to save input-encoding data */
d7482997 82};
83
84/*
85 * Data structure to hold the input form of the source, ie a linked
86 * list of paragraphs
87 */
88struct paragraph_Tag {
89 paragraph *next;
90 int type;
91 wchar_t *keyword; /* for most special paragraphs */
e4ea58f8 92 char *origkeyword; /* same again in original charset */
d7482997 93 word *words; /* list of words in paragraph */
94 int aux; /* number, in a numbered paragraph
95 * or subsection level
96 */
97 word *kwtext; /* chapter/section indication */
98 word *kwtext2; /* numeric-only form of kwtext */
99 filepos fpos;
100
101 paragraph *parent, *child, *sibling; /* for hierarchy navigation */
102
103 void *private_data; /* for temp use in backends */
104};
105enum {
106 para_IM, /* index merge */
107 para_BR, /* bibliography rewrite */
108 para_Rule, /* random horizontal rule */
109 para_Chapter,
110 para_Appendix,
111 para_UnnumberedChapter,
112 para_Heading,
113 para_Subsect,
114 para_Normal,
115 para_Biblio, /* causes no output unless turned ... */
116 para_BiblioCited, /* ... into this paragraph type */
117 para_Bullet,
118 para_NumberedList,
7136a6c7 119 para_DescribedThing,
120 para_Description,
d7482997 121 para_Code,
122 para_Copyright,
d7482997 123 para_NoCite,
124 para_Title,
125 para_VersionID,
126 para_Config, /* configuration directive */
7136a6c7 127 para_LcontPush, /* begin continuation of list item */
128 para_LcontPop, /* end continuation of list item */
2614b01d 129 para_QuotePush, /* begin block quote */
130 para_QuotePop, /* end block quote */
3f3d1acc 131 /*
132 * Back ends may define their own paragraph types beyond here,
133 * in case they need to use them internally.
134 */
d7482997 135 para_NotParaType /* placeholder value */
136};
137
138/*
139 * Data structure to hold an individual word
140 */
141struct word_Tag {
142 word *next, *alt;
143 int type;
144 int aux;
145 int breaks; /* can a line break after it? */
146 wchar_t *text;
147 filepos fpos;
5dd44dce 148
149 void *private_data; /* for temp use in backends */
d7482997 150};
151enum {
152 /* ORDERING CONSTRAINT: these normal-word types ... */
153 word_Normal,
154 word_Emph,
155 word_Code, /* monospaced; `quoted' in text */
156 word_WeakCode, /* monospaced, normal in text */
157 /* ... must be in the same order as these space types ... */
158 word_WhiteSpace, /* text is NULL or ignorable */
159 word_EmphSpace, /* WhiteSpace when emphasised */
160 word_CodeSpace, /* WhiteSpace when code */
161 word_WkCodeSpace, /* WhiteSpace when weak code */
162 /* ... and must be in the same order as these quote types ... */
163 word_Quote, /* text is NULL or ignorable */
164 word_EmphQuote, /* Quote when emphasised */
165 word_CodeQuote, /* (can't happen) */
166 word_WkCodeQuote, /* (can't happen) */
167 /* END ORDERING CONSTRAINT */
168 word_internal_endattrs,
169 word_UpperXref, /* \K */
170 word_LowerXref, /* \k */
171 word_XrefEnd, /* (invisible; no text) */
172 word_IndexRef, /* (always an invisible one) */
173 word_HyperLink, /* (invisible) */
3f3d1acc 174 word_HyperEnd, /* (also invisible; no text) */
175 /*
176 * Back ends may define their own word types beyond here, in
177 * case they need to use them internally.
178 */
179 word_NotWordType /* placeholder value */
d7482997 180};
181/* aux values for attributed words */
182enum {
183 attr_Only = 0x0000, /* a lone word with the attribute */
184 attr_First = 0x0001, /* the first of a series */
185 attr_Last = 0x0002, /* the last of a series */
186 attr_Always = 0x0003, /* any other part of a series */
187 attr_mask = 0x0003,
188};
189/* aux values for quote-type words */
190enum {
191 quote_Open = 0x0010,
192 quote_Close = 0x0020,
193 quote_mask = 0x0030,
194};
195#define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \
196 ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) )
197#define sameattr(x,y) ( (((x)-(y)) & 3) == 0 )
198#define towordstyle(x) ( word_Normal + ((x) & 3) )
199#define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) )
200#define toquotestyle(x) ( word_Quote + ((x) & 3) )
201#define removeattr(x) ( word_Normal + ((x) &~ 3) )
202
203#define attraux(x) ( (x) & attr_mask )
204#define quoteaux(x) ( (x) & quote_mask )
205
206/*
207 * error.c
208 */
209void fatal(int code, ...) NORETURN;
210void error(int code, ...);
211enum {
212 err_nomemory, /* out of memory */
213 err_optnoarg, /* option `-%s' requires an argument */
214 err_nosuchopt, /* unrecognised option `-%s' */
215 err_noinput, /* no input files */
216 err_cantopen, /* unable to open input file `%s' */
217 err_nodata, /* no data in input files */
218 err_brokencodepara, /* line in codepara didn't begin `\c' */
219 err_kwunclosed, /* expected `}' after keyword */
220 err_kwillegal, /* paragraph type expects no keyword */
221 err_kwexpected, /* paragraph type expects a keyword */
222 err_kwtoomany, /* paragraph type expects only 1 */
223 err_bodyillegal, /* paragraph type expects only kws! */
224 err_badparatype, /* invalid command at start of para */
225 err_badmidcmd, /* invalid command in mid-para */
226 err_unexbrace, /* unexpected brace */
227 err_explbr, /* expected `{' after command */
228 err_commenteof, /* EOF inside braced comment */
229 err_kwexprbr, /* expected `}' after cross-ref */
230 err_missingrbrace, /* unclosed braces at end of para */
7136a6c7 231 err_missingrbrace2, /* unclosed braces at end of file */
d7482997 232 err_nestedstyles, /* unable to nest text styles */
233 err_nestedindex, /* unable to nest `\i' thingys */
234 err_nosuchkw, /* unresolved cross-reference */
235 err_multiBR, /* multiple \BRs on same keyword */
236 err_nosuchidxtag, /* \IM on unknown index tag (warning) */
237 err_cantopenw, /* can't open output file for write */
238 err_macroexists, /* this macro already exists */
239 err_sectjump, /* jump a heading level, eg \C -> \S */
240 err_winhelp_ctxclash, /* WinHelp context ID hash clash */
241 err_multikw, /* keyword clash in sections */
7136a6c7 242 err_misplacedlcont, /* \lcont not after a list item */
2614b01d 243 err_sectmarkerinblock, /* section marker appeared in block */
d4c7e130 244 err_infodirentry, /* \cfg{info-dir-entry} missing param */
f4551933 245 err_infonodechar, /* colon/comma in node name in info */
d7482997 246 err_whatever /* random error of another type */
247};
248
249/*
250 * malloc.c
251 */
252#ifdef LOGALLOC
253void *smalloc(char *file, int line, int size);
254void *srealloc(char *file, int line, void *p, int size);
255void sfree(char *file, int line, void *p);
256#define smalloc(x) smalloc(__FILE__, __LINE__, x)
257#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y)
258#define sfree(x) sfree(__FILE__, __LINE__, x)
259#else
260void *smalloc(int size);
261void *srealloc(void *p, int size);
262void sfree(void *p);
263#endif
264void free_word_list(word *w);
265void free_para_list(paragraph *p);
266word *dup_word_list(word *w);
267char *dupstr(char *s);
268
269#define mknew(type) ( (type *) smalloc (sizeof (type)) )
270#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) )
271#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) )
272#define lenof(array) ( sizeof(array) / sizeof(*(array)) )
273
274/*
275 * ustring.c
276 */
e4ea58f8 277wchar_t *ustrdup(wchar_t const *s);
278char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset);
279char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset);
280wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset);
281char *utoa_dup(wchar_t const *s, int charset);
282char *utoa_dup_len(wchar_t const *s, int charset, int *len);
283char *utoa_careful_dup(wchar_t const *s, int charset);
284wchar_t *ufroma_dup(char const *s, int charset);
5dd44dce 285int ustrlen(wchar_t const *s);
d7482997 286wchar_t *uadv(wchar_t *s);
5dd44dce 287wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source);
d7482997 288wchar_t utolower(wchar_t);
831da32e 289int uisalpha(wchar_t);
d7482997 290int ustrcmp(wchar_t *lhs, wchar_t *rhs);
291int ustricmp(wchar_t *lhs, wchar_t *rhs);
292int utoi(wchar_t *);
293int utob(wchar_t *);
294int uisdigit(wchar_t);
295wchar_t *ustrlow(wchar_t *s);
296wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
91f93b94 297int cvt_ok(int charset, const wchar_t *s);
d7482997 298
299/*
300 * help.c
301 */
302void help(void);
303void usage(void);
304void showversion(void);
305
306/*
307 * licence.c
308 */
309void licence(void);
310
311/*
312 * version.c
313 */
314const char *const version;
315
316/*
317 * misc.c
318 */
e4ea58f8 319char *adv(char *s);
320
d7482997 321typedef struct stackTag *stack;
322stack stk_new(void);
323void stk_free(stack);
324void stk_push(stack, void *);
325void *stk_pop(stack);
7136a6c7 326void *stk_top(stack);
d7482997 327
328typedef struct tagRdstring rdstring;
329struct tagRdstring {
330 int pos, size;
331 wchar_t *text;
332};
333typedef struct tagRdstringc rdstringc;
334struct tagRdstringc {
335 int pos, size;
336 char *text;
337};
338extern const rdstring empty_rdstring;
339extern const rdstringc empty_rdstringc;
340void rdadd(rdstring *rs, wchar_t c);
5dd44dce 341void rdadds(rdstring *rs, wchar_t const *p);
d7482997 342wchar_t *rdtrim(rdstring *rs);
343void rdaddc(rdstringc *rs, char c);
5dd44dce 344void rdaddsc(rdstringc *rs, char const *p);
d7482997 345char *rdtrimc(rdstringc *rs);
346
347int compare_wordlists(word *a, word *b);
348
349void mark_attr_ends(paragraph *sourceform);
350
351typedef struct tagWrappedLine wrappedline;
352struct tagWrappedLine {
353 wrappedline *next;
354 word *begin, *end; /* first & last words of line */
355 int nspaces; /* number of whitespaces in line */
356 int shortfall; /* how much shorter than max width */
357};
43341922 358wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int);
d7482997 359void wrap_free(wrappedline *);
e4ea58f8 360void cmdline_cfg_add(paragraph *cfg, char *string);
361paragraph *cmdline_cfg_new(void);
362paragraph *cmdline_cfg_simple(char *string, ...);
d7482997 363
364/*
365 * input.c
366 */
367paragraph *read_input(input *in, indexdata *idx);
368
369/*
370 * keywords.c
371 */
372struct keywordlist_Tag {
373 int nkeywords;
374 int size;
375 tree234 *keys; /* sorted by `key' field */
376 word **looseends; /* non-keyword list element numbers */
377 int nlooseends;
378 int looseendssize;
379};
380struct keyword_Tag {
381 wchar_t *key; /* the keyword itself */
382 word *text; /* "Chapter 2", "Appendix Q"... */
383 /* (NB: filepos are not set) */
384 paragraph *para; /* the paragraph referenced */
385};
386keyword *kw_lookup(keywordlist *, wchar_t *);
387keywordlist *get_keywords(paragraph *);
388void free_keywords(keywordlist *);
389void subst_keywords(paragraph *, keywordlist *);
390
391/*
392 * index.c
393 */
394
395/*
396 * Data structure to hold both sides of the index.
397 */
398struct indexdata_Tag {
399 tree234 *tags; /* holds type `indextag' */
400 tree234 *entries; /* holds type `indexentry' */
401};
402
403/*
404 * Data structure to hold an index tag (LHS of index).
405 */
406struct indextag_Tag {
407 wchar_t *name;
408 word *implicit_text;
f4551933 409 filepos implicit_fpos;
d7482997 410 word **explicit_texts;
f4551933 411 filepos *explicit_fpos;
d7482997 412 int nexplicit, explicit_size;
413 int nrefs;
414 indexentry **refs; /* array of entries referenced by tag */
415};
416
417/*
418 * Data structure to hold an index entry (RHS of index).
419 */
420struct indexentry_Tag {
421 word *text;
422 void *backend_data; /* private to back end */
f4551933 423 filepos fpos;
d7482997 424};
425
426indexdata *make_index(void);
427void cleanup_index(indexdata *);
428/* index_merge takes responsibility for freeing arg 3 iff implicit; never
429 * takes responsibility for arg 2 */
f4551933 430void index_merge(indexdata *, int is_explicit, wchar_t *, word *, filepos *);
d7482997 431void build_index(indexdata *);
432void index_debug(indexdata *);
433indextag *index_findtag(indexdata *idx, wchar_t *name);
434
435/*
436 * contents.c
437 */
438numberstate *number_init(void);
439void number_cfg(numberstate *, paragraph *);
96f3af16 440word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, int *);
d7482997 441void number_free(numberstate *);
442
443/*
444 * biblio.c
445 */
446void gen_citations(paragraph *, keywordlist *);
447
448/*
449 * style.c
450 */
451struct userstyle_Tag {
452};
453
454/*
455 * bk_text.c
456 */
43341922 457void text_backend(paragraph *, keywordlist *, indexdata *, void *);
ba9c1487 458paragraph *text_config_filename(char *filename);
d7482997 459
460/*
461 * bk_xhtml.c
462 */
43341922 463void xhtml_backend(paragraph *, keywordlist *, indexdata *, void *);
ba9c1487 464paragraph *xhtml_config_filename(char *filename);
d7482997 465
466/*
467 * bk_whlp.c
468 */
43341922 469void whlp_backend(paragraph *, keywordlist *, indexdata *, void *);
ba9c1487 470paragraph *whlp_config_filename(char *filename);
d7482997 471
7136a6c7 472/*
473 * bk_man.c
474 */
43341922 475void man_backend(paragraph *, keywordlist *, indexdata *, void *);
ba9c1487 476paragraph *man_config_filename(char *filename);
7136a6c7 477
5dd44dce 478/*
479 * bk_info.c
480 */
43341922 481void info_backend(paragraph *, keywordlist *, indexdata *, void *);
5dd44dce 482paragraph *info_config_filename(char *filename);
483
43341922 484/*
485 * bk_paper.c
486 */
487void *paper_pre_backend(paragraph *, keywordlist *, indexdata *);
488
489/*
490 * bk_ps.c
491 */
492void ps_backend(paragraph *, keywordlist *, indexdata *, void *);
493paragraph *ps_config_filename(char *filename);
494
495/*
496 * bk_pdf.c
497 */
498void pdf_backend(paragraph *, keywordlist *, indexdata *, void *);
499paragraph *pdf_config_filename(char *filename);
500
d7482997 501#endif