When building the static Huffman tables, it's vital to include the
[sgt/halibut] / main.c
CommitLineData
d7482997 1/*
2 * main.c: command line parsing and top level
3 */
4
c8fb54d2 5#include <assert.h>
7e976207 6#include <locale.h>
d7482997 7#include <stdio.h>
8#include <stdlib.h>
9#include "halibut.h"
10
11static void dbg_prtsource(paragraph *sourceform);
12static void dbg_prtwordlist(int level, word *w);
13static void dbg_prtkws(keywordlist *kws);
14
43341922 15static const struct pre_backend {
16 void *(*func)(paragraph *, keywordlist *, indexdata *);
17 int bitfield;
18} pre_backends[] = {
19 {paper_pre_backend, 0x0001}
20};
21
c8fb54d2 22static const struct backend {
23 char *name;
43341922 24 void (*func)(paragraph *, keywordlist *, indexdata *, void *);
ba9c1487 25 paragraph *(*filename)(char *filename);
43341922 26 int bitfield, prebackend_bitfield;
c8fb54d2 27} backends[] = {
43341922 28 {"text", text_backend, text_config_filename, 0x0001, 0},
78c73085 29 {"xhtml", html_backend, html_config_filename, 0x0002, 0},
30 {"html", html_backend, html_config_filename, 0x0002, 0},
43341922 31 {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
32 {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
33 {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
34 {"man", man_backend, man_config_filename, 0x0008, 0},
35 {"info", info_backend, info_config_filename, 0x0010, 0},
36 {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
37 {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
c8fb54d2 38};
39
d7482997 40int main(int argc, char **argv) {
41 char **infiles;
d7482997 42 int nfiles;
43 int nogo;
44 int errs;
45 int reportcols;
675958c3 46 int input_charset;
d7482997 47 int debug;
43341922 48 int backendbits, prebackbits;
c8fb54d2 49 int k, b;
6a0b9d08 50 paragraph *cfg, *cfg_tail;
43341922 51 void *pre_backend_data[16];
d7482997 52
740a7d6a 53 /*
54 * Use the specified locale everywhere. It'll be used for
55 * output of error messages, and as the default character set
56 * for input files if one is not explicitly specified.
57 *
58 * However, we need to use standard numeric formatting for
59 * output of things like PDF.
60 */
7e976207 61 setlocale(LC_ALL, "");
740a7d6a 62 setlocale(LC_NUMERIC, "C");
7e976207 63
d7482997 64 /*
65 * Set up initial (default) parameters.
66 */
f1530049 67 infiles = snewn(argc, char *);
d7482997 68 nfiles = 0;
69 nogo = errs = FALSE;
70 reportcols = 0;
675958c3 71 input_charset = CS_ASCII;
d7482997 72 debug = 0;
c8fb54d2 73 backendbits = 0;
6a0b9d08 74 cfg = cfg_tail = NULL;
d7482997 75
76 if (argc == 1) {
77 usage();
78 exit(EXIT_SUCCESS);
79 }
80
81 /*
82 * Parse command line arguments.
83 */
84 while (--argc) {
85 char *p = *++argv;
86 if (*p == '-') {
87 /*
88 * An option.
89 */
90 while (p && *++p) {
91 char c = *p;
92 switch (c) {
93 case '-':
94 /*
95 * Long option.
96 */
97 {
98 char *opt, *val;
99 opt = p++; /* opt will have _one_ leading - */
100 while (*p && *p != '=')
101 p++; /* find end of option */
102 if (*p == '=') {
103 *p++ = '\0';
104 val = p;
105 } else
106 val = NULL;
c8fb54d2 107
108 assert(opt[0] == '-');
109 for (k = 0; k < (int)lenof(backends); k++)
110 if (!strcmp(opt+1, backends[k].name)) {
111 backendbits |= backends[k].bitfield;
ba9c1487 112 if (val) {
113 paragraph *p = backends[k].filename(val);
114 assert(p);
115 if (cfg_tail)
116 cfg_tail->next = p;
117 else
118 cfg = p;
119 while (p->next)
120 p = p->next;
121 cfg_tail = p;
122 }
c8fb54d2 123 break;
124 }
125 if (k < (int)lenof(backends)) {
126 /* do nothing */;
675958c3 127 } else if (!strcmp(opt, "-input-charset")) {
128 if (!val) {
129 errs = TRUE, error(err_optnoarg, opt);
130 } else {
131 int charset = charset_from_localenc(val);
132 if (charset == CS_NONE) {
133 errs = TRUE, error(err_cmdcharset, val);
134 } else {
135 input_charset = charset;
136 }
137 }
c8fb54d2 138 } else if (!strcmp(opt, "-help")) {
d7482997 139 help();
140 nogo = TRUE;
141 } else if (!strcmp(opt, "-version")) {
142 showversion();
143 nogo = TRUE;
144 } else if (!strcmp(opt, "-licence") ||
145 !strcmp(opt, "-license")) {
146 licence();
147 nogo = TRUE;
f336fa9a 148 } else if (!strcmp(opt, "-list-charsets")) {
149 listcharsets();
150 nogo = TRUE;
d7482997 151 } else if (!strcmp(opt, "-precise")) {
152 reportcols = 1;
153 } else {
154 errs = TRUE, error(err_nosuchopt, opt);
155 }
156 }
157 p = NULL;
158 break;
159 case 'h':
160 case 'V':
161 case 'L':
162 case 'P':
163 case 'd':
164 /*
165 * Option requiring no parameter.
166 */
167 switch (c) {
168 case 'h':
169 help();
170 nogo = TRUE;
171 break;
172 case 'V':
173 showversion();
174 nogo = TRUE;
175 break;
176 case 'L':
177 licence();
178 nogo = TRUE;
179 break;
180 case 'P':
181 reportcols = 1;
182 break;
183 case 'd':
184 debug = TRUE;
185 break;
186 }
187 break;
6a0b9d08 188 case 'C':
d7482997 189 /*
190 * Option requiring parameter.
191 */
192 p++;
193 if (!*p && argc > 1)
194 --argc, p = *++argv;
195 else if (!*p) {
196 char opt[2];
197 opt[0] = c;
198 opt[1] = '\0';
199 errs = TRUE, error(err_optnoarg, opt);
200 }
201 /*
202 * Now c is the option and p is the parameter.
203 */
204 switch (c) {
6a0b9d08 205 case 'C':
206 /*
207 * -C means we split our argument up into
208 * colon-separated chunks and assemble them
209 * into a config paragraph.
210 */
211 {
e4ea58f8 212 char *s = dupstr(p), *q, *r;
6a0b9d08 213 paragraph *para;
214
e4ea58f8 215 para = cmdline_cfg_new();
6a0b9d08 216
e4ea58f8 217 q = r = s;
6a0b9d08 218 while (*q) {
219 if (*q == ':') {
e4ea58f8 220 *r = '\0';
675958c3 221 /* XXX ad-hoc diagnostic */
222 if (!strcmp(s, "input-charset"))
223 error(err_futileopt, "Cinput-charset",
224 "; use --input-charset");
e4ea58f8 225 cmdline_cfg_add(para, s);
226 r = s;
6a0b9d08 227 } else {
228 if (*q == '\\' && q[1])
229 q++;
e4ea58f8 230 *r++ = *q;
6a0b9d08 231 }
232 q++;
233 }
57e17355 234 *r = '\0';
e4ea58f8 235 cmdline_cfg_add(para, s);
6a0b9d08 236
237 if (cfg_tail)
238 cfg_tail->next = para;
239 else
240 cfg = para;
241 cfg_tail = para;
242 }
d7482997 243 break;
244 }
245 p = NULL; /* prevent continued processing */
246 break;
247 default:
248 /*
249 * Unrecognised option.
250 */
251 {
252 char opt[2];
253 opt[0] = c;
254 opt[1] = '\0';
255 errs = TRUE, error(err_nosuchopt, opt);
256 }
257 }
258 }
259 } else {
260 /*
261 * A non-option argument.
262 */
263 infiles[nfiles++] = p;
264 }
265 }
266
267 if (errs)
268 exit(EXIT_FAILURE);
269 if (nogo)
270 exit(EXIT_SUCCESS);
271
272 /*
273 * Do the work.
274 */
275 if (nfiles == 0) {
276 error(err_noinput);
277 usage();
278 exit(EXIT_FAILURE);
279 }
280
281 {
282 input in;
283 paragraph *sourceform, *p;
284 indexdata *idx;
285 keywordlist *keywords;
286
287 in.filenames = infiles;
288 in.nfiles = nfiles;
289 in.currfp = NULL;
290 in.currindex = 0;
291 in.npushback = in.pushbacksize = 0;
292 in.pushback = NULL;
293 in.reportcols = reportcols;
294 in.stack = NULL;
675958c3 295 in.defcharset = input_charset;
d7482997 296
297 idx = make_index();
298
299 sourceform = read_input(&in, idx);
300 if (!sourceform)
301 exit(EXIT_FAILURE);
302
6a0b9d08 303 /*
304 * Append the config directives acquired from the command
305 * line.
306 */
307 {
308 paragraph *end;
309
310 end = sourceform;
311 while (end && end->next)
312 end = end->next;
313 assert(end);
314
315 end->next = cfg;
316 }
317
d7482997 318 sfree(in.pushback);
319
d7482997 320 sfree(infiles);
321
322 keywords = get_keywords(sourceform);
323 if (!keywords)
324 exit(EXIT_FAILURE);
325 gen_citations(sourceform, keywords);
326 subst_keywords(sourceform, keywords);
327
328 for (p = sourceform; p; p = p->next)
329 if (p->type == para_IM)
f4551933 330 index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
d7482997 331
332 build_index(idx);
333
bb9e7835 334 /*
335 * Set up attr_First / attr_Last / attr_Always, in the main
336 * document and in the index entries.
337 */
338 for (p = sourceform; p; p = p->next)
339 mark_attr_ends(p->words);
340 {
341 int i;
342 indexentry *entry;
343
344 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
345 mark_attr_ends(entry->text);
346 }
347
d7482997 348 if (debug) {
349 index_debug(idx);
350 dbg_prtkws(keywords);
351 dbg_prtsource(sourceform);
352 }
353
c8fb54d2 354 /*
43341922 355 * Select and run the pre-backends.
356 */
357 prebackbits = 0;
358 for (k = 0; k < (int)lenof(backends); k++)
359 if (backendbits == 0 || (backendbits & backends[k].bitfield))
360 prebackbits |= backends[k].prebackend_bitfield;
361 for (k = 0; k < (int)lenof(pre_backends); k++)
362 if (prebackbits & pre_backends[k].bitfield) {
363 assert(k < (int)lenof(pre_backend_data));
364 pre_backend_data[k] =
365 pre_backends[k].func(sourceform, keywords, idx);
366 }
367
368 /*
c8fb54d2 369 * Run the selected set of backends.
370 */
371 for (k = b = 0; k < (int)lenof(backends); k++)
372 if (b != backends[k].bitfield) {
373 b = backends[k].bitfield;
43341922 374 if (backendbits == 0 || (backendbits & b)) {
375 void *pbd = NULL;
376 int pbb = backends[k].prebackend_bitfield;
377 int m;
378
379 for (m = 0; m < (int)lenof(pre_backends); m++)
380 if (pbb & pre_backends[m].bitfield) {
381 assert(m < (int)lenof(pre_backend_data));
382 pbd = pre_backend_data[m];
383 break;
384 }
385
386 backends[k].func(sourceform, keywords, idx, pbd);
387 }
c8fb54d2 388 }
d7482997 389
390 free_para_list(sourceform);
391 free_keywords(keywords);
392 cleanup_index(idx);
393 }
394
395 return 0;
396}
397
398static void dbg_prtsource(paragraph *sourceform) {
399 /*
400 * Output source form in debugging format.
401 */
402
403 paragraph *p;
404 for (p = sourceform; p; p = p->next) {
405 wchar_t *wp;
406 printf("para %d ", p->type);
407 if (p->keyword) {
408 wp = p->keyword;
409 while (*wp) {
410 putchar('\"');
411 for (; *wp; wp++)
412 putchar(*wp);
413 putchar('\"');
414 if (*++wp)
415 printf(", ");
416 }
417 } else
418 printf("(no keyword)");
419 printf(" {\n");
420 dbg_prtwordlist(1, p->words);
421 printf("}\n");
422 }
423}
424
425static void dbg_prtkws(keywordlist *kws) {
426 /*
427 * Output keywords in debugging format.
428 */
429
430 int i;
431 keyword *kw;
432
433 for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) {
434 wchar_t *wp;
435 printf("keyword ");
436 wp = kw->key;
437 while (*wp) {
438 putchar('\"');
439 for (; *wp; wp++)
440 putchar(*wp);
441 putchar('\"');
442 if (*++wp)
443 printf(", ");
444 }
445 printf(" {\n");
446 dbg_prtwordlist(1, kw->text);
447 printf("}\n");
448 }
449}
450
451static void dbg_prtwordlist(int level, word *w) {
452 for (; w; w = w->next) {
453 wchar_t *wp;
454 printf("%*sword %d ", level*4, "", w->type);
455 if (w->text) {
456 printf("\"");
457 for (wp = w->text; *wp; wp++)
458 putchar(*wp);
459 printf("\"");
460 } else
461 printf("(no text)");
14b072e2 462 if (w->breaks)
463 printf(" [breaks]");
d7482997 464 if (w->alt) {
465 printf(" alt = {\n");
466 dbg_prtwordlist(level+1, w->alt);
467 printf("%*s}", level*4, "");
468 }
469 printf("\n");
470 }
471}