Add a `--list-charsets' option to Halibut to enumerate canonical names of known
[sgt/halibut] / main.c
1 /*
2 * main.c: command line parsing and top level
3 */
4
5 #include <assert.h>
6 #include <locale.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include "halibut.h"
10
11 static void dbg_prtsource(paragraph *sourceform);
12 static void dbg_prtwordlist(int level, word *w);
13 static void dbg_prtkws(keywordlist *kws);
14
15 static const struct pre_backend {
16 void *(*func)(paragraph *, keywordlist *, indexdata *);
17 int bitfield;
18 } pre_backends[] = {
19 {paper_pre_backend, 0x0001}
20 };
21
22 static const struct backend {
23 char *name;
24 void (*func)(paragraph *, keywordlist *, indexdata *, void *);
25 paragraph *(*filename)(char *filename);
26 int bitfield, prebackend_bitfield;
27 } backends[] = {
28 {"text", text_backend, text_config_filename, 0x0001, 0},
29 {"xhtml", html_backend, html_config_filename, 0x0002, 0},
30 {"html", html_backend, html_config_filename, 0x0002, 0},
31 {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
32 {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
33 {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
34 {"man", man_backend, man_config_filename, 0x0008, 0},
35 {"info", info_backend, info_config_filename, 0x0010, 0},
36 {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
37 {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
38 };
39
40 int main(int argc, char **argv) {
41 char **infiles;
42 int nfiles;
43 int nogo;
44 int errs;
45 int reportcols;
46 int input_charset;
47 int debug;
48 int backendbits, prebackbits;
49 int k, b;
50 paragraph *cfg, *cfg_tail;
51 void *pre_backend_data[16];
52
53 setlocale(LC_ALL, "");
54
55 /*
56 * Set up initial (default) parameters.
57 */
58 infiles = snewn(argc, char *);
59 nfiles = 0;
60 nogo = errs = FALSE;
61 reportcols = 0;
62 input_charset = CS_ASCII;
63 debug = 0;
64 backendbits = 0;
65 cfg = cfg_tail = NULL;
66
67 if (argc == 1) {
68 usage();
69 exit(EXIT_SUCCESS);
70 }
71
72 /*
73 * Parse command line arguments.
74 */
75 while (--argc) {
76 char *p = *++argv;
77 if (*p == '-') {
78 /*
79 * An option.
80 */
81 while (p && *++p) {
82 char c = *p;
83 switch (c) {
84 case '-':
85 /*
86 * Long option.
87 */
88 {
89 char *opt, *val;
90 opt = p++; /* opt will have _one_ leading - */
91 while (*p && *p != '=')
92 p++; /* find end of option */
93 if (*p == '=') {
94 *p++ = '\0';
95 val = p;
96 } else
97 val = NULL;
98
99 assert(opt[0] == '-');
100 for (k = 0; k < (int)lenof(backends); k++)
101 if (!strcmp(opt+1, backends[k].name)) {
102 backendbits |= backends[k].bitfield;
103 if (val) {
104 paragraph *p = backends[k].filename(val);
105 assert(p);
106 if (cfg_tail)
107 cfg_tail->next = p;
108 else
109 cfg = p;
110 while (p->next)
111 p = p->next;
112 cfg_tail = p;
113 }
114 break;
115 }
116 if (k < (int)lenof(backends)) {
117 /* do nothing */;
118 } else if (!strcmp(opt, "-input-charset")) {
119 if (!val) {
120 errs = TRUE, error(err_optnoarg, opt);
121 } else {
122 int charset = charset_from_localenc(val);
123 if (charset == CS_NONE) {
124 errs = TRUE, error(err_cmdcharset, val);
125 } else {
126 input_charset = charset;
127 }
128 }
129 } else if (!strcmp(opt, "-help")) {
130 help();
131 nogo = TRUE;
132 } else if (!strcmp(opt, "-version")) {
133 showversion();
134 nogo = TRUE;
135 } else if (!strcmp(opt, "-licence") ||
136 !strcmp(opt, "-license")) {
137 licence();
138 nogo = TRUE;
139 } else if (!strcmp(opt, "-list-charsets")) {
140 listcharsets();
141 nogo = TRUE;
142 } else if (!strcmp(opt, "-precise")) {
143 reportcols = 1;
144 } else {
145 errs = TRUE, error(err_nosuchopt, opt);
146 }
147 }
148 p = NULL;
149 break;
150 case 'h':
151 case 'V':
152 case 'L':
153 case 'P':
154 case 'd':
155 /*
156 * Option requiring no parameter.
157 */
158 switch (c) {
159 case 'h':
160 help();
161 nogo = TRUE;
162 break;
163 case 'V':
164 showversion();
165 nogo = TRUE;
166 break;
167 case 'L':
168 licence();
169 nogo = TRUE;
170 break;
171 case 'P':
172 reportcols = 1;
173 break;
174 case 'd':
175 debug = TRUE;
176 break;
177 }
178 break;
179 case 'C':
180 /*
181 * Option requiring parameter.
182 */
183 p++;
184 if (!*p && argc > 1)
185 --argc, p = *++argv;
186 else if (!*p) {
187 char opt[2];
188 opt[0] = c;
189 opt[1] = '\0';
190 errs = TRUE, error(err_optnoarg, opt);
191 }
192 /*
193 * Now c is the option and p is the parameter.
194 */
195 switch (c) {
196 case 'C':
197 /*
198 * -C means we split our argument up into
199 * colon-separated chunks and assemble them
200 * into a config paragraph.
201 */
202 {
203 char *s = dupstr(p), *q, *r;
204 paragraph *para;
205
206 para = cmdline_cfg_new();
207
208 q = r = s;
209 while (*q) {
210 if (*q == ':') {
211 *r = '\0';
212 /* XXX ad-hoc diagnostic */
213 if (!strcmp(s, "input-charset"))
214 error(err_futileopt, "Cinput-charset",
215 "; use --input-charset");
216 cmdline_cfg_add(para, s);
217 r = s;
218 } else {
219 if (*q == '\\' && q[1])
220 q++;
221 *r++ = *q;
222 }
223 q++;
224 }
225 *r = '\0';
226 cmdline_cfg_add(para, s);
227
228 if (cfg_tail)
229 cfg_tail->next = para;
230 else
231 cfg = para;
232 cfg_tail = para;
233 }
234 break;
235 }
236 p = NULL; /* prevent continued processing */
237 break;
238 default:
239 /*
240 * Unrecognised option.
241 */
242 {
243 char opt[2];
244 opt[0] = c;
245 opt[1] = '\0';
246 errs = TRUE, error(err_nosuchopt, opt);
247 }
248 }
249 }
250 } else {
251 /*
252 * A non-option argument.
253 */
254 infiles[nfiles++] = p;
255 }
256 }
257
258 if (errs)
259 exit(EXIT_FAILURE);
260 if (nogo)
261 exit(EXIT_SUCCESS);
262
263 /*
264 * Do the work.
265 */
266 if (nfiles == 0) {
267 error(err_noinput);
268 usage();
269 exit(EXIT_FAILURE);
270 }
271
272 {
273 input in;
274 paragraph *sourceform, *p;
275 indexdata *idx;
276 keywordlist *keywords;
277
278 in.filenames = infiles;
279 in.nfiles = nfiles;
280 in.currfp = NULL;
281 in.currindex = 0;
282 in.npushback = in.pushbacksize = 0;
283 in.pushback = NULL;
284 in.reportcols = reportcols;
285 in.stack = NULL;
286 in.defcharset = input_charset;
287
288 idx = make_index();
289
290 sourceform = read_input(&in, idx);
291 if (!sourceform)
292 exit(EXIT_FAILURE);
293
294 /*
295 * Append the config directives acquired from the command
296 * line.
297 */
298 {
299 paragraph *end;
300
301 end = sourceform;
302 while (end && end->next)
303 end = end->next;
304 assert(end);
305
306 end->next = cfg;
307 }
308
309 sfree(in.pushback);
310
311 sfree(infiles);
312
313 keywords = get_keywords(sourceform);
314 if (!keywords)
315 exit(EXIT_FAILURE);
316 gen_citations(sourceform, keywords);
317 subst_keywords(sourceform, keywords);
318
319 for (p = sourceform; p; p = p->next)
320 if (p->type == para_IM)
321 index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
322
323 build_index(idx);
324
325 /*
326 * Set up attr_First / attr_Last / attr_Always, in the main
327 * document and in the index entries.
328 */
329 for (p = sourceform; p; p = p->next)
330 mark_attr_ends(p->words);
331 {
332 int i;
333 indexentry *entry;
334
335 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
336 mark_attr_ends(entry->text);
337 }
338
339 if (debug) {
340 index_debug(idx);
341 dbg_prtkws(keywords);
342 dbg_prtsource(sourceform);
343 }
344
345 /*
346 * Select and run the pre-backends.
347 */
348 prebackbits = 0;
349 for (k = 0; k < (int)lenof(backends); k++)
350 if (backendbits == 0 || (backendbits & backends[k].bitfield))
351 prebackbits |= backends[k].prebackend_bitfield;
352 for (k = 0; k < (int)lenof(pre_backends); k++)
353 if (prebackbits & pre_backends[k].bitfield) {
354 assert(k < (int)lenof(pre_backend_data));
355 pre_backend_data[k] =
356 pre_backends[k].func(sourceform, keywords, idx);
357 }
358
359 /*
360 * Run the selected set of backends.
361 */
362 for (k = b = 0; k < (int)lenof(backends); k++)
363 if (b != backends[k].bitfield) {
364 b = backends[k].bitfield;
365 if (backendbits == 0 || (backendbits & b)) {
366 void *pbd = NULL;
367 int pbb = backends[k].prebackend_bitfield;
368 int m;
369
370 for (m = 0; m < (int)lenof(pre_backends); m++)
371 if (pbb & pre_backends[m].bitfield) {
372 assert(m < (int)lenof(pre_backend_data));
373 pbd = pre_backend_data[m];
374 break;
375 }
376
377 backends[k].func(sourceform, keywords, idx, pbd);
378 }
379 }
380
381 free_para_list(sourceform);
382 free_keywords(keywords);
383 cleanup_index(idx);
384 }
385
386 return 0;
387 }
388
389 static void dbg_prtsource(paragraph *sourceform) {
390 /*
391 * Output source form in debugging format.
392 */
393
394 paragraph *p;
395 for (p = sourceform; p; p = p->next) {
396 wchar_t *wp;
397 printf("para %d ", p->type);
398 if (p->keyword) {
399 wp = p->keyword;
400 while (*wp) {
401 putchar('\"');
402 for (; *wp; wp++)
403 putchar(*wp);
404 putchar('\"');
405 if (*++wp)
406 printf(", ");
407 }
408 } else
409 printf("(no keyword)");
410 printf(" {\n");
411 dbg_prtwordlist(1, p->words);
412 printf("}\n");
413 }
414 }
415
416 static void dbg_prtkws(keywordlist *kws) {
417 /*
418 * Output keywords in debugging format.
419 */
420
421 int i;
422 keyword *kw;
423
424 for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) {
425 wchar_t *wp;
426 printf("keyword ");
427 wp = kw->key;
428 while (*wp) {
429 putchar('\"');
430 for (; *wp; wp++)
431 putchar(*wp);
432 putchar('\"');
433 if (*++wp)
434 printf(", ");
435 }
436 printf(" {\n");
437 dbg_prtwordlist(1, kw->text);
438 printf("}\n");
439 }
440 }
441
442 static void dbg_prtwordlist(int level, word *w) {
443 for (; w; w = w->next) {
444 wchar_t *wp;
445 printf("%*sword %d ", level*4, "", w->type);
446 if (w->text) {
447 printf("\"");
448 for (wp = w->text; *wp; wp++)
449 putchar(*wp);
450 printf("\"");
451 } else
452 printf("(no text)");
453 if (w->alt) {
454 printf(" alt = {\n");
455 dbg_prtwordlist(level+1, w->alt);
456 printf("%*s}", level*4, "");
457 }
458 printf("\n");
459 }
460 }