Indexing tweaks for recent changes.
[sgt/halibut] / main.c
CommitLineData
d7482997 1/*
2 * main.c: command line parsing and top level
3 */
4
c8fb54d2 5#include <assert.h>
7e976207 6#include <locale.h>
d7482997 7#include <stdio.h>
8#include <stdlib.h>
9#include "halibut.h"
10
11static void dbg_prtsource(paragraph *sourceform);
12static void dbg_prtwordlist(int level, word *w);
13static void dbg_prtkws(keywordlist *kws);
14
43341922 15static const struct pre_backend {
16 void *(*func)(paragraph *, keywordlist *, indexdata *);
17 int bitfield;
18} pre_backends[] = {
19 {paper_pre_backend, 0x0001}
20};
21
c8fb54d2 22static const struct backend {
23 char *name;
43341922 24 void (*func)(paragraph *, keywordlist *, indexdata *, void *);
ba9c1487 25 paragraph *(*filename)(char *filename);
43341922 26 int bitfield, prebackend_bitfield;
c8fb54d2 27} backends[] = {
43341922 28 {"text", text_backend, text_config_filename, 0x0001, 0},
78c73085 29 {"xhtml", html_backend, html_config_filename, 0x0002, 0},
30 {"html", html_backend, html_config_filename, 0x0002, 0},
43341922 31 {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
32 {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
33 {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
34 {"man", man_backend, man_config_filename, 0x0008, 0},
35 {"info", info_backend, info_config_filename, 0x0010, 0},
36 {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
37 {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
c8fb54d2 38};
39
d7482997 40int main(int argc, char **argv) {
41 char **infiles;
d7482997 42 int nfiles;
43 int nogo;
44 int errs;
45 int reportcols;
675958c3 46 int input_charset;
d7482997 47 int debug;
43341922 48 int backendbits, prebackbits;
c8fb54d2 49 int k, b;
6a0b9d08 50 paragraph *cfg, *cfg_tail;
43341922 51 void *pre_backend_data[16];
d7482997 52
7e976207 53 setlocale(LC_ALL, "");
54
d7482997 55 /*
56 * Set up initial (default) parameters.
57 */
f1530049 58 infiles = snewn(argc, char *);
d7482997 59 nfiles = 0;
60 nogo = errs = FALSE;
61 reportcols = 0;
675958c3 62 input_charset = CS_ASCII;
d7482997 63 debug = 0;
c8fb54d2 64 backendbits = 0;
6a0b9d08 65 cfg = cfg_tail = NULL;
d7482997 66
67 if (argc == 1) {
68 usage();
69 exit(EXIT_SUCCESS);
70 }
71
72 /*
73 * Parse command line arguments.
74 */
75 while (--argc) {
76 char *p = *++argv;
77 if (*p == '-') {
78 /*
79 * An option.
80 */
81 while (p && *++p) {
82 char c = *p;
83 switch (c) {
84 case '-':
85 /*
86 * Long option.
87 */
88 {
89 char *opt, *val;
90 opt = p++; /* opt will have _one_ leading - */
91 while (*p && *p != '=')
92 p++; /* find end of option */
93 if (*p == '=') {
94 *p++ = '\0';
95 val = p;
96 } else
97 val = NULL;
c8fb54d2 98
99 assert(opt[0] == '-');
100 for (k = 0; k < (int)lenof(backends); k++)
101 if (!strcmp(opt+1, backends[k].name)) {
102 backendbits |= backends[k].bitfield;
ba9c1487 103 if (val) {
104 paragraph *p = backends[k].filename(val);
105 assert(p);
106 if (cfg_tail)
107 cfg_tail->next = p;
108 else
109 cfg = p;
110 while (p->next)
111 p = p->next;
112 cfg_tail = p;
113 }
c8fb54d2 114 break;
115 }
116 if (k < (int)lenof(backends)) {
117 /* do nothing */;
675958c3 118 } else if (!strcmp(opt, "-input-charset")) {
119 if (!val) {
120 errs = TRUE, error(err_optnoarg, opt);
121 } else {
122 int charset = charset_from_localenc(val);
123 if (charset == CS_NONE) {
124 errs = TRUE, error(err_cmdcharset, val);
125 } else {
126 input_charset = charset;
127 }
128 }
c8fb54d2 129 } else if (!strcmp(opt, "-help")) {
d7482997 130 help();
131 nogo = TRUE;
132 } else if (!strcmp(opt, "-version")) {
133 showversion();
134 nogo = TRUE;
135 } else if (!strcmp(opt, "-licence") ||
136 !strcmp(opt, "-license")) {
137 licence();
138 nogo = TRUE;
d7482997 139 } else if (!strcmp(opt, "-precise")) {
140 reportcols = 1;
141 } else {
142 errs = TRUE, error(err_nosuchopt, opt);
143 }
144 }
145 p = NULL;
146 break;
147 case 'h':
148 case 'V':
149 case 'L':
150 case 'P':
151 case 'd':
152 /*
153 * Option requiring no parameter.
154 */
155 switch (c) {
156 case 'h':
157 help();
158 nogo = TRUE;
159 break;
160 case 'V':
161 showversion();
162 nogo = TRUE;
163 break;
164 case 'L':
165 licence();
166 nogo = TRUE;
167 break;
168 case 'P':
169 reportcols = 1;
170 break;
171 case 'd':
172 debug = TRUE;
173 break;
174 }
175 break;
6a0b9d08 176 case 'C':
d7482997 177 /*
178 * Option requiring parameter.
179 */
180 p++;
181 if (!*p && argc > 1)
182 --argc, p = *++argv;
183 else if (!*p) {
184 char opt[2];
185 opt[0] = c;
186 opt[1] = '\0';
187 errs = TRUE, error(err_optnoarg, opt);
188 }
189 /*
190 * Now c is the option and p is the parameter.
191 */
192 switch (c) {
6a0b9d08 193 case 'C':
194 /*
195 * -C means we split our argument up into
196 * colon-separated chunks and assemble them
197 * into a config paragraph.
198 */
199 {
e4ea58f8 200 char *s = dupstr(p), *q, *r;
6a0b9d08 201 paragraph *para;
202
e4ea58f8 203 para = cmdline_cfg_new();
6a0b9d08 204
e4ea58f8 205 q = r = s;
6a0b9d08 206 while (*q) {
207 if (*q == ':') {
e4ea58f8 208 *r = '\0';
675958c3 209 /* XXX ad-hoc diagnostic */
210 if (!strcmp(s, "input-charset"))
211 error(err_futileopt, "Cinput-charset",
212 "; use --input-charset");
e4ea58f8 213 cmdline_cfg_add(para, s);
214 r = s;
6a0b9d08 215 } else {
216 if (*q == '\\' && q[1])
217 q++;
e4ea58f8 218 *r++ = *q;
6a0b9d08 219 }
220 q++;
221 }
57e17355 222 *r = '\0';
e4ea58f8 223 cmdline_cfg_add(para, s);
6a0b9d08 224
225 if (cfg_tail)
226 cfg_tail->next = para;
227 else
228 cfg = para;
229 cfg_tail = para;
230 }
d7482997 231 break;
232 }
233 p = NULL; /* prevent continued processing */
234 break;
235 default:
236 /*
237 * Unrecognised option.
238 */
239 {
240 char opt[2];
241 opt[0] = c;
242 opt[1] = '\0';
243 errs = TRUE, error(err_nosuchopt, opt);
244 }
245 }
246 }
247 } else {
248 /*
249 * A non-option argument.
250 */
251 infiles[nfiles++] = p;
252 }
253 }
254
255 if (errs)
256 exit(EXIT_FAILURE);
257 if (nogo)
258 exit(EXIT_SUCCESS);
259
260 /*
261 * Do the work.
262 */
263 if (nfiles == 0) {
264 error(err_noinput);
265 usage();
266 exit(EXIT_FAILURE);
267 }
268
269 {
270 input in;
271 paragraph *sourceform, *p;
272 indexdata *idx;
273 keywordlist *keywords;
274
275 in.filenames = infiles;
276 in.nfiles = nfiles;
277 in.currfp = NULL;
278 in.currindex = 0;
279 in.npushback = in.pushbacksize = 0;
280 in.pushback = NULL;
281 in.reportcols = reportcols;
282 in.stack = NULL;
675958c3 283 in.defcharset = input_charset;
d7482997 284
285 idx = make_index();
286
287 sourceform = read_input(&in, idx);
288 if (!sourceform)
289 exit(EXIT_FAILURE);
290
6a0b9d08 291 /*
292 * Append the config directives acquired from the command
293 * line.
294 */
295 {
296 paragraph *end;
297
298 end = sourceform;
299 while (end && end->next)
300 end = end->next;
301 assert(end);
302
303 end->next = cfg;
304 }
305
d7482997 306 sfree(in.pushback);
307
d7482997 308 sfree(infiles);
309
310 keywords = get_keywords(sourceform);
311 if (!keywords)
312 exit(EXIT_FAILURE);
313 gen_citations(sourceform, keywords);
314 subst_keywords(sourceform, keywords);
315
316 for (p = sourceform; p; p = p->next)
317 if (p->type == para_IM)
f4551933 318 index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
d7482997 319
320 build_index(idx);
321
bb9e7835 322 /*
323 * Set up attr_First / attr_Last / attr_Always, in the main
324 * document and in the index entries.
325 */
326 for (p = sourceform; p; p = p->next)
327 mark_attr_ends(p->words);
328 {
329 int i;
330 indexentry *entry;
331
332 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++)
333 mark_attr_ends(entry->text);
334 }
335
d7482997 336 if (debug) {
337 index_debug(idx);
338 dbg_prtkws(keywords);
339 dbg_prtsource(sourceform);
340 }
341
c8fb54d2 342 /*
43341922 343 * Select and run the pre-backends.
344 */
345 prebackbits = 0;
346 for (k = 0; k < (int)lenof(backends); k++)
347 if (backendbits == 0 || (backendbits & backends[k].bitfield))
348 prebackbits |= backends[k].prebackend_bitfield;
349 for (k = 0; k < (int)lenof(pre_backends); k++)
350 if (prebackbits & pre_backends[k].bitfield) {
351 assert(k < (int)lenof(pre_backend_data));
352 pre_backend_data[k] =
353 pre_backends[k].func(sourceform, keywords, idx);
354 }
355
356 /*
c8fb54d2 357 * Run the selected set of backends.
358 */
359 for (k = b = 0; k < (int)lenof(backends); k++)
360 if (b != backends[k].bitfield) {
361 b = backends[k].bitfield;
43341922 362 if (backendbits == 0 || (backendbits & b)) {
363 void *pbd = NULL;
364 int pbb = backends[k].prebackend_bitfield;
365 int m;
366
367 for (m = 0; m < (int)lenof(pre_backends); m++)
368 if (pbb & pre_backends[m].bitfield) {
369 assert(m < (int)lenof(pre_backend_data));
370 pbd = pre_backend_data[m];
371 break;
372 }
373
374 backends[k].func(sourceform, keywords, idx, pbd);
375 }
c8fb54d2 376 }
d7482997 377
378 free_para_list(sourceform);
379 free_keywords(keywords);
380 cleanup_index(idx);
381 }
382
383 return 0;
384}
385
386static void dbg_prtsource(paragraph *sourceform) {
387 /*
388 * Output source form in debugging format.
389 */
390
391 paragraph *p;
392 for (p = sourceform; p; p = p->next) {
393 wchar_t *wp;
394 printf("para %d ", p->type);
395 if (p->keyword) {
396 wp = p->keyword;
397 while (*wp) {
398 putchar('\"');
399 for (; *wp; wp++)
400 putchar(*wp);
401 putchar('\"');
402 if (*++wp)
403 printf(", ");
404 }
405 } else
406 printf("(no keyword)");
407 printf(" {\n");
408 dbg_prtwordlist(1, p->words);
409 printf("}\n");
410 }
411}
412
413static void dbg_prtkws(keywordlist *kws) {
414 /*
415 * Output keywords in debugging format.
416 */
417
418 int i;
419 keyword *kw;
420
421 for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) {
422 wchar_t *wp;
423 printf("keyword ");
424 wp = kw->key;
425 while (*wp) {
426 putchar('\"');
427 for (; *wp; wp++)
428 putchar(*wp);
429 putchar('\"');
430 if (*++wp)
431 printf(", ");
432 }
433 printf(" {\n");
434 dbg_prtwordlist(1, kw->text);
435 printf("}\n");
436 }
437}
438
439static void dbg_prtwordlist(int level, word *w) {
440 for (; w; w = w->next) {
441 wchar_t *wp;
442 printf("%*sword %d ", level*4, "", w->type);
443 if (w->text) {
444 printf("\"");
445 for (wp = w->text; *wp; wp++)
446 putchar(*wp);
447 printf("\"");
448 } else
449 printf("(no text)");
450 if (w->alt) {
451 printf(" alt = {\n");
452 dbg_prtwordlist(level+1, w->alt);
453 printf("%*s}", level*4, "");
454 }
455 printf("\n");
456 }
457}