Support for \cfg{input-charset}. Input files can now be in ASCII,
[sgt/halibut] / main.c
CommitLineData
d7482997 1/*
2 * main.c: command line parsing and top level
3 */
4
c8fb54d2 5#include <assert.h>
d7482997 6#include <stdio.h>
7#include <stdlib.h>
8#include "halibut.h"
9
10static void dbg_prtsource(paragraph *sourceform);
11static void dbg_prtwordlist(int level, word *w);
12static void dbg_prtkws(keywordlist *kws);
13
43341922 14static const struct pre_backend {
15 void *(*func)(paragraph *, keywordlist *, indexdata *);
16 int bitfield;
17} pre_backends[] = {
18 {paper_pre_backend, 0x0001}
19};
20
c8fb54d2 21static const struct backend {
22 char *name;
43341922 23 void (*func)(paragraph *, keywordlist *, indexdata *, void *);
ba9c1487 24 paragraph *(*filename)(char *filename);
43341922 25 int bitfield, prebackend_bitfield;
c8fb54d2 26} backends[] = {
43341922 27 {"text", text_backend, text_config_filename, 0x0001, 0},
28 {"xhtml", xhtml_backend, xhtml_config_filename, 0x0002, 0},
29 {"html", xhtml_backend, xhtml_config_filename, 0x0002, 0},
30 {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0},
31 {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0},
32 {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0},
33 {"man", man_backend, man_config_filename, 0x0008, 0},
34 {"info", info_backend, info_config_filename, 0x0010, 0},
35 {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
36 {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
c8fb54d2 37};
38
d7482997 39int main(int argc, char **argv) {
40 char **infiles;
d7482997 41 int nfiles;
42 int nogo;
43 int errs;
44 int reportcols;
45 int debug;
43341922 46 int backendbits, prebackbits;
c8fb54d2 47 int k, b;
6a0b9d08 48 paragraph *cfg, *cfg_tail;
43341922 49 void *pre_backend_data[16];
d7482997 50
51 /*
52 * Set up initial (default) parameters.
53 */
54 infiles = mknewa(char *, argc);
d7482997 55 nfiles = 0;
56 nogo = errs = FALSE;
57 reportcols = 0;
58 debug = 0;
c8fb54d2 59 backendbits = 0;
6a0b9d08 60 cfg = cfg_tail = NULL;
d7482997 61
62 if (argc == 1) {
63 usage();
64 exit(EXIT_SUCCESS);
65 }
66
67 /*
68 * Parse command line arguments.
69 */
70 while (--argc) {
71 char *p = *++argv;
72 if (*p == '-') {
73 /*
74 * An option.
75 */
76 while (p && *++p) {
77 char c = *p;
78 switch (c) {
79 case '-':
80 /*
81 * Long option.
82 */
83 {
84 char *opt, *val;
85 opt = p++; /* opt will have _one_ leading - */
86 while (*p && *p != '=')
87 p++; /* find end of option */
88 if (*p == '=') {
89 *p++ = '\0';
90 val = p;
91 } else
92 val = NULL;
c8fb54d2 93
94 assert(opt[0] == '-');
95 for (k = 0; k < (int)lenof(backends); k++)
96 if (!strcmp(opt+1, backends[k].name)) {
97 backendbits |= backends[k].bitfield;
ba9c1487 98 if (val) {
99 paragraph *p = backends[k].filename(val);
100 assert(p);
101 if (cfg_tail)
102 cfg_tail->next = p;
103 else
104 cfg = p;
105 while (p->next)
106 p = p->next;
107 cfg_tail = p;
108 }
c8fb54d2 109 break;
110 }
111 if (k < (int)lenof(backends)) {
112 /* do nothing */;
113 } else if (!strcmp(opt, "-help")) {
d7482997 114 help();
115 nogo = TRUE;
116 } else if (!strcmp(opt, "-version")) {
117 showversion();
118 nogo = TRUE;
119 } else if (!strcmp(opt, "-licence") ||
120 !strcmp(opt, "-license")) {
121 licence();
122 nogo = TRUE;
d7482997 123 } else if (!strcmp(opt, "-precise")) {
124 reportcols = 1;
125 } else {
126 errs = TRUE, error(err_nosuchopt, opt);
127 }
128 }
129 p = NULL;
130 break;
131 case 'h':
132 case 'V':
133 case 'L':
134 case 'P':
135 case 'd':
136 /*
137 * Option requiring no parameter.
138 */
139 switch (c) {
140 case 'h':
141 help();
142 nogo = TRUE;
143 break;
144 case 'V':
145 showversion();
146 nogo = TRUE;
147 break;
148 case 'L':
149 licence();
150 nogo = TRUE;
151 break;
152 case 'P':
153 reportcols = 1;
154 break;
155 case 'd':
156 debug = TRUE;
157 break;
158 }
159 break;
6a0b9d08 160 case 'C':
d7482997 161 /*
162 * Option requiring parameter.
163 */
164 p++;
165 if (!*p && argc > 1)
166 --argc, p = *++argv;
167 else if (!*p) {
168 char opt[2];
169 opt[0] = c;
170 opt[1] = '\0';
171 errs = TRUE, error(err_optnoarg, opt);
172 }
173 /*
174 * Now c is the option and p is the parameter.
175 */
176 switch (c) {
6a0b9d08 177 case 'C':
178 /*
179 * -C means we split our argument up into
180 * colon-separated chunks and assemble them
181 * into a config paragraph.
182 */
183 {
184 wchar_t *keywords;
185 char *q;
186 wchar_t *u;
187 paragraph *para;
188
189 keywords = mknewa(wchar_t, 2+strlen(p));
190
191 u = keywords;
192 q = p;
193
194 while (*q) {
195 if (*q == ':') {
196 *u++ = L'\0';
197 } else {
198 if (*q == '\\' && q[1])
199 q++;
200 /* FIXME: lacks charset flexibility */
201 *u++ = *q;
202 }
203 q++;
204 }
205 *u = L'\0';
206
207 para = mknew(paragraph);
208 memset(para, 0, sizeof(*para));
209 para->type = para_Config;
210 para->keyword = keywords;
211 para->next = NULL;
212 para->fpos.filename = "<command line>";
213 para->fpos.line = para->fpos.col = -1;
214
215 if (cfg_tail)
216 cfg_tail->next = para;
217 else
218 cfg = para;
219 cfg_tail = para;
220 }
d7482997 221 break;
222 }
223 p = NULL; /* prevent continued processing */
224 break;
225 default:
226 /*
227 * Unrecognised option.
228 */
229 {
230 char opt[2];
231 opt[0] = c;
232 opt[1] = '\0';
233 errs = TRUE, error(err_nosuchopt, opt);
234 }
235 }
236 }
237 } else {
238 /*
239 * A non-option argument.
240 */
241 infiles[nfiles++] = p;
242 }
243 }
244
245 if (errs)
246 exit(EXIT_FAILURE);
247 if (nogo)
248 exit(EXIT_SUCCESS);
249
250 /*
251 * Do the work.
252 */
253 if (nfiles == 0) {
254 error(err_noinput);
255 usage();
256 exit(EXIT_FAILURE);
257 }
258
259 {
260 input in;
261 paragraph *sourceform, *p;
262 indexdata *idx;
263 keywordlist *keywords;
264
265 in.filenames = infiles;
266 in.nfiles = nfiles;
267 in.currfp = NULL;
268 in.currindex = 0;
269 in.npushback = in.pushbacksize = 0;
270 in.pushback = NULL;
271 in.reportcols = reportcols;
272 in.stack = NULL;
e34ba5c3 273 in.defcharset = CS_ASCII;
d7482997 274
275 idx = make_index();
276
277 sourceform = read_input(&in, idx);
278 if (!sourceform)
279 exit(EXIT_FAILURE);
280
6a0b9d08 281 /*
282 * Append the config directives acquired from the command
283 * line.
284 */
285 {
286 paragraph *end;
287
288 end = sourceform;
289 while (end && end->next)
290 end = end->next;
291 assert(end);
292
293 end->next = cfg;
294 }
295
d7482997 296 sfree(in.pushback);
297
298 mark_attr_ends(sourceform);
299
300 sfree(infiles);
301
302 keywords = get_keywords(sourceform);
303 if (!keywords)
304 exit(EXIT_FAILURE);
305 gen_citations(sourceform, keywords);
306 subst_keywords(sourceform, keywords);
307
308 for (p = sourceform; p; p = p->next)
309 if (p->type == para_IM)
f4551933 310 index_merge(idx, TRUE, p->keyword, p->words, &p->fpos);
d7482997 311
312 build_index(idx);
313
314 if (debug) {
315 index_debug(idx);
316 dbg_prtkws(keywords);
317 dbg_prtsource(sourceform);
318 }
319
c8fb54d2 320 /*
43341922 321 * Select and run the pre-backends.
322 */
323 prebackbits = 0;
324 for (k = 0; k < (int)lenof(backends); k++)
325 if (backendbits == 0 || (backendbits & backends[k].bitfield))
326 prebackbits |= backends[k].prebackend_bitfield;
327 for (k = 0; k < (int)lenof(pre_backends); k++)
328 if (prebackbits & pre_backends[k].bitfield) {
329 assert(k < (int)lenof(pre_backend_data));
330 pre_backend_data[k] =
331 pre_backends[k].func(sourceform, keywords, idx);
332 }
333
334 /*
c8fb54d2 335 * Run the selected set of backends.
336 */
337 for (k = b = 0; k < (int)lenof(backends); k++)
338 if (b != backends[k].bitfield) {
339 b = backends[k].bitfield;
43341922 340 if (backendbits == 0 || (backendbits & b)) {
341 void *pbd = NULL;
342 int pbb = backends[k].prebackend_bitfield;
343 int m;
344
345 for (m = 0; m < (int)lenof(pre_backends); m++)
346 if (pbb & pre_backends[m].bitfield) {
347 assert(m < (int)lenof(pre_backend_data));
348 pbd = pre_backend_data[m];
349 break;
350 }
351
352 backends[k].func(sourceform, keywords, idx, pbd);
353 }
c8fb54d2 354 }
d7482997 355
356 free_para_list(sourceform);
357 free_keywords(keywords);
358 cleanup_index(idx);
359 }
360
361 return 0;
362}
363
364static void dbg_prtsource(paragraph *sourceform) {
365 /*
366 * Output source form in debugging format.
367 */
368
369 paragraph *p;
370 for (p = sourceform; p; p = p->next) {
371 wchar_t *wp;
372 printf("para %d ", p->type);
373 if (p->keyword) {
374 wp = p->keyword;
375 while (*wp) {
376 putchar('\"');
377 for (; *wp; wp++)
378 putchar(*wp);
379 putchar('\"');
380 if (*++wp)
381 printf(", ");
382 }
383 } else
384 printf("(no keyword)");
385 printf(" {\n");
386 dbg_prtwordlist(1, p->words);
387 printf("}\n");
388 }
389}
390
391static void dbg_prtkws(keywordlist *kws) {
392 /*
393 * Output keywords in debugging format.
394 */
395
396 int i;
397 keyword *kw;
398
399 for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) {
400 wchar_t *wp;
401 printf("keyword ");
402 wp = kw->key;
403 while (*wp) {
404 putchar('\"');
405 for (; *wp; wp++)
406 putchar(*wp);
407 putchar('\"');
408 if (*++wp)
409 printf(", ");
410 }
411 printf(" {\n");
412 dbg_prtwordlist(1, kw->text);
413 printf("}\n");
414 }
415}
416
417static void dbg_prtwordlist(int level, word *w) {
418 for (; w; w = w->next) {
419 wchar_t *wp;
420 printf("%*sword %d ", level*4, "", w->type);
421 if (w->text) {
422 printf("\"");
423 for (wp = w->text; *wp; wp++)
424 putchar(*wp);
425 printf("\"");
426 } else
427 printf("(no text)");
428 if (w->alt) {
429 printf(" alt = {\n");
430 dbg_prtwordlist(level+1, w->alt);
431 printf("%*s}", level*4, "");
432 }
433 printf("\n");
434 }
435}