d7482997 |
1 | /* |
2 | * text backend for Halibut |
3 | */ |
4 | |
5 | #include <stdio.h> |
6 | #include <stdlib.h> |
7 | #include <assert.h> |
8 | #include "halibut.h" |
9 | |
10 | typedef enum { LEFT, LEFTPLUS, CENTRE } alignment; |
11 | typedef struct { |
12 | alignment align; |
13 | int just_numbers; |
14 | wchar_t underline; |
15 | } alignstruct; |
16 | |
17 | typedef struct { |
18 | int indent, indent_code; |
19 | int listindentbefore, listindentafter; |
20 | int width; |
21 | alignstruct atitle, achapter, *asect; |
22 | int nasect; |
23 | int include_version_id; |
24 | int indent_preambles; |
25 | word bullet; |
26 | } textconfig; |
27 | |
28 | static int text_convert(wchar_t *, char **); |
29 | |
30 | static void text_heading(FILE *, word *, word *, word *, alignstruct, int,int); |
31 | static void text_rule(FILE *, int, int); |
32 | static void text_para(FILE *, word *, char *, word *, int, int, int); |
33 | static void text_codepara(FILE *, word *, int, int); |
34 | static void text_versionid(FILE *, word *); |
35 | |
36 | static alignment utoalign(wchar_t *p) { |
37 | if (!ustricmp(p, L"centre") || !ustricmp(p, L"center")) |
38 | return CENTRE; |
39 | if (!ustricmp(p, L"leftplus")) |
40 | return LEFTPLUS; |
41 | return LEFT; |
42 | } |
43 | |
44 | static textconfig text_configure(paragraph *source) { |
45 | textconfig ret; |
46 | |
47 | /* |
48 | * Non-negotiables. |
49 | */ |
50 | ret.bullet.next = NULL; |
51 | ret.bullet.alt = NULL; |
52 | ret.bullet.type = word_Normal; |
53 | ret.atitle.just_numbers = FALSE; /* ignored */ |
54 | |
55 | /* |
56 | * Defaults. |
57 | */ |
58 | ret.indent = 7; |
59 | ret.indent_code = 2; |
60 | ret.listindentbefore = 1; |
61 | ret.listindentafter = 3; |
62 | ret.width = 68; |
63 | ret.atitle.align = CENTRE; |
64 | ret.atitle.underline = L'='; |
65 | ret.achapter.align = LEFT; |
66 | ret.achapter.just_numbers = FALSE; |
67 | ret.achapter.underline = L'-'; |
68 | ret.nasect = 1; |
69 | ret.asect = mknewa(alignstruct, ret.nasect); |
70 | ret.asect[0].align = LEFTPLUS; |
71 | ret.asect[0].just_numbers = TRUE; |
72 | ret.asect[0].underline = L'\0'; |
73 | ret.include_version_id = TRUE; |
74 | ret.indent_preambles = FALSE; |
75 | ret.bullet.text = ustrdup(L"-"); |
76 | |
77 | for (; source; source = source->next) { |
78 | if (source->type == para_Config) { |
79 | if (!ustricmp(source->keyword, L"text-indent")) { |
80 | ret.indent = utoi(uadv(source->keyword)); |
81 | } else if (!ustricmp(source->keyword, L"text-indent-code")) { |
82 | ret.indent_code = utoi(uadv(source->keyword)); |
83 | } else if (!ustricmp(source->keyword, L"text-width")) { |
84 | ret.width = utoi(uadv(source->keyword)); |
85 | } else if (!ustricmp(source->keyword, L"text-list-indent")) { |
86 | ret.listindentbefore = utoi(uadv(source->keyword)); |
87 | } else if (!ustricmp(source->keyword, L"text-listitem-indent")) { |
88 | ret.listindentafter = utoi(uadv(source->keyword)); |
89 | } else if (!ustricmp(source->keyword, L"text-chapter-align")) { |
90 | ret.achapter.align = utoalign(uadv(source->keyword)); |
91 | } else if (!ustricmp(source->keyword, L"text-chapter-underline")) { |
92 | ret.achapter.underline = *uadv(source->keyword); |
93 | } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) { |
94 | ret.achapter.underline = utob(uadv(source->keyword)); |
95 | } else if (!ustricmp(source->keyword, L"text-section-align")) { |
96 | wchar_t *p = uadv(source->keyword); |
97 | int n = 0; |
98 | if (uisdigit(*p)) { |
99 | n = utoi(p); |
100 | p = uadv(p); |
101 | } |
102 | if (n >= ret.nasect) { |
103 | int i; |
104 | ret.asect = resize(ret.asect, n+1); |
105 | for (i = ret.nasect; i <= n; i++) |
106 | ret.asect[i] = ret.asect[ret.nasect-1]; |
107 | ret.nasect = n+1; |
108 | } |
109 | ret.asect[n].align = utoalign(p); |
110 | } else if (!ustricmp(source->keyword, L"text-section-underline")) { |
111 | wchar_t *p = uadv(source->keyword); |
112 | int n = 0; |
113 | if (uisdigit(*p)) { |
114 | n = utoi(p); |
115 | p = uadv(p); |
116 | } |
117 | if (n >= ret.nasect) { |
118 | int i; |
119 | ret.asect = resize(ret.asect, n+1); |
120 | for (i = ret.nasect; i <= n; i++) |
121 | ret.asect[i] = ret.asect[ret.nasect-1]; |
122 | ret.nasect = n+1; |
123 | } |
124 | ret.asect[n].underline = *p; |
125 | } else if (!ustricmp(source->keyword, L"text-section-numeric")) { |
126 | wchar_t *p = uadv(source->keyword); |
127 | int n = 0; |
128 | if (uisdigit(*p)) { |
129 | n = utoi(p); |
130 | p = uadv(p); |
131 | } |
132 | if (n >= ret.nasect) { |
133 | int i; |
134 | ret.asect = resize(ret.asect, n+1); |
135 | for (i = ret.nasect; i <= n; i++) |
136 | ret.asect[i] = ret.asect[ret.nasect-1]; |
137 | ret.nasect = n+1; |
138 | } |
139 | ret.asect[n].just_numbers = utob(p); |
140 | } else if (!ustricmp(source->keyword, L"text-title-align")) { |
141 | ret.atitle.align = utoalign(uadv(source->keyword)); |
142 | } else if (!ustricmp(source->keyword, L"text-title-underline")) { |
143 | ret.atitle.underline = *uadv(source->keyword); |
144 | } else if (!ustricmp(source->keyword, L"text-versionid")) { |
145 | ret.include_version_id = utob(uadv(source->keyword)); |
146 | } else if (!ustricmp(source->keyword, L"text-indent-preamble")) { |
147 | ret.indent_preambles = utob(uadv(source->keyword)); |
148 | } else if (!ustricmp(source->keyword, L"text-bullet")) { |
149 | ret.bullet.text = uadv(source->keyword); |
150 | } |
151 | } |
152 | } |
153 | |
154 | return ret; |
155 | } |
156 | |
157 | void text_backend(paragraph *sourceform, keywordlist *keywords, |
158 | indexdata *idx) { |
159 | paragraph *p; |
160 | textconfig conf; |
161 | word *prefix, *body, *wp; |
162 | word spaceword; |
163 | FILE *fp; |
164 | char *prefixextra; |
165 | int indentb, indenta; |
166 | |
167 | IGNORE(keywords); /* we don't happen to need this */ |
168 | IGNORE(idx); /* or this */ |
169 | |
170 | conf = text_configure(sourceform); |
171 | |
172 | /* |
173 | * Determine the output file name, and open the output file |
174 | * |
175 | * FIXME: want configurable output file names here. For the |
176 | * moment, we'll just call it `output.txt'. |
177 | */ |
178 | fp = fopen("output.txt", "w"); |
179 | if (!fp) { |
180 | error(err_cantopenw, "output.txt"); |
181 | return; |
182 | } |
183 | |
184 | /* Do the title */ |
185 | for (p = sourceform; p; p = p->next) |
186 | if (p->type == para_Title) |
187 | text_heading(fp, NULL, NULL, p->words, |
188 | conf.atitle, conf.indent, conf.width); |
189 | |
190 | /* Do the preamble and copyright */ |
191 | for (p = sourceform; p; p = p->next) |
192 | if (p->type == para_Preamble) |
193 | text_para(fp, NULL, NULL, p->words, |
194 | conf.indent_preambles ? conf.indent : 0, 0, |
195 | conf.width + (conf.indent_preambles ? 0 : conf.indent)); |
196 | for (p = sourceform; p; p = p->next) |
197 | if (p->type == para_Copyright) |
198 | text_para(fp, NULL, NULL, p->words, |
199 | conf.indent_preambles ? conf.indent : 0, 0, |
200 | conf.width + (conf.indent_preambles ? 0 : conf.indent)); |
201 | |
202 | /* Do the main document */ |
203 | for (p = sourceform; p; p = p->next) switch (p->type) { |
204 | |
205 | /* |
206 | * Things we ignore because we've already processed them or |
207 | * aren't going to touch them in this pass. |
208 | */ |
209 | case para_IM: |
210 | case para_BR: |
211 | case para_Biblio: /* only touch BiblioCited */ |
212 | case para_VersionID: |
213 | case para_Copyright: |
214 | case para_Preamble: |
215 | case para_NoCite: |
216 | case para_Title: |
217 | break; |
218 | |
219 | /* |
220 | * Chapter titles. |
221 | */ |
222 | case para_Chapter: |
223 | case para_Appendix: |
224 | case para_UnnumberedChapter: |
225 | text_heading(fp, p->kwtext, p->kwtext2, p->words, |
226 | conf.achapter, conf.indent, conf.width); |
227 | break; |
228 | |
229 | case para_Heading: |
230 | case para_Subsect: |
231 | text_heading(fp, p->kwtext, p->kwtext2, p->words, |
232 | conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux], |
233 | conf.indent, conf.width); |
234 | break; |
235 | |
236 | case para_Rule: |
237 | text_rule(fp, conf.indent, conf.width); |
238 | break; |
239 | |
240 | case para_Normal: |
241 | case para_BiblioCited: |
242 | case para_Bullet: |
243 | case para_NumberedList: |
244 | if (p->type == para_Bullet) { |
245 | prefix = &conf.bullet; |
246 | prefixextra = NULL; |
247 | indentb = conf.listindentbefore; |
248 | indenta = conf.listindentafter; |
249 | } else if (p->type == para_NumberedList) { |
250 | prefix = p->kwtext; |
251 | prefixextra = "."; /* FIXME: configurability */ |
252 | indentb = conf.listindentbefore; |
253 | indenta = conf.listindentafter; |
254 | } else { |
255 | prefix = NULL; |
256 | prefixextra = NULL; |
257 | indentb = indenta = 0; |
258 | } |
259 | if (p->type == para_BiblioCited) { |
260 | body = dup_word_list(p->kwtext); |
261 | for (wp = body; wp->next; wp = wp->next); |
262 | wp->next = &spaceword; |
263 | spaceword.next = p->words; |
264 | spaceword.alt = NULL; |
265 | spaceword.type = word_WhiteSpace; |
266 | spaceword.text = NULL; |
267 | } else { |
268 | wp = NULL; |
269 | body = p->words; |
270 | } |
271 | text_para(fp, prefix, prefixextra, body, |
272 | conf.indent + indentb, indenta, conf.width); |
273 | if (wp) { |
274 | wp->next = NULL; |
275 | free_word_list(body); |
276 | } |
277 | break; |
278 | |
279 | case para_Code: |
280 | text_codepara(fp, p->words, conf.indent + conf.indent_code, conf.width - 2 * conf.indent_code); |
281 | break; |
282 | } |
283 | |
284 | /* Do the version ID */ |
285 | if (conf.include_version_id) { |
286 | for (p = sourceform; p; p = p->next) |
287 | if (p->type == para_VersionID) |
288 | text_versionid(fp, p->words); |
289 | } |
290 | |
291 | /* |
292 | * Tidy up |
293 | */ |
294 | fclose(fp); |
295 | sfree(conf.bullet.text); |
296 | } |
297 | |
298 | /* |
299 | * Convert a wide string into a string of chars. If `result' is |
300 | * non-NULL, mallocs the resulting string and stores a pointer to |
301 | * it in `*result'. If `result' is NULL, merely checks whether all |
302 | * characters in the string are feasible for the output character |
303 | * set. |
304 | * |
305 | * Return is nonzero if all characters are OK. If not all |
306 | * characters are OK but `result' is non-NULL, a result _will_ |
307 | * still be generated! |
308 | */ |
309 | static int text_convert(wchar_t *s, char **result) { |
310 | /* |
311 | * FIXME. Currently this is ISO8859-1 only. |
312 | */ |
313 | int doing = (result != 0); |
314 | int ok = TRUE; |
315 | char *p = NULL; |
316 | int plen = 0, psize = 0; |
317 | |
318 | for (; *s; s++) { |
319 | wchar_t c = *s; |
320 | char outc; |
321 | |
322 | if ((c >= 32 && c <= 126) || |
323 | (c >= 160 && c <= 255)) { |
324 | /* Char is OK. */ |
325 | outc = (char)c; |
326 | } else { |
327 | /* Char is not OK. */ |
328 | ok = FALSE; |
329 | outc = 0xBF; /* approximate the good old DEC `uh?' */ |
330 | } |
331 | if (doing) { |
332 | if (plen >= psize) { |
333 | psize = plen + 256; |
334 | p = resize(p, psize); |
335 | } |
336 | p[plen++] = outc; |
337 | } |
338 | } |
339 | if (doing) { |
340 | p = resize(p, plen+1); |
341 | p[plen] = '\0'; |
342 | *result = p; |
343 | } |
344 | return ok; |
345 | } |
346 | |
347 | static void text_rdaddwc(rdstringc *rs, word *text, word *end) { |
348 | char *c; |
349 | |
350 | for (; text && text != end; text = text->next) switch (text->type) { |
351 | case word_HyperLink: |
352 | case word_HyperEnd: |
353 | case word_UpperXref: |
354 | case word_LowerXref: |
355 | case word_XrefEnd: |
356 | case word_IndexRef: |
357 | break; |
358 | |
359 | case word_Normal: |
360 | case word_Emph: |
361 | case word_Code: |
362 | case word_WeakCode: |
363 | case word_WhiteSpace: |
364 | case word_EmphSpace: |
365 | case word_CodeSpace: |
366 | case word_WkCodeSpace: |
367 | case word_Quote: |
368 | case word_EmphQuote: |
369 | case word_CodeQuote: |
370 | case word_WkCodeQuote: |
371 | assert(text->type != word_CodeQuote && |
372 | text->type != word_WkCodeQuote); |
373 | if (towordstyle(text->type) == word_Emph && |
374 | (attraux(text->aux) == attr_First || |
375 | attraux(text->aux) == attr_Only)) |
376 | rdaddc(rs, '_'); /* FIXME: configurability */ |
377 | else if (towordstyle(text->type) == word_Code && |
378 | (attraux(text->aux) == attr_First || |
379 | attraux(text->aux) == attr_Only)) |
380 | rdaddc(rs, '`'); /* FIXME: configurability */ |
381 | if (removeattr(text->type) == word_Normal) { |
382 | if (text_convert(text->text, &c)) |
383 | rdaddsc(rs, c); |
384 | else |
385 | text_rdaddwc(rs, text->alt, NULL); |
386 | sfree(c); |
387 | } else if (removeattr(text->type) == word_WhiteSpace) { |
388 | rdaddc(rs, ' '); |
389 | } else if (removeattr(text->type) == word_Quote) { |
390 | rdaddc(rs, quoteaux(text->aux) == quote_Open ? '`' : '\''); |
391 | /* FIXME: configurability */ |
392 | } |
393 | if (towordstyle(text->type) == word_Emph && |
394 | (attraux(text->aux) == attr_Last || |
395 | attraux(text->aux) == attr_Only)) |
396 | rdaddc(rs, '_'); /* FIXME: configurability */ |
397 | else if (towordstyle(text->type) == word_Code && |
398 | (attraux(text->aux) == attr_Last || |
399 | attraux(text->aux) == attr_Only)) |
400 | rdaddc(rs, '\''); /* FIXME: configurability */ |
401 | break; |
402 | } |
403 | } |
404 | |
405 | static int text_width(word *); |
406 | |
407 | static int text_width_list(word *text) { |
408 | int w = 0; |
409 | while (text) { |
410 | w += text_width(text); |
411 | text = text->next; |
412 | } |
413 | return w; |
414 | } |
415 | |
416 | static int text_width(word *text) { |
417 | switch (text->type) { |
418 | case word_HyperLink: |
419 | case word_HyperEnd: |
420 | case word_UpperXref: |
421 | case word_LowerXref: |
422 | case word_XrefEnd: |
423 | case word_IndexRef: |
424 | return 0; |
425 | |
426 | case word_Normal: |
427 | case word_Emph: |
428 | case word_Code: |
429 | case word_WeakCode: |
430 | return (((text->type == word_Emph || |
431 | text->type == word_Code) |
432 | ? (attraux(text->aux) == attr_Only ? 2 : |
433 | attraux(text->aux) == attr_Always ? 0 : 1) |
434 | : 0) + |
435 | (text_convert(text->text, NULL) ? |
436 | ustrlen(text->text) : |
437 | text_width_list(text->alt))); |
438 | |
439 | case word_WhiteSpace: |
440 | case word_EmphSpace: |
441 | case word_CodeSpace: |
442 | case word_WkCodeSpace: |
443 | case word_Quote: |
444 | case word_EmphQuote: |
445 | case word_CodeQuote: |
446 | case word_WkCodeQuote: |
447 | assert(text->type != word_CodeQuote && |
448 | text->type != word_WkCodeQuote); |
449 | return (((towordstyle(text->type) == word_Emph || |
450 | towordstyle(text->type) == word_Code) |
451 | ? (attraux(text->aux) == attr_Only ? 2 : |
452 | attraux(text->aux) == attr_Always ? 0 : 1) |
453 | : 0) + 1); |
454 | } |
455 | return 0; /* should never happen */ |
456 | } |
457 | |
458 | static void text_heading(FILE *fp, word *tprefix, word *nprefix, word *text, |
459 | alignstruct align, int indent, int width) { |
460 | rdstringc t = { 0, 0, NULL }; |
461 | int margin, length; |
462 | int firstlinewidth, wrapwidth; |
463 | wrappedline *wrapping, *p; |
464 | |
465 | if (align.just_numbers && nprefix) { |
466 | text_rdaddwc(&t, nprefix, NULL); |
467 | rdaddc(&t, ' '); /* FIXME: as below */ |
468 | } else if (!align.just_numbers && tprefix) { |
469 | text_rdaddwc(&t, tprefix, NULL); |
470 | rdaddsc(&t, ": "); /* FIXME: configurability */ |
471 | } |
472 | margin = length = (t.text ? strlen(t.text) : 0); |
473 | |
474 | if (align.align == LEFTPLUS) { |
475 | margin = indent - margin; |
476 | if (margin < 0) margin = 0; |
477 | firstlinewidth = indent + width - margin - length; |
478 | wrapwidth = width; |
479 | } else if (align.align == LEFT || align.align == CENTRE) { |
480 | margin = 0; |
481 | firstlinewidth = indent + width - length; |
482 | wrapwidth = indent + width; |
483 | } |
484 | |
485 | wrapping = wrap_para(text, firstlinewidth, wrapwidth, text_width); |
486 | for (p = wrapping; p; p = p->next) { |
487 | text_rdaddwc(&t, p->begin, p->end); |
488 | length = (t.text ? strlen(t.text) : 0); |
489 | if (align.align == CENTRE) { |
490 | margin = (indent + width - length)/2; |
491 | if (margin < 0) margin = 0; |
492 | } |
493 | fprintf(fp, "%*s%s\n", margin, "", t.text); |
494 | if (align.underline != L'\0') { |
495 | char *u, uc; |
496 | wchar_t uw[2]; |
497 | uw[0] = align.underline; uw[1] = L'\0'; |
498 | text_convert(uw, &u); |
499 | uc = u[0]; |
500 | sfree(u); |
501 | fprintf(fp, "%*s", margin, ""); |
502 | while (length--) |
503 | putc(uc, fp); |
504 | putc('\n', fp); |
505 | } |
506 | if (align.align == LEFTPLUS) |
507 | margin = indent; |
508 | else |
509 | margin = 0; |
510 | sfree(t.text); |
511 | t = empty_rdstringc; |
512 | } |
513 | wrap_free(wrapping); |
514 | putc('\n', fp); |
515 | |
516 | sfree(t.text); |
517 | } |
518 | |
519 | static void text_rule(FILE *fp, int indent, int width) { |
520 | while (indent--) putc(' ', fp); |
521 | while (width--) putc('-', fp); /* FIXME: configurability! */ |
522 | putc('\n', fp); |
523 | putc('\n', fp); |
524 | } |
525 | |
526 | static void text_para(FILE *fp, word *prefix, char *prefixextra, word *text, |
527 | int indent, int extraindent, int width) { |
528 | wrappedline *wrapping, *p; |
529 | rdstringc pfx = { 0, 0, NULL }; |
530 | int e; |
531 | int firstlinewidth = width; |
532 | |
533 | if (prefix) { |
534 | text_rdaddwc(&pfx, prefix, NULL); |
535 | if (prefixextra) |
536 | rdaddsc(&pfx, prefixextra); |
537 | fprintf(fp, "%*s%s", indent, "", pfx.text); |
538 | e = extraindent - strlen(pfx.text); |
539 | if (e < 0) { |
540 | e = 0; |
541 | firstlinewidth -= e; |
542 | if (firstlinewidth < 0) { |
543 | e = indent + extraindent; |
544 | firstlinewidth = width; |
545 | fprintf(fp, "\n"); |
546 | } |
547 | } |
548 | sfree(pfx.text); |
549 | } else |
550 | e = indent + extraindent; |
551 | |
552 | wrapping = wrap_para(text, firstlinewidth, width, text_width); |
553 | for (p = wrapping; p; p = p->next) { |
554 | rdstringc t = { 0, 0, NULL }; |
555 | text_rdaddwc(&t, p->begin, p->end); |
556 | fprintf(fp, "%*s%s\n", e, "", t.text); |
557 | e = indent + extraindent; |
558 | sfree(t.text); |
559 | } |
560 | wrap_free(wrapping); |
561 | putc('\n', fp); |
562 | } |
563 | |
564 | static void text_codepara(FILE *fp, word *text, int indent, int width) { |
565 | for (; text; text = text->next) if (text->type == word_WeakCode) { |
566 | char *c; |
567 | text_convert(text->text, &c); |
568 | if (strlen(c) > (size_t)width) { |
569 | /* FIXME: warn */ |
570 | } |
571 | fprintf(fp, "%*s%s\n", indent, "", c); |
572 | sfree(c); |
573 | } |
574 | |
575 | putc('\n', fp); |
576 | } |
577 | |
578 | static void text_versionid(FILE *fp, word *text) { |
579 | rdstringc t = { 0, 0, NULL }; |
580 | |
581 | rdaddc(&t, '['); /* FIXME: configurability */ |
582 | text_rdaddwc(&t, text, NULL); |
583 | rdaddc(&t, ']'); /* FIXME: configurability */ |
584 | |
585 | fprintf(fp, "%s\n", t.text); |
586 | sfree(t.text); |
587 | } |