d7482997 |
1 | /* |
2 | * xhtml backend for Halibut |
3 | * (initial implementation by James Aylett) |
4 | * |
5 | * Still to do: |
6 | * |
7 | * +++ doesn't handle non-breaking hyphens. Not sure how to yet. |
8 | * +++ entity names (from a file -- ideally supply normal SGML files) |
9 | * +++ configuration directive to file split where the current layout |
10 | * code wouldn't. Needs changes to _ponder_layout() and _do_paras(), |
11 | * perhaps others. |
12 | * |
13 | * Limitations: |
14 | * |
15 | * +++ biblio/index references target the nearest section marker, rather |
16 | * than having a dedicated target themselves. In large bibliographies |
17 | * this will cause problems. (The solution is to fake up a response |
18 | * from xhtml_find_section(), probably linking it into the sections |
19 | * chain just in case we need it again, and to make freeing it up |
20 | * easier.) docsrc.pl used to work as we do, however, and SGT agrees that |
21 | * this is acceptable for now. |
22 | * +++ can't cope with leaf-level == 0. It's all to do with the |
23 | * top-level file not being normal, probably not even having a valid |
24 | * section level, and stuff like that. I question whether this is an |
25 | * issue, frankly; small manuals that fit on one page should probably |
26 | * not be written in halibut at all. |
27 | */ |
28 | |
29 | #include <stdio.h> |
30 | #include <stdlib.h> |
31 | #include <assert.h> |
32 | #include "halibut.h" |
33 | |
34 | struct xhtmlsection_Struct { |
35 | struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */ |
36 | struct xhtmlsection_Struct *child; /* NULL if split across files */ |
37 | struct xhtmlsection_Struct *parent; /* NULL if split across files */ |
38 | struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */ |
39 | paragraph *para; |
40 | struct xhtmlfile_Struct *file; /* which file is this a part of? */ |
41 | char *fragment; /* fragment id within the file */ |
42 | int level; |
43 | }; |
44 | |
45 | struct xhtmlfile_Struct { |
46 | struct xhtmlfile_Struct *next; |
47 | struct xhtmlfile_Struct *child; |
48 | struct xhtmlfile_Struct *parent; |
49 | char *filename; |
50 | struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */ |
51 | int is_leaf; /* is this file a leaf file, ie does it not have any children? */ |
52 | }; |
53 | |
54 | typedef struct xhtmlsection_Struct xhtmlsection; |
55 | typedef struct xhtmlfile_Struct xhtmlfile; |
56 | typedef struct xhtmlindex_Struct xhtmlindex; |
57 | |
58 | struct xhtmlindex_Struct { |
59 | int nsection; |
60 | int size; |
61 | xhtmlsection **sections; |
62 | }; |
63 | |
64 | typedef struct { |
65 | int contents_depth[6]; |
66 | int leaf_contains_contents; |
67 | int leaf_level; |
68 | int leaf_smallest_contents; |
69 | int include_version_id; |
70 | wchar_t *author, *description; |
71 | wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs; |
72 | int suppress_address; |
73 | } xhtmlconfig; |
74 | |
75 | /*static void xhtml_level(paragraph *, int); |
76 | static void xhtml_level_0(paragraph *); |
77 | static void xhtml_docontents(FILE *, paragraph *, int); |
78 | static void xhtml_dosections(FILE *, paragraph *, int); |
79 | static void xhtml_dobody(FILE *, paragraph *, int);*/ |
80 | |
81 | static void xhtml_doheader(FILE *, word *); |
82 | static void xhtml_dofooter(FILE *); |
83 | static void xhtml_versionid(FILE *, word *, int); |
84 | |
85 | static void xhtml_utostr(wchar_t *, char **); |
86 | static int xhtml_para_level(paragraph *); |
87 | static int xhtml_reservedchar(int); |
88 | |
89 | static int xhtml_convert(wchar_t *, char **, int); |
90 | static void xhtml_rdaddwc(rdstringc *, word *, word *); |
91 | static void xhtml_para(FILE *, word *); |
92 | static void xhtml_codepara(FILE *, word *); |
93 | static void xhtml_heading(FILE *, paragraph *); |
94 | |
95 | /* File-global variables are much easier than passing these things |
96 | * all over the place. Evil, but easier. We can replace this with a single |
97 | * structure at some point. |
98 | */ |
99 | static xhtmlconfig conf; |
100 | static keywordlist *keywords; |
101 | static indexdata *idx; |
102 | static xhtmlfile *topfile; |
103 | static xhtmlsection *topsection; |
104 | static paragraph *sourceparas; |
105 | static xhtmlfile *lastfile; |
106 | static xhtmlfile *xhtml_last_file = NULL; |
107 | static int last_level=-1; |
108 | static xhtmlsection *currentsection; |
109 | |
110 | static xhtmlconfig xhtml_configure(paragraph *source) |
111 | { |
112 | xhtmlconfig ret; |
113 | |
114 | /* |
115 | * Defaults. |
116 | */ |
117 | ret.contents_depth[0] = 2; |
118 | ret.contents_depth[1] = 3; |
119 | ret.contents_depth[2] = 4; |
120 | ret.contents_depth[3] = 5; |
121 | ret.contents_depth[4] = 6; |
122 | ret.contents_depth[5] = 7; |
123 | ret.leaf_level = 2; |
124 | ret.leaf_smallest_contents = 4; |
125 | ret.leaf_contains_contents = FALSE; |
126 | ret.include_version_id = TRUE; |
127 | ret.author = NULL; |
128 | ret.description = NULL; |
129 | ret.head_end = NULL; |
130 | ret.body = NULL; |
131 | ret.body_start = NULL; |
132 | ret.body_end = NULL; |
133 | ret.address_start = NULL; |
134 | ret.address_end = NULL; |
135 | ret.nav_attrs = NULL; |
136 | ret.suppress_address = FALSE; |
137 | |
138 | for (; source; source = source->next) |
139 | { |
140 | if (source->type == para_Config) |
141 | { |
142 | if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) { |
143 | ret.contents_depth[0] = utoi(uadv(source->keyword)); |
144 | } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) { |
145 | ret.contents_depth[1] = utoi(uadv(source->keyword)); |
146 | } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) { |
147 | ret.contents_depth[2] = utoi(uadv(source->keyword)); |
148 | } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) { |
149 | ret.contents_depth[3] = utoi(uadv(source->keyword)); |
150 | } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) { |
151 | ret.contents_depth[4] = utoi(uadv(source->keyword)); |
152 | } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) { |
153 | ret.contents_depth[5] = utoi(uadv(source->keyword)); |
154 | } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) { |
155 | ret.leaf_level = utoi(uadv(source->keyword)); |
d7482997 |
156 | } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) { |
157 | ret.leaf_smallest_contents = utoi(uadv(source->keyword)); |
158 | } else if (!ustricmp(source->keyword, L"xhtml-versionid")) { |
159 | ret.include_version_id = utob(uadv(source->keyword)); |
160 | } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) { |
161 | ret.leaf_contains_contents = utob(uadv(source->keyword)); |
162 | } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) { |
163 | ret.suppress_address = utob(uadv(source->keyword)); |
164 | } else if (!ustricmp(source->keyword, L"xhtml-author")) { |
165 | ret.author = uadv(source->keyword); |
166 | } else if (!ustricmp(source->keyword, L"xhtml-description")) { |
167 | ret.description = uadv(source->keyword); |
168 | } else if (!ustricmp(source->keyword, L"xhtml-head-end")) { |
169 | ret.head_end = uadv(source->keyword); |
170 | } else if (!ustricmp(source->keyword, L"xhtml-body-start")) { |
171 | ret.body_start = uadv(source->keyword); |
172 | } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) { |
173 | ret.body = uadv(source->keyword); |
174 | } else if (!ustricmp(source->keyword, L"xhtml-body-end")) { |
175 | ret.body_end = uadv(source->keyword); |
176 | } else if (!ustricmp(source->keyword, L"xhtml-address-start")) { |
177 | ret.address_start = uadv(source->keyword); |
178 | } else if (!ustricmp(source->keyword, L"xhtml-address-end")) { |
179 | ret.address_end = uadv(source->keyword); |
180 | } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) { |
181 | ret.nav_attrs = uadv(source->keyword); |
182 | } |
183 | } |
184 | } |
185 | |
186 | /* printf(" !!! leaf_level = %i\n", ret.leaf_level); |
187 | printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]); |
188 | printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]); |
189 | printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]); |
190 | printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]); |
191 | printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]); |
192 | printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]); |
193 | printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/ |
194 | return ret; |
195 | } |
196 | |
197 | static xhtmlsection *xhtml_new_section(xhtmlsection *last) |
198 | { |
199 | xhtmlsection *ret = mknew(xhtmlsection); |
200 | ret->next=NULL; |
201 | ret->child=NULL; |
202 | ret->parent=NULL; |
203 | ret->chain=last; |
204 | ret->para=NULL; |
205 | ret->file=NULL; |
206 | ret->fragment=NULL; |
207 | ret->level=-1; /* marker: end of chain */ |
208 | return ret; |
209 | } |
210 | |
211 | /* Returns NULL or the section that marks that paragraph */ |
212 | static xhtmlsection *xhtml_find_section(paragraph *p) |
213 | { |
214 | xhtmlsection *ret = topsection; |
215 | if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */ |
216 | paragraph *p2 = sourceparas; |
217 | paragraph *p3 = NULL; |
218 | while (p2 && p2!=p) { |
219 | if (xhtml_para_level(p2)!=-1) { |
220 | p3 = p2; |
221 | } |
222 | p2=p2->next; |
223 | } |
224 | if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */ |
225 | /* Note that this can happen, if you have a cross-reference to before the first chapter starts. |
226 | * So don't do that, then. |
227 | */ |
228 | return NULL; |
229 | } |
230 | p=p3; |
231 | } |
232 | while (ret && ret->para != p) { |
233 | /* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/ |
234 | ret=ret->chain; |
235 | } |
236 | return ret; |
237 | } |
238 | |
239 | static xhtmlfile *xhtml_new_file(xhtmlsection *sect) |
240 | { |
241 | xhtmlfile *ret = mknew(xhtmlfile); |
242 | |
243 | ret->next=NULL; |
244 | ret->child=NULL; |
245 | ret->parent=NULL; |
246 | ret->filename=NULL; |
247 | ret->sections=sect; |
248 | ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level); |
249 | if (sect==NULL) { |
250 | if (conf.leaf_level==0) { /* currently unused */ |
251 | #define FILENAME_MANUAL "Manual.html" |
252 | #define FILENAME_CONTENTS "Contents.html" |
253 | ret->filename = smalloc(strlen(FILENAME_MANUAL)+1); |
254 | sprintf(ret->filename, FILENAME_MANUAL); |
255 | } else { |
256 | ret->filename = smalloc(strlen(FILENAME_CONTENTS)+1); |
257 | sprintf(ret->filename, FILENAME_CONTENTS); |
258 | } |
259 | } else { |
260 | paragraph *p = sect->para; |
261 | rdstringc fname_c = { 0, 0, NULL }; |
262 | char *c; |
263 | word *w; |
264 | for (w=(p->kwtext)?(p->kwtext):(p->words); w; w=w->next) |
265 | { |
266 | switch (removeattr(w->type)) |
267 | { |
268 | case word_Normal: |
269 | /*case word_Emph: |
270 | case word_Code: |
271 | case word_WeakCode:*/ |
272 | xhtml_utostr(w->text, &c); |
273 | rdaddsc(&fname_c,c); |
274 | sfree(c); |
275 | break; |
276 | } |
277 | } |
278 | rdaddsc(&fname_c, ".html"); |
279 | ret->filename = rdtrimc(&fname_c); |
280 | } |
281 | /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/ |
282 | return ret; |
283 | } |
284 | |
285 | /* |
286 | * Walk the tree fixing up files which are actually leaf (ie |
287 | * have no children) but aren't at leaf level, so they have the |
288 | * leaf flag set. |
289 | */ |
290 | void xhtml_fixup_layout(xhtmlfile* file) |
291 | { |
292 | if (file->child==NULL) { |
293 | file->is_leaf = TRUE; |
294 | } else { |
295 | xhtml_fixup_layout(file->child); |
296 | } |
297 | if (file->next) |
298 | xhtml_fixup_layout(file->next); |
299 | } |
300 | |
301 | /* |
302 | * Create the tree structure so we know where everything goes. |
303 | * Method: |
304 | * |
305 | * Ignoring file splitting, we have three choices with each new section: |
306 | * |
307 | * +-----------------+-----------------+ |
308 | * | | | |
309 | * X +----X----+ (1) |
310 | * | | |
311 | * Y (3) |
312 | * | |
313 | * (3) |
314 | * |
315 | * Y is the last section we added (currentsect). |
316 | * If sect is the section we want to add, then: |
317 | * |
318 | * (1) if sect->level < currentsect->level |
319 | * (2) if sect->level == currentsect->level |
320 | * (3) if sect->level > currentsect->level |
321 | * |
322 | * This requires the constraint that you never skip section numbers |
323 | * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing). |
324 | * |
325 | * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change |
326 | * more than one level at a time. Lots of asserts, and probably part of |
327 | * the algorithm here, rely on this being true. (It currently isn't |
328 | * enforced by halibut, however.) |
329 | * |
330 | * File splitting makes this harder. For instance, say we added at (3) |
331 | * above and now need to add another section. We are splitting at level |
332 | * 2, ie the level of Y. Z is the last section we added: |
333 | * |
334 | * +-----------------+-----------------+ |
335 | * | | | |
336 | * X +----X----+ (1) |
337 | * | | |
338 | * +----Y----+ (1) |
339 | * | | |
340 | * Z (2) |
341 | * | |
342 | * (3) |
343 | * |
344 | * The (1) case is now split; we need to search upwards to find where |
345 | * to actually link in. The other two cases remain the same (and will |
346 | * always be like this). |
347 | * |
348 | * File splitting makes this harder, however. The decision of whether |
349 | * to split to a new file is always on the same condition, however (is |
350 | * the level of this section higher than the leaf_level configuration |
351 | * value or not). |
352 | * |
353 | * Treating the cases backwards: |
354 | * |
355 | * (3) same file if sect->level > conf.leaf_level, otherwise new file |
356 | * |
357 | * if in the same file, currentsect->child points to sect |
358 | * otherwise the linking is done through the file tree (which works |
359 | * in more or less the same way, ie currentfile->child points to |
360 | * the new file) |
361 | * |
362 | * (2) same file if sect->level > conf.leaf_level, otherwise new file |
363 | * |
364 | * if in the same file, currentsect->next points to sect |
365 | * otherwise file linking and currentfile->next points to the new |
366 | * file (we know that Z must have caused a new file to be created) |
367 | * |
368 | * (1) same file if sect->level > conf.leaf_level, otherwise new file |
369 | * |
370 | * this is actually effectively the same case as (2) here, |
371 | * except that we first have to travel up the sections to figure |
372 | * out which section this new one will be a sibling of. In doing |
373 | * so, we may disappear off the top of a file and have to go up |
374 | * to its parent in the file tree. |
375 | * |
376 | */ |
377 | static void xhtml_ponder_layout(paragraph *p) |
378 | { |
379 | xhtmlsection *lastsection; |
380 | xhtmlsection *currentsect; |
381 | xhtmlfile *currentfile; |
382 | |
383 | lastfile = NULL; |
384 | topsection = xhtml_new_section(NULL); |
385 | topfile = xhtml_new_file(NULL); |
386 | lastsection = topsection; |
387 | currentfile = topfile; |
388 | currentsect = topsection; |
389 | |
d2e74722 |
390 | if (conf.leaf_level == 0) { |
391 | topfile->is_leaf = 1; |
392 | topfile->sections = topsection; |
393 | topsection->file = topfile; |
394 | } |
395 | |
d7482997 |
396 | for (; p; p=p->next) |
397 | { |
398 | int level = xhtml_para_level(p); |
399 | if (level>0) /* actually a section */ |
400 | { |
401 | xhtmlsection *sect; |
402 | word *w; |
403 | char *c; |
404 | rdstringc fname_c = { 0, 0, NULL }; |
405 | |
406 | sect = xhtml_new_section(lastsection); |
407 | lastsection = sect; |
408 | sect->para = p; |
409 | for (w=(p->kwtext2)?(p->kwtext2):(p->words); w; w=w->next) /* kwtext2 because we want numbers only! */ |
410 | { |
411 | switch (removeattr(w->type)) |
412 | { |
413 | case word_Normal: |
414 | /*case word_Emph: |
415 | case word_Code: |
416 | case word_WeakCode:*/ |
417 | xhtml_utostr(w->text, &c); |
418 | rdaddsc(&fname_c,c); |
419 | sfree(c); |
420 | break; |
421 | } |
422 | } |
423 | /* rdaddsc(&fname_c, ".html");*/ |
424 | sect->fragment = rdtrimc(&fname_c); |
425 | sect->level = level; |
426 | /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/ |
427 | |
428 | if (level>currentsect->level) { /* case (3) */ |
429 | if (level>conf.leaf_level) { /* same file */ |
430 | assert(currentfile->is_leaf); |
431 | currentsect->child = sect; |
432 | sect->parent=currentsect; |
433 | sect->file=currentfile; |
434 | /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/ |
435 | currentsect=sect; |
436 | } else { /* new file */ |
437 | xhtmlfile *file = xhtml_new_file(sect); |
438 | assert(!currentfile->is_leaf); |
439 | currentfile->child=file; |
440 | sect->file=file; |
441 | file->parent=currentfile; |
442 | /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/ |
443 | currentfile=file; |
444 | currentsect=sect; |
445 | } |
446 | } else if (level >= currentsect->file->sections->level) { |
447 | /* Case (1) or (2) *AND* still under the section that starts |
448 | * the current file. |
449 | * |
450 | * I'm not convinced that this couldn't be rolled in with the |
451 | * final else {} leg further down. It seems a lot of effort |
452 | * this way. |
453 | */ |
454 | if (level>conf.leaf_level) { /* stick within the same file */ |
455 | assert(currentfile->is_leaf); |
456 | sect->file = currentfile; |
457 | while (currentsect && currentsect->level > level && |
458 | currentsect->file==currentsect->parent->file) { |
459 | currentsect = currentsect->parent; |
460 | } |
461 | assert(currentsect); |
462 | currentsect->next = sect; |
463 | assert(currentsect->level == sect->level); |
464 | sect->parent = currentsect->parent; |
465 | currentsect = sect; |
466 | /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/ |
467 | } else { /* new file */ |
468 | xhtmlfile *file = xhtml_new_file(sect); |
469 | sect->file=file; |
470 | currentfile->next=file; |
471 | file->parent=currentfile->parent; |
472 | file->is_leaf=(level==conf.leaf_level); |
473 | file->sections=sect; |
474 | /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/ |
475 | currentfile=file; |
476 | currentsect=sect; |
477 | } |
478 | } else { /* Case (1) or (2) and we must move up the file tree first */ |
479 | /* this loop is now probably irrelevant - we know we can't connect |
480 | * to anything in the current file */ |
481 | while (currentsect && level<currentsect->level) { |
482 | currentsect=currentsect->parent; |
483 | if (currentsect) { |
484 | /* printf(" * up one level to '%s'\n", currentsect->fragment);*/ |
485 | } else { |
486 | /* printf(" * up one level (off top of current file)\n");*/ |
487 | } |
488 | } |
489 | if (currentsect) { |
490 | /* I'm pretty sure this can now never fire */ |
491 | assert(currentfile->is_leaf); |
492 | /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/ |
493 | sect->file = currentfile; |
494 | currentsect->next=sect; |
495 | currentsect=sect; |
496 | } else { /* find a file we can attach to */ |
497 | while (currentfile && currentfile->sections && level<currentfile->sections->level) { |
498 | currentfile=currentfile->parent; |
499 | if (currentfile) { |
500 | /* printf(" * up one file level to '%s'\n", currentfile->filename);*/ |
501 | } else { |
502 | /* printf(" * up one file level (off top of tree)\n");*/ |
503 | } |
504 | } |
505 | if (currentfile) { /* new file (we had to skip up a file to |
506 | get here, so we must be dealing with a |
507 | level no lower than the configured |
508 | leaf_level */ |
509 | xhtmlfile *file = xhtml_new_file(sect); |
510 | currentfile->next=file; |
511 | sect->file=file; |
512 | file->parent=currentfile->parent; |
513 | file->is_leaf=(level==conf.leaf_level); |
514 | file->sections=sect; |
515 | /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/ |
516 | currentfile=file; |
517 | currentsect=sect; |
518 | } else { |
519 | fatal(err_whatever, "Ran off the top trying to connect sibling: strange document."); |
520 | } |
521 | } |
522 | } |
523 | } |
524 | } |
525 | topsection = lastsection; /* get correct end of the chain */ |
526 | xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */ |
527 | } |
528 | |
529 | static void xhtml_do_index(); |
530 | static void xhtml_do_file(xhtmlfile *file); |
531 | static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform); |
532 | static void xhtml_do_paras(FILE *fp, paragraph *p); |
533 | static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit); |
534 | static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit); |
535 | static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit); |
536 | static int xhtml_do_contents(FILE *fp, xhtmlfile *file); |
537 | static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file); |
538 | static void xhtml_do_sections(FILE *fp, xhtmlsection *sections); |
539 | |
540 | /* |
541 | * Do all the files in this structure. |
542 | */ |
543 | static void xhtml_do_files(xhtmlfile *file) |
544 | { |
545 | xhtml_do_file(file); |
546 | if (file->child) |
547 | xhtml_do_files(file->child); |
548 | if (file->next) |
549 | xhtml_do_files(file->next); |
550 | } |
551 | |
552 | /* |
553 | * Free up all memory used by the file tree from 'xfile' downwards |
554 | */ |
555 | static void xhtml_free_file(xhtmlfile* xfile) |
556 | { |
557 | if (xfile==NULL) { |
558 | return; |
559 | } |
560 | |
561 | if (xfile->filename) { |
562 | sfree(xfile->filename); |
563 | } |
564 | xhtml_free_file(xfile->child); |
565 | xhtml_free_file(xfile->next); |
566 | sfree(xfile); |
567 | } |
568 | |
569 | /* |
570 | * Main function. |
571 | */ |
572 | void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords, |
573 | indexdata *in_idx) |
574 | { |
575 | /* int i;*/ |
576 | indexentry *ientry; |
577 | int ti; |
578 | xhtmlsection *xsect; |
579 | |
580 | sourceparas = sourceform; |
581 | conf = xhtml_configure(sourceform); |
582 | keywords = in_keywords; |
583 | idx = in_idx; |
584 | |
585 | /* Clear up the index entries backend data pointers */ |
586 | for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) { |
587 | ientry->backend_data=NULL; |
588 | } |
589 | |
590 | xhtml_ponder_layout(sourceform); |
591 | |
592 | /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */ |
593 | /* xhtml_level_0(sourceform); |
594 | for (i=1; i<=conf.leaf_level; i++) |
595 | { |
596 | xhtml_level(sourceform, i); |
597 | }*/ |
598 | |
599 | /* new system ... (writes to *.html, but isn't fully trusted) */ |
600 | xhtml_do_top_file(topfile, sourceform); |
601 | assert(!topfile->next); /* shouldn't have a sibling at all */ |
d2e74722 |
602 | if (topfile->child) { |
603 | xhtml_do_files(topfile->child); |
604 | xhtml_do_index(); |
605 | } |
d7482997 |
606 | |
607 | /* release file, section, index data structures */ |
608 | xsect = topsection; |
609 | while (xsect) { |
610 | xhtmlsection *tmp = xsect->chain; |
611 | if (xsect->fragment) { |
612 | sfree(xsect->fragment); |
613 | } |
614 | sfree(xsect); |
615 | xsect = tmp; |
616 | } |
617 | xhtml_free_file(topfile); |
618 | for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) { |
619 | if (ientry->backend_data!=NULL) { |
620 | xhtmlindex *xi = (xhtmlindex*) ientry->backend_data; |
621 | if (xi->sections!=NULL) { |
622 | sfree(xi->sections); |
623 | } |
624 | sfree(xi); |
625 | } |
626 | ientry->backend_data = NULL; |
627 | } |
628 | } |
629 | |
630 | static int xhtml_para_level(paragraph *p) |
631 | { |
632 | switch (p->type) |
633 | { |
634 | case para_UnnumberedChapter: |
635 | case para_Chapter: |
636 | case para_Appendix: |
d2e74722 |
637 | case para_Title: |
d7482997 |
638 | return 1; |
639 | break; |
640 | /* case para_BiblioCited: |
641 | return 2; |
642 | break;*/ |
643 | case para_Heading: |
644 | case para_Subsect: |
645 | return p->aux+2; |
646 | break; |
647 | default: |
648 | return -1; |
649 | break; |
650 | } |
651 | } |
652 | |
653 | static char* xhtml_index_filename = "IndexPage.html"; |
654 | |
655 | /* Output the nav links for the current file. |
656 | * file == NULL means we're doing the index |
657 | */ |
658 | static void xhtml_donavlinks(FILE *fp, xhtmlfile *file) |
659 | { |
660 | xhtmlfile *xhtml_next_file = NULL; |
661 | fprintf(fp, "<p"); |
662 | if (conf.nav_attrs!=NULL) { |
663 | fprintf(fp, " %ls>", conf.nav_attrs); |
664 | } else { |
665 | fprintf(fp, ">"); |
666 | } |
667 | if (xhtml_last_file==NULL) { |
668 | fprintf(fp, "Previous | "); |
669 | } else { |
670 | fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename); |
671 | } |
672 | fprintf(fp, "<a href='Contents.html'>Contents</a> | "); |
673 | if (file != NULL) { /* otherwise we're doing nav links for the index */ |
674 | if (xhtml_next_file==NULL) |
675 | xhtml_next_file = file->child; |
676 | if (xhtml_next_file==NULL) |
677 | xhtml_next_file = file->next; |
678 | if (xhtml_next_file==NULL) |
679 | xhtml_next_file = file->parent->next; |
680 | } |
681 | if (xhtml_next_file==NULL) { |
682 | if (file==NULL) { /* index, so no next file */ |
683 | fprintf(fp, "Next "); |
684 | } else { |
685 | fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename); |
686 | } |
687 | } else { |
688 | fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename); |
689 | } |
690 | fprintf(fp, "</p>\n"); |
691 | } |
692 | |
693 | /* Write out the index file */ |
d2e74722 |
694 | static void xhtml_do_index_body(FILE *fp) |
d7482997 |
695 | { |
d7482997 |
696 | indexentry *y; |
697 | int ti; |
d7482997 |
698 | |
d2e74722 |
699 | if (count234(idx->entries) == 0) |
700 | return; /* don't write anything at all */ |
d7482997 |
701 | |
702 | fprintf(fp, "<dl>\n"); |
703 | /* iterate over idx->entries using the tree functions and display everything */ |
704 | for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) { |
705 | if (y->backend_data) { |
706 | int i; |
707 | xhtmlindex *xi; |
708 | |
709 | fprintf(fp, "<dt>"); |
710 | xhtml_para(fp, y->text); |
711 | fprintf(fp, "</dt>\n<dd>"); |
712 | |
713 | xi = (xhtmlindex*) y->backend_data; |
714 | for (i=0; i<xi->nsection; i++) { |
715 | xhtmlsection *sect = xi->sections[i]; |
716 | if (sect) { |
717 | fprintf(fp, "<a href='%s#%s'>", sect->file->filename, sect->fragment); |
718 | if (sect->para->kwtext) { |
719 | xhtml_para(fp, sect->para->kwtext); |
720 | } else if (sect->para->words) { |
721 | xhtml_para(fp, sect->para->words); |
722 | } |
723 | fprintf(fp, "</a>"); |
724 | if (i+1<xi->nsection) { |
725 | fprintf(fp, ", "); |
726 | } |
727 | } |
728 | } |
729 | fprintf(fp, "</dd>\n"); |
730 | } |
731 | } |
732 | fprintf(fp, "</dl>\n"); |
d2e74722 |
733 | } |
734 | static void xhtml_do_index() |
735 | { |
736 | word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", { NULL, 0, 0} }; |
737 | FILE *fp = fopen(xhtml_index_filename, "w"); |
738 | |
739 | if (fp==NULL) |
740 | fatal(err_cantopenw, xhtml_index_filename); |
741 | xhtml_doheader(fp, &temp_word); |
742 | xhtml_donavlinks(fp, NULL); |
743 | |
744 | xhtml_do_index_body(fp); |
d7482997 |
745 | |
746 | xhtml_donavlinks(fp, NULL); |
747 | xhtml_dofooter(fp); |
748 | fclose(fp); |
749 | } |
750 | |
751 | /* Output the given file. This includes whatever contents at beginning and end, etc. etc. */ |
752 | static void xhtml_do_file(xhtmlfile *file) |
753 | { |
754 | FILE *fp = fopen(file->filename, "w"); |
755 | if (fp==NULL) |
756 | fatal(err_cantopenw, file->filename); |
757 | |
758 | if (file->sections->para->words) { |
759 | xhtml_doheader(fp, file->sections->para->words); |
760 | } else if (file->sections->para->kwtext) { |
761 | xhtml_doheader(fp, file->sections->para->kwtext); |
762 | } else { |
763 | xhtml_doheader(fp, NULL); |
764 | } |
765 | |
766 | xhtml_donavlinks(fp, file); |
767 | |
d2e74722 |
768 | if (file->is_leaf && conf.leaf_contains_contents && |
769 | xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents) |
d7482997 |
770 | xhtml_do_contents(fp, file); |
771 | xhtml_do_sections(fp, file->sections); |
772 | if (!file->is_leaf) |
773 | xhtml_do_naked_contents(fp, file); |
774 | |
775 | xhtml_donavlinks(fp, file); |
776 | |
777 | xhtml_dofooter(fp); |
778 | fclose(fp); |
779 | |
780 | xhtml_last_file = file; |
781 | } |
782 | |
783 | /* Output the top-level file. */ |
784 | static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform) |
785 | { |
786 | paragraph *p; |
787 | int done=FALSE; |
788 | FILE *fp = fopen(file->filename, "w"); |
789 | if (fp==NULL) |
790 | fatal(err_cantopenw, file->filename); |
791 | |
792 | /* Do the title -- only one allowed */ |
793 | for (p = sourceform; p && !done; p = p->next) |
794 | { |
795 | if (p->type == para_Title) |
796 | { |
797 | xhtml_doheader(fp, p->words); |
798 | done=TRUE; |
799 | } |
800 | } |
801 | if (!done) |
802 | xhtml_doheader(fp, NULL /* Eek! */); |
803 | |
d2e74722 |
804 | /* |
805 | * Display the title. |
806 | */ |
807 | for (p = sourceform; p; p = p->next) |
808 | { |
809 | if (p->type == para_Title) { |
810 | xhtml_heading(fp, p); |
811 | break; |
812 | } |
813 | } |
814 | |
d7482997 |
815 | /* Do the preamble and copyright */ |
816 | for (p = sourceform; p; p = p->next) |
817 | { |
818 | if (p->type == para_Preamble) |
819 | { |
820 | fprintf(fp, "<p>"); |
821 | xhtml_para(fp, p->words); |
822 | fprintf(fp, "</p>\n"); |
823 | } |
824 | } |
825 | for (p = sourceform; p; p = p->next) |
826 | { |
827 | if (p->type == para_Copyright) |
828 | { |
829 | fprintf(fp, "<p>"); |
830 | xhtml_para(fp, p->words); |
831 | fprintf(fp, "</p>\n"); |
832 | } |
833 | } |
834 | |
835 | xhtml_do_contents(fp, file); |
836 | xhtml_do_sections(fp, file->sections); |
d2e74722 |
837 | |
838 | if (count234(idx->entries) > 0) { |
839 | fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n"); |
840 | xhtml_do_index_body(fp); |
841 | } |
842 | |
d7482997 |
843 | xhtml_dofooter(fp); |
844 | fclose(fp); |
845 | } |
846 | |
847 | /* Convert a Unicode string to an ASCII one. '?' is |
848 | * used for unmappable characters. |
849 | */ |
850 | static void xhtml_utostr(wchar_t *in, char **out) |
851 | { |
852 | int l = ustrlen(in); |
853 | int i; |
854 | *out = smalloc(l+1); |
855 | for (i=0; i<l; i++) |
856 | { |
857 | if (in[i]>=32 && in[i]<=126) |
858 | (*out)[i]=(char)in[i]; |
859 | else |
860 | (*out)[i]='?'; |
861 | } |
862 | (*out)[i]=0; |
863 | } |
864 | |
865 | /* |
866 | * Write contents for the given file, and subfiles, down to |
867 | * the appropriate contents depth. Returns the number of |
868 | * entries written. |
869 | */ |
870 | static int xhtml_do_contents(FILE *fp, xhtmlfile *file) |
871 | { |
872 | int level, limit, start_level, count = 0; |
873 | if (!file) |
874 | return 0; |
875 | |
876 | level = (file->sections)?(file->sections->level):(0); |
877 | limit = conf.contents_depth[(level>5)?(5):(level)]; |
878 | start_level = (file->is_leaf) ? (level-1) : (level); |
879 | last_level = start_level; |
880 | |
881 | count += xhtml_do_contents_section_limit(fp, file->sections, limit); |
882 | count += xhtml_do_contents_limit(fp, file->child, limit); |
883 | if (fp!=NULL) { |
884 | while (last_level > start_level) { |
885 | last_level--; |
886 | fprintf(fp, "</ul>\n"); |
887 | } |
888 | } |
889 | return count; |
890 | } |
891 | |
892 | /* As above, but doesn't do anything in the current file */ |
893 | static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file) |
894 | { |
895 | int level, limit, start_level, count = 0; |
896 | if (!file) |
897 | return 0; |
898 | |
899 | level = (file->sections)?(file->sections->level):(0); |
900 | limit = conf.contents_depth[(level>5)?(5):(level)]; |
901 | start_level = (file->is_leaf) ? (level-1) : (level); |
902 | last_level = start_level; |
903 | |
904 | count = xhtml_do_contents_limit(fp, file->child, limit); |
905 | if (fp!=NULL) { |
906 | while (last_level > start_level) { |
907 | last_level--; |
908 | fprintf(fp, "</ul>\n"); |
909 | } |
910 | } |
911 | return count; |
912 | } |
913 | |
914 | /* |
915 | * Write contents for the given file, children, and siblings, down to |
916 | * given limit contents depth. |
917 | */ |
918 | static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit) |
919 | { |
920 | int count = 0; |
921 | while (file) { |
922 | count += xhtml_do_contents_section_limit(fp, file->sections, limit); |
923 | count += xhtml_do_contents_limit(fp, file->child, limit); |
924 | file = file->next; |
925 | } |
926 | return count; |
927 | } |
928 | |
929 | /* |
930 | * Write contents entries for the given section tree, down to the |
931 | * limit contents depth. |
932 | */ |
933 | static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit) |
934 | { |
935 | int count = 0; |
936 | while (section) { |
937 | if (!xhtml_add_contents_entry(fp, section, limit)) |
938 | return 0; |
939 | else |
940 | count++; |
941 | count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); |
942 | section = section->next; |
943 | } |
944 | return count; |
945 | } |
946 | |
947 | /* |
948 | * Write contents entries for the given section tree, down to the |
949 | * limit contents depth. |
950 | */ |
951 | static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit) |
952 | { |
953 | int count = 0; |
954 | if (!section) |
955 | return 0; |
956 | xhtml_add_contents_entry(fp, section, limit); |
957 | count=1; |
958 | count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); |
959 | /* section=section->child; |
960 | while (section && xhtml_add_contents_entry(fp, section, limit)) { |
961 | section = section->next; |
962 | }*/ |
963 | return count; |
964 | } |
965 | |
966 | /* |
967 | * Add a section entry, unless we're exceeding the limit, in which |
968 | * case return FALSE (otherwise return TRUE). |
969 | */ |
970 | static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit) |
971 | { |
972 | if (!section || section->level > limit) |
973 | return FALSE; |
d2e74722 |
974 | if (fp==NULL || !section->parent) |
d7482997 |
975 | return TRUE; |
976 | while (last_level > section->level) { |
977 | last_level--; |
978 | fprintf(fp, "</ul>\n"); |
979 | } |
980 | while (last_level < section->level) { |
981 | last_level++; |
982 | fprintf(fp, "<ul>\n"); |
983 | } |
984 | fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment); |
985 | if (section->para->kwtext) { |
986 | xhtml_para(fp, section->para->kwtext); |
987 | if (section->para->words) { |
988 | fprintf(fp, ": "); |
989 | } |
990 | } |
991 | if (section->para->words) { |
992 | xhtml_para(fp, section->para->words); |
993 | } |
994 | fprintf(fp, "</a></li>\n"); |
995 | return TRUE; |
996 | } |
997 | |
998 | /* |
999 | * Write all the sections in this file. Do all paragraphs in this section, then all |
1000 | * children (recursively), then go on to the next one (tail recursively). |
1001 | */ |
1002 | static void xhtml_do_sections(FILE *fp, xhtmlsection *sections) |
1003 | { |
1004 | while (sections) { |
1005 | currentsection = sections; |
1006 | xhtml_do_paras(fp, sections->para); |
1007 | xhtml_do_sections(fp, sections->child); |
1008 | sections = sections->next; |
1009 | } |
1010 | } |
1011 | |
1012 | /* Write this list of paragraphs. Close off all lists at the end. */ |
1013 | static void xhtml_do_paras(FILE *fp, paragraph *p) |
1014 | { |
1015 | int last_type = -1, first=TRUE; |
1016 | if (!p) |
1017 | return; |
1018 | |
1019 | /* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/ |
1020 | for (; p && (xhtml_para_level(p)==-1 || first); p=p->next) { |
1021 | first=FALSE; |
1022 | switch (p->type) |
1023 | { |
1024 | /* |
1025 | * Things we ignore because we've already processed them or |
1026 | * aren't going to touch them in this pass. |
1027 | */ |
1028 | case para_IM: |
1029 | case para_BR: |
1030 | case para_Biblio: /* only touch BiblioCited */ |
1031 | case para_VersionID: |
1032 | case para_Copyright: |
1033 | case para_Preamble: |
1034 | case para_NoCite: |
1035 | case para_Title: |
1036 | break; |
1037 | |
1038 | /* |
1039 | * Chapter titles. |
1040 | */ |
1041 | case para_Chapter: |
1042 | case para_Appendix: |
1043 | case para_UnnumberedChapter: |
1044 | xhtml_heading(fp, p); |
1045 | break; |
1046 | |
1047 | case para_Heading: |
1048 | case para_Subsect: |
1049 | xhtml_heading(fp, p); |
1050 | break; |
1051 | |
1052 | case para_Rule: |
1053 | fprintf(fp, "\n<hr />\n"); |
1054 | break; |
1055 | |
1056 | case para_Normal: |
1057 | fprintf(fp, "\n<p>"); |
1058 | xhtml_para(fp, p->words); |
1059 | fprintf(fp, "</p>\n"); |
1060 | break; |
1061 | |
1062 | case para_Bullet: |
1063 | case para_NumberedList: |
1064 | case para_BiblioCited: |
1065 | if (last_type!=p->type) { |
1066 | /* start up list if necessary */ |
1067 | if (p->type == para_Bullet) { |
1068 | fprintf(fp, "<ul>\n"); |
1069 | } else if (p->type == para_NumberedList) { |
1070 | fprintf(fp, "<ol>\n"); |
1071 | } else if (p->type == para_BiblioCited) { |
1072 | fprintf(fp, "<dl>\n"); |
1073 | } |
1074 | } |
1075 | if (p->type == para_Bullet || p->type == para_NumberedList) |
1076 | fprintf(fp, "<li>"); |
1077 | else if (p->type == para_BiblioCited) { |
1078 | fprintf(fp, "<dt>"); |
1079 | xhtml_para(fp, p->kwtext); |
1080 | fprintf(fp, "</dt>\n<dd>"); |
1081 | } |
1082 | xhtml_para(fp, p->words); |
1083 | if (p->type == para_BiblioCited) { |
1084 | fprintf(fp, "</dd>\n"); |
1085 | } else if (p->type == para_Bullet || p->type == para_NumberedList) { |
1086 | fprintf(fp, "</li>"); |
1087 | } |
1088 | if (p->type == para_Bullet || p->type == para_NumberedList || p->type == para_BiblioCited) |
1089 | /* close off list if necessary */ |
1090 | { |
1091 | paragraph *p2 = p->next; |
1092 | int close_off=FALSE; |
1093 | /* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/ |
1094 | if (p2 && xhtml_para_level(p2)==-1) { |
1095 | if (p2->type != p->type) |
1096 | close_off=TRUE; |
1097 | } else { |
1098 | close_off=TRUE; |
1099 | } |
1100 | if (close_off) { |
1101 | if (p->type == para_Bullet) { |
1102 | fprintf(fp, "</ul>\n"); |
1103 | } else if (p->type == para_NumberedList) { |
1104 | fprintf(fp, "</ol>\n"); |
1105 | } else if (p->type == para_BiblioCited) { |
1106 | fprintf(fp, "</dl>\n"); |
1107 | } |
1108 | } |
1109 | } |
1110 | break; |
1111 | |
1112 | case para_Code: |
1113 | xhtml_codepara(fp, p->words); |
1114 | break; |
1115 | } |
1116 | last_type = p->type; |
1117 | } |
1118 | } |
1119 | |
1120 | /* |
1121 | * Output a header for this XHTML file. |
1122 | */ |
1123 | static void xhtml_doheader(FILE *fp, word *title) |
1124 | { |
1125 | fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"); |
1126 | fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"); |
1127 | fprintf(fp, "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>"); |
1128 | if (title==NULL) |
1129 | fprintf(fp, "The thing with no name!"); |
1130 | else |
1131 | xhtml_para(fp, title); |
1132 | fprintf(fp, "</title>\n"); |
1133 | fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version); |
1134 | if (conf.author) |
1135 | fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author); |
1136 | if (conf.description) |
1137 | fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description); |
1138 | if (conf.head_end) |
1139 | fprintf(fp, "%ls\n", conf.head_end); |
1140 | fprintf(fp, "</head>\n\n"); |
1141 | if (conf.body) |
1142 | fprintf(fp, "%ls\n", conf.body); |
1143 | else |
1144 | fprintf(fp, "<body>\n"); |
1145 | if (conf.body_start) |
1146 | fprintf(fp, "%ls\n", conf.body_start); |
1147 | } |
1148 | |
1149 | /* |
1150 | * Output a footer for this XHTML file. |
1151 | */ |
1152 | static void xhtml_dofooter(FILE *fp) |
1153 | { |
1154 | fprintf(fp, "\n<hr />\n\n"); |
1155 | if (conf.body_end) |
1156 | fprintf(fp, "%ls\n", conf.body_end); |
1157 | if (!conf.suppress_address) { |
1158 | fprintf(fp,"<address>\n"); |
1159 | if (conf.address_start) |
1160 | fprintf(fp, "%ls\n", conf.address_start); |
1161 | /* Do the version ID */ |
1162 | if (conf.include_version_id) { |
1163 | paragraph *p; |
1164 | int started = 0; |
1165 | for (p = sourceparas; p; p = p->next) |
1166 | if (p->type == para_VersionID) { |
1167 | xhtml_versionid(fp, p->words, started); |
1168 | started = 1; |
1169 | } |
1170 | } |
1171 | if (conf.address_end) |
1172 | fprintf(fp, "%ls\n", conf.address_end); |
1173 | fprintf(fp, "</address>\n"); |
1174 | } |
1175 | fprintf(fp, "</body>\n\n</html>\n"); |
1176 | } |
1177 | |
1178 | /* |
1179 | * Output the versionid paragraph. Typically this is a version control |
1180 | * ID string (such as $Id...$ in RCS). |
1181 | */ |
1182 | static void xhtml_versionid(FILE *fp, word *text, int started) |
1183 | { |
1184 | rdstringc t = { 0, 0, NULL }; |
1185 | |
1186 | rdaddc(&t, '['); /* FIXME: configurability */ |
1187 | xhtml_rdaddwc(&t, text, NULL); |
1188 | rdaddc(&t, ']'); /* FIXME: configurability */ |
1189 | |
1190 | if (started) |
1191 | fprintf(fp, "<br>\n"); |
1192 | fprintf(fp, "%s\n", t.text); |
1193 | sfree(t.text); |
1194 | } |
1195 | |
1196 | /* Is this an XHTML reserved character? */ |
1197 | static int xhtml_reservedchar(int c) |
1198 | { |
1199 | if (c=='&' || c=='<' || c=='>' || c=='"') |
1200 | return TRUE; |
1201 | else |
1202 | return FALSE; |
1203 | } |
1204 | |
1205 | /* |
1206 | * Convert a wide string into valid XHTML: Anything outside ASCII will |
1207 | * be fixed up as an entity. Currently we don't worry about constraining the |
1208 | * encoded character set, which we should probably do at some point (we can |
1209 | * still fix up and return FALSE - see the last comment here). We also don't |
1210 | * currently |
1211 | * |
1212 | * Because this is only used for words, spaces are HARD spaces (any other |
1213 | * spaces will be word_Whitespace not word_Normal). So they become |
1214 | * Unless hard_spaces is FALSE, of course (code paragraphs break the above |
1215 | * rule). |
1216 | * |
1217 | * If `result' is non-NULL, mallocs the resulting string and stores a pointer to |
1218 | * it in `*result'. If `result' is NULL, merely checks whether all |
1219 | * characters in the string are feasible. |
1220 | * |
1221 | * Return is nonzero if all characters are OK. If not all |
1222 | * characters are OK but `result' is non-NULL, a result _will_ |
1223 | * still be generated! |
1224 | */ |
1225 | static int xhtml_convert(wchar_t *s, char **result, int hard_spaces) { |
1226 | int doing = (result != 0); |
1227 | int ok = TRUE; |
1228 | char *p = NULL; |
1229 | int plen = 0, psize = 0; |
1230 | |
1231 | for (; *s; s++) { |
1232 | wchar_t c = *s; |
1233 | |
1234 | #define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); } |
1235 | |
1236 | if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) { |
1237 | /* Char is OK. */ |
1238 | if (doing) |
1239 | { |
1240 | ensure_size(plen); |
1241 | p[plen++] = (char)c; |
1242 | } |
1243 | } else { |
1244 | /* Char needs fixing up. */ |
1245 | /* ok = FALSE; -- currently we never return FALSE; we |
1246 | * might want to when considering a character set for the |
1247 | * encoded document. |
1248 | */ |
1249 | if (doing) |
1250 | { |
1251 | if (c==32) { /* a space in a word is a hard space */ |
1252 | ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */ |
1253 | sprintf(p+plen, " "); |
1254 | plen+=6; |
1255 | } else { |
1256 | /* FIXME: entity names! */ |
1257 | ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */ |
1258 | plen+=sprintf(p+plen, "&#%04i;", (int)c); |
1259 | } |
1260 | } |
1261 | } |
1262 | } |
1263 | if (doing) { |
1264 | p = resize(p, plen+1); |
1265 | p[plen] = '\0'; |
1266 | *result = p; |
1267 | } |
1268 | return ok; |
1269 | } |
1270 | |
1271 | /* |
1272 | * This formats the given words as XHTML. |
1273 | */ |
1274 | static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end) { |
1275 | char *c; |
1276 | keyword *kwl; |
1277 | xhtmlsection *sect; |
1278 | indextag *itag; |
1279 | int ti; |
1280 | |
1281 | for (; text && text != end; text = text->next) { |
1282 | switch (text->type) { |
1283 | case word_HyperLink: |
1284 | xhtml_utostr(text->text, &c); |
1285 | rdaddsc(rs, "<a href=\""); |
1286 | rdaddsc(rs, c); |
1287 | rdaddsc(rs, "\">"); |
1288 | sfree(c); |
1289 | break; |
1290 | |
1291 | case word_UpperXref: |
1292 | case word_LowerXref: |
1293 | kwl = kw_lookup(keywords, text->text); |
1294 | if (kwl) { |
1295 | sect=xhtml_find_section(kwl->para); |
1296 | if (sect) { |
1297 | rdaddsc(rs, "<a href=\""); |
1298 | rdaddsc(rs, sect->file->filename); |
1299 | rdaddc(rs, '#'); |
1300 | rdaddsc(rs, sect->fragment); |
1301 | rdaddsc(rs, "\">"); |
1302 | } else { |
1303 | rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->"); |
1304 | error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)"); |
1305 | } |
1306 | } else { |
1307 | rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->"); |
1308 | error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)"); |
1309 | } |
1310 | break; |
1311 | |
1312 | case word_IndexRef: /* in theory we could make an index target here */ |
1313 | /* rdaddsc(rs, "<a name=\"idx-"); |
1314 | xhtml_utostr(text->text, &c); |
1315 | rdaddsc(rs, c); |
1316 | sfree(c); |
1317 | rdaddsc(rs, "\"></a>");*/ |
1318 | /* what we _do_ need to do is to fix up the backend data |
1319 | * for any indexentry this points to. |
1320 | */ |
1321 | for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) { |
1322 | /* FIXME: really ustricmp() and not ustrcmp()? */ |
1323 | if (ustricmp(itag->name, text->text)==0) { |
1324 | break; |
1325 | } |
1326 | } |
1327 | if (itag!=NULL) { |
1328 | if (itag->refs!=NULL) { |
1329 | int i; |
1330 | for (i=0; i<itag->nrefs; i++) { |
1331 | xhtmlindex *idx_ref; |
1332 | indexentry *ientry; |
1333 | |
1334 | ientry = itag->refs[i]; |
1335 | if (ientry->backend_data==NULL) { |
1336 | idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex)); |
1337 | if (idx_ref==NULL) |
1338 | fatal(err_nomemory); |
1339 | idx_ref->nsection = 0; |
1340 | idx_ref->size = 4; |
1341 | idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*)); |
1342 | if (idx_ref->sections==NULL) |
1343 | fatal(err_nomemory); |
1344 | ientry->backend_data = idx_ref; |
1345 | } else { |
1346 | idx_ref = ientry->backend_data; |
1347 | if (idx_ref->nsection+1 > idx_ref->size) { |
1348 | int new_size = idx_ref->size * 2; |
1349 | idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection)); |
1350 | if (idx_ref->sections==NULL) { |
1351 | fatal(err_nomemory); |
1352 | } |
1353 | idx_ref->size = new_size; |
1354 | } |
1355 | } |
1356 | idx_ref->sections[idx_ref->nsection++] = currentsection; |
1357 | #if 0 |
1358 | #endif |
1359 | } |
1360 | } else { |
1361 | fatal(err_whatever, "Index tag had no entries!"); |
1362 | } |
1363 | } else { |
1364 | fprintf(stderr, "Looking for index entry '%ls'\n", text->text); |
1365 | fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)"); |
1366 | } |
1367 | break; |
1368 | |
1369 | case word_HyperEnd: |
1370 | case word_XrefEnd: |
1371 | rdaddsc(rs, "</a>"); |
1372 | break; |
1373 | |
1374 | case word_Normal: |
1375 | case word_Emph: |
1376 | case word_Code: |
1377 | case word_WeakCode: |
1378 | case word_WhiteSpace: |
1379 | case word_EmphSpace: |
1380 | case word_CodeSpace: |
1381 | case word_WkCodeSpace: |
1382 | case word_Quote: |
1383 | case word_EmphQuote: |
1384 | case word_CodeQuote: |
1385 | case word_WkCodeQuote: |
1386 | assert(text->type != word_CodeQuote && |
1387 | text->type != word_WkCodeQuote); |
1388 | if (towordstyle(text->type) == word_Emph && |
1389 | (attraux(text->aux) == attr_First || |
1390 | attraux(text->aux) == attr_Only)) |
1391 | rdaddsc(rs, "<em>"); |
1392 | else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) && |
1393 | (attraux(text->aux) == attr_First || |
1394 | attraux(text->aux) == attr_Only)) |
1395 | rdaddsc(rs, "<code>"); |
1396 | |
1397 | if (removeattr(text->type) == word_Normal) { |
1398 | if (xhtml_convert(text->text, &c, TRUE)) /* spaces in the word are hard */ |
1399 | rdaddsc(rs, c); |
1400 | else |
1401 | xhtml_rdaddwc(rs, text->alt, NULL); |
1402 | sfree(c); |
1403 | } else if (removeattr(text->type) == word_WhiteSpace) { |
1404 | rdaddc(rs, ' '); |
1405 | } else if (removeattr(text->type) == word_Quote) { |
1406 | rdaddsc(rs, """); |
1407 | } |
1408 | |
1409 | if (towordstyle(text->type) == word_Emph && |
1410 | (attraux(text->aux) == attr_Last || |
1411 | attraux(text->aux) == attr_Only)) |
1412 | rdaddsc(rs, "</em>"); |
1413 | else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) && |
1414 | (attraux(text->aux) == attr_Last || |
1415 | attraux(text->aux) == attr_Only)) |
1416 | rdaddsc(rs, "</code>"); |
1417 | break; |
1418 | } |
1419 | } |
1420 | } |
1421 | |
1422 | /* Output a heading, formatted as XHTML. |
1423 | */ |
1424 | static void xhtml_heading(FILE *fp, paragraph *p) |
1425 | { |
1426 | rdstringc t = { 0, 0, NULL }; |
1427 | word *tprefix = p->kwtext; |
1428 | word *nprefix = p->kwtext2; |
1429 | word *text = p->words; |
1430 | int level = xhtml_para_level(p); |
1431 | xhtmlsection *sect = xhtml_find_section(p); |
1432 | char *fragment; |
1433 | if (sect) { |
1434 | fragment = sect->fragment; |
1435 | } else { |
d2e74722 |
1436 | if (p->type == para_Title) |
1437 | fragment = "title"; |
1438 | else { |
1439 | fragment = ""; /* FIXME: what else can we do? */ |
1440 | error(err_whatever, "Couldn't locate heading cross-reference!"); |
1441 | } |
d7482997 |
1442 | } |
1443 | |
1444 | if (level>2 && nprefix) { /* FIXME: configurability on the level thing */ |
1445 | xhtml_rdaddwc(&t, nprefix, NULL); |
1446 | rdaddc(&t, ' '); /* FIXME: as below */ |
1447 | } else if (tprefix) { |
1448 | xhtml_rdaddwc(&t, tprefix, NULL); |
1449 | rdaddsc(&t, ": "); /* FIXME: configurability */ |
1450 | } |
1451 | xhtml_rdaddwc(&t, text, NULL); |
1452 | fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level); |
1453 | sfree(t.text); |
1454 | } |
1455 | |
1456 | /* Output a paragraph. Styles are handled by xhtml_rdaddwc(). |
1457 | * This looks pretty simple; I may have missed something ... |
1458 | */ |
1459 | static void xhtml_para(FILE *fp, word *text) |
1460 | { |
1461 | rdstringc out = { 0, 0, NULL }; |
1462 | xhtml_rdaddwc(&out, text, NULL); |
1463 | fprintf(fp, "%s", out.text); |
1464 | sfree(out.text); |
1465 | } |
1466 | |
1467 | /* Output a code paragraph. I'm treating this as preformatted, which |
1468 | * may not be entirely correct. See xhtml_para() for my worries about |
1469 | * this being overly-simple; however I think that most of the complexity |
1470 | * of the text backend came entirely out of word wrapping anyway. |
1471 | */ |
1472 | static void xhtml_codepara(FILE *fp, word *text) |
1473 | { |
1474 | fprintf(fp, "<pre>"); |
1475 | for (; text; text = text->next) if (text->type == word_WeakCode) { |
1476 | char *c; |
1477 | xhtml_convert(text->text, &c, FALSE); |
1478 | fprintf(fp, "%s\n", c); |
1479 | sfree(c); |
1480 | } |
1481 | fprintf(fp, "</pre>\n"); |
1482 | } |