From db662ca1b30d08ecf779c42f8c642c2694966279 Mon Sep 17 00:00:00 2001
From: simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Date: Fri, 23 Apr 2004 17:20:07 +0000
Subject: [PATCH] Enhance the text backend to support configurable quote
 characters, configurable emphasis characters, various other configurable bits
 which have been marked FIXME in the code for a while, and also to warn when a
 code paragraph line is too long (because that was the only other thing
 labelled FIXME). Fallback options are implemented, and defaults set
 accordingly. A UTF-8 text output file now looks like proper UTF-8.

git-svn-id: svn://svn.tartarus.org/sgt/halibut@4128 cda61777-01e9-0310-a592-d414129be87e
---
 bk_text.c      | 355 +++++++++++++++++++++++++++++++++++++--------------------
 doc/index.but  |   6 +
 doc/output.but | 125 +++++++++++++++++---
 error.c        |   9 ++
 halibut.h      |   1 +
 input.c        |   7 +-
 6 files changed, 367 insertions(+), 136 deletions(-)

diff --git a/bk_text.c b/bk_text.c
index 8fa7340..3d8646c 100644
--- a/bk_text.c
+++ b/bk_text.c
@@ -11,7 +11,7 @@ typedef enum { LEFT, LEFTPLUS, CENTRE } alignment;
 typedef struct {
     alignment align;
     int just_numbers;
-    wchar_t underline;
+    wchar_t *underline;
     wchar_t *number_suffix;
 } alignstruct;
 
@@ -25,7 +25,9 @@ typedef struct {
     int indent_preambles;
     int charset;
     word bullet;
+    wchar_t *lquote, *rquote, *rule;
     char *filename;
+    wchar_t *listsuffix, *startemph, *endemph;
 } textconfig;
 
 typedef struct {
@@ -35,11 +37,12 @@ typedef struct {
 } textfile;
 
 static void text_heading(textfile *, word *, word *, word *, alignstruct,
-			 int,int);
-static void text_rule(textfile *, int, int);
-static void text_para(textfile *, word *, wchar_t *, word *, int, int, int);
+			 int, int, textconfig *);
+static void text_rule(textfile *, int, int, textconfig *);
+static void text_para(textfile *, word *, wchar_t *, word *, int, int, int,
+		      textconfig *);
 static void text_codepara(textfile *, word *, int, int);
-static void text_versionid(textfile *, word *);
+static void text_versionid(textfile *, word *, textconfig *);
 
 static void text_output(textfile *, const wchar_t *);
 static void text_output_many(textfile *, int, wchar_t);
@@ -54,6 +57,8 @@ static alignment utoalign(wchar_t *p) {
 
 static textconfig text_configure(paragraph *source) {
     textconfig ret;
+    paragraph *p;
+    int n;
 
     /*
      * Non-negotiables.
@@ -72,56 +77,82 @@ static textconfig text_configure(paragraph *source) {
     ret.listindentafter = 3;
     ret.width = 68;
     ret.atitle.align = CENTRE;
-    ret.atitle.underline = L'=';
+    ret.atitle.underline = L"\x2550\0=\0\0";
     ret.achapter.align = LEFT;
     ret.achapter.just_numbers = FALSE;
     ret.achapter.number_suffix = L": ";
-    ret.achapter.underline = L'-';
+    ret.achapter.underline = L"\x203E\0-\0\0";
     ret.nasect = 1;
     ret.asect = mknewa(alignstruct, ret.nasect);
     ret.asect[0].align = LEFTPLUS;
     ret.asect[0].just_numbers = TRUE;
     ret.asect[0].number_suffix = L" ";
-    ret.asect[0].underline = L'\0';
+    ret.asect[0].underline = L"\0";
     ret.include_version_id = TRUE;
     ret.indent_preambles = FALSE;
-    ret.bullet.text = L"-";
+    ret.bullet.text = L"\x2022\0-\0\0";
+    ret.rule = L"\x2500\0-\0\0";
     ret.filename = dupstr("output.txt");
+    ret.startemph = ret.endemph = L"_";
+    ret.listsuffix = L".";
     ret.charset = CS_ASCII;
+    /*
+     * Default quote characters are Unicode matched single quotes,
+     * falling back to the TeXlike `'.
+     */
+    ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
+    ret.rquote = uadv(ret.lquote);
 
-    for (; source; source = source->next) {
-	if (source->type == para_Config) {
-	    if (!ustricmp(source->keyword, L"text-indent")) {
-		ret.indent = utoi(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-charset")) {
-		char *csname = utoa_dup(uadv(source->keyword), CS_ASCII);
+    /*
+     * Two-pass configuration so that we can pick up global config
+     * (e.g. `quotes') before having it overridden by specific
+     * config (`text-quotes'), irrespective of the order in which
+     * they occur.
+     */
+    for (p = source; p; p = p->next) {
+	if (p->type == para_Config) {
+	    if (!ustricmp(p->keyword, L"quotes")) {
+		if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+		    ret.lquote = uadv(p->keyword);
+		    ret.rquote = uadv(ret.lquote);
+		}
+	    }
+	}
+    }
+
+    for (p = source; p; p = p->next) {
+	if (p->type == para_Config) {
+	    if (!ustricmp(p->keyword, L"text-indent")) {
+		ret.indent = utoi(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-charset")) {
+		char *csname = utoa_dup(uadv(p->keyword), CS_ASCII);
 		ret.charset = charset_from_localenc(csname);
 		sfree(csname);
-	    } else if (!ustricmp(source->keyword, L"text-filename")) {
+	    } else if (!ustricmp(p->keyword, L"text-filename")) {
 		sfree(ret.filename);
-		ret.filename = dupstr(adv(source->origkeyword));
-	    } else if (!ustricmp(source->keyword, L"text-indent-code")) {
-		ret.indent_code = utoi(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-width")) {
-		ret.width = utoi(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-list-indent")) {
-		ret.listindentbefore = utoi(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-listitem-indent")) {
-		ret.listindentafter = utoi(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-chapter-align")) {
-		ret.achapter.align = utoalign(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-chapter-underline")) {
-		ret.achapter.underline = *uadv(source->keyword);
-	    } else if (!ustricmp(source->keyword, L"text-chapter-numeric")) {
-		ret.achapter.just_numbers = utob(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-chapter-suffix")) {
-		ret.achapter.number_suffix = uadv(source->keyword);
-	    } else if (!ustricmp(source->keyword, L"text-section-align")) {
-		wchar_t *p = uadv(source->keyword);
+		ret.filename = dupstr(adv(p->origkeyword));
+	    } else if (!ustricmp(p->keyword, L"text-indent-code")) {
+		ret.indent_code = utoi(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-width")) {
+		ret.width = utoi(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-list-indent")) {
+		ret.listindentbefore = utoi(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-listitem-indent")) {
+		ret.listindentafter = utoi(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-chapter-align")) {
+		ret.achapter.align = utoalign(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-chapter-underline")) {
+		ret.achapter.underline = uadv(p->keyword);
+	    } else if (!ustricmp(p->keyword, L"text-chapter-numeric")) {
+		ret.achapter.just_numbers = utob(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-chapter-suffix")) {
+		ret.achapter.number_suffix = uadv(p->keyword);
+	    } else if (!ustricmp(p->keyword, L"text-section-align")) {
+		wchar_t *q = uadv(p->keyword);
 		int n = 0;
-		if (uisdigit(*p)) {
-		    n = utoi(p);
-		    p = uadv(p);
+		if (uisdigit(*q)) {
+		    n = utoi(q);
+		    q = uadv(q);
 		}
 		if (n >= ret.nasect) {
 		    int i;
@@ -130,13 +161,13 @@ static textconfig text_configure(paragraph *source) {
 			ret.asect[i] = ret.asect[ret.nasect-1];
 		    ret.nasect = n+1;
 		}
-		ret.asect[n].align = utoalign(p);
-	    } else if (!ustricmp(source->keyword, L"text-section-underline")) {
-		wchar_t *p = uadv(source->keyword);
+		ret.asect[n].align = utoalign(q);
+	    } else if (!ustricmp(p->keyword, L"text-section-underline")) {
+		wchar_t *q = uadv(p->keyword);
 		int n = 0;
-		if (uisdigit(*p)) {
-		    n = utoi(p);
-		    p = uadv(p);
+		if (uisdigit(*q)) {
+		    n = utoi(q);
+		    q = uadv(q);
 		}
 		if (n >= ret.nasect) {
 		    int i;
@@ -145,13 +176,13 @@ static textconfig text_configure(paragraph *source) {
 			ret.asect[i] = ret.asect[ret.nasect-1];
 		    ret.nasect = n+1;
 		}
-		ret.asect[n].underline = *p;
-	    } else if (!ustricmp(source->keyword, L"text-section-numeric")) {
-		wchar_t *p = uadv(source->keyword);
+		ret.asect[n].underline = q;
+	    } else if (!ustricmp(p->keyword, L"text-section-numeric")) {
+		wchar_t *q = uadv(p->keyword);
 		int n = 0;
-		if (uisdigit(*p)) {
-		    n = utoi(p);
-		    p = uadv(p);
+		if (uisdigit(*q)) {
+		    n = utoi(q);
+		    q = uadv(q);
 		}
 		if (n >= ret.nasect) {
 		    int i;
@@ -160,13 +191,13 @@ static textconfig text_configure(paragraph *source) {
 			ret.asect[i] = ret.asect[ret.nasect-1];
 		    ret.nasect = n+1;
 		}
-		ret.asect[n].just_numbers = utob(p);
-	    } else if (!ustricmp(source->keyword, L"text-section-suffix")) {
-		wchar_t *p = uadv(source->keyword);
+		ret.asect[n].just_numbers = utob(q);
+	    } else if (!ustricmp(p->keyword, L"text-section-suffix")) {
+		wchar_t *q = uadv(p->keyword);
 		int n = 0;
-		if (uisdigit(*p)) {
-		    n = utoi(p);
-		    p = uadv(p);
+		if (uisdigit(*q)) {
+		    n = utoi(q);
+		    q = uadv(q);
 		}
 		if (n >= ret.nasect) {
 		    int i;
@@ -176,21 +207,75 @@ static textconfig text_configure(paragraph *source) {
 		    }
 		    ret.nasect = n+1;
 		}
-		ret.asect[n].number_suffix = p;
-	    } else if (!ustricmp(source->keyword, L"text-title-align")) {
-		ret.atitle.align = utoalign(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-title-underline")) {
-		ret.atitle.underline = *uadv(source->keyword);
-	    } else if (!ustricmp(source->keyword, L"text-versionid")) {
-		ret.include_version_id = utob(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-indent-preamble")) {
-		ret.indent_preambles = utob(uadv(source->keyword));
-	    } else if (!ustricmp(source->keyword, L"text-bullet")) {
-		ret.bullet.text = uadv(source->keyword);
+		ret.asect[n].number_suffix = q;
+	    } else if (!ustricmp(p->keyword, L"text-title-align")) {
+		ret.atitle.align = utoalign(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-title-underline")) {
+		ret.atitle.underline = uadv(p->keyword);
+	    } else if (!ustricmp(p->keyword, L"text-versionid")) {
+		ret.include_version_id = utob(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-indent-preamble")) {
+		ret.indent_preambles = utob(uadv(p->keyword));
+	    } else if (!ustricmp(p->keyword, L"text-bullet")) {
+		ret.bullet.text = uadv(p->keyword);
+	    } else if (!ustricmp(p->keyword, L"text-rule")) {
+		ret.rule = uadv(p->keyword);
+	    } else if (!ustricmp(p->keyword, L"text-list-suffix")) {
+		ret.listsuffix = uadv(p->keyword);
+	    } else if (!ustricmp(p->keyword, L"text-emphasis")) {
+		if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+		    ret.startemph = uadv(p->keyword);
+		    ret.endemph = uadv(ret.startemph);
+		}
+	    } else if (!ustricmp(p->keyword, L"text-quotes")) {
+		if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
+		    ret.lquote = uadv(p->keyword);
+		    ret.rquote = uadv(ret.lquote);
+		}
 	    }
 	}
     }
 
+    /*
+     * Now process fallbacks on quote characters, underlines, the
+     * rule character, the emphasis characters, and bullets.
+     */
+    while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
+	   (!cvt_ok(ret.charset, ret.lquote) ||
+	    !cvt_ok(ret.charset, ret.rquote))) {
+	ret.lquote = uadv(ret.rquote);
+	ret.rquote = uadv(ret.lquote);
+    }
+
+    while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
+	   (!cvt_ok(ret.charset, ret.startemph) ||
+	    !cvt_ok(ret.charset, ret.endemph))) {
+	ret.startemph = uadv(ret.endemph);
+	ret.endemph = uadv(ret.startemph);
+    }
+
+    while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
+	   !cvt_ok(ret.charset, ret.atitle.underline))
+	ret.atitle.underline = uadv(ret.atitle.underline);
+    
+    while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
+	   !cvt_ok(ret.charset, ret.achapter.underline))
+	ret.achapter.underline = uadv(ret.achapter.underline);
+
+    for (n = 0; n < ret.nasect; n++) {
+	while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
+	       !cvt_ok(ret.charset, ret.asect[n].underline))
+	    ret.asect[n].underline = uadv(ret.asect[n].underline);
+    }
+    
+    while (*ret.bullet.text && *uadv(ret.bullet.text) &&
+	   !cvt_ok(ret.charset, ret.bullet.text))
+	ret.bullet.text = uadv(ret.bullet.text);
+
+    while (*ret.rule && *uadv(ret.rule) &&
+	   !cvt_ok(ret.charset, ret.rule))
+	ret.rule = uadv(ret.rule);
+
     return ret;
 }
 
@@ -231,7 +316,7 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
     for (p = sourceform; p; p = p->next)
 	if (p->type == para_Title)
 	    text_heading(&tf, NULL, NULL, p->words,
-			 conf.atitle, conf.indent, conf.width);
+			 conf.atitle, conf.indent, conf.width, &conf);
 
     nestindent = conf.listindentbefore + conf.listindentafter;
     nesting = (conf.indent_preambles ? 0 : -conf.indent);
@@ -274,7 +359,7 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
       case para_Appendix:
       case para_UnnumberedChapter:
 	text_heading(&tf, p->kwtext, p->kwtext2, p->words,
-		     conf.achapter, conf.indent, conf.width);
+		     conf.achapter, conf.indent, conf.width, &conf);
 	nesting = 0;
 	break;
 
@@ -282,11 +367,11 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
       case para_Subsect:
 	text_heading(&tf, p->kwtext, p->kwtext2, p->words,
 		     conf.asect[p->aux>=conf.nasect ? conf.nasect-1 : p->aux],
-		     conf.indent, conf.width);
+		     conf.indent, conf.width, &conf);
 	break;
 
       case para_Rule:
-	text_rule(&tf, conf.indent + nesting, conf.width - nesting);
+	text_rule(&tf, conf.indent + nesting, conf.width - nesting, &conf);
 	break;
 
       case para_Normal:
@@ -303,7 +388,7 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
 	    indenta = conf.listindentafter;
 	} else if (p->type == para_NumberedList) {
 	    prefix = p->kwtext;
-	    prefixextra = L".";	       /* FIXME: configurability */
+	    prefixextra = conf.listsuffix;
 	    indentb = conf.listindentbefore;
 	    indenta = conf.listindentafter;
 	} else if (p->type == para_Description) {
@@ -330,7 +415,7 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
 	}
 	text_para(&tf, prefix, prefixextra, body,
 		  conf.indent + nesting + indentb, indenta,
-		  conf.width - nesting - indentb - indenta);
+		  conf.width - nesting - indentb - indenta, &conf);
 	if (wp) {
 	    wp->next = NULL;
 	    free_word_list(body);
@@ -348,7 +433,7 @@ void text_backend(paragraph *sourceform, keywordlist *keywords,
     if (conf.include_version_id) {
 	for (p = sourceform; p; p = p->next)
 	    if (p->type == para_VersionID)
- 		text_versionid(&tf, p->words);
+ 		text_versionid(&tf, p->words, &conf);
     }
 
     /*
@@ -392,7 +477,7 @@ static void text_output_many(textfile *tf, int n, wchar_t c)
 	text_output(tf, s);
 }
 
-static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) {
+static void text_rdaddw(rdstring *rs, word *text, word *end, textconfig *cfg) {
     for (; text && text != end; text = text->next) switch (text->type) {
       case word_HyperLink:
       case word_HyperEnd:
@@ -419,30 +504,30 @@ static void text_rdaddw(int charset, rdstring *rs, word *text, word *end) {
 	if (towordstyle(text->type) == word_Emph &&
 	    (attraux(text->aux) == attr_First ||
 	     attraux(text->aux) == attr_Only))
-	    rdadd(rs, L'_');	       /* FIXME: configurability */
+	    rdadds(rs, cfg->startemph);
 	else if (towordstyle(text->type) == word_Code &&
 		 (attraux(text->aux) == attr_First ||
 		  attraux(text->aux) == attr_Only))
-	    rdadd(rs, L'`');	       /* FIXME: configurability */
+	    rdadds(rs, cfg->lquote);
 	if (removeattr(text->type) == word_Normal) {
-	    if (cvt_ok(charset, text->text) || !text->alt)
+	    if (cvt_ok(cfg->charset, text->text) || !text->alt)
 		rdadds(rs, text->text);
 	    else
-		text_rdaddw(charset, rs, text->alt, NULL);
+		text_rdaddw(rs, text->alt, NULL, cfg);
 	} else if (removeattr(text->type) == word_WhiteSpace) {
 	    rdadd(rs, L' ');
 	} else if (removeattr(text->type) == word_Quote) {
-	    rdadd(rs, quoteaux(text->aux) == quote_Open ? L'`' : L'\'');
-				       /* FIXME: configurability */
+	    rdadds(rs, quoteaux(text->aux) == quote_Open ?
+		   cfg->lquote : cfg->rquote);
 	}
 	if (towordstyle(text->type) == word_Emph &&
 	    (attraux(text->aux) == attr_Last ||
 	     attraux(text->aux) == attr_Only))
-	    rdadd(rs, L'_');	       /* FIXME: configurability */
+	    rdadds(rs, cfg->endemph);
 	else if (towordstyle(text->type) == word_Code &&
 		 (attraux(text->aux) == attr_Last ||
 		  attraux(text->aux) == attr_Only))
-	    rdadd(rs, L'\'');	       /* FIXME: configurability */
+	    rdadds(rs, cfg->rquote);
 	break;
     }
 }
@@ -459,7 +544,9 @@ static int text_width_list(void *ctx, word *text) {
 }
 
 static int text_width(void *ctx, word *text) {
-    int charset = * (int *) ctx;
+    textconfig *cfg = (textconfig *)ctx;
+    int wid;
+    int attr;
 
     switch (text->type) {
       case word_HyperLink:
@@ -469,19 +556,35 @@ static int text_width(void *ctx, word *text) {
       case word_XrefEnd:
       case word_IndexRef:
 	return 0;
+    }
+
+    assert(text->type < word_internal_endattrs);
+
+    wid = 0;
+    attr = towordstyle(text->type);
+    if (attr == word_Emph || attr == word_Code) {
+	if (attraux(text->aux) == attr_Only ||
+	    attraux(text->aux) == attr_First)
+	    wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
+			   cfg->charset);
+    }
+    if (attr == word_Emph || attr == word_Code) {
+	if (attraux(text->aux) == attr_Only ||
+	    attraux(text->aux) == attr_Last)
+	    wid += ustrwid(attr == word_Emph ? cfg->startemph : cfg->lquote,
+			   cfg->charset);
+    }
 
+    switch (text->type) {
       case word_Normal:
       case word_Emph:
       case word_Code:
       case word_WeakCode:
-	return (((text->type == word_Emph ||
-		  text->type == word_Code)
-		 ? (attraux(text->aux) == attr_Only ? 2 :
-		    attraux(text->aux) == attr_Always ? 0 : 1)
-		 : 0) +
-		(cvt_ok(charset, text->text) || !text->alt ?
-		 ustrwid(text->text, charset) :
-		 text_width_list(ctx, text->alt)));
+	if (cvt_ok(cfg->charset, text->text) || !text->alt)
+	    wid += ustrwid(text->text, cfg->charset);
+	else
+	    wid += text_width_list(ctx, text->alt);
+	return wid;
 
       case word_WhiteSpace:
       case word_EmphSpace:
@@ -493,31 +596,34 @@ static int text_width(void *ctx, word *text) {
       case word_WkCodeQuote:
 	assert(text->type != word_CodeQuote &&
 	       text->type != word_WkCodeQuote);
-	return (((towordstyle(text->type) == word_Emph ||
-		  towordstyle(text->type) == word_Code)
-		 ? (attraux(text->aux) == attr_Only ? 2 :
-		    attraux(text->aux) == attr_Always ? 0 : 1)
-		 : 0) + 1);
+	if (removeattr(text->type) == word_Quote) {
+	    if (quoteaux(text->aux) == quote_Open)
+		wid += ustrwid(cfg->lquote, cfg->charset);
+	    else
+		wid += ustrwid(cfg->rquote, cfg->charset);
+	} else
+	    wid++;		       /* space */
     }
-    return 0;			       /* should never happen */
+
+    return wid;
 }
 
 static void text_heading(textfile *tf, word *tprefix, word *nprefix,
 			 word *text, alignstruct align,
-			 int indent, int width) {
+			 int indent, int width, textconfig *cfg) {
     rdstring t = { 0, 0, NULL };
     int margin, length;
     int firstlinewidth, wrapwidth;
     wrappedline *wrapping, *p;
 
     if (align.just_numbers && nprefix) {
-	text_rdaddw(tf->charset, &t, nprefix, NULL);
+	text_rdaddw(&t, nprefix, NULL, cfg);
 	rdadds(&t, align.number_suffix);
     } else if (!align.just_numbers && tprefix) {
-	text_rdaddw(tf->charset, &t, tprefix, NULL);
+	text_rdaddw(&t, tprefix, NULL, cfg);
 	rdadds(&t, align.number_suffix);
     }
-    margin = length = t.pos;
+    margin = length = ustrwid(t.text ? t.text : L"", cfg->charset);
 
     if (align.align == LEFTPLUS) {
 	margin = indent - margin;
@@ -531,10 +637,10 @@ static void text_heading(textfile *tf, word *tprefix, word *nprefix,
     }
 
     wrapping = wrap_para(text, firstlinewidth, wrapwidth,
-			 text_width, &tf->charset, 0);
+			 text_width, cfg, 0);
     for (p = wrapping; p; p = p->next) {
-	text_rdaddw(tf->charset, &t, p->begin, p->end);
-	length = t.pos;
+	text_rdaddw(&t, p->begin, p->end, cfg);
+	length = ustrwid(t.text ? t.text : L"", cfg->charset);
 	if (align.align == CENTRE) {
 	    margin = (indent + width - length)/2;
 	    if (margin < 0) margin = 0;
@@ -542,9 +648,12 @@ static void text_heading(textfile *tf, word *tprefix, word *nprefix,
 	text_output_many(tf, margin, L' ');
 	text_output(tf, t.text);
 	text_output(tf, L"\n");
-	if (align.underline != L'\0') {
+	if (*align.underline) {
 	    text_output_many(tf, margin, L' ');
-	    text_output_many(tf, length, align.underline);
+	    while (length > 0) {
+		text_output(tf, align.underline);
+		length -= ustrwid(align.underline, cfg->charset);
+	    }
 	    text_output(tf, L"\n");
 	}
 	if (align.align == LEFTPLUS)
@@ -560,27 +669,31 @@ static void text_heading(textfile *tf, word *tprefix, word *nprefix,
     sfree(t.text);
 }
 
-static void text_rule(textfile *tf, int indent, int width) {
+static void text_rule(textfile *tf, int indent, int width, textconfig *cfg) {
     text_output_many(tf, indent, L' ');
-    text_output_many(tf, width, L'-');     /* FIXME: configurability! */
+    while (width > 0) {
+	text_output(tf, cfg->rule);
+	width -= ustrwid(cfg->rule, cfg->charset);
+    }
     text_output_many(tf, 2, L'\n');
 }
 
 static void text_para(textfile *tf, word *prefix, wchar_t *prefixextra,
-		      word *text, int indent, int extraindent, int width) {
+		      word *text, int indent, int extraindent, int width,
+		      textconfig *cfg) {
     wrappedline *wrapping, *p;
     rdstring pfx = { 0, 0, NULL };
     int e;
     int firstlinewidth = width;
 
     if (prefix) {
-	text_rdaddw(tf->charset, &pfx, prefix, NULL);
+	text_rdaddw(&pfx, prefix, NULL, cfg);
 	if (prefixextra)
 	    rdadds(&pfx, prefixextra);
 	text_output_many(tf, indent, L' ');
 	text_output(tf, pfx.text);
 	/* If the prefix is too long, shorten the first line to fit. */
-	e = extraindent - pfx.pos;
+	e = extraindent - ustrwid(pfx.text ? pfx.text : L"", cfg->charset);
 	if (e < 0) {
 	    firstlinewidth += e;       /* this decreases it, since e < 0 */
 	    if (firstlinewidth < 0) {
@@ -595,10 +708,10 @@ static void text_para(textfile *tf, word *prefix, wchar_t *prefixextra,
 	e = indent + extraindent;
 
     wrapping = wrap_para(text, firstlinewidth, width,
-			 text_width, &tf->charset, 0);
+			 text_width, cfg, 0);
     for (p = wrapping; p; p = p->next) {
 	rdstring t = { 0, 0, NULL };
-	text_rdaddw(tf->charset, &t, p->begin, p->end);
+	text_rdaddw(&t, p->begin, p->end, cfg);
 	text_output_many(tf, e, L' ');
 	text_output(tf, t.text);
 	text_output(tf, L"\n");
@@ -611,9 +724,9 @@ static void text_para(textfile *tf, word *prefix, wchar_t *prefixextra,
 
 static void text_codepara(textfile *tf, word *text, int indent, int width) {
     for (; text; text = text->next) if (text->type == word_WeakCode) {
-	if (ustrlen(text->text) > width) {
-	    /* FIXME: warn */
-	}
+	int wid = ustrwid(text->text, tf->charset);
+	if (wid > width)
+	    error(err_text_codeline, &text->fpos, wid, width);
 	text_output_many(tf, indent, L' ');
 	text_output(tf, text->text);
 	text_output(tf, L"\n");
@@ -622,12 +735,12 @@ static void text_codepara(textfile *tf, word *text, int indent, int width) {
     text_output(tf, L"\n");
 }
 
-static void text_versionid(textfile *tf, word *text) {
+static void text_versionid(textfile *tf, word *text, textconfig *cfg) {
     rdstring t = { 0, 0, NULL };
 
-    rdadd(&t, L'[');		       /* FIXME: configurability */
-    text_rdaddw(tf->charset, &t, text, NULL);
-    rdadd(&t, L']');		       /* FIXME: configurability */
+    rdadd(&t, L'[');
+    text_rdaddw(&t, text, NULL, cfg);
+    rdadd(&t, L']');
     rdadd(&t, L'\n');
 
     text_output(tf, t.text);
diff --git a/doc/index.but b/doc/index.but
index 2cf1ebc..c51a85a 100644
--- a/doc/index.but
+++ b/doc/index.but
@@ -118,6 +118,12 @@ directive
 \IM{\\cfg\{text-bullet\}} \c{text-bullet} configuration directive
 \IM{\\cfg\{text-bullet\}} \cw{\\cfg\{text-bullet\}}
 
+\IM{\\cfg\{text-rule\}} \c{text-rule} configuration directive
+\IM{\\cfg\{text-rule\}} \cw{\\cfg\{text-rule\}}
+
+\IM{\\cfg\{text-quotes\}} \c{text-quotes} configuration directive
+\IM{\\cfg\{text-quotes\}} \cw{\\cfg\{text-quotes\}}
+
 \IM{\\cfg\{xhtml-leaf-level\}} \c{xhtml-leaf-level} configuration
 directive
 \IM{\\cfg\{xhtml-leaf-level\}} \cw{\\cfg\{xhtml-leaf-level\}}
diff --git a/doc/output.but b/doc/output.but
index 986f53d..dbc9b70 100644
--- a/doc/output.but
+++ b/doc/output.but
@@ -107,11 +107,17 @@ left of that (so that it goes in the margin if there is room).
 
 Also, several of the directives below specify how a title should be
 \I{underlining}underlined. The parameter to one of these directives
-should be either blank (\cw{\{\}}) or a single character. In the
-latter case, that character will be used to underline the title. So
-you might want to specify, for example,
-\cw{\\text-title-underline\{=\}} but
-\cw{\\text-chapter-underline\{-\}}.
+should be either blank (\cw{\{\}}) or a piece of text which will be
+repeated to produce the underline. So you might want to specify, for
+example, \cw{\\text-title-underline\{=\}} but
+\cw{\\text-chapter-underline\{\-\}}.
+
+You can also specify more than one underline setting, and Halibut
+will choose the first one that the output character set supports.
+So, for example, you could write
+\cw{\\text-chapter-underline\{\u203e\}\{\-\}}, and Halibut would use
+the Unicode \q{OVERLINE} character where possible and fall back to
+the ASCII minus sign otherwise.
 
 \dt \I{\cw{\\cfg\{text-title-align\}}}\cw{\\cfg\{text-title-align\}\{}\e{alignment}\cw{\}}
 
@@ -171,8 +177,98 @@ be numeric only (if \c{true}).
 be appended to section numbers at a particular level, before
 displaying the section title.
 
+\S{output-text-characters} Configuring the characters used
+
+\dt \I{\cw{\\cfg\{text-bullet\}}}\cw{\\cfg\{text-bullet\}\{}\e{text}\cw{\}}[\cw{\{}\e{text}...\cw{\}}]
+
+\dd This specifies the text which should be used as the \i{bullet}
+in bulletted lists. It can be one character
+(\cw{\\cfg\{text-bullet\}\{-\}}), or more than one
+(\cw{\\cfg\{text-bullet\}\{(*)\}}).
+
+\lcont{
+
+You can specify multiple possible options (each in their own pair of
+braces) after this command, and Halibut will choose the first one
+which the output character set supports. (This is to allow you to
+configure the bullet character once, generate output in several
+different character sets, and have Halibut constantly adapt to make
+the best use of the current encoding.) For example, you might write
+\cw{\\cfg\{text-bullet\}\{\\u2022\}\{\\u00b7\}\{*\}}, in which case
+Halibut would use the Unicode \q{BULLET} character where possible,
+fall back to the ISO-8859-1 \q{MIDDLE DOT} if that wasn't available,
+and resort to the ASCII asterisk if all else failed.
+
+}
+
+\dt \I{\cw{\\cfg\{text-rule\}}}\cw{\\cfg\{text-rule\}\{}\e{text}\cw{\}}[\cw{\{}\e{text}...\cw{\}}]
+
+\dd This specifies the text which should be used for drawing
+\i{horizontal rules} (generated by \i\c{\\rule}; see
+\k{input-rule}). It can be one character, or more than one. The
+string you specify will be repeated to reach the required width, so
+you can specify something like \q{\cw{-=}} to get a rule that looks
+like \cw{-=-=-=}.
+
+\lcont{
+
+Like \cw{\\cfg\{text-bullet\}}, you can specify multiple fallback
+options in this command.
+
+}
+
+\dt \I{\cw{\\cfg\{text-quotes\}}}\cw{\\cfg\{text-quotes\}\{}\e{open-quote}\cw{\}\{}\e{close-quote}\cw{\}}[\cw{\{}\e{open-quote}\cw{\}\{}\e{close-quote}...\cw{\}}]
+
+\dd This specifies the quote characters which should be used in
+response to the \c{\\q} command (see \k{input-quotes}). These quotes
+will also be used to mark text enclosed in the \c{\\c} command (see
+\k{input-code}).
+
+\lcont{
+
+You should separately specify the open and close quote characters,
+each of which can be more than one character if you want. Also, like
+\cw{\\cfg\{text-bullet\}}, you can specify multiple fallback options
+in this command (a pair of open and close quotes, then another pair,
+then another if you like); Halibut will always use a matching pair.
+For example, you might write
+
+\c \cfg{text-quotes}{\u201c}{\u201d}{"}{"}
+
+and Halibut would use the Unicode matched double quote characters if
+possible, and fall back to ASCII double quotes otherwise. If the
+output character set were to contain U+201C but not U+201D, then
+Halibut would fall back to using the ASCII double quote character as
+\e{both} open and close quotes. (No known character set is that
+silly; I mention it only as an example.)
+
+}
+
+\dt \I{\cw{\\cfg\{text-emphasis\}}}\cw{\\cfg\{text-emphasis\}\{}\e{start-emph}\cw{\}\{}\e{end-emph}\cw{\}}[\cw{\{}\e{start-emph}\cw{\}\{}\e{end-emph}...\cw{\}}]
+
+\dd This specifies the text which should be used to surround
+emphasised text (written using the \c{\\e} command; see
+\k{input-emph}).
+
+\lcont{
+
+You should separately specify the start-emphasis and end-emphasis
+text, each of which can be more than one character if you want.
+Also, like \cw{\\cfg\{text-quotes\}}, you can specify multiple pairs
+of fallback options in this command, and Halibut will always use a
+matching pair.
+
+}
+
 \S{output-text-misc} Miscellaneous configuration options
 
+\dt \I{\cw{\\cfg\{text-list-suffix\}}}\cw{\\cfg\{text-list-suffix\}\{}\e{text}\cw{\}}
+
+\dd This text is appended to the number on a \i{numbered list} item
+(see \k{input-list-number}). So if you want to label your lists as
+\q{1)}, \q{2)} and so on, then you would write
+\cw{\\cfg\{text-list-suffix\}\{)\}}.
+
 \dt \I{\cw{\\cfg\{text-versionid\}}}\cw{\\cfg\{text-versionid\}\{}\e{boolean}\cw{\}}
 
 \dd If this is set to \c{true}, \i{version ID paragraphs} (defined
@@ -180,13 +276,6 @@ using the \i\c{\\versionid} command - see \k{input-blurb}) will be
 included at the bottom of the text file. If it is set to \c{false},
 they will be omitted completely.
 
-\dt \I{\cw{\\cfg\{text-bullet\}}}\cw{\\cfg\{text-bullet\}\{}\e{text}\cw{\}}
-
-\dd This specifies the text which should be used as the \i{bullet}
-in bulletted lists. It can be one character
-(\cw{\\cfg\{text-bullet\}\{-\}}), or more than one
-(\cw{\\cfg\{text-bullet\}\{(*)\}}).
-
 \# FIXME: code indentation is configurable, therefore \quote
 \# indentation probably ought to be as well.
 
@@ -206,10 +295,10 @@ The \i{default settings} for Halibut's plain text output format are:
 \c \cfg{text-indent-preamble}{false}
 \c
 \c \cfg{text-title-align}{centre}
-\c \cfg{text-title-underline}{=}
+\c \cfg{text-title-underline}{\u2550}{=}
 \c
 \c \cfg{text-chapter-align}{left}
-\c \cfg{text-chapter-underline}{-}
+\c \cfg{text-chapter-underline}{\u203e}{-}
 \c \cfg{text-chapter-numeric}{false}
 \c \cfg{text-chapter-suffix}{: }
 \c
@@ -225,6 +314,14 @@ The \i{default settings} for Halibut's plain text output format are:
 \c
 \c ... and so on for all section levels below this ...
 \e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+\c
+\c \cfg{text-bullet}{\u2022}{-}
+\c \cfg{text-rule}{\u2500}{-}
+\c \cfg{text-quotes}{\u2018}{\u2019}{`}{'}
+\c \cfg{text-emphasis}{_}{_}
+\c
+\c \cfg{text-list-suffix}{.}
+\c \cfg{text-versionid}{true}
 
 \H{output-html} HTML
 
diff --git a/error.c b/error.c
index 42eea37..0e90f0c 100644
--- a/error.c
+++ b/error.c
@@ -16,6 +16,7 @@
 static void do_error(int code, va_list ap) {
     char error[1024];
     char c;
+    int i, j;
     char *sp, *sp2;
     wchar_t *wsp;
     filepos fpos, fpos2;
@@ -221,6 +222,14 @@ static void do_error(int code, va_list ap) {
 		" node names; removing", c);
 	flags = FILEPOS;
 	break;
+      case err_text_codeline:
+	fpos = *va_arg(ap, filepos *);
+	i = va_arg(ap, int);
+	j = va_arg(ap, int);
+	sprintf(error, "warning: code paragraph line is %d chars wide, wider"
+		" than body width %d", i, j);
+	flags = FILEPOS;
+	break;
       case err_whatever:
 	sp = va_arg(ap, char *);
         vsprintf(error, sp, ap);
diff --git a/halibut.h b/halibut.h
index 3205e46..6a0e944 100644
--- a/halibut.h
+++ b/halibut.h
@@ -237,6 +237,7 @@ enum {
     err_sectmarkerinblock,	       /* section marker appeared in block */
     err_infodirentry,		       /* \cfg{info-dir-entry} missing param */
     err_infonodechar,		       /* colon/comma in node name in info */
+    err_text_codeline,		       /* \c line too long in text backend */
     err_whatever                       /* random error of another type */
 };
 
diff --git a/input.c b/input.c
index 1187e94..7531d62 100644
--- a/input.c
+++ b/input.c
@@ -904,11 +904,16 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 			   t.type == tok_word || 
 			   t.type == tok_white ||
 			   (t.type == tok_cmd && t.cmd == c__nbsp) ||
-			   (t.type == tok_cmd && t.cmd == c__escaped)) {
+			   (t.type == tok_cmd && t.cmd == c__escaped) ||
+			   (t.type == tok_cmd && t.cmd == c_u)) {
 			if (t.type == tok_white ||
 			    (t.type == tok_cmd && t.cmd == c__nbsp)) {
 			    rdadd(&rs, ' ');
 			    rdaddc(&rsc, ' ');
+			} else if (t.type == tok_cmd && t.cmd == c_u) {
+			    rdadd(&rs, t.aux);
+			    rdaddc(&rsc, '\\');
+			    rdaddsc(&rsc, t.origtext);
 			} else {
 			    rdadds(&rs, t.text);
 			    rdaddsc(&rsc, t.origtext);
-- 
2.11.0