Infrastructure changes for character set support. ustrtoa,

author simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Tue, 20 Apr 2004 17:50:41 +0000 (17:50 +0000)

committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>

Tue, 20 Apr 2004 17:50:41 +0000 (17:50 +0000)
author simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Tue, 20 Apr 2004 17:50:41 +0000 (17:50 +0000)
committer simon <simon@cda61777-01e9-0310-a592-d414129be87e>
Tue, 20 Apr 2004 17:50:41 +0000 (17:50 +0000)
diff --git a/bk_info.c b/bk_info.c

index f7f83fb..fc1b292 100644 (file)
--- a/bk_info.c
+++ b/bk_info.c
@@ -80,7 +80,7 @@ static infoconfig info_configure(paragraph *source) {
         if (source->type == para_Config) {
             if (!ustricmp(source->keyword, L"info-filename")) {
                 sfree(ret.filename);
-               ret.filename = utoa_dup(uadv(source->keyword));
+               ret.filename = dupstr(adv(source->origkeyword));
             } else if (!ustricmp(source->keyword, L"info-max-file-size")) {
                 ret.maxfilesize = utoi(uadv(source->keyword));
             }
@@ -92,30 +92,7 @@ static infoconfig info_configure(paragraph *source) {
  
  paragraph *info_config_filename(char *filename)
  {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"info-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"info-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("info-filename", filename, NULL);
  }
  
  void info_backend(paragraph *sourceform, keywordlist *keywords,
@@ -235,11 +212,11 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
             }
  
             rdaddsc(&intro_text, "INFO-DIR-SECTION ");
-           s = utoa_dup(section);
+           s = utoa_dup(section, CS_FIXME);
             rdaddsc(&intro_text, s);
             sfree(s);
             rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
-           s = utoa_dup(shortname);
+           s = utoa_dup(shortname, CS_FIXME);
             rdaddsc(&intro_text, s);
             sfree(s);
             rdaddsc(&intro_text, ": (");
@@ -257,7 +234,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
                 }
             }
             rdaddsc(&intro_text, ".   ");
-           s = utoa_dup(longname);
+           s = utoa_dup(longname, CS_FIXME);
             rdaddsc(&intro_text, s);
             sfree(s);
             rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
diff --git a/bk_man.c b/bk_man.c

index d03f6b7..0854e58 100644 (file)
--- a/bk_man.c
+++ b/bk_man.c
@@ -48,7 +48,7 @@ static manconfig man_configure(paragraph *source) {
                 ret.mindepth = utoi(uadv(source->keyword));
             } else if (!ustricmp(source->keyword, L"man-filename")) {
                 sfree(ret.filename);
-               ret.filename = utoa_dup(uadv(source->keyword));
+               ret.filename = dupstr(adv(source->origkeyword));
             }
         }
      }
@@ -64,30 +64,7 @@ static void man_conf_cleanup(manconfig cf)
  
  paragraph *man_config_filename(char *filename)
  {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"man-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"man-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("man-filename", filename, NULL);
  }
  
  #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
diff --git a/bk_paper.c b/bk_paper.c

index e1721ae..81050cb 100644 (file)
--- a/bk_paper.c
+++ b/bk_paper.c
@@ -510,7 +510,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
  
         for (page = pages; page; page = page->next) {
             sprintf(buf, "%d", ++pagenum);
-           page->number = ufroma_dup(buf);
+           page->number = ufroma_dup(buf, CS_ASCII);
         }
  
         if (has_index) {
@@ -524,7 +524,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
  
             /* And don't forget the as-yet-uncreated index. */
             sprintf(buf, "%d", ++pagenum);
-           first_index_page->number = ufroma_dup(buf);
+           first_index_page->number = ufroma_dup(buf, CS_ASCII);
         }
      }
  
@@ -683,7 +683,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
         for (page = ipages->next; page; page = page->next) {
             char buf[40];
             sprintf(buf, "%d", ++pagenum);
-           page->number = ufroma_dup(buf);
+           page->number = ufroma_dup(buf, CS_ASCII);
         }
  
         /*
@@ -1682,7 +1682,7 @@ static int render_text(page_data *page, para_data *pdata, line_data *ldata,
  
             if (text->type == word_HyperLink) {
                 dest.type = URL;
-               dest.url = utoa_dup(text->text);
+               dest.url = utoa_dup(text->text, CS_ASCII);
                 dest.page = NULL;
             } else if (text->type == word_PageXref) {
                 dest.type = PAGE;
diff --git a/bk_pdf.c b/bk_pdf.c

index f6babe5..e9904e7 100644 (file)
--- a/bk_pdf.c
+++ b/bk_pdf.c
@@ -10,30 +10,7 @@
  
  paragraph *pdf_config_filename(char *filename)
  {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"pdf-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"pdf-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("pdf-filename", filename, NULL);
  }
  
  typedef struct object_Tag object;
@@ -88,7 +65,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords,
         if (p->type == para_Config && p->parent) {
             if (!ustricmp(p->keyword, L"pdf-filename")) {
                 sfree(filename);
-               filename = utoa_dup(uadv(p->keyword));
+               filename = dupstr(adv(p->origkeyword));
             }
         }
      }
@@ -742,7 +719,7 @@ static int pdf_versionid(FILE *fp, word *words)
  
         switch (type) {
           case word_Normal:
-           text = utoa_dup(words->text);
+           text = utoa_dup(words->text, CS_ASCII);
             break;
           case word_WhiteSpace:
             text = dupstr(" ");
diff --git a/bk_ps.c b/bk_ps.c

index 7a59ac0..358571e 100644 (file)
--- a/bk_ps.c
+++ b/bk_ps.c
@@ -10,30 +10,7 @@ static void ps_versionid(FILE *fp, word *words);
  
  paragraph *ps_config_filename(char *filename)
  {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"ps-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"ps-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("ps-filename", filename, NULL);
  }
  
  void ps_backend(paragraph *sourceform, keywordlist *keywords,
@@ -55,7 +32,7 @@ void ps_backend(paragraph *sourceform, keywordlist *keywords,
         if (p->type == para_Config && p->parent) {
             if (!ustricmp(p->keyword, L"ps-filename")) {
                 sfree(filename);
-               filename = utoa_dup(uadv(p->keyword));
+               filename = dupstr(adv(p->origkeyword));
             }
         }
      }
@@ -247,7 +224,7 @@ static void ps_versionid(FILE *fp, word *words)
  
         switch (type) {
           case word_Normal:
-           text = utoa_dup(words->text);
+           text = utoa_dup(words->text, CS_ASCII);
             break;
           case word_WhiteSpace:
             text = dupstr(" ");
diff --git a/bk_text.c b/bk_text.c

index 499f232..ce4492d 100644 (file)
--- a/bk_text.c
+++ b/bk_text.c
@@ -85,7 +85,7 @@ static textconfig text_configure(paragraph *source) {
                 ret.indent = utoi(uadv(source->keyword));
             } else if (!ustricmp(source->keyword, L"text-filename")) {
                 sfree(ret.filename);
-               ret.filename = utoa_dup(uadv(source->keyword));
+               ret.filename = dupstr(adv(source->origkeyword));
             } else if (!ustricmp(source->keyword, L"text-indent-code")) {
                 ret.indent_code = utoi(uadv(source->keyword));
             } else if (!ustricmp(source->keyword, L"text-width")) {
@@ -182,30 +182,7 @@ static textconfig text_configure(paragraph *source) {
  
  paragraph *text_config_filename(char *filename)
  {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"text-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"text-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("text-filename", filename, NULL);
  }
  
  void text_backend(paragraph *sourceform, keywordlist *keywords,
diff --git a/bk_whlp.c b/bk_whlp.c

index 7738c71..c8a6524 100644 (file)
--- a/bk_whlp.c
+++ b/bk_whlp.c
@@ -45,30 +45,7 @@ static void whlp_contents_write(struct bk_whlp_state *state,
      
  paragraph *whlp_config_filename(char *filename)
  {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"winhelp-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"winhelp-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("winhelp-filename", filename, NULL);
  }
  
  void whlp_backend(paragraph *sourceform, keywordlist *keywords,
@@ -129,7 +106,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
                 p->parent->private_data = topicname;
             } else if (!ustricmp(p->keyword, L"winhelp-filename")) {
                 sfree(filename);
-               filename = utoa_dup(uadv(p->keyword));
+               filename = dupstr(adv(p->origkeyword));
             }
         }
      }
@@ -152,7 +129,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
             filename = newf;
             len = strlen(newf);
         }
-       cntname = mknewa(char, len);
+       cntname = mknewa(char, len+1);
         sprintf(cntname, "%.*s.cnt", len-4, filename);
      }
  
@@ -671,7 +648,7 @@ static void whlp_rdaddwc(rdstringc *rs, word *text) {
         assert(text->type != word_CodeQuote &&
                text->type != word_WkCodeQuote);
         if (removeattr(text->type) == word_Normal) {
-           if (whlp_convert(text->text, 0, &c, FALSE))
+           if (whlp_convert(text->text, 0, &c, FALSE) || !text->alt)
                 rdaddsc(rs, c);
             else
                 whlp_rdaddwc(rs, text->alt);
diff --git a/bk_xhtml.c b/bk_xhtml.c

index 4b9b98c..4d5069a 100644 (file)
--- a/bk_xhtml.c
+++ b/bk_xhtml.c
@@ -192,19 +192,19 @@ static xhtmlconfig xhtml_configure(paragraph *source)
      {
        if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
         sfree(ret.contents_filename);
-       ret.contents_filename = utoa_dup(uadv(source->keyword));
+       ret.contents_filename = dupstr(adv(source->origkeyword));
        } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
         sfree(ret.single_filename);
-       ret.single_filename = utoa_dup(uadv(source->keyword));
+       ret.single_filename = dupstr(adv(source->origkeyword));
        } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
         sfree(ret.index_filename);
-       ret.index_filename = utoa_dup(uadv(source->keyword));
+       ret.index_filename = dupstr(adv(source->origkeyword));
        } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
         sfree(ret.template_filename);
-       ret.template_filename = utoa_dup(uadv(source->keyword));
+       ret.template_filename = dupstr(adv(source->origkeyword));
        } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) {
         sfree(ret.template_fragment);
-       ret.template_fragment = utoa_dup(uadv(source->keyword));
+       ret.template_fragment = utoa_dup(uadv(source->keyword), CS_ASCII);
        } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
          ret.contents_depth[0] = utoi(uadv(source->keyword));
        } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
@@ -304,45 +304,12 @@ paragraph *xhtml_config_filename(char *filename)
       * \cfg{xhtml-leaf-level}{0}; the rationale being that the user
       * wants their output _in that file_.
       */
+    paragraph *p, *q;
  
-    paragraph *p[2];
-    int i, len;
-    wchar_t *ufilename, *up;
-
-    for (i = 0; i < 2; i++) {
-       p[i] = mknew(paragraph);
-       memset(p[i], 0, sizeof(*p[i]));
-       p[i]->type = para_Config;
-       p[i]->next = NULL;
-       p[i]->fpos.filename = "<command line>";
-       p[i]->fpos.line = p[i]->fpos.col = -1;
-    }
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename");
-    p[0]->keyword = mknewa(wchar_t, len);
-    up = p[0]->keyword;
-    ustrcpy(up, L"xhtml-single-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p[0]->keyword < len);
-    sfree(ufilename);
-
-    len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1;
-    p[1]->keyword = mknewa(wchar_t, len);
-    up = p[1]->keyword;
-    ustrcpy(up, L"xhtml-leaf-level");
-    up = uadv(up);
-    ustrcpy(up, L"0");
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p[1]->keyword < len);
-
-    p[0]->next = p[1];
-
-    return p[0];
+    p = cmdline_cfg_simple("xhtml-single-filename", filename, NULL);
+    q = cmdline_cfg_simple("xhtml-leaf-level", "0", NULL);
+    p->next = q;
+    return p;
  }
  
  static xhtmlsection *xhtml_new_section(xhtmlsection *last)
diff --git a/error.c b/error.c

index 5f5cbda..abd8d61 100644 (file)
--- a/error.c
+++ b/error.c
@@ -82,7 +82,7 @@ static void do_error(int code, va_list ap) {
         break;
        case err_badparatype:
         wsp = va_arg(ap, wchar_t *);
-       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
         fpos = *va_arg(ap, filepos *);
         sprintf(error, "command `%.200s' unrecognised at start of"
                 " paragraph", sp);
@@ -90,7 +90,7 @@ static void do_error(int code, va_list ap) {
         break;
        case err_badmidcmd:
         wsp = va_arg(ap, wchar_t *);
-       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
         fpos = *va_arg(ap, filepos *);
         sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp);
         flags = FILEPOS;
@@ -138,20 +138,20 @@ static void do_error(int code, va_list ap) {
        case err_nosuchkw:
         fpos = *va_arg(ap, filepos *);
         wsp = va_arg(ap, wchar_t *);
-       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
         sprintf(error, "unable to resolve cross-reference to `%.200s'", sp);
         flags = FILEPOS;
         break;
        case err_multiBR:
         fpos = *va_arg(ap, filepos *);
         wsp = va_arg(ap, wchar_t *);
-       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
         sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
         flags = FILEPOS;
         break;
        case err_nosuchidxtag:
         wsp = va_arg(ap, wchar_t *);
-       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
         sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
         flags = 0;
         /* FIXME: need to get a filepos to here somehow */
@@ -164,7 +164,7 @@ static void do_error(int code, va_list ap) {
        case err_macroexists:
         fpos = *va_arg(ap, filepos *);
         wsp = va_arg(ap, wchar_t *);
-       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
         sprintf(error, "macro `%.200s' already defined", sp);
         flags = FILEPOS;
         break;
@@ -185,7 +185,7 @@ static void do_error(int code, va_list ap) {
         fpos = *va_arg(ap, filepos *);
         fpos2 = *va_arg(ap, filepos *);
         wsp = va_arg(ap, wchar_t *);
-       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+       sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
         sprintf(error, "paragraph keyword `%.200s' already defined at ", sp);
         sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line);
         flags = FILEPOS;
diff --git a/halibut.h b/halibut.h

index 9aa2c59..94493d4 100644 (file)
--- a/halibut.h
+++ b/halibut.h
@@ -27,6 +27,12 @@
  #include "tree234.h"
  
  /*
+ * FIXME: Charset temporary workarounds
+ */
+#define CS_FIXME CS_ISO8859_1
+#define CS_LOCAL CS_ISO8859_1
+
+/*
   * Structure tags
   */
  typedef struct input_Tag input;
@@ -72,6 +78,7 @@ struct input_Tag {
      charset_state csstate;
      wchar_t wc[16];                   /* wide chars from input conversion */
      int nwc, wcpos;                   /* size of, and position in, wc[] */
+    char *pushback_chars;             /* used to save input-encoding data */
  };
  
  /*
@@ -82,6 +89,7 @@ struct paragraph_Tag {
      paragraph *next;
      int type;
      wchar_t *keyword;                 /* for most special paragraphs */
+    char *origkeyword;                /* same again in original charset */
      word *words;                      /* list of words in paragraph */
      int aux;                          /* number, in a numbered paragraph
                                          * or subsection level
@@ -266,11 +274,14 @@ char *dupstr(char *s);
  /*
   * ustring.c
   */
-wchar_t *ustrdup(wchar_t *s);
-char *ustrtoa(wchar_t *s, char *outbuf, int size);
-wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size);
-char *utoa_dup(wchar_t *s);
-wchar_t *ufroma_dup(char *s);
+wchar_t *ustrdup(wchar_t const *s);
+char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset);
+char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset);
+wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset);
+char *utoa_dup(wchar_t const *s, int charset);
+char *utoa_dup_len(wchar_t const *s, int charset, int *len);
+char *utoa_careful_dup(wchar_t const *s, int charset);
+wchar_t *ufroma_dup(char const *s, int charset);
  int ustrlen(wchar_t const *s);
  wchar_t *uadv(wchar_t *s);
  wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source);
@@ -304,6 +315,8 @@ const char *const version;
  /*
   * misc.c
   */
+char *adv(char *s);
+
  typedef struct stackTag *stack;
  stack stk_new(void);
  void stk_free(stack);
@@ -343,6 +356,9 @@ struct tagWrappedLine {
  };
  wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int);
  void wrap_free(wrappedline *);
+void cmdline_cfg_add(paragraph *cfg, char *string);
+paragraph *cmdline_cfg_new(void);
+paragraph *cmdline_cfg_simple(char *string, ...);
  
  /*
   * input.c
diff --git a/input.c b/input.c

index d607e86..1187e94 100644 (file)
--- a/input.c
+++ b/input.c
@@ -86,7 +86,7 @@ static void input_configure(input *in, paragraph *cfg) {
      assert(cfg->type == para_Config);
  
      if (!ustricmp(cfg->keyword, L"input-charset")) {
-       char *csname = utoa_dup(uadv(cfg->keyword));
+       char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
         in->charset = charset_from_localenc(csname);
         sfree(csname);
      }
@@ -95,7 +95,7 @@ static void input_configure(input *in, paragraph *cfg) {
  /*
   * Can return EOF
   */
-static int get(input *in, filepos *pos) {
+static int get(input *in, filepos *pos, rdstringc *rsc) {
      int pushbackpt = in->stack ? in->stack->npushback : 0;
      if (in->npushback > pushbackpt) {
         --in->npushback;
@@ -123,6 +123,10 @@ static int get(input *in, filepos *pos) {
                 in->currfp = NULL;
                 return EOF;
             }
+
+           if (rsc)
+               rdaddc(rsc, c);
+
             /* Track line numbers, for error reporting */
             if (pos)
                 *pos = in->pos;
@@ -182,6 +186,7 @@ struct token_Tag {
      int type;
      int cmd, aux;
      wchar_t *text;
+    char *origtext;
      filepos pos;
  };
  enum {
@@ -373,31 +378,48 @@ static void match_kw(token *tok) {
  token get_token(input *in) {
      int c;
      int nls;
+    int prevpos;
      token ret;
      rdstring rs = { 0, 0, NULL };
+    rdstringc rsc = { 0, 0, NULL };
      filepos cpos;
  
      ret.text = NULL;                  /* default */
-    c = get(in, &cpos);
+    ret.origtext = NULL;              /* default */
+    if (in->pushback_chars) {
+       rdaddsc(&rsc, in->pushback_chars);
+       sfree(in->pushback_chars);
+       in->pushback_chars = NULL;
+    }
+    c = get(in, &cpos, &rsc);
      ret.pos = cpos;
      if (iswhite(c)) {                 /* tok_white or tok_eop */
         nls = 0;
+       prevpos = 0;
         do {
             if (isnl(c))
                 nls++;
-       } while ((c = get(in, &cpos)) != EOF && iswhite(c));
+           prevpos = rsc.pos;
+       } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
         if (c == EOF) {
             ret.type = tok_eof;
+           sfree(rsc.text);
             return ret;
         }
+       if (rsc.text) {
+           in->pushback_chars = dupstr(rsc.text + prevpos);
+           sfree(rsc.text);
+       }
         unget(in, c, &cpos);
         ret.type = (nls > 1 ? tok_eop : tok_white);
         return ret;
      } else if (c == EOF) {            /* tok_eof */
         ret.type = tok_eof;
+       sfree(rsc.text);
         return ret;
      } else if (c == '\\') {           /* tok_cmd */
-       c = get(in, &cpos);
+       rsc.pos = prevpos = 0;
+       c = get(in, &cpos, &rsc);
         if (c == '-' || c == '\\' || c == '_' ||
             c == '#' || c == '{' || c == '}' || c == '.') {
             /* single-char command */
@@ -407,13 +429,15 @@ token get_token(input *in) {
             do {
                 rdadd(&rs, c);
                 len++;
-               c = get(in, &cpos);
+               prevpos = rsc.pos;
+               c = get(in, &cpos, &rsc);
             } while (ishex(c) && len < 5);
             unget(in, c, &cpos);
         } else if (iscmd(c)) {
             do {
                 rdadd(&rs, c);
-               c = get(in, &cpos);
+               prevpos = rsc.pos;
+               c = get(in, &cpos, &rsc);
             } while (iscmd(c));
             unget(in, c, &cpos);
         }
@@ -423,14 +447,24 @@ token get_token(input *in) {
          */
         ret.type = tok_cmd;
         ret.text = ustrdup(rs.text);
+       if (rsc.text) {
+           in->pushback_chars = dupstr(rsc.text + prevpos);
+           rsc.text[prevpos] = '\0';
+           ret.origtext = dupstr(rsc.text);
+       } else {
+           ret.origtext = dupstr("");
+       }
         match_kw(&ret);
         sfree(rs.text);
+       sfree(rsc.text);
         return ret;
      } else if (c == '{') {            /* tok_lbrace */
         ret.type = tok_lbrace;
+       sfree(rsc.text);
         return ret;
      } else if (c == '}') {            /* tok_rbrace */
         ret.type = tok_rbrace;
+       sfree(rsc.text);
         return ret;
      } else {                          /* tok_word */
         /*
@@ -442,6 +476,7 @@ token get_token(input *in) {
          * a hyphen.
          */
         ret.aux = FALSE;               /* assumed for now */
+       prevpos = 0;
         while (1) {
             if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
                 /* Put back the character that caused termination */
@@ -450,15 +485,25 @@ token get_token(input *in) {
             } else {
                 rdadd(&rs, c);
                 if (c == '-') {
+                   prevpos = rsc.pos;
                     ret.aux = TRUE;
                     break;             /* hyphen terminates word */
                 }
             }
-           c = get(in, &cpos);
+           prevpos = rsc.pos;
+           c = get(in, &cpos, &rsc);
         }
         ret.type = tok_word;
         ret.text = ustrdup(rs.text);
+       if (rsc.text) {
+           in->pushback_chars = dupstr(rsc.text + prevpos);
+           rsc.text[prevpos] = '\0';
+           ret.origtext = dupstr(rsc.text);
+       } else {
+           ret.origtext = dupstr("");
+       }
         sfree(rs.text);
+       sfree(rsc.text);
         return ret;
      }
  }
@@ -472,7 +517,7 @@ int isbrace(input *in) {
      int c;
      filepos cpos;
  
-    c = get(in, &cpos);
+    c = get(in, &cpos, NULL);
      unget(in, c, &cpos);
      return (c == '{');
  }
@@ -488,15 +533,16 @@ token get_codepar_token(input *in) {
      filepos cpos;
  
      ret.type = tok_word;
-    c = get(in, &cpos);                       /* expect (and discard) one space */
+    ret.origtext = NULL;
+    c = get(in, &cpos, NULL);         /* expect (and discard) one space */
      ret.pos = cpos;
      if (c == ' ') {
-       c = get(in, &cpos);
+       c = get(in, &cpos, NULL);
         ret.pos = cpos;
      }
      while (!isnl(c) && c != EOF) {
         int c2 = c;
-       c = get(in, &cpos);
+       c = get(in, &cpos, NULL);
         /* Discard \r just before \n. */
         if (c2 != 13 || !isnl(c))
             rdadd(&rs, c2);
@@ -538,7 +584,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
   * Destructor before token is reassigned; should catch most memory
   * leaks
   */
-#define dtor(t) ( sfree(t.text) )
+#define dtor(t) ( sfree(t.text), sfree(t.origtext) )
  
  /*
   * Reads a single file (ie until get() returns EOF)
@@ -581,6 +627,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
      wchar_t uchr;
  
      t.text = NULL;
+    t.origtext = NULL;
      macros = newtree234(macrocmp);
      already = FALSE;
  
@@ -593,6 +640,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
         int start_cmd = c__invalid;
         par.words = NULL;
         par.keyword = NULL;
+       par.origkeyword = NULL;
         whptr = &par.words;
  
         /*
@@ -840,6 +888,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
  
             if (needkw > 0) {
                 rdstring rs = { 0, 0, NULL };
+               rdstringc rsc = { 0, 0, NULL };
                 int nkeys = 0;
                 filepos fp;
  
@@ -857,20 +906,25 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                            (t.type == tok_cmd && t.cmd == c__nbsp) ||
                            (t.type == tok_cmd && t.cmd == c__escaped)) {
                         if (t.type == tok_white ||
-                           (t.type == tok_cmd && t.cmd == c__nbsp))
+                           (t.type == tok_cmd && t.cmd == c__nbsp)) {
                             rdadd(&rs, ' ');
-                       else
+                           rdaddc(&rsc, ' ');
+                       } else {
                             rdadds(&rs, t.text);
+                           rdaddsc(&rsc, t.origtext);
+                       }
                     }
                     if (t.type != tok_rbrace) {
                         error(err_kwunclosed, &t.pos);
                         continue;
                     }
                     rdadd(&rs, 0);     /* add string terminator */
+                   rdaddc(&rsc, 0);   /* add string terminator */
                     dtor(t), t = get_token(in); /* eat right brace */
                 }
  
-               rdadd(&rs, 0);     /* add string terminator */
+               rdadd(&rs, 0);         /* add string terminator */
+               rdaddc(&rsc, 0);       /* add string terminator */
  
                 /* See whether we have the right number of keywords. */
                 if ((needkw & 48) && nkeys > 0)
@@ -901,6 +955,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
                 }
  
                 par.keyword = rdtrim(&rs);
+               par.origkeyword = rdtrimc(&rsc);
  
                 /* Move to EOP in case of needkw==8 or 16 (no body) */
                 if (needkw & 24) {
@@ -1464,6 +1519,8 @@ paragraph *read_input(input *in, indexdata *idx) {
             setpos(in, in->filenames[in->currindex]);
             in->charset = in->defcharset;
             in->csstate = charset_init_state;
+           in->wcpos = in->nwc = 0;
+           in->pushback_chars = NULL;
             read_file(&hptr, in, idx);
         }
         in->currindex++;
diff --git a/main.c b/main.c

index 182cbec..d18166e 100644 (file)
--- a/main.c
+++ b/main.c
@@ -181,36 +181,25 @@ int main(int argc, char **argv) {
                          * into a config paragraph.
                          */
                         {
-                           wchar_t *keywords;
-                           char *q;
-                           wchar_t *u;
+                           char *s = dupstr(p), *q, *r;
                             paragraph *para;
  
-                           keywords = mknewa(wchar_t, 2+strlen(p));
-
-                           u = keywords;
-                           q = p;
+                           para = cmdline_cfg_new();
  
+                           q = r = s;
                             while (*q) {
                                 if (*q == ':') {
-                                   *u++ = L'\0';
+                                   *r = '\0';
+                                   cmdline_cfg_add(para, s);
+                                   r = s;
                                 } else {
                                     if (*q == '\\' && q[1])
                                         q++;
-                                   /* FIXME: lacks charset flexibility */
-                                   *u++ = *q;
+                                   *r++ = *q;
                                 }
                                 q++;
                             }
-                           *u = L'\0';
-
-                           para = mknew(paragraph);
-                           memset(para, 0, sizeof(*para));
-                           para->type = para_Config;
-                           para->keyword = keywords;
-                           para->next = NULL;
-                           para->fpos.filename = "<command line>";
-                           para->fpos.line = para->fpos.col = -1;
+                           cmdline_cfg_add(para, s);
  
                             if (cfg_tail)
                                 cfg_tail->next = para;
diff --git a/misc.c b/misc.c

index 6f4ddd4..304cb1f 100644 (file)
--- a/misc.c
+++ b/misc.c
@@ -2,8 +2,13 @@
   * misc.c: miscellaneous useful items
   */
  
+#include <stdarg.h>
  #include "halibut.h"
  
+char *adv(char *s) {
+    return s + 1 + strlen(s);
+}
+
  struct stackTag {
      void **data;
      int sp;
@@ -479,3 +484,65 @@ void wrap_free(wrappedline *w) {
         w = t;
      }
  }
+
+void cmdline_cfg_add(paragraph *cfg, char *string)
+{
+    wchar_t *ustring;
+    int upos, ulen, pos, len;
+
+    ulen = 0;
+    while (cfg->keyword[ulen])
+       ulen += 1 + ustrlen(cfg->keyword+ulen);
+    len = 0;
+    while (cfg->origkeyword[len])
+       len += 1 + strlen(cfg->origkeyword+len);
+
+    ustring = ufroma_dup(string, CS_FIXME);
+
+    upos = ulen;
+    ulen += 2 + ustrlen(ustring);
+    cfg->keyword = resize(cfg->keyword, ulen);
+    ustrcpy(cfg->keyword+upos, ustring);
+    cfg->keyword[ulen-1] = L'\0';
+
+    pos = len;
+    len += 2 + strlen(string);
+    cfg->origkeyword = resize(cfg->origkeyword, len);
+    strcpy(cfg->origkeyword+pos, string);
+    cfg->origkeyword[len-1] = '\0';
+
+    sfree(ustring);
+}
+
+paragraph *cmdline_cfg_new(void)
+{
+    paragraph *p;
+
+    p = mknew(paragraph);
+    memset(p, 0, sizeof(*p));
+    p->type = para_Config;
+    p->next = NULL;
+    p->fpos.filename = "<command line>";
+    p->fpos.line = p->fpos.col = -1;
+    p->keyword = ustrdup(L"\0");
+    p->origkeyword = dupstr("\0");
+
+    return p;
+}
+
+paragraph *cmdline_cfg_simple(char *string, ...)
+{
+    va_list ap;
+    char *s;
+    paragraph *p;
+
+    p = cmdline_cfg_new();
+    cmdline_cfg_add(p, string);
+
+    va_start(ap, string);
+    while ((s = va_arg(ap, char *)) != NULL)
+       cmdline_cfg_add(p, s);
+    va_end(ap);
+
+    return p;
+}
diff --git a/ustring.c b/ustring.c

index 51c279b..169a377 100644 (file)
--- a/ustring.c
+++ b/ustring.c
@@ -6,7 +6,7 @@
  #include <time.h>
  #include "halibut.h"
  
-wchar_t *ustrdup(wchar_t *s) {
+wchar_t *ustrdup(wchar_t const *s) {
      wchar_t *r;
      if (s) {
         r = mknewa(wchar_t, 1+ustrlen(s));
@@ -18,59 +18,145 @@ wchar_t *ustrdup(wchar_t *s) {
      return r;
  }
  
-char *ustrtoa(wchar_t *s, char *outbuf, int size) {
-    char *p;
+static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size,
+                             int charset, int careful) {
+    int len, ret, err;
+    charset_state state = CHARSET_INIT_STATE;
+
      if (!s) {
         *outbuf = '\0';
         return outbuf;
      }
-    for (p = outbuf; *s && p < outbuf+size; p++,s++)
-       *p = *s;
-    if (p < outbuf+size)
-       *p = '\0';
-    else
-       outbuf[size-1] = '\0';
+
+    len = ustrlen(s);
+    size--;                           /* leave room for terminating NUL */
+    *outbuf = '\0';
+    while (len > 0) {
+       err = 0;
+       ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state,
+                                  (careful ? &err : NULL));
+       if (err)
+           return NULL;
+       if (!ret)
+           return outbuf;
+       size -= ret;
+       outbuf += ret;
+       *outbuf = '\0';
+    }
+    /*
+     * Clean up
+     */
+    ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL);
+    size -= ret;
+    outbuf += ret;
+    *outbuf = '\0';
      return outbuf;
  }
  
-wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size) {
-    wchar_t *p;
+char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) {
+    return ustrtoa_internal(s, outbuf, size, charset, FALSE);
+}
+
+char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) {
+    return ustrtoa_internal(s, outbuf, size, charset, TRUE);
+}
+
+wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) {
+    int len, ret;
+    charset_state state = CHARSET_INIT_STATE;
+
      if (!s) {
         *outbuf = L'\0';
         return outbuf;
      }
-    for (p = outbuf; *s && p < outbuf+size; p++,s++)
-       *p = *s;
-    if (p < outbuf+size)
-       *p = '\0';
-    else
-       outbuf[size-1] = '\0';
+
+    len = strlen(s);
+    size--;                           /* allow for terminating NUL */
+    *outbuf = L'\0';
+    while (len > 0) {
+       ret = charset_to_unicode(&s, &len, outbuf, size,
+                                charset, &state, NULL, 0);
+       if (!ret)
+           return outbuf;
+       outbuf += ret;
+       size -= ret;
+       *outbuf = L'\0';
+    }
      return outbuf;
  }
  
-char *utoa_dup(wchar_t *s) {
-    int len;
-    char *buf = NULL;
+char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
+{
+    char *outbuf;
+    int outpos, outlen, len, ret, err;
+    charset_state state = CHARSET_INIT_STATE;
  
-    len = ustrlen(s) + 1;
-    do {
-       buf = resize(buf, len);
-       ustrtoa(s, buf, len);
-       len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
-    } while ((int)strlen(buf) >= len-1);
+    if (!s) {
+       return dupstr("");
+    }
  
-    buf = resize(buf, strlen(buf)+1);
-    return buf;
+    len = ustrlen(s);
+
+    outlen = len + 10;
+    outbuf = mknewa(char, outlen);
+
+    outpos = 0;
+    outbuf[outpos] = '\0';
+
+    while (len > 0) {
+       err = 0;
+       ret = charset_from_unicode(&s, &len,
+                                  outbuf + outpos, outlen - outpos - 1,
+                                  charset, &state, (careful ? &err : NULL));
+       if (err) {
+           sfree(outbuf);
+           return NULL;
+       }
+       if (!ret) {
+           outlen = outlen * 3 / 2;
+           outbuf = resize(outbuf, outlen);
+       }
+       outpos += ret;
+       outbuf[outpos] = '\0';
+    }
+    /*
+     * Clean up
+     */
+    outlen = outpos + 32;
+    outbuf = resize(outbuf, outlen);
+    ret = charset_from_unicode(NULL, 0,
+                              outbuf + outpos, outlen - outpos + 1,
+                              charset, &state, NULL);
+    outpos += ret;
+    outbuf[outpos] = '\0';
+    if (lenp)
+       *lenp = outpos;
+    return outbuf;
  }
  
-wchar_t *ufroma_dup(char *s) {
+char *utoa_dup(wchar_t const *s, int charset)
+{
+    return utoa_internal_dup(s, charset, NULL, FALSE);
+}
+
+char *utoa_dup_len(wchar_t const *s, int charset, int *len)
+{
+    return utoa_internal_dup(s, charset, len, FALSE);
+}
+
+char *utoa_careful_dup(wchar_t const *s, int charset)
+{
+    return utoa_internal_dup(s, charset, NULL, TRUE);
+}
+
+wchar_t *ufroma_dup(char const *s, int charset) {
      int len;
      wchar_t *buf = NULL;
  
      len = strlen(s) + 1;
      do {
         buf = resize(buf, len);
-       ustrfroma(s, buf, len);
+       ustrfroma(s, buf, len, charset);
         len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
      } while (ustrlen(buf) >= len-1);
  
@@ -183,6 +269,12 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
      size_t len;
  
      /*
+     * FIXME: really we ought to copy non-% parts of the format
+     * ourselves, and only resort to strftime for % parts. Also we
+     * should use wcsftime if it's present.
+     */
+
+    /*
       * strftime has the entertaining property that it returns 0
       * _either_ on out-of-space _or_ on successful generation of
       * the empty string. Hence we must ensure our format can never
@@ -192,7 +284,7 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
      if (wfmt) {
         len = ustrlen(wfmt);
         fmt = mknewa(char, 2+len);
-       ustrtoa(wfmt, fmt+1, len+1);
+       ustrtoa(wfmt, fmt+1, len+1, CS_ASCII);   /* CS_FIXME? */
         fmt[0] = ' ';
      } else
         fmt = " %c";
author	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Tue, 20 Apr 2004 17:50:41 +0000 (17:50 +0000)
committer	simon <simon@cda61777-01e9-0310-a592-d414129be87e>
	Tue, 20 Apr 2004 17:50:41 +0000 (17:50 +0000)
bk_info.c		patch \| blob \| blame \| history
bk_man.c		patch \| blob \| blame \| history
bk_paper.c		patch \| blob \| blame \| history
bk_pdf.c		patch \| blob \| blame \| history
bk_ps.c		patch \| blob \| blame \| history
bk_text.c		patch \| blob \| blame \| history
bk_whlp.c		patch \| blob \| blame \| history
bk_xhtml.c		patch \| blob \| blame \| history
error.c		patch \| blob \| blame \| history
halibut.h		patch \| blob \| blame \| history
input.c		patch \| blob \| blame \| history
main.c		patch \| blob \| blame \| history
misc.c		patch \| blob \| blame \| history
ustring.c		patch \| blob \| blame \| history