- After discussion with Simon, change the default input charset back to ASCII,
rather than trying to work it out from the locale, for the sake of promoting
.but file portability.
- Add a new command-line option "--input-charset=csname", which overrides the
ASCII default for all input files (since there's no other way to use a
non-ASCII-compatible input file).
- Output a warning if -Cinput-charset:foo is specified that it has no effect.
- Update the docs to match all this. Also try to clarify some other things in
this area that caught me out.
git-svn-id: svn://svn.tartarus.org/sgt/halibut@5332
cda61777-01e9-0310-a592-
d414129be87e
all recognised, \c{GB2312} and \c{EUC-CN} both work, and so on.
This directive takes effect immediately after the \c{\\cfg} command.
all recognised, \c{GB2312} and \c{EUC-CN} both work, and so on.
This directive takes effect immediately after the \c{\\cfg} command.
-All text after that in the file is expected to be in the new
-character set. You can even change character set several times
-within a file if you really want to.
+All text after that until the end of the input file is expected to be
+in the new character set. You can even change character set several
+times within a file if you really want to.
When Halibut reads the input file, everything you type will be
converted into \i{Unicode} from the character set you specify here,
When Halibut reads the input file, everything you type will be
converted into \i{Unicode} from the character set you specify here,
\c \cfg{appendix}{Appendix}
\c \cfg{input-charset}{ASCII}
\c \cfg{appendix}{Appendix}
\c \cfg{input-charset}{ASCII}
-(The default settings for \cw{\\cfg\{quotes\}} are backend-specific;
-see \k{output}.)
+The default for \cw{\\cfg\{input-charset\}} can be changed with the
+\cw{--input-charset} option; see \k{running-options}. The default
+settings for \cw{\\cfg\{quotes\}} are backend-specific; see
+\k{output}.
\H{input-macro} Defining \i{macros}
\H{input-macro} Defining \i{macros}
extra input file to the command line which contains the directive
\cw{\\cfg\{}\e{word}\cw{\}\{}\e{word}\cw{\}\{}\e{word}...\cw{\}}.
extra input file to the command line which contains the directive
\cw{\\cfg\{}\e{word}\cw{\}\{}\e{word}\cw{\}\{}\e{word}...\cw{\}}.
+\dt \cw{--input-charset}\cw{=}\e{charset}
+
+\dd Changes the assumed character set for input files from the
+default of ASCII.
+
\dt \cw{--precise}
\dd Makes Halibut report the column number as well as the line
\dt \cw{--precise}
\dd Makes Halibut report the column number as well as the line
is not part of Halibut's own behaviour, and it cannot do anything
about it.)
is not part of Halibut's own behaviour, and it cannot do anything
about it.)
+Configuration directives created in this way take effect after all
+other input has been processed. (In most cases, this has the effect of
+overriding any other instances of the directive in the input.)
+
}
The options which set the output file names actually work by
}
The options which set the output file names actually work by
In addition to these, there are also a few other options:
In addition to these, there are also a few other options:
+\dt \i\cw{--input-charset}\cw{=}\e{charset}
+
+\dd Changes the default assumed character set for all input files from
+ASCII to something else. (\cw{-Cinput-charset} cannot be used for
+this, as \cw{-C} directives are processed after all other input, so
+wouldn't affect any files.)
+
+\lcont{
+
+Any \cw{\\cfg\{input-charset\}} directives within input files override
+this option.
+
+See \k{input-config} for more information about the input character set.
+
+}
+
\dt \i\cw{--help}
\dd Print a brief help message and exit immediately. (Don't confuse
\dt \i\cw{--help}
\dd Print a brief help message and exit immediately. (Don't confuse
sprintf(error, "unrecognised option `-%.200s'", sp);
flags = PREFIX;
break;
sprintf(error, "unrecognised option `-%.200s'", sp);
flags = PREFIX;
break;
+ case err_cmdcharset:
+ sp = va_arg(ap, char *);
+ sprintf(error, "character set `%.200s' not recognised", sp);
+ flags = PREFIX;
+ break;
+ case err_futileopt:
+ sp = va_arg(ap, char *);
+ sp2 = va_arg(ap, char *);
+ sprintf(error, "warning: option `-%s' has no effect%s", sp, sp2);
+ flags = PREFIX;
+ break;
case err_noinput: /* no arguments */
sprintf(error, "no input files");
flags = PREFIX;
case err_noinput: /* no arguments */
sprintf(error, "no input files");
flags = PREFIX;
err_nomemory, /* out of memory */
err_optnoarg, /* option `-%s' requires an argument */
err_nosuchopt, /* unrecognised option `-%s' */
err_nomemory, /* out of memory */
err_optnoarg, /* option `-%s' requires an argument */
err_nosuchopt, /* unrecognised option `-%s' */
+ err_cmdcharset, /* unrecognised charset %s (cmdline) */
+ err_futileopt, /* futile option `-%s'%s */
err_noinput, /* no input files */
err_cantopen, /* unable to open input file `%s' */
err_nodata, /* no data in input files */
err_noinput, /* no input files */
err_cantopen, /* unable to open input file `%s' */
err_nodata, /* no data in input files */
" --ps[=filename] generate PostScript output",
" --pdf[=filename] generate PDF output",
" -Cfoo:bar:baz append \\cfg{foo}{bar}{baz} to input",
" --ps[=filename] generate PostScript output",
" --pdf[=filename] generate PDF output",
" -Cfoo:bar:baz append \\cfg{foo}{bar}{baz} to input",
+ " --input-charset=cs change default input file charset",
" --precise report column numbers in error messages",
" --help display this text",
" --version display version number",
" --precise report column numbers in error messages",
" --help display this text",
" --version display version number",
};
static char *usagetext[] = {
};
static char *usagetext[] = {
- "usage: halibut [--format[=filename]] [-Cconfig...] file.but [file.but...]",
+ "usage: halibut [--format[=filename]] [options] file.but [file.but...]",
int nogo;
int errs;
int reportcols;
int nogo;
int errs;
int reportcols;
int debug;
int backendbits, prebackbits;
int k, b;
int debug;
int backendbits, prebackbits;
int k, b;
nfiles = 0;
nogo = errs = FALSE;
reportcols = 0;
nfiles = 0;
nogo = errs = FALSE;
reportcols = 0;
+ input_charset = CS_ASCII;
debug = 0;
backendbits = 0;
cfg = cfg_tail = NULL;
debug = 0;
backendbits = 0;
cfg = cfg_tail = NULL;
}
if (k < (int)lenof(backends)) {
/* do nothing */;
}
if (k < (int)lenof(backends)) {
/* do nothing */;
+ } else if (!strcmp(opt, "-input-charset")) {
+ if (!val) {
+ errs = TRUE, error(err_optnoarg, opt);
+ } else {
+ int charset = charset_from_localenc(val);
+ if (charset == CS_NONE) {
+ errs = TRUE, error(err_cmdcharset, val);
+ } else {
+ input_charset = charset;
+ }
+ }
} else if (!strcmp(opt, "-help")) {
help();
nogo = TRUE;
} else if (!strcmp(opt, "-help")) {
help();
nogo = TRUE;
while (*q) {
if (*q == ':') {
*r = '\0';
while (*q) {
if (*q == ':') {
*r = '\0';
+ /* XXX ad-hoc diagnostic */
+ if (!strcmp(s, "input-charset"))
+ error(err_futileopt, "Cinput-charset",
+ "; use --input-charset");
cmdline_cfg_add(para, s);
r = s;
} else {
cmdline_cfg_add(para, s);
r = s;
} else {
in.pushback = NULL;
in.reportcols = reportcols;
in.stack = NULL;
in.pushback = NULL;
in.reportcols = reportcols;
in.stack = NULL;
- in.defcharset = charset_from_locale();
+ in.defcharset = input_charset;