+/*
+ * winutils.c: miscellaneous Windows utilities
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
+
+#ifdef TESTMODE
+/* Definitions to allow this module to be compiled standalone for testing. */
+#define smalloc malloc
+#endif
+
+/*
+ * Split a complete command line into argc/argv, attempting to do
+ * it exactly the same way Windows itself would do it (so that
+ * console utilities, which receive argc and argv from Windows,
+ * will have their command lines processed in the same way as GUI
+ * utilities which get a whole command line and must break it
+ * themselves).
+ *
+ * Does not modify the input command line (just in case).
+ */
+void split_into_argv(const char *cmdline, int *argc, char ***argv)
+{
+ const char *p;
+ char *outputline, *q;
+ char **outputargv;
+ int outputargc;
+
+ /*
+ * At first glance the rules appeared to be:
+ *
+ * - Single quotes are not special characters.
+ *
+ * - Double quotes are removed, but within them spaces cease
+ * to be special.
+ *
+ * - Backslashes are _only_ special when a sequence of them
+ * appear just before a double quote. In this situation,
+ * they are treated like C backslashes: so \" just gives a
+ * literal quote, \\" gives a literal backslash and then
+ * opens or closes a double-quoted segment, \\\" gives a
+ * literal backslash and then a literal quote, \\\\" gives
+ * two literal backslashes and then opens/closes a
+ * double-quoted segment, and so forth. Note that this
+ * behaviour is identical inside and outside double quotes.
+ *
+ * - Two successive double quotes become one literal double
+ * quote, but only _inside_ a double-quoted segment.
+ * Outside, they just form an empty double-quoted segment
+ * (which may cause an empty argument word).
+ *
+ * - That only leaves the interesting question of what happens
+ * when one or more backslashes precedes two or more double
+ * quotes, starting inside a double-quoted string. And the
+ * answer to that appears somewhat bizarre. Here I tabulate
+ * number of backslashes (across the top) against number of
+ * quotes (down the left), and indicate how many backslashes
+ * are output, how many quotes are output, and whether a
+ * quoted segment is open at the end of the sequence:
+ *
+ * backslashes
+ *
+ * 0 1 2 3 4
+ *
+ * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
+ * --------+-----------------------------
+ * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
+ * q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n
+ * u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y
+ * o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
+ * t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n
+ * e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y
+ * s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
+ * 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n
+ * 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y
+ * 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
+ * 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n
+ *
+ *
+ * [Test fragment was of the form "a\\\"""b c" d.]
+ *
+ * There is very weird mod-3 behaviour going on here in the
+ * number of quotes, and it even applies when there aren't any
+ * backslashes! How ghastly.
+ *
+ * With a bit of thought, this extremely odd diagram suddenly
+ * coalesced itself into a coherent, if still ghastly, model of
+ * how things work:
+ *
+ * - As before, backslashes are only special when one or more
+ * of them appear contiguously before at least one double
+ * quote. In this situation the backslashes do exactly what
+ * you'd expect: each one quotes the next thing in front of
+ * it, so you end up with n/2 literal backslashes (if n is
+ * even) or (n-1)/2 literal backslashes and a literal quote
+ * (if n is odd). In the latter case the double quote
+ * character right after the backslashes is used up.
+ *
+ * - After that, any remaining double quotes are processed. A
+ * string of contiguous unescaped double quotes has a mod-3
+ * behaviour:
+ *
+ * * inside a quoted segment, a quote ends the segment.
+ * * _immediately_ after ending a quoted segment, a quote
+ * simply produces a literal quote.
+ * * otherwise, outside a quoted segment, a quote begins a
+ * quoted segment.
+ *
+ * So, for example, if we started inside a quoted segment
+ * then two contiguous quotes would close the segment and
+ * produce a literal quote; three would close the segment,
+ * produce a literal quote, and open a new segment. If we
+ * started outside a quoted segment, then two contiguous
+ * quotes would open and then close a segment, producing no
+ * output (but potentially creating a zero-length argument);
+ * but three quotes would open and close a segment and then
+ * produce a literal quote.
+ */
+
+ /*
+ * This will guaranteeably be big enough; we can realloc it
+ * down later.
+ */
+ outputline = malloc(1+strlen(cmdline));
+ outputargv = malloc(sizeof(char *) * (strlen(cmdline)+1 / 2));
+
+ p = cmdline; q = outputline; outputargc = 0;
+
+ while (*p) {
+ int quote;
+
+ /* Skip whitespace searching for start of argument. */
+ while (*p && isspace(*p)) p++;
+ if (!*p) break;
+
+ /* We have an argument; start it. */
+ outputargv[outputargc++] = q;
+ quote = 0;
+
+ /* Copy data into the argument until it's finished. */
+ while (*p) {
+ if (!quote && isspace(*p))
+ break; /* argument is finished */
+
+ if (*p == '"' || *p == '\\') {
+ /*
+ * We have a sequence of zero or more backslashes
+ * followed by a sequence of zero or more quotes.
+ * Count up how many of each, and then deal with
+ * them as appropriate.
+ */
+ int i, slashes = 0, quotes = 0;
+ while (*p == '\\') slashes++, p++;
+ while (*p == '"') quotes++, p++;
+
+ if (!quotes) {
+ /*
+ * Special case: if there are no quotes,
+ * slashes are not special at all, so just copy
+ * n slashes to the output string.
+ */
+ while (slashes--) *q++ = '\\';
+ } else {
+ /* Slashes annihilate in pairs. */
+ while (slashes >= 2) slashes -= 2, *q++ = '\\';
+
+ /* One remaining slash takes out the first quote. */
+ if (slashes) quotes--, *q++ = '"';
+
+ if (quotes > 0) {
+ /* Outside a quote segment, a quote starts one. */
+ if (!quote) quotes--, quote = 1;
+
+ /* Now we produce (n+1)/3 literal quotes... */
+ for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
+
+ /* ... and end in a quote segment iff 3 divides n. */
+ quote = (quotes % 3 == 0);
+ }
+ }
+ } else {
+ *q++ = *p++;
+ }
+ }
+
+ /* At the end of an argument, just append a trailing NUL. */
+ *q++ = '\0';
+ }
+
+ outputargv = realloc(outputargv, sizeof(char *) * outputargc);
+
+ if (argc) *argc = outputargc;
+ if (argv) *argv = outputargv;
+}
+
+#ifdef TESTMODE
+
+const struct argv_test {
+ const char *cmdline;
+ const char *argv[10];
+} argv_tests[] = {
+ /*
+ * We generate this set of tests by invoking ourself with
+ * `-generate'.
+ */
+ {"ab c\" d", {"ab", "c d", NULL}},
+ {"a\"b c\" d", {"ab c", "d", NULL}},
+ {"a\"\"b c\" d", {"ab", "c d", NULL}},
+ {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
+ {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
+ {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
+ {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
+ {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"a\\b c\" d", {"a\\b", "c d", NULL}},
+ {"a\\\"b c\" d", {"a\"b", "c d", NULL}},
+ {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
+ {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
+ {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
+ {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
+ {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
+ {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
+ {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
+ {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
+ {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
+ {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
+ {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
+ {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
+ {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
+ {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
+ {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
+ {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
+ {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
+ {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
+ {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
+ {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
+ {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
+ {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
+ {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
+ {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
+ {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
+ {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
+ {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
+ {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
+ {"\"ab c\" d", {"ab c", "d", NULL}},
+ {"\"a\"b c\" d", {"ab", "c d", NULL}},
+ {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
+ {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
+ {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
+ {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
+ {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
+ {"\"a\\b c\" d", {"a\\b c", "d", NULL}},
+ {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
+ {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
+ {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
+ {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
+ {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
+ {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
+ {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
+ {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
+ {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
+ {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
+ {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
+ {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
+ {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
+ {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
+ {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
+ {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
+ {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
+ {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
+ {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
+ {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
+ {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
+ {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
+ {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
+ {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
+ {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
+ {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
+ {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
+ {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
+ {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
+ {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
+ {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
+};
+
+int main(int argc, char **argv)
+{
+ int i, j;
+
+ if (argc > 1) {
+ /*
+ * Generation of tests.
+ *
+ * Given `-splat <args>', we print out a C-style
+ * representation of each argument (in the form "a", "b",
+ * NULL), backslash-escaping each backslash and double
+ * quote.
+ *
+ * Given `-split <string>', we first doctor `string' by
+ * turning forward slashes into backslashes, single quotes
+ * into double quotes and underscores into spaces; and then
+ * we feed the resulting string to ourself with `-splat'.
+ *
+ * Given `-generate', we concoct a variety of fun test
+ * cases, encode them in quote-safe form (mapping \, " and
+ * space to /, ' and _ respectively) and feed each one to
+ * `-split'.
+ */
+ if (!strcmp(argv[1], "-splat")) {
+ int i;
+ char *p;
+ for (i = 2; i < argc; i++) {
+ putchar('"');
+ for (p = argv[i]; *p; p++) {
+ if (*p == '\\' || *p == '"')
+ putchar('\\');
+ putchar(*p);
+ }
+ printf("\", ");
+ }
+ printf("NULL");
+ return 0;
+ }
+
+ if (!strcmp(argv[1], "-split") && argc > 2) {
+ char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
+ char *p, *q;
+
+ q = str + sprintf(str, "%s -splat ", argv[0]);
+ printf(" {\"");
+ for (p = argv[2]; *p; p++, q++) {
+ switch (*p) {
+ case '/': printf("\\\\"); *q = '\\'; break;
+ case '\'': printf("\\\""); *q = '"'; break;
+ case '_': printf(" "); *q = ' '; break;
+ default: putchar(*p); *q = *p; break;
+ }
+ }
+ *p = '\0';
+ printf("\", {");
+ fflush(stdout);
+
+ system(str);
+
+ printf("}},\n");
+
+ return 0;
+ }
+
+ if (!strcmp(argv[1], "-generate")) {
+ char *teststr, *p;
+ int i, initialquote, backslashes, quotes;
+
+ teststr = malloc(200 + strlen(argv[0]));
+
+ for (initialquote = 0; initialquote <= 1; initialquote++) {
+ for (backslashes = 0; backslashes < 5; backslashes++) {
+ for (quotes = 0; quotes < 9; quotes++) {
+ p = teststr + sprintf(teststr, "%s -split ", argv[0]);
+ if (initialquote) *p++ = '\'';
+ *p++ = 'a';
+ for (i = 0; i < backslashes; i++) *p++ = '/';
+ for (i = 0; i < quotes; i++) *p++ = '\'';
+ *p++ = 'b';
+ *p++ = '_';
+ *p++ = 'c';
+ *p++ = '\'';
+ *p++ = '_';
+ *p++ = 'd';
+ *p = '\0';
+
+ system(teststr);
+ }
+ }
+ }
+ return 0;
+ }
+
+ fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
+ return 1;
+ }
+
+ /*
+ * If we get here, we were invoked with no arguments, so just
+ * run the tests.
+ */
+
+ for (i = 0; i < lenof(argv_tests); i++) {
+ int ac;
+ char **av;
+
+ split_into_argv(argv_tests[i].cmdline, &ac, &av);
+
+ for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
+ if (strcmp(av[j], argv_tests[i].argv[j])) {
+ printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
+ i, argv_tests[i].cmdline,
+ j, av[j], argv_tests[i].argv[j]);
+ }
+#ifdef VERBOSE
+ else {
+ printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
+ i, argv_tests[i].cmdline,
+ j, av[j], argv_tests[i].argv[j]);
+ }
+#endif
+ }
+ if (j < ac)
+ printf("failed test %d (|%s|): %d args returned, should be %d\n",
+ i, argv_tests[i].cmdline, ac, j);
+ if (argv_tests[i].argv[j])
+ printf("failed test %d (|%s|): %d args returned, should be more\n",
+ i, argv_tests[i].cmdline, ac);
+ }
+
+ return 0;
+}
+
+#endif
\ No newline at end of file