Revamp of command-line handling. Most command line options should
[u/mdw/putty] / winutils.c
1 /*
2 * winutils.c: miscellaneous Windows utilities
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7
8 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
9
10 #ifdef TESTMODE
11 /* Definitions to allow this module to be compiled standalone for testing. */
12 #define smalloc malloc
13 #endif
14
15 /*
16 * Split a complete command line into argc/argv, attempting to do
17 * it exactly the same way Windows itself would do it (so that
18 * console utilities, which receive argc and argv from Windows,
19 * will have their command lines processed in the same way as GUI
20 * utilities which get a whole command line and must break it
21 * themselves).
22 *
23 * Does not modify the input command line (just in case).
24 */
25 void split_into_argv(const char *cmdline, int *argc, char ***argv)
26 {
27 const char *p;
28 char *outputline, *q;
29 char **outputargv;
30 int outputargc;
31
32 /*
33 * At first glance the rules appeared to be:
34 *
35 * - Single quotes are not special characters.
36 *
37 * - Double quotes are removed, but within them spaces cease
38 * to be special.
39 *
40 * - Backslashes are _only_ special when a sequence of them
41 * appear just before a double quote. In this situation,
42 * they are treated like C backslashes: so \" just gives a
43 * literal quote, \\" gives a literal backslash and then
44 * opens or closes a double-quoted segment, \\\" gives a
45 * literal backslash and then a literal quote, \\\\" gives
46 * two literal backslashes and then opens/closes a
47 * double-quoted segment, and so forth. Note that this
48 * behaviour is identical inside and outside double quotes.
49 *
50 * - Two successive double quotes become one literal double
51 * quote, but only _inside_ a double-quoted segment.
52 * Outside, they just form an empty double-quoted segment
53 * (which may cause an empty argument word).
54 *
55 * - That only leaves the interesting question of what happens
56 * when one or more backslashes precedes two or more double
57 * quotes, starting inside a double-quoted string. And the
58 * answer to that appears somewhat bizarre. Here I tabulate
59 * number of backslashes (across the top) against number of
60 * quotes (down the left), and indicate how many backslashes
61 * are output, how many quotes are output, and whether a
62 * quoted segment is open at the end of the sequence:
63 *
64 * backslashes
65 *
66 * 0 1 2 3 4
67 *
68 * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
69 * --------+-----------------------------
70 * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
71 * q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n
72 * u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y
73 * o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
74 * t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n
75 * e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y
76 * s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
77 * 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n
78 * 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y
79 * 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
80 * 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n
81 *
82 *
83 * [Test fragment was of the form "a\\\"""b c" d.]
84 *
85 * There is very weird mod-3 behaviour going on here in the
86 * number of quotes, and it even applies when there aren't any
87 * backslashes! How ghastly.
88 *
89 * With a bit of thought, this extremely odd diagram suddenly
90 * coalesced itself into a coherent, if still ghastly, model of
91 * how things work:
92 *
93 * - As before, backslashes are only special when one or more
94 * of them appear contiguously before at least one double
95 * quote. In this situation the backslashes do exactly what
96 * you'd expect: each one quotes the next thing in front of
97 * it, so you end up with n/2 literal backslashes (if n is
98 * even) or (n-1)/2 literal backslashes and a literal quote
99 * (if n is odd). In the latter case the double quote
100 * character right after the backslashes is used up.
101 *
102 * - After that, any remaining double quotes are processed. A
103 * string of contiguous unescaped double quotes has a mod-3
104 * behaviour:
105 *
106 * * inside a quoted segment, a quote ends the segment.
107 * * _immediately_ after ending a quoted segment, a quote
108 * simply produces a literal quote.
109 * * otherwise, outside a quoted segment, a quote begins a
110 * quoted segment.
111 *
112 * So, for example, if we started inside a quoted segment
113 * then two contiguous quotes would close the segment and
114 * produce a literal quote; three would close the segment,
115 * produce a literal quote, and open a new segment. If we
116 * started outside a quoted segment, then two contiguous
117 * quotes would open and then close a segment, producing no
118 * output (but potentially creating a zero-length argument);
119 * but three quotes would open and close a segment and then
120 * produce a literal quote.
121 */
122
123 /*
124 * This will guaranteeably be big enough; we can realloc it
125 * down later.
126 */
127 outputline = malloc(1+strlen(cmdline));
128 outputargv = malloc(sizeof(char *) * (strlen(cmdline)+1 / 2));
129
130 p = cmdline; q = outputline; outputargc = 0;
131
132 while (*p) {
133 int quote;
134
135 /* Skip whitespace searching for start of argument. */
136 while (*p && isspace(*p)) p++;
137 if (!*p) break;
138
139 /* We have an argument; start it. */
140 outputargv[outputargc++] = q;
141 quote = 0;
142
143 /* Copy data into the argument until it's finished. */
144 while (*p) {
145 if (!quote && isspace(*p))
146 break; /* argument is finished */
147
148 if (*p == '"' || *p == '\\') {
149 /*
150 * We have a sequence of zero or more backslashes
151 * followed by a sequence of zero or more quotes.
152 * Count up how many of each, and then deal with
153 * them as appropriate.
154 */
155 int i, slashes = 0, quotes = 0;
156 while (*p == '\\') slashes++, p++;
157 while (*p == '"') quotes++, p++;
158
159 if (!quotes) {
160 /*
161 * Special case: if there are no quotes,
162 * slashes are not special at all, so just copy
163 * n slashes to the output string.
164 */
165 while (slashes--) *q++ = '\\';
166 } else {
167 /* Slashes annihilate in pairs. */
168 while (slashes >= 2) slashes -= 2, *q++ = '\\';
169
170 /* One remaining slash takes out the first quote. */
171 if (slashes) quotes--, *q++ = '"';
172
173 if (quotes > 0) {
174 /* Outside a quote segment, a quote starts one. */
175 if (!quote) quotes--, quote = 1;
176
177 /* Now we produce (n+1)/3 literal quotes... */
178 for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
179
180 /* ... and end in a quote segment iff 3 divides n. */
181 quote = (quotes % 3 == 0);
182 }
183 }
184 } else {
185 *q++ = *p++;
186 }
187 }
188
189 /* At the end of an argument, just append a trailing NUL. */
190 *q++ = '\0';
191 }
192
193 outputargv = realloc(outputargv, sizeof(char *) * outputargc);
194
195 if (argc) *argc = outputargc;
196 if (argv) *argv = outputargv;
197 }
198
199 #ifdef TESTMODE
200
201 const struct argv_test {
202 const char *cmdline;
203 const char *argv[10];
204 } argv_tests[] = {
205 /*
206 * We generate this set of tests by invoking ourself with
207 * `-generate'.
208 */
209 {"ab c\" d", {"ab", "c d", NULL}},
210 {"a\"b c\" d", {"ab c", "d", NULL}},
211 {"a\"\"b c\" d", {"ab", "c d", NULL}},
212 {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
213 {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
214 {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
215 {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
216 {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
217 {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
218 {"a\\b c\" d", {"a\\b", "c d", NULL}},
219 {"a\\\"b c\" d", {"a\"b", "c d", NULL}},
220 {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
221 {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
222 {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
223 {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
224 {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
225 {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
226 {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
227 {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
228 {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
229 {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
230 {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
231 {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
232 {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
233 {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
234 {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
235 {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
236 {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
237 {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
238 {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
239 {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
240 {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
241 {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
242 {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
243 {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
244 {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
245 {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
246 {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
247 {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
248 {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
249 {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
250 {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
251 {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
252 {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
253 {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
254 {"\"ab c\" d", {"ab c", "d", NULL}},
255 {"\"a\"b c\" d", {"ab", "c d", NULL}},
256 {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
257 {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
258 {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
259 {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
260 {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
261 {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
262 {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
263 {"\"a\\b c\" d", {"a\\b c", "d", NULL}},
264 {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
265 {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
266 {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
267 {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
268 {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
269 {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
270 {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
271 {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
272 {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
273 {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
274 {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
275 {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
276 {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
277 {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
278 {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
279 {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
280 {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
281 {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
282 {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
283 {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
284 {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
285 {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
286 {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
287 {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
288 {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
289 {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
290 {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
291 {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
292 {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
293 {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
294 {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
295 {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
296 {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
297 {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
298 {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
299 };
300
301 int main(int argc, char **argv)
302 {
303 int i, j;
304
305 if (argc > 1) {
306 /*
307 * Generation of tests.
308 *
309 * Given `-splat <args>', we print out a C-style
310 * representation of each argument (in the form "a", "b",
311 * NULL), backslash-escaping each backslash and double
312 * quote.
313 *
314 * Given `-split <string>', we first doctor `string' by
315 * turning forward slashes into backslashes, single quotes
316 * into double quotes and underscores into spaces; and then
317 * we feed the resulting string to ourself with `-splat'.
318 *
319 * Given `-generate', we concoct a variety of fun test
320 * cases, encode them in quote-safe form (mapping \, " and
321 * space to /, ' and _ respectively) and feed each one to
322 * `-split'.
323 */
324 if (!strcmp(argv[1], "-splat")) {
325 int i;
326 char *p;
327 for (i = 2; i < argc; i++) {
328 putchar('"');
329 for (p = argv[i]; *p; p++) {
330 if (*p == '\\' || *p == '"')
331 putchar('\\');
332 putchar(*p);
333 }
334 printf("\", ");
335 }
336 printf("NULL");
337 return 0;
338 }
339
340 if (!strcmp(argv[1], "-split") && argc > 2) {
341 char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
342 char *p, *q;
343
344 q = str + sprintf(str, "%s -splat ", argv[0]);
345 printf(" {\"");
346 for (p = argv[2]; *p; p++, q++) {
347 switch (*p) {
348 case '/': printf("\\\\"); *q = '\\'; break;
349 case '\'': printf("\\\""); *q = '"'; break;
350 case '_': printf(" "); *q = ' '; break;
351 default: putchar(*p); *q = *p; break;
352 }
353 }
354 *p = '\0';
355 printf("\", {");
356 fflush(stdout);
357
358 system(str);
359
360 printf("}},\n");
361
362 return 0;
363 }
364
365 if (!strcmp(argv[1], "-generate")) {
366 char *teststr, *p;
367 int i, initialquote, backslashes, quotes;
368
369 teststr = malloc(200 + strlen(argv[0]));
370
371 for (initialquote = 0; initialquote <= 1; initialquote++) {
372 for (backslashes = 0; backslashes < 5; backslashes++) {
373 for (quotes = 0; quotes < 9; quotes++) {
374 p = teststr + sprintf(teststr, "%s -split ", argv[0]);
375 if (initialquote) *p++ = '\'';
376 *p++ = 'a';
377 for (i = 0; i < backslashes; i++) *p++ = '/';
378 for (i = 0; i < quotes; i++) *p++ = '\'';
379 *p++ = 'b';
380 *p++ = '_';
381 *p++ = 'c';
382 *p++ = '\'';
383 *p++ = '_';
384 *p++ = 'd';
385 *p = '\0';
386
387 system(teststr);
388 }
389 }
390 }
391 return 0;
392 }
393
394 fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
395 return 1;
396 }
397
398 /*
399 * If we get here, we were invoked with no arguments, so just
400 * run the tests.
401 */
402
403 for (i = 0; i < lenof(argv_tests); i++) {
404 int ac;
405 char **av;
406
407 split_into_argv(argv_tests[i].cmdline, &ac, &av);
408
409 for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
410 if (strcmp(av[j], argv_tests[i].argv[j])) {
411 printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
412 i, argv_tests[i].cmdline,
413 j, av[j], argv_tests[i].argv[j]);
414 }
415 #ifdef VERBOSE
416 else {
417 printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
418 i, argv_tests[i].cmdline,
419 j, av[j], argv_tests[i].argv[j]);
420 }
421 #endif
422 }
423 if (j < ac)
424 printf("failed test %d (|%s|): %d args returned, should be %d\n",
425 i, argv_tests[i].cmdline, ac, j);
426 if (argv_tests[i].argv[j])
427 printf("failed test %d (|%s|): %d args returned, should be more\n",
428 i, argv_tests[i].cmdline, ac);
429 }
430
431 return 0;
432 }
433
434 #endif