Introduced wrapper macros snew(), snewn() and sresize() for the
[u/mdw/putty] / winutils.c
CommitLineData
c0a81592 1/*
2 * winutils.c: miscellaneous Windows utilities
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
d3a1a808 7#include <ctype.h>
c0a81592 8
d3a1a808 9#include "misc.h"
c0a81592 10
11#ifdef TESTMODE
12/* Definitions to allow this module to be compiled standalone for testing. */
13#define smalloc malloc
5a71c4ea 14#define srealloc realloc
15#define sfree free
c0a81592 16#endif
17
18/*
19 * Split a complete command line into argc/argv, attempting to do
20 * it exactly the same way Windows itself would do it (so that
21 * console utilities, which receive argc and argv from Windows,
22 * will have their command lines processed in the same way as GUI
23 * utilities which get a whole command line and must break it
24 * themselves).
25 *
d3a1a808 26 * Does not modify the input command line.
27 *
28 * The final parameter (argstart) is used to return a second array
29 * of char * pointers, the same length as argv, each one pointing
30 * at the start of the corresponding element of argv in the
31 * original command line. So if you get half way through processing
32 * your command line in argc/argv form and then decide you want to
33 * treat the rest as a raw string, you can. If you don't want to,
34 * `argstart' can be safely left NULL.
c0a81592 35 */
d3a1a808 36void split_into_argv(char *cmdline, int *argc, char ***argv,
37 char ***argstart)
c0a81592 38{
d3a1a808 39 char *p;
c0a81592 40 char *outputline, *q;
d3a1a808 41 char **outputargv, **outputargstart;
c0a81592 42 int outputargc;
43
44 /*
45 * At first glance the rules appeared to be:
46 *
47 * - Single quotes are not special characters.
48 *
49 * - Double quotes are removed, but within them spaces cease
50 * to be special.
51 *
52 * - Backslashes are _only_ special when a sequence of them
53 * appear just before a double quote. In this situation,
54 * they are treated like C backslashes: so \" just gives a
55 * literal quote, \\" gives a literal backslash and then
56 * opens or closes a double-quoted segment, \\\" gives a
57 * literal backslash and then a literal quote, \\\\" gives
58 * two literal backslashes and then opens/closes a
59 * double-quoted segment, and so forth. Note that this
60 * behaviour is identical inside and outside double quotes.
61 *
62 * - Two successive double quotes become one literal double
63 * quote, but only _inside_ a double-quoted segment.
64 * Outside, they just form an empty double-quoted segment
65 * (which may cause an empty argument word).
66 *
67 * - That only leaves the interesting question of what happens
68 * when one or more backslashes precedes two or more double
69 * quotes, starting inside a double-quoted string. And the
70 * answer to that appears somewhat bizarre. Here I tabulate
71 * number of backslashes (across the top) against number of
72 * quotes (down the left), and indicate how many backslashes
73 * are output, how many quotes are output, and whether a
74 * quoted segment is open at the end of the sequence:
75 *
76 * backslashes
77 *
78 * 0 1 2 3 4
79 *
80 * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y
81 * --------+-----------------------------
82 * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n
83 * q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n
84 * u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y
85 * o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n
86 * t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n
87 * e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y
88 * s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n
89 * 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n
90 * 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y
91 * 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n
92 * 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n
93 *
94 *
95 * [Test fragment was of the form "a\\\"""b c" d.]
96 *
97 * There is very weird mod-3 behaviour going on here in the
98 * number of quotes, and it even applies when there aren't any
99 * backslashes! How ghastly.
100 *
101 * With a bit of thought, this extremely odd diagram suddenly
102 * coalesced itself into a coherent, if still ghastly, model of
103 * how things work:
104 *
105 * - As before, backslashes are only special when one or more
106 * of them appear contiguously before at least one double
107 * quote. In this situation the backslashes do exactly what
108 * you'd expect: each one quotes the next thing in front of
109 * it, so you end up with n/2 literal backslashes (if n is
110 * even) or (n-1)/2 literal backslashes and a literal quote
111 * (if n is odd). In the latter case the double quote
112 * character right after the backslashes is used up.
113 *
114 * - After that, any remaining double quotes are processed. A
115 * string of contiguous unescaped double quotes has a mod-3
116 * behaviour:
117 *
118 * * inside a quoted segment, a quote ends the segment.
119 * * _immediately_ after ending a quoted segment, a quote
120 * simply produces a literal quote.
121 * * otherwise, outside a quoted segment, a quote begins a
122 * quoted segment.
123 *
124 * So, for example, if we started inside a quoted segment
125 * then two contiguous quotes would close the segment and
126 * produce a literal quote; three would close the segment,
127 * produce a literal quote, and open a new segment. If we
128 * started outside a quoted segment, then two contiguous
129 * quotes would open and then close a segment, producing no
130 * output (but potentially creating a zero-length argument);
131 * but three quotes would open and close a segment and then
132 * produce a literal quote.
133 */
134
135 /*
5a71c4ea 136 * First deal with the simplest of all special cases: if there
137 * aren't any arguments, return 0,NULL,NULL.
138 */
139 while (*cmdline && isspace(*cmdline)) cmdline++;
140 if (!*cmdline) {
141 if (argc) *argc = 0;
142 if (argv) *argv = NULL;
143 if (argstart) *argstart = NULL;
144 return;
145 }
146
147 /*
c0a81592 148 * This will guaranteeably be big enough; we can realloc it
149 * down later.
150 */
3d88e64d 151 outputline = snewn(1+strlen(cmdline), char);
152 outputargv = snewn(strlen(cmdline)+1 / 2, char *);
153 outputargstart = snewn(strlen(cmdline)+1 / 2, char *);
c0a81592 154
155 p = cmdline; q = outputline; outputargc = 0;
156
157 while (*p) {
158 int quote;
159
160 /* Skip whitespace searching for start of argument. */
161 while (*p && isspace(*p)) p++;
162 if (!*p) break;
163
164 /* We have an argument; start it. */
d3a1a808 165 outputargv[outputargc] = q;
166 outputargstart[outputargc] = p;
167 outputargc++;
c0a81592 168 quote = 0;
169
170 /* Copy data into the argument until it's finished. */
171 while (*p) {
172 if (!quote && isspace(*p))
173 break; /* argument is finished */
174
175 if (*p == '"' || *p == '\\') {
176 /*
177 * We have a sequence of zero or more backslashes
178 * followed by a sequence of zero or more quotes.
179 * Count up how many of each, and then deal with
180 * them as appropriate.
181 */
182 int i, slashes = 0, quotes = 0;
183 while (*p == '\\') slashes++, p++;
184 while (*p == '"') quotes++, p++;
185
186 if (!quotes) {
187 /*
188 * Special case: if there are no quotes,
189 * slashes are not special at all, so just copy
190 * n slashes to the output string.
191 */
192 while (slashes--) *q++ = '\\';
193 } else {
194 /* Slashes annihilate in pairs. */
195 while (slashes >= 2) slashes -= 2, *q++ = '\\';
196
197 /* One remaining slash takes out the first quote. */
198 if (slashes) quotes--, *q++ = '"';
199
200 if (quotes > 0) {
201 /* Outside a quote segment, a quote starts one. */
202 if (!quote) quotes--, quote = 1;
203
204 /* Now we produce (n+1)/3 literal quotes... */
205 for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
206
207 /* ... and end in a quote segment iff 3 divides n. */
208 quote = (quotes % 3 == 0);
209 }
210 }
211 } else {
212 *q++ = *p++;
213 }
214 }
215
216 /* At the end of an argument, just append a trailing NUL. */
217 *q++ = '\0';
218 }
219
3d88e64d 220 outputargv = sresize(outputargv, outputargc, char *);
221 outputargstart = sresize(outputargstart, outputargc, char *);
c0a81592 222
223 if (argc) *argc = outputargc;
d3a1a808 224 if (argv) *argv = outputargv; else sfree(outputargv);
225 if (argstart) *argstart = outputargstart; else sfree(outputargstart);
c0a81592 226}
227
228#ifdef TESTMODE
229
230const struct argv_test {
231 const char *cmdline;
232 const char *argv[10];
233} argv_tests[] = {
234 /*
235 * We generate this set of tests by invoking ourself with
236 * `-generate'.
237 */
238 {"ab c\" d", {"ab", "c d", NULL}},
239 {"a\"b c\" d", {"ab c", "d", NULL}},
240 {"a\"\"b c\" d", {"ab", "c d", NULL}},
241 {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
242 {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
243 {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
244 {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
245 {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
246 {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
247 {"a\\b c\" d", {"a\\b", "c d", NULL}},
248 {"a\\\"b c\" d", {"a\"b", "c d", NULL}},
249 {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
250 {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
251 {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
252 {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
253 {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
254 {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
255 {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
256 {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
257 {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
258 {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
259 {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
260 {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
261 {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
262 {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
263 {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
264 {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
265 {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
266 {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
267 {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
268 {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
269 {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
270 {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
271 {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
272 {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
273 {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
274 {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
275 {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
276 {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
277 {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
278 {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
279 {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
280 {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
281 {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
282 {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
283 {"\"ab c\" d", {"ab c", "d", NULL}},
284 {"\"a\"b c\" d", {"ab", "c d", NULL}},
285 {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
286 {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
287 {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
288 {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
289 {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
290 {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
291 {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
292 {"\"a\\b c\" d", {"a\\b c", "d", NULL}},
293 {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
294 {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
295 {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
296 {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
297 {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
298 {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
299 {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
300 {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
301 {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
302 {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
303 {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
304 {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
305 {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
306 {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
307 {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
308 {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
309 {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
310 {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
311 {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
312 {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
313 {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
314 {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
315 {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
316 {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
317 {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
318 {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
319 {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
320 {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
321 {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
322 {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
323 {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
324 {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
325 {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
326 {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
327 {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
328};
329
330int main(int argc, char **argv)
331{
332 int i, j;
333
334 if (argc > 1) {
335 /*
336 * Generation of tests.
337 *
338 * Given `-splat <args>', we print out a C-style
339 * representation of each argument (in the form "a", "b",
340 * NULL), backslash-escaping each backslash and double
341 * quote.
342 *
343 * Given `-split <string>', we first doctor `string' by
344 * turning forward slashes into backslashes, single quotes
345 * into double quotes and underscores into spaces; and then
346 * we feed the resulting string to ourself with `-splat'.
347 *
348 * Given `-generate', we concoct a variety of fun test
349 * cases, encode them in quote-safe form (mapping \, " and
350 * space to /, ' and _ respectively) and feed each one to
351 * `-split'.
352 */
353 if (!strcmp(argv[1], "-splat")) {
354 int i;
355 char *p;
356 for (i = 2; i < argc; i++) {
357 putchar('"');
358 for (p = argv[i]; *p; p++) {
359 if (*p == '\\' || *p == '"')
360 putchar('\\');
361 putchar(*p);
362 }
363 printf("\", ");
364 }
365 printf("NULL");
366 return 0;
367 }
368
369 if (!strcmp(argv[1], "-split") && argc > 2) {
370 char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
371 char *p, *q;
372
373 q = str + sprintf(str, "%s -splat ", argv[0]);
374 printf(" {\"");
375 for (p = argv[2]; *p; p++, q++) {
376 switch (*p) {
377 case '/': printf("\\\\"); *q = '\\'; break;
378 case '\'': printf("\\\""); *q = '"'; break;
379 case '_': printf(" "); *q = ' '; break;
380 default: putchar(*p); *q = *p; break;
381 }
382 }
383 *p = '\0';
384 printf("\", {");
385 fflush(stdout);
386
387 system(str);
388
389 printf("}},\n");
390
391 return 0;
392 }
393
394 if (!strcmp(argv[1], "-generate")) {
395 char *teststr, *p;
396 int i, initialquote, backslashes, quotes;
397
398 teststr = malloc(200 + strlen(argv[0]));
399
400 for (initialquote = 0; initialquote <= 1; initialquote++) {
401 for (backslashes = 0; backslashes < 5; backslashes++) {
402 for (quotes = 0; quotes < 9; quotes++) {
403 p = teststr + sprintf(teststr, "%s -split ", argv[0]);
404 if (initialquote) *p++ = '\'';
405 *p++ = 'a';
406 for (i = 0; i < backslashes; i++) *p++ = '/';
407 for (i = 0; i < quotes; i++) *p++ = '\'';
408 *p++ = 'b';
409 *p++ = '_';
410 *p++ = 'c';
411 *p++ = '\'';
412 *p++ = '_';
413 *p++ = 'd';
414 *p = '\0';
415
416 system(teststr);
417 }
418 }
419 }
420 return 0;
421 }
422
423 fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
424 return 1;
425 }
426
427 /*
428 * If we get here, we were invoked with no arguments, so just
429 * run the tests.
430 */
431
432 for (i = 0; i < lenof(argv_tests); i++) {
433 int ac;
434 char **av;
435
436 split_into_argv(argv_tests[i].cmdline, &ac, &av);
437
438 for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
439 if (strcmp(av[j], argv_tests[i].argv[j])) {
440 printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
441 i, argv_tests[i].cmdline,
442 j, av[j], argv_tests[i].argv[j]);
443 }
444#ifdef VERBOSE
445 else {
446 printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
447 i, argv_tests[i].cmdline,
448 j, av[j], argv_tests[i].argv[j]);
449 }
450#endif
451 }
452 if (j < ac)
453 printf("failed test %d (|%s|): %d args returned, should be %d\n",
454 i, argv_tests[i].cmdline, ac, j);
455 if (argv_tests[i].argv[j])
456 printf("failed test %d (|%s|): %d args returned, should be more\n",
457 i, argv_tests[i].cmdline, ac);
458 }
459
460 return 0;
461}
462
5a71c4ea 463#endif