c0a81592 |
1 | /* |
2 | * winutils.c: miscellaneous Windows utilities |
3 | */ |
4 | |
5 | #include <stdio.h> |
6 | #include <stdlib.h> |
7 | |
8 | #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) |
9 | |
10 | #ifdef TESTMODE |
11 | /* Definitions to allow this module to be compiled standalone for testing. */ |
12 | #define smalloc malloc |
13 | #endif |
14 | |
15 | /* |
16 | * Split a complete command line into argc/argv, attempting to do |
17 | * it exactly the same way Windows itself would do it (so that |
18 | * console utilities, which receive argc and argv from Windows, |
19 | * will have their command lines processed in the same way as GUI |
20 | * utilities which get a whole command line and must break it |
21 | * themselves). |
22 | * |
23 | * Does not modify the input command line (just in case). |
24 | */ |
25 | void split_into_argv(const char *cmdline, int *argc, char ***argv) |
26 | { |
27 | const char *p; |
28 | char *outputline, *q; |
29 | char **outputargv; |
30 | int outputargc; |
31 | |
32 | /* |
33 | * At first glance the rules appeared to be: |
34 | * |
35 | * - Single quotes are not special characters. |
36 | * |
37 | * - Double quotes are removed, but within them spaces cease |
38 | * to be special. |
39 | * |
40 | * - Backslashes are _only_ special when a sequence of them |
41 | * appear just before a double quote. In this situation, |
42 | * they are treated like C backslashes: so \" just gives a |
43 | * literal quote, \\" gives a literal backslash and then |
44 | * opens or closes a double-quoted segment, \\\" gives a |
45 | * literal backslash and then a literal quote, \\\\" gives |
46 | * two literal backslashes and then opens/closes a |
47 | * double-quoted segment, and so forth. Note that this |
48 | * behaviour is identical inside and outside double quotes. |
49 | * |
50 | * - Two successive double quotes become one literal double |
51 | * quote, but only _inside_ a double-quoted segment. |
52 | * Outside, they just form an empty double-quoted segment |
53 | * (which may cause an empty argument word). |
54 | * |
55 | * - That only leaves the interesting question of what happens |
56 | * when one or more backslashes precedes two or more double |
57 | * quotes, starting inside a double-quoted string. And the |
58 | * answer to that appears somewhat bizarre. Here I tabulate |
59 | * number of backslashes (across the top) against number of |
60 | * quotes (down the left), and indicate how many backslashes |
61 | * are output, how many quotes are output, and whether a |
62 | * quoted segment is open at the end of the sequence: |
63 | * |
64 | * backslashes |
65 | * |
66 | * 0 1 2 3 4 |
67 | * |
68 | * 0 0,0,y | 1,0,y 2,0,y 3,0,y 4,0,y |
69 | * --------+----------------------------- |
70 | * 1 0,0,n | 0,1,y 1,0,n 1,1,y 2,0,n |
71 | * q 2 0,1,n | 0,1,n 1,1,n 1,1,n 2,1,n |
72 | * u 3 0,1,y | 0,2,n 1,1,y 1,2,n 2,1,y |
73 | * o 4 0,1,n | 0,2,y 1,1,n 1,2,y 2,1,n |
74 | * t 5 0,2,n | 0,2,n 1,2,n 1,2,n 2,2,n |
75 | * e 6 0,2,y | 0,3,n 1,2,y 1,3,n 2,2,y |
76 | * s 7 0,2,n | 0,3,y 1,2,n 1,3,y 2,2,n |
77 | * 8 0,3,n | 0,3,n 1,3,n 1,3,n 2,3,n |
78 | * 9 0,3,y | 0,4,n 1,3,y 1,4,n 2,3,y |
79 | * 10 0,3,n | 0,4,y 1,3,n 1,4,y 2,3,n |
80 | * 11 0,4,n | 0,4,n 1,4,n 1,4,n 2,4,n |
81 | * |
82 | * |
83 | * [Test fragment was of the form "a\\\"""b c" d.] |
84 | * |
85 | * There is very weird mod-3 behaviour going on here in the |
86 | * number of quotes, and it even applies when there aren't any |
87 | * backslashes! How ghastly. |
88 | * |
89 | * With a bit of thought, this extremely odd diagram suddenly |
90 | * coalesced itself into a coherent, if still ghastly, model of |
91 | * how things work: |
92 | * |
93 | * - As before, backslashes are only special when one or more |
94 | * of them appear contiguously before at least one double |
95 | * quote. In this situation the backslashes do exactly what |
96 | * you'd expect: each one quotes the next thing in front of |
97 | * it, so you end up with n/2 literal backslashes (if n is |
98 | * even) or (n-1)/2 literal backslashes and a literal quote |
99 | * (if n is odd). In the latter case the double quote |
100 | * character right after the backslashes is used up. |
101 | * |
102 | * - After that, any remaining double quotes are processed. A |
103 | * string of contiguous unescaped double quotes has a mod-3 |
104 | * behaviour: |
105 | * |
106 | * * inside a quoted segment, a quote ends the segment. |
107 | * * _immediately_ after ending a quoted segment, a quote |
108 | * simply produces a literal quote. |
109 | * * otherwise, outside a quoted segment, a quote begins a |
110 | * quoted segment. |
111 | * |
112 | * So, for example, if we started inside a quoted segment |
113 | * then two contiguous quotes would close the segment and |
114 | * produce a literal quote; three would close the segment, |
115 | * produce a literal quote, and open a new segment. If we |
116 | * started outside a quoted segment, then two contiguous |
117 | * quotes would open and then close a segment, producing no |
118 | * output (but potentially creating a zero-length argument); |
119 | * but three quotes would open and close a segment and then |
120 | * produce a literal quote. |
121 | */ |
122 | |
123 | /* |
124 | * This will guaranteeably be big enough; we can realloc it |
125 | * down later. |
126 | */ |
127 | outputline = malloc(1+strlen(cmdline)); |
128 | outputargv = malloc(sizeof(char *) * (strlen(cmdline)+1 / 2)); |
129 | |
130 | p = cmdline; q = outputline; outputargc = 0; |
131 | |
132 | while (*p) { |
133 | int quote; |
134 | |
135 | /* Skip whitespace searching for start of argument. */ |
136 | while (*p && isspace(*p)) p++; |
137 | if (!*p) break; |
138 | |
139 | /* We have an argument; start it. */ |
140 | outputargv[outputargc++] = q; |
141 | quote = 0; |
142 | |
143 | /* Copy data into the argument until it's finished. */ |
144 | while (*p) { |
145 | if (!quote && isspace(*p)) |
146 | break; /* argument is finished */ |
147 | |
148 | if (*p == '"' || *p == '\\') { |
149 | /* |
150 | * We have a sequence of zero or more backslashes |
151 | * followed by a sequence of zero or more quotes. |
152 | * Count up how many of each, and then deal with |
153 | * them as appropriate. |
154 | */ |
155 | int i, slashes = 0, quotes = 0; |
156 | while (*p == '\\') slashes++, p++; |
157 | while (*p == '"') quotes++, p++; |
158 | |
159 | if (!quotes) { |
160 | /* |
161 | * Special case: if there are no quotes, |
162 | * slashes are not special at all, so just copy |
163 | * n slashes to the output string. |
164 | */ |
165 | while (slashes--) *q++ = '\\'; |
166 | } else { |
167 | /* Slashes annihilate in pairs. */ |
168 | while (slashes >= 2) slashes -= 2, *q++ = '\\'; |
169 | |
170 | /* One remaining slash takes out the first quote. */ |
171 | if (slashes) quotes--, *q++ = '"'; |
172 | |
173 | if (quotes > 0) { |
174 | /* Outside a quote segment, a quote starts one. */ |
175 | if (!quote) quotes--, quote = 1; |
176 | |
177 | /* Now we produce (n+1)/3 literal quotes... */ |
178 | for (i = 3; i <= quotes+1; i += 3) *q++ = '"'; |
179 | |
180 | /* ... and end in a quote segment iff 3 divides n. */ |
181 | quote = (quotes % 3 == 0); |
182 | } |
183 | } |
184 | } else { |
185 | *q++ = *p++; |
186 | } |
187 | } |
188 | |
189 | /* At the end of an argument, just append a trailing NUL. */ |
190 | *q++ = '\0'; |
191 | } |
192 | |
193 | outputargv = realloc(outputargv, sizeof(char *) * outputargc); |
194 | |
195 | if (argc) *argc = outputargc; |
196 | if (argv) *argv = outputargv; |
197 | } |
198 | |
199 | #ifdef TESTMODE |
200 | |
201 | const struct argv_test { |
202 | const char *cmdline; |
203 | const char *argv[10]; |
204 | } argv_tests[] = { |
205 | /* |
206 | * We generate this set of tests by invoking ourself with |
207 | * `-generate'. |
208 | */ |
209 | {"ab c\" d", {"ab", "c d", NULL}}, |
210 | {"a\"b c\" d", {"ab c", "d", NULL}}, |
211 | {"a\"\"b c\" d", {"ab", "c d", NULL}}, |
212 | {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}}, |
213 | {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}}, |
214 | {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}}, |
215 | {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
216 | {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, |
217 | {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
218 | {"a\\b c\" d", {"a\\b", "c d", NULL}}, |
219 | {"a\\\"b c\" d", {"a\"b", "c d", NULL}}, |
220 | {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}}, |
221 | {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}}, |
222 | {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
223 | {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, |
224 | {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
225 | {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, |
226 | {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, |
227 | {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}}, |
228 | {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}}, |
229 | {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}}, |
230 | {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, |
231 | {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}}, |
232 | {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, |
233 | {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
234 | {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, |
235 | {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
236 | {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}}, |
237 | {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}}, |
238 | {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}}, |
239 | {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, |
240 | {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
241 | {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, |
242 | {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
243 | {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, |
244 | {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, |
245 | {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}}, |
246 | {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}}, |
247 | {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}}, |
248 | {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, |
249 | {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}}, |
250 | {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, |
251 | {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, |
252 | {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}}, |
253 | {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, |
254 | {"\"ab c\" d", {"ab c", "d", NULL}}, |
255 | {"\"a\"b c\" d", {"ab", "c d", NULL}}, |
256 | {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}}, |
257 | {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}}, |
258 | {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}}, |
259 | {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
260 | {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, |
261 | {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
262 | {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, |
263 | {"\"a\\b c\" d", {"a\\b c", "d", NULL}}, |
264 | {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}}, |
265 | {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}}, |
266 | {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
267 | {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}}, |
268 | {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}}, |
269 | {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, |
270 | {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}}, |
271 | {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}}, |
272 | {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}}, |
273 | {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}}, |
274 | {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}}, |
275 | {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}}, |
276 | {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}}, |
277 | {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
278 | {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, |
279 | {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
280 | {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, |
281 | {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}}, |
282 | {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}}, |
283 | {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}}, |
284 | {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
285 | {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}}, |
286 | {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}}, |
287 | {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, |
288 | {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}}, |
289 | {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}}, |
290 | {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}}, |
291 | {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}}, |
292 | {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, |
293 | {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}}, |
294 | {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}}, |
295 | {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, |
296 | {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}}, |
297 | {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}}, |
298 | {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}}, |
299 | }; |
300 | |
301 | int main(int argc, char **argv) |
302 | { |
303 | int i, j; |
304 | |
305 | if (argc > 1) { |
306 | /* |
307 | * Generation of tests. |
308 | * |
309 | * Given `-splat <args>', we print out a C-style |
310 | * representation of each argument (in the form "a", "b", |
311 | * NULL), backslash-escaping each backslash and double |
312 | * quote. |
313 | * |
314 | * Given `-split <string>', we first doctor `string' by |
315 | * turning forward slashes into backslashes, single quotes |
316 | * into double quotes and underscores into spaces; and then |
317 | * we feed the resulting string to ourself with `-splat'. |
318 | * |
319 | * Given `-generate', we concoct a variety of fun test |
320 | * cases, encode them in quote-safe form (mapping \, " and |
321 | * space to /, ' and _ respectively) and feed each one to |
322 | * `-split'. |
323 | */ |
324 | if (!strcmp(argv[1], "-splat")) { |
325 | int i; |
326 | char *p; |
327 | for (i = 2; i < argc; i++) { |
328 | putchar('"'); |
329 | for (p = argv[i]; *p; p++) { |
330 | if (*p == '\\' || *p == '"') |
331 | putchar('\\'); |
332 | putchar(*p); |
333 | } |
334 | printf("\", "); |
335 | } |
336 | printf("NULL"); |
337 | return 0; |
338 | } |
339 | |
340 | if (!strcmp(argv[1], "-split") && argc > 2) { |
341 | char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2])); |
342 | char *p, *q; |
343 | |
344 | q = str + sprintf(str, "%s -splat ", argv[0]); |
345 | printf(" {\""); |
346 | for (p = argv[2]; *p; p++, q++) { |
347 | switch (*p) { |
348 | case '/': printf("\\\\"); *q = '\\'; break; |
349 | case '\'': printf("\\\""); *q = '"'; break; |
350 | case '_': printf(" "); *q = ' '; break; |
351 | default: putchar(*p); *q = *p; break; |
352 | } |
353 | } |
354 | *p = '\0'; |
355 | printf("\", {"); |
356 | fflush(stdout); |
357 | |
358 | system(str); |
359 | |
360 | printf("}},\n"); |
361 | |
362 | return 0; |
363 | } |
364 | |
365 | if (!strcmp(argv[1], "-generate")) { |
366 | char *teststr, *p; |
367 | int i, initialquote, backslashes, quotes; |
368 | |
369 | teststr = malloc(200 + strlen(argv[0])); |
370 | |
371 | for (initialquote = 0; initialquote <= 1; initialquote++) { |
372 | for (backslashes = 0; backslashes < 5; backslashes++) { |
373 | for (quotes = 0; quotes < 9; quotes++) { |
374 | p = teststr + sprintf(teststr, "%s -split ", argv[0]); |
375 | if (initialquote) *p++ = '\''; |
376 | *p++ = 'a'; |
377 | for (i = 0; i < backslashes; i++) *p++ = '/'; |
378 | for (i = 0; i < quotes; i++) *p++ = '\''; |
379 | *p++ = 'b'; |
380 | *p++ = '_'; |
381 | *p++ = 'c'; |
382 | *p++ = '\''; |
383 | *p++ = '_'; |
384 | *p++ = 'd'; |
385 | *p = '\0'; |
386 | |
387 | system(teststr); |
388 | } |
389 | } |
390 | } |
391 | return 0; |
392 | } |
393 | |
394 | fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]); |
395 | return 1; |
396 | } |
397 | |
398 | /* |
399 | * If we get here, we were invoked with no arguments, so just |
400 | * run the tests. |
401 | */ |
402 | |
403 | for (i = 0; i < lenof(argv_tests); i++) { |
404 | int ac; |
405 | char **av; |
406 | |
407 | split_into_argv(argv_tests[i].cmdline, &ac, &av); |
408 | |
409 | for (j = 0; j < ac && argv_tests[i].argv[j]; j++) { |
410 | if (strcmp(av[j], argv_tests[i].argv[j])) { |
411 | printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n", |
412 | i, argv_tests[i].cmdline, |
413 | j, av[j], argv_tests[i].argv[j]); |
414 | } |
415 | #ifdef VERBOSE |
416 | else { |
417 | printf("test %d (|%s|) arg %d: |%s| == |%s|\n", |
418 | i, argv_tests[i].cmdline, |
419 | j, av[j], argv_tests[i].argv[j]); |
420 | } |
421 | #endif |
422 | } |
423 | if (j < ac) |
424 | printf("failed test %d (|%s|): %d args returned, should be %d\n", |
425 | i, argv_tests[i].cmdline, ac, j); |
426 | if (argv_tests[i].argv[j]) |
427 | printf("failed test %d (|%s|): %d args returned, should be more\n", |
428 | i, argv_tests[i].cmdline, ac); |
429 | } |
430 | |
431 | return 0; |
432 | } |
433 | |
434 | #endif |