debian/rules: Use `git' potty wrapper.
[qmail] / token822.c
1 #include "stralloc.h"
2 #include "alloc.h"
3 #include "str.h"
4 #include "token822.h"
5 #include "gen_allocdefs.h"
6
7 static struct token822 comma = { TOKEN822_COMMA };
8
9 void token822_reverse(ta)
10 token822_alloc *ta;
11 {
12 int i;
13 int n;
14 struct token822 temp;
15
16 n = ta->len - 1;
17 for (i = 0;i + i < n;++i)
18 {
19 temp = ta->t[i];
20 ta->t[i] = ta->t[n - i];
21 ta->t[n - i] = temp;
22 }
23 }
24
25 GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_ready)
26 GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus)
27 GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus,token822_append)
28
29 static int needspace(t1,t2)
30 int t1;
31 int t2;
32 {
33 if (!t1) return 0;
34 if (t1 == TOKEN822_COLON) return 1;
35 if (t1 == TOKEN822_COMMA) return 1;
36 if (t2 == TOKEN822_LEFT) return 1;
37 switch(t1)
38 {
39 case TOKEN822_ATOM: case TOKEN822_LITERAL:
40 case TOKEN822_QUOTE: case TOKEN822_COMMENT:
41 switch(t2)
42 {
43 case TOKEN822_ATOM: case TOKEN822_LITERAL:
44 case TOKEN822_QUOTE: case TOKEN822_COMMENT:
45 return 1;
46 }
47 }
48 return 0;
49 }
50
51 static int atomok(ch)
52 char ch;
53 {
54 switch(ch)
55 {
56 case ' ': case '\t': case '\r': case '\n':
57 case '(': case '[': case '"':
58 case '<': case '>': case ';': case ':':
59 case '@': case ',': case '.':
60 return 0;
61 }
62 return 1;
63 }
64
65 static void atomcheck(t)
66 struct token822 *t;
67 {
68 int i;
69 char ch;
70 for (i = 0;i < t->slen;++i)
71 {
72 ch = t->s[i];
73 if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\'))
74 {
75 t->type = TOKEN822_QUOTE;
76 return;
77 }
78 }
79 }
80
81 int token822_unparse(sa,ta,linelen)
82 stralloc *sa;
83 token822_alloc *ta;
84 unsigned int linelen;
85 {
86 struct token822 *t;
87 int len;
88 int ch;
89 int i;
90 int j;
91 int lasttype;
92 int newtype;
93 char *s;
94 char *lineb;
95 char *linee;
96
97 len = 0;
98 lasttype = 0;
99 for (i = 0;i < ta->len;++i)
100 {
101 t = ta->t + i;
102 newtype = t->type;
103 if (needspace(lasttype,newtype))
104 ++len;
105 lasttype = newtype;
106 switch(newtype)
107 {
108 case TOKEN822_COMMA:
109 len += 3; break;
110 case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT:
111 case TOKEN822_SEMI: case TOKEN822_COLON:
112 ++len; break;
113 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
114 if (t->type != TOKEN822_ATOM) len += 2;
115 for (j = 0;j < t->slen;++j)
116 switch(ch = t->s[j])
117 {
118 case '"': case '[': case ']': case '(': case ')':
119 case '\\': case '\r': case '\n': ++len;
120 default: ++len;
121 }
122 break;
123 }
124 }
125 len += 2;
126
127 if (!stralloc_ready(sa,len))
128 return -1;
129
130 s = sa->s;
131 lineb = s;
132 linee = 0;
133
134 lasttype = 0;
135 for (i = 0;i < ta->len;++i)
136 {
137 t = ta->t + i;
138 newtype = t->type;
139 if (needspace(lasttype,newtype))
140 *s++ = ' ';
141 lasttype = newtype;
142 switch(newtype)
143 {
144 case TOKEN822_COMMA:
145 *s++ = ',';
146 #define NSUW \
147 s[0] = '\n'; s[1] = ' '; \
148 if (linee && (!linelen || (s - lineb <= linelen))) \
149 { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \
150 else { if (linee) lineb = linee + 1; linee = s; s += 2; }
151 NSUW
152 break;
153 case TOKEN822_AT: *s++ = '@'; break;
154 case TOKEN822_DOT: *s++ = '.'; break;
155 case TOKEN822_LEFT: *s++ = '<'; break;
156 case TOKEN822_RIGHT: *s++ = '>'; break;
157 case TOKEN822_SEMI: *s++ = ';'; break;
158 case TOKEN822_COLON: *s++ = ':'; break;
159 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
160 if (t->type == TOKEN822_QUOTE) *s++ = '"';
161 if (t->type == TOKEN822_LITERAL) *s++ = '[';
162 if (t->type == TOKEN822_COMMENT) *s++ = '(';
163 for (j = 0;j < t->slen;++j)
164 switch(ch = t->s[j])
165 {
166 case '"': case '[': case ']': case '(': case ')':
167 case '\\': case '\r': case '\n': *s++ = '\\';
168 default: *s++ = ch;
169 }
170 if (t->type == TOKEN822_QUOTE) *s++ = '"';
171 if (t->type == TOKEN822_LITERAL) *s++ = ']';
172 if (t->type == TOKEN822_COMMENT) *s++ = ')';
173 break;
174 }
175 }
176 NSUW
177 --s;
178 sa->len = s - sa->s;
179 return 1;
180 }
181
182 int token822_unquote(sa,ta)
183 stralloc *sa;
184 token822_alloc *ta;
185 {
186 struct token822 *t;
187 int len;
188 int i;
189 int j;
190 char *s;
191
192 len = 0;
193 for (i = 0;i < ta->len;++i)
194 {
195 t = ta->t + i;
196 switch(t->type)
197 {
198 case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT:
199 case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON:
200 ++len; break;
201 case TOKEN822_LITERAL:
202 len += 2;
203 case TOKEN822_ATOM: case TOKEN822_QUOTE:
204 len += t->slen;
205 }
206 }
207
208 if (!stralloc_ready(sa,len))
209 return -1;
210
211 s = sa->s;
212
213 for (i = 0;i < ta->len;++i)
214 {
215 t = ta->t + i;
216 switch(t->type)
217 {
218 case TOKEN822_COMMA: *s++ = ','; break;
219 case TOKEN822_AT: *s++ = '@'; break;
220 case TOKEN822_DOT: *s++ = '.'; break;
221 case TOKEN822_LEFT: *s++ = '<'; break;
222 case TOKEN822_RIGHT: *s++ = '>'; break;
223 case TOKEN822_SEMI: *s++ = ';'; break;
224 case TOKEN822_COLON: *s++ = ':'; break;
225 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
226 if (t->type == TOKEN822_LITERAL) *s++ = '[';
227 for (j = 0;j < t->slen;++j)
228 *s++ = t->s[j];
229 if (t->type == TOKEN822_LITERAL) *s++ = ']';
230 break;
231 case TOKEN822_COMMENT: break;
232 }
233 }
234 sa->len = s - sa->s;
235 return 1;
236 }
237
238 int token822_parse(ta,sa,buf)
239 token822_alloc *ta;
240 stralloc *sa;
241 stralloc *buf;
242 {
243 int i;
244 int salen;
245 int level;
246 struct token822 *t;
247 int numtoks;
248 int numchars;
249 char *cbuf;
250
251 salen = sa->len;
252
253 numchars = 0;
254 numtoks = 0;
255 for (i = 0;i < salen;++i)
256 switch(sa->s[i])
257 {
258 case '.': case ',': case '@': case '<': case '>': case ':': case ';':
259 ++numtoks; break;
260 case ' ': case '\t': case '\r': case '\n': break;
261 case ')': case ']': return 0;
262 /* other control chars and non-ASCII chars are also bad, in theory */
263 case '(':
264 level = 1;
265 while (level)
266 {
267 if (++i >= salen) return 0;
268 switch(sa->s[i])
269 {
270 case '(': ++level; break;
271 case ')': --level; break;
272 case '\\': if (++i >= salen) return 0;
273 default: ++numchars;
274 }
275 }
276 ++numtoks;
277 break;
278 case '"':
279 level = 1;
280 while (level)
281 {
282 if (++i >= salen) return 0;
283 switch(sa->s[i])
284 {
285 case '"': --level; break;
286 case '\\': if (++i >= salen) return 0;
287 default: ++numchars;
288 }
289 }
290 ++numtoks;
291 break;
292 case '[':
293 level = 1;
294 while (level)
295 {
296 if (++i >= salen) return 0;
297 switch(sa->s[i])
298 {
299 case ']': --level; break;
300 case '\\': if (++i >= salen) return 0;
301 default: ++numchars;
302 }
303 }
304 ++numtoks;
305 break;
306 default:
307 do
308 {
309 if (sa->s[i] == '\\') if (++i >= salen) break;
310 ++numchars;
311 if (++i >= salen)
312 break;
313 }
314 while (atomok(sa->s[i]));
315 --i;
316 ++numtoks;
317 }
318
319 if (!token822_ready(ta,numtoks))
320 return -1;
321 if (!stralloc_ready(buf,numchars))
322 return -1;
323 cbuf = buf->s;
324 ta->len = numtoks;
325
326 t = ta->t;
327 for (i = 0;i < salen;++i)
328 switch(sa->s[i])
329 {
330 case '.': t->type = TOKEN822_DOT; ++t; break;
331 case ',': t->type = TOKEN822_COMMA; ++t; break;
332 case '@': t->type = TOKEN822_AT; ++t; break;
333 case '<': t->type = TOKEN822_LEFT; ++t; break;
334 case '>': t->type = TOKEN822_RIGHT; ++t; break;
335 case ':': t->type = TOKEN822_COLON; ++t; break;
336 case ';': t->type = TOKEN822_SEMI; ++t; break;
337 case ' ': case '\t': case '\r': case '\n': break;
338 case '(':
339 t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0;
340 level = 1;
341 while (level)
342 {
343 ++i; /* assert: < salen */
344 switch(sa->s[i])
345 {
346 case '(': ++level; break;
347 case ')': --level; break;
348 case '\\': ++i; /* assert: < salen */
349 default: *cbuf++ = sa->s[i]; ++t->slen;
350 }
351 }
352 ++t;
353 break;
354 case '"':
355 t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0;
356 level = 1;
357 while (level)
358 {
359 ++i; /* assert: < salen */
360 switch(sa->s[i])
361 {
362 case '"': --level; break;
363 case '\\': ++i; /* assert: < salen */
364 default: *cbuf++ = sa->s[i]; ++t->slen;
365 }
366 }
367 ++t;
368 break;
369 case '[':
370 t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0;
371 level = 1;
372 while (level)
373 {
374 ++i; /* assert: < salen */
375 switch(sa->s[i])
376 {
377 case ']': --level; break;
378 case '\\': ++i; /* assert: < salen */
379 default: *cbuf++ = sa->s[i]; ++t->slen;
380 }
381 }
382 ++t;
383 break;
384 default:
385 t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0;
386 do
387 {
388 if (sa->s[i] == '\\') if (++i >= salen) break;
389 *cbuf++ = sa->s[i]; ++t->slen;
390 if (++i >= salen)
391 break;
392 }
393 while (atomok(sa->s[i]));
394 atomcheck(t);
395 --i;
396 ++t;
397 }
398 return 1;
399 }
400
401 static int gotaddr(taout,taaddr,callback)
402 token822_alloc *taout;
403 token822_alloc *taaddr;
404 int (*callback)();
405 {
406 int i;
407
408 if (callback(taaddr) != 1)
409 return 0;
410
411 if (!token822_readyplus(taout,taaddr->len))
412 return 0;
413
414 for (i = 0;i < taaddr->len;++i)
415 taout->t[taout->len++] = taaddr->t[i];
416
417 taaddr->len = 0;
418 return 1;
419 }
420
421 int token822_addrlist(taout,taaddr,ta,callback)
422 token822_alloc *taout;
423 token822_alloc *taaddr;
424 token822_alloc *ta;
425 int (*callback)();
426 {
427 struct token822 *t;
428 struct token822 *beginning;
429 int ingroup;
430 int wordok;
431
432 taout->len = 0;
433 taaddr->len = 0;
434
435 if (!token822_readyplus(taout,1)) return -1;
436 if (!token822_readyplus(taaddr,1)) return -1;
437
438 ingroup = 0;
439 wordok = 1;
440
441 beginning = ta->t + 2;
442 t = ta->t + ta->len - 1;
443
444 /* rfc 822 address lists are easy to parse from right to left */
445
446 #define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1;
447 #define FLUSHCOMMA if (taaddr->len) { \
448 if (!gotaddr(taout,taaddr,callback)) return -1; \
449 if (!token822_append(taout,&comma)) return -1; }
450 #define ADDRLEFT if (!token822_append(taaddr,t--)) return -1;
451 #define OUTLEFT if (!token822_append(taout,t--)) return -1;
452
453 while (t >= beginning)
454 {
455 switch(t->type)
456 {
457 case TOKEN822_SEMI:
458 FLUSHCOMMA
459 if (ingroup) return 0;
460 ingroup = 1;
461 wordok = 1;
462 break;
463 case TOKEN822_COLON:
464 FLUSH
465 if (!ingroup) return 0;
466 ingroup = 0;
467 while ((t >= beginning) && (t->type != TOKEN822_COMMA))
468 OUTLEFT
469 if (t >= beginning)
470 OUTLEFT
471 wordok = 1;
472 continue;
473 case TOKEN822_RIGHT:
474 FLUSHCOMMA
475 OUTLEFT
476 while ((t >= beginning) && (t->type != TOKEN822_LEFT))
477 ADDRLEFT
478 /* important to use address here even if it's empty: <> */
479 if (!gotaddr(taout,taaddr,callback)) return -1;
480 if (t < beginning) return 0;
481 OUTLEFT
482 while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) || (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) || (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT)))
483 OUTLEFT
484 wordok = 0;
485 continue;
486 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
487 if (!wordok)
488 FLUSHCOMMA
489 wordok = 0;
490 ADDRLEFT
491 continue;
492 case TOKEN822_COMMENT:
493 /* comment is lexically a space; shouldn't affect wordok */
494 break;
495 case TOKEN822_COMMA:
496 FLUSH
497 wordok = 1;
498 break;
499 default:
500 wordok = 1;
501 ADDRLEFT
502 continue;
503 }
504 OUTLEFT
505 }
506 FLUSH
507 ++t;
508 while (t > ta->t)
509 if (!token822_append(taout,--t)) return -1;
510
511 token822_reverse(taout);
512 return 1;
513 }