Import ezmlm-idx 0.40
[ezmlm] / ezmlm-archive.c
1 /*$Id: ezmlm-archive.c,v 1.13 1999/11/28 20:13:32 lindberg Exp $*/
2 /*$Name: ezmlm-idx-040 $*/
3
4 #include "alloc.h"
5 #include "error.h"
6 #include "stralloc.h"
7 #include "str.h"
8 #include "sig.h"
9 #include "getconf.h"
10 #include "strerr.h"
11 #include "getln.h"
12 #include "substdio.h"
13 #include "readwrite.h"
14 #include "fmt.h"
15 #include "sgetopt.h"
16 #include "idxthread.h"
17 #include "makehash.h"
18 #include "idx.h"
19 #include "errtxt.h"
20
21 #define FATAL "ezmlm-archive: fatal: "
22 #define WARNING "ezmlm-archive: warning: inconsistent index: "
23
24 substdio ssin;
25 char inbuf[1024];
26 substdio ssout;
27 char outbuf[1024];
28 substdio ssnum;
29 char numbuf[16];
30
31 stralloc line = {0};
32 stralloc num = {0};
33 stralloc fn = {0};
34 stralloc fnn = {0};
35
36 char strnum[FMT_ULONG];
37 int flagerror = 0;
38 int flagsync = 1; /* sync() by default, not for -c or -f or -t */
39 char *dir;
40
41 struct ca {
42 char *s; /* start */
43 unsigned int l; /* length */
44 } ca;
45
46 void die_usage() {
47 strerr_die1x(100,
48 "ezmlm-archive: usage: "
49 "ezmlm-archive [-cCFsSTvV] [-f min_msg] [-t max_msg] dir");
50 }
51
52 void die_nomem() { strerr_die2x(111,FATAL,ERR_NOMEM); }
53
54 void close_proper(ss,s,sn)
55 /* flush,sync,close,move sn->s) */
56 substdio *ss;
57 char *s, *sn;
58 {
59 if (substdio_flush(ss) == -1)
60 strerr_die6sys(111,FATAL,ERR_FLUSH,dir,"/",s,": ");
61 if (flagsync)
62 if (fsync(ss->fd) == -1)
63 strerr_die6sys(111,FATAL,ERR_SYNC,dir,"/",s,": ");
64 if (close(ss->fd) == -1)
65 strerr_die6sys(111,FATAL,ERR_CLOSE,dir,"/",s,": ");
66 if (rename(sn,s) == -1)
67 strerr_die6sys(111,FATAL,ERR_MOVE,dir,"/",sn,": ");
68 }
69
70 void write_threads(msgtable,subtable,authtable,datetable,from,to)
71 /* Add the current threading data to the thread database without dups */
72 /* Writes the subject index first, then processes the individual files */
73 msgentry *msgtable; subentry *subtable; authentry *authtable;
74 dateentry *datetable;
75 unsigned long from,to;
76 {
77 msgentry *pmsgt;
78 subentry *psubt,*psubtm, *psubtlast;
79 subentry *presubt = (subentry *)0;
80 authentry *pautht;
81 dateentry *pdatet;
82 char *cp,*cp1;
83 unsigned long msg;
84 unsigned long ulmsginthread;
85 unsigned long subnum;
86 unsigned long authnum;
87 unsigned long msgnum;
88 unsigned int pos,l;
89 unsigned int startdate,nextdate;
90 unsigned int startmsg,nextmsg;
91 int fd = -1;
92 int fdn = -1;
93 int match;
94 int ffound;
95 int lineno;
96 int res;
97
98 psubtm = subtable; /* now for new threads */
99 pdatet = datetable;
100 nextmsg = 0L;
101 nextdate = pdatet->date;
102 while (psubtm->sub) { /* these are in msgnum order */
103 if (!presubt) /* for rewind */
104 if (psubtm->lastmsg >= nextmsg)
105 presubt = psubtm; /* this thread extends beyond current month */
106 if (psubtm->firstmsg >= nextmsg) { /* done with this month */
107 if (fdn != -1) close_proper(&ssout,fn.s,fnn.s);
108 psubtlast = psubtm; /* last thread done */
109 if (presubt) /* need to rewind? */
110 psubtm = presubt; /* do it */
111 psubt = psubtm; /* tmp pointer to reset done flag */
112 presubt = (subentry *)0; /* reset rewind pointer */
113 pdatet++; /* next month */
114 startdate = nextdate; /* startdate */
115 nextdate = pdatet->date; /* end date */
116 startmsg = nextmsg; /* first message in month */
117 nextmsg = pdatet->msg; /* first message in next month */
118 if (!stralloc_copys(&fn,"archive/threads/")) die_nomem();
119 if (!stralloc_catb(&fn,strnum,fmt_uint(strnum,startdate))) die_nomem();
120 if (!stralloc_copy(&fnn,&fn)) die_nomem();
121 if (!stralloc_0(&fn)) die_nomem();
122 if (!stralloc_cats(&fnn,"n")) die_nomem();
123 if (!stralloc_0(&fnn)) die_nomem();
124 if ((fdn = open_trunc(fnn.s)) == -1)
125 strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fnn.s,": ");
126 substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf));
127 if ((fd = open_read(fn.s)) == -1) {
128 if (errno != error_noent)
129 strerr_die6sys(111,FATAL,ERR_OPEN,dir,"/",fn.s,": ");
130 } else {
131 substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
132 for (;;) {
133 if (getln(&ssin,&line,&match,'\n') == -1)
134 strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": ");
135 if (!match) break;
136 pos = scan_ulong(line.s,&msgnum);
137 pos++; /* skip ':' */
138 if (msgnum >= from)
139 continue; /* ignore entries from threading range */
140 if (line.len < pos + HASHLEN) {
141 flagerror = -1; /* and bad ones */
142 continue;
143 }
144 psubt = subtable;
145 cp = line.s + pos;
146 ffound = 0; /* search among already known subjects */
147 for (;;) {
148 res = str_diffn(psubt->sub,cp,HASHLEN);
149 if (res < 0) {
150 if (psubt->higher)
151 psubt = psubt->higher;
152 else
153 break;
154 } else if (res > 0) {
155 if (psubt->lower)
156 psubt = psubt->lower;
157 else
158 break;
159 } else {
160 ffound = 1;
161 break;
162 }
163 }
164 if (!ffound) {
165 if (substdio_put(&ssout,line.s,line.len) == -1)
166 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
167 } else { /* new # of msg in thread */
168 cp += HASHLEN; /* HASHLEN [#] Subject always \n at end */
169 if (*(cp++) == ' ' && *(cp++) == '[') {
170 cp += scan_ulong(cp,&ulmsginthread);
171 if (*cp == ']') {
172 psubt->msginthread += (unsigned char) (ulmsginthread & 0xff);
173 }
174 } else
175 flagerror = -5;
176 }
177 }
178 close(fd);
179 }
180 continue;
181 }
182
183 if (psubtm->firstmsg < nextmsg && psubtm->lastmsg >= startmsg) {
184 if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,psubtm->lastmsg)))
185 die_nomem();
186 if (!stralloc_cats(&line,":")) die_nomem();
187 if (!stralloc_catb(&line,psubtm->sub,HASHLEN)) die_nomem();
188 if (!stralloc_cats(&line," [")) die_nomem();
189 if (!stralloc_catb(&line,strnum,
190 fmt_ulong(strnum,(unsigned long) psubtm->msginthread)))
191 die_nomem();
192 if (!stralloc_cats(&line,"]")) die_nomem();
193 if (!stralloc_catb(&line,psubtm->sub + HASHLEN,psubtm->sublen - HASHLEN))
194 die_nomem(); /* has \n */
195 if (substdio_put(&ssout,line.s,line.len) == -1)
196 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
197 }
198 psubtm++;
199 }
200 if (fdn != -1)
201 close_proper(&ssout,fn.s,fnn.s);
202
203 psubt = subtable;
204 while (psubt->sub) { /* now the threads */
205 if (!stralloc_copys(&fn,"archive/subjects/")) die_nomem();
206 if (!stralloc_catb(&fn,psubt->sub,2)) die_nomem();
207 if (!stralloc_0(&fn)) die_nomem();
208 if (mkdir(fn.s,0755) == -1)
209 if (errno != error_exist)
210 strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fn.s,": ");
211 fn.s[fn.len - 1] = '/';
212 if (!stralloc_catb(&fn,psubt->sub+2,HASHLEN-2)) die_nomem();
213 if (!stralloc_copy(&fnn,&fn)) die_nomem();
214 if (!stralloc_cats(&fnn,"n")) die_nomem();
215 if (!stralloc_0(&fn)) die_nomem();
216 if (!stralloc_0(&fnn)) die_nomem();
217 if ((fdn = open_trunc(fnn.s)) == -1)
218 strerr_die4sys(111,FATAL,ERR_CREATE,fnn.s,": ");
219 substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf));
220 if ((fd = open_read(fn.s)) == -1) {
221 if (errno != error_noent)
222 strerr_die4sys(111,FATAL,ERR_OPEN,fn.s,": ");
223 if (substdio_puts(&ssout,psubt->sub) == -1) /* write subject */
224 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
225 } else { /* copy data */
226 substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
227 lineno = 0;
228 for (;;) {
229 if (getln(&ssin,&line,&match,'\n') == -1)
230 strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": ");
231 if (!match) break;
232 if (!lineno) { /* write subject */
233 if (line.len < HASHLEN + 1 || line.s[HASHLEN] != ' ')
234 flagerror = -3;
235 if (substdio_put(&ssout,line.s,line.len) == -1)
236 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
237 lineno = 1;
238 continue;
239 }
240 (void) scan_ulong(line.s,&msgnum);
241 if (msgnum >= from) break;
242 if (substdio_put(&ssout,line.s,line.len) == -1)
243 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
244 }
245 (void) close(fd); /* close old index */
246 }
247
248 subnum = (unsigned long) (psubt - subtable + 1); /* idx of this subj */
249 pmsgt = msgtable + psubt->firstmsg - from; /* first message entry */
250 for (msg = psubt->firstmsg; msg <= psubt->lastmsg; msg++) {
251 if (pmsgt->subnum == subnum) {
252 if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msg))) die_nomem();
253 if (!stralloc_cats(&line,":")) die_nomem();
254 if (!stralloc_catb(&line,strnum,fmt_uint(strnum,pmsgt->date)))
255 die_nomem();
256 if (!stralloc_cats(&line,":")) die_nomem();
257 if (pmsgt->authnum) {
258 pautht = authtable + pmsgt->authnum - 1;
259 cp = pautht->auth;
260 cp1 = cp + str_chr(cp,' ');
261 if (cp + HASHLEN != cp1)
262 strerr_die1x(100,ERR_BAD_INDEX);
263 if (!stralloc_cats(&line,cp))
264 die_nomem(); /* hash */
265 } else
266 if (!stralloc_cats(&line,"\n")) die_nomem();
267 if (substdio_put(&ssout,line.s,line.len) == -1)
268 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
269 }
270 pmsgt++;
271 }
272 close_proper(&ssout,fn.s,fnn.s);
273 psubt++;
274 }
275
276 /* (no master author index) */
277 pautht = authtable;
278 while (pautht->auth) { /* now the authors */
279 if (!stralloc_copys(&fn,"archive/authors/")) die_nomem();
280 if (!stralloc_catb(&fn,pautht->auth,2)) die_nomem();
281 if (!stralloc_0(&fn)) die_nomem();
282 if (mkdir(fn.s,0755) == -1)
283 if (errno != error_exist)
284 strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fn.s,": ");
285 fn.s[fn.len - 1] = '/';
286 if (!stralloc_catb(&fn,pautht->auth+2,HASHLEN-2)) die_nomem();
287 if (!stralloc_copy(&fnn,&fn)) die_nomem();
288 if (!stralloc_cats(&fnn,"n")) die_nomem();
289 if (!stralloc_0(&fn)) die_nomem();
290 if (!stralloc_0(&fnn)) die_nomem();
291 if ((fdn = open_trunc(fnn.s)) == -1)
292 strerr_die4sys(111,FATAL,ERR_CREATE,fnn.s,": ");
293 substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf));
294 if ((fd = open_read(fn.s)) == -1) {
295 if (errno != error_noent)
296 strerr_die4sys(111,FATAL,ERR_OPEN,fn.s,": ");
297 else { /* didn't exist before: write author */
298 if (substdio_put(&ssout,pautht->auth,pautht->authlen) == -1)
299 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
300 }
301 } else { /* copy data */
302 substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
303 lineno = 0;
304 for (;;) {
305 if (getln(&ssin,&line,&match,'\n') == -1)
306 strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": ");
307 if (!match) break;
308 if (!lineno) { /* write author */
309 if (line.len < HASHLEN + 1 || line.s[HASHLEN] != ' ')
310 flagerror = - 4;
311 if (substdio_put(&ssout,line.s,line.len) == -1)
312 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
313 lineno = 1;
314 continue;
315 }
316 (void) scan_ulong(line.s,&msgnum);
317 if (msgnum >= from) break;
318 if (substdio_put(&ssout,line.s,line.len) == -1)
319 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
320 }
321 (void) close(fd); /* close old index */
322 }
323
324 authnum = (unsigned long) (pautht - authtable + 1); /* idx of this auth */
325 pmsgt = msgtable + pautht->firstmsg - from; /* first message entry */
326 for (msg = pautht->firstmsg; msg <= to; msg++) {
327 if (pmsgt->authnum == authnum) {
328 if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msg))) die_nomem();
329 if (!stralloc_cats(&line,":")) die_nomem();
330 if (!stralloc_catb(&line,strnum,fmt_uint(strnum,pmsgt->date)))
331 die_nomem();
332 if (!stralloc_cats(&line,":")) die_nomem();
333 if (pmsgt->subnum) {
334 psubt = subtable + pmsgt->subnum - 1;
335 if (!stralloc_catb(&line,psubt->sub,psubt->sublen))
336 die_nomem();
337 }
338 if (substdio_put(&ssout,line.s,line.len) == -1)
339 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
340 }
341 pmsgt++;
342 }
343 close_proper(&ssout,fn.s,fnn.s);
344 pautht++;
345 }
346 }
347
348 int main(argc,argv)
349 int argc;
350 char **argv;
351 {
352 unsigned long archnum = 0L;
353 unsigned long to = 0L;
354 unsigned long max;
355 int fd;
356 int fdlock;
357 int flagcreate = 0;
358 int flagsyncall = 0;
359 int opt;
360 msgentry *msgtable;
361 subentry *subtable;
362 authentry *authtable;
363 dateentry *datetable;
364
365 (void) umask(022);
366 sig_pipeignore();
367
368 while ((opt = getopt(argc,argv,"cCf:FsSt:TvV")) != opteof)
369 switch (opt) {
370 case 'c': flagcreate = 1;
371 flagsync = 0;
372 break; /* start at beginning of archive */
373 case 'C': flagcreate = 0;
374 break; /* Do only archnum+1 => num */
375 case 'f': if (optarg) {
376 (void) scan_ulong(optarg,&archnum);
377 archnum = (archnum / 100) * 100;
378 }
379 flagsync = 0;
380 break;
381 case 'F': archnum = 0; break;
382 case 's': flagsyncall = 1; break;
383 case 'S': flagsyncall = 0; break;
384 case 't': if (optarg) {
385 (void) scan_ulong(optarg,&to);
386 }
387 flagsync = 0;
388 break;
389 case 'T': to = 0; break;
390 case 'v':
391 case 'V': strerr_die2x(0,"ezmlm-archive version: ",EZIDX_VERSION);
392 default:
393 die_usage();
394 }
395
396 if (flagsyncall) flagsync = 1; /* overrides */
397 dir = argv[optind++];
398 if (!dir) die_usage();
399 if (chdir(dir) == -1)
400 strerr_die4sys(111,FATAL,ERR_SWITCH,dir,": ");
401
402 if (mkdir("archive/threads",0755) == -1)
403 if (errno != error_exist)
404 strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/threads: ");
405 if (mkdir("archive/subjects",0755) == -1)
406 if (errno != error_exist)
407 strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/subjects: ");
408 if (mkdir("archive/authors",0755) == -1)
409 if (errno != error_exist)
410 strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/authors: ");
411
412 /* Lock list to assure that no ezmlm-send is working on it */
413 /* and that the "num" message is final */
414 fdlock = open_append("lock");
415 if (fdlock == -1)
416 strerr_die2sys(111,FATAL,ERR_OPEN_LOCK);
417 if (lock_ex(fdlock) == -1) {
418 (void) close(fdlock);
419 strerr_die2sys(111,FATAL,ERR_OBTAIN_LOCK);
420 }
421 /* get num */
422 if (!getconf_line(&num,"num",0,FATAL,dir))
423 strerr_die1x(100,ERR_EMPTY_LIST);
424 (void) close(fdlock);
425
426 if (!stralloc_0(&num)) die_nomem(); /* parse num */
427 (void) scan_ulong(num.s,&max);
428 if (!to || to > max) to = max;
429
430 fdlock = open_append("archive/lock"); /* lock index */
431 if (fdlock == -1)
432 strerr_die4sys(111,FATAL,ERR_OPEN,dir,"/archive/lock: ");
433 if (lock_ex(fdlock) == -1) {
434 (void) close(fdlock);
435 strerr_die4sys(111,FATAL,ERR_OBTAIN,dir,"/archive/lock: ");
436 }
437 if (!flagcreate && !archnum) { /* adjust archnum (from) / to */
438 if (getconf_line(&num,"archnum",0,FATAL,dir)) {
439 if (!stralloc_0(&num)) die_nomem();
440 (void) scan_ulong(num.s,&archnum);
441 archnum++;
442 }
443 }
444
445 if (archnum > to)
446 _exit(0); /* nothing to do */
447
448 /* do the subject threading */
449 idx_mkthreads(&msgtable,&subtable,&authtable,&datetable,
450 archnum,to,max,0,FATAL);
451 /* update the index */
452 write_threads(msgtable,subtable,authtable,datetable,archnum,to);
453 /* update archnum */
454 if ((fd = open_trunc("archnumn")) == -1)
455 strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archnumn: ");
456 substdio_fdbuf(&ssnum,write,fd,numbuf,sizeof(numbuf));
457 if (substdio_put(&ssnum,strnum,fmt_ulong(strnum,to)) == -1)
458 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
459 if (substdio_puts(&ssnum,"\n") == -1)
460 strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
461 close_proper(&ssnum,"archnum","archnumn");
462 switch (flagerror) {
463 case 0:
464 _exit(0); /* go bye-bye */
465 case -1:
466 strerr_die2x(99,WARNING,"threads entry with illegal format");
467 case -2:
468 strerr_die2x(99,WARNING,"thread in index, but threadfile missing");
469 case -3:
470 strerr_die2x(99,WARNING,"a subject file lacks subject");
471 case -4:
472 strerr_die2x(99,WARNING,"an author file lacks author/hash");
473 case -5:
474 strerr_die2x(99,WARNING,"threads entry lacks message count");
475 default:
476 strerr_die2x(99,WARNING,"something happened that isn't quite right");
477 }
478 }
479