X-Git-Url: https://git.distorted.org.uk/~mdw/ezmlm/blobdiff_plain/5b62e993b0af39700031c2875d7f6654e6a02850..f8beb284087c279acfb30506f5bb32baa4949b44:/ezmlm-archive.c diff --git a/ezmlm-archive.c b/ezmlm-archive.c new file mode 100644 index 0000000..9026789 --- /dev/null +++ b/ezmlm-archive.c @@ -0,0 +1,479 @@ +/*$Id: ezmlm-archive.c,v 1.13 1999/11/28 20:13:32 lindberg Exp $*/ +/*$Name: ezmlm-idx-040 $*/ + +#include "alloc.h" +#include "error.h" +#include "stralloc.h" +#include "str.h" +#include "sig.h" +#include "getconf.h" +#include "strerr.h" +#include "getln.h" +#include "substdio.h" +#include "readwrite.h" +#include "fmt.h" +#include "sgetopt.h" +#include "idxthread.h" +#include "makehash.h" +#include "idx.h" +#include "errtxt.h" + +#define FATAL "ezmlm-archive: fatal: " +#define WARNING "ezmlm-archive: warning: inconsistent index: " + +substdio ssin; +char inbuf[1024]; +substdio ssout; +char outbuf[1024]; +substdio ssnum; +char numbuf[16]; + +stralloc line = {0}; +stralloc num = {0}; +stralloc fn = {0}; +stralloc fnn = {0}; + +char strnum[FMT_ULONG]; +int flagerror = 0; +int flagsync = 1; /* sync() by default, not for -c or -f or -t */ +char *dir; + +struct ca { + char *s; /* start */ + unsigned int l; /* length */ +} ca; + +void die_usage() { + strerr_die1x(100, + "ezmlm-archive: usage: " + "ezmlm-archive [-cCFsSTvV] [-f min_msg] [-t max_msg] dir"); +} + +void die_nomem() { strerr_die2x(111,FATAL,ERR_NOMEM); } + +void close_proper(ss,s,sn) +/* flush,sync,close,move sn->s) */ +substdio *ss; +char *s, *sn; +{ + if (substdio_flush(ss) == -1) + strerr_die6sys(111,FATAL,ERR_FLUSH,dir,"/",s,": "); + if (flagsync) + if (fsync(ss->fd) == -1) + strerr_die6sys(111,FATAL,ERR_SYNC,dir,"/",s,": "); + if (close(ss->fd) == -1) + strerr_die6sys(111,FATAL,ERR_CLOSE,dir,"/",s,": "); + if (rename(sn,s) == -1) + strerr_die6sys(111,FATAL,ERR_MOVE,dir,"/",sn,": "); +} + +void write_threads(msgtable,subtable,authtable,datetable,from,to) +/* Add the current threading data to the thread database without dups */ +/* Writes the subject index first, then processes the individual files */ +msgentry *msgtable; subentry *subtable; authentry *authtable; +dateentry *datetable; +unsigned long from,to; +{ + msgentry *pmsgt; + subentry *psubt,*psubtm, *psubtlast; + subentry *presubt = (subentry *)0; + authentry *pautht; + dateentry *pdatet; + char *cp,*cp1; + unsigned long msg; + unsigned long ulmsginthread; + unsigned long subnum; + unsigned long authnum; + unsigned long msgnum; + unsigned int pos,l; + unsigned int startdate,nextdate; + unsigned int startmsg,nextmsg; + int fd = -1; + int fdn = -1; + int match; + int ffound; + int lineno; + int res; + + psubtm = subtable; /* now for new threads */ + pdatet = datetable; + nextmsg = 0L; + nextdate = pdatet->date; + while (psubtm->sub) { /* these are in msgnum order */ + if (!presubt) /* for rewind */ + if (psubtm->lastmsg >= nextmsg) + presubt = psubtm; /* this thread extends beyond current month */ + if (psubtm->firstmsg >= nextmsg) { /* done with this month */ + if (fdn != -1) close_proper(&ssout,fn.s,fnn.s); + psubtlast = psubtm; /* last thread done */ + if (presubt) /* need to rewind? */ + psubtm = presubt; /* do it */ + psubt = psubtm; /* tmp pointer to reset done flag */ + presubt = (subentry *)0; /* reset rewind pointer */ + pdatet++; /* next month */ + startdate = nextdate; /* startdate */ + nextdate = pdatet->date; /* end date */ + startmsg = nextmsg; /* first message in month */ + nextmsg = pdatet->msg; /* first message in next month */ + if (!stralloc_copys(&fn,"archive/threads/")) die_nomem(); + if (!stralloc_catb(&fn,strnum,fmt_uint(strnum,startdate))) die_nomem(); + if (!stralloc_copy(&fnn,&fn)) die_nomem(); + if (!stralloc_0(&fn)) die_nomem(); + if (!stralloc_cats(&fnn,"n")) die_nomem(); + if (!stralloc_0(&fnn)) die_nomem(); + if ((fdn = open_trunc(fnn.s)) == -1) + strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fnn.s,": "); + substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf)); + if ((fd = open_read(fn.s)) == -1) { + if (errno != error_noent) + strerr_die6sys(111,FATAL,ERR_OPEN,dir,"/",fn.s,": "); + } else { + substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf)); + for (;;) { + if (getln(&ssin,&line,&match,'\n') == -1) + strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": "); + if (!match) break; + pos = scan_ulong(line.s,&msgnum); + pos++; /* skip ':' */ + if (msgnum >= from) + continue; /* ignore entries from threading range */ + if (line.len < pos + HASHLEN) { + flagerror = -1; /* and bad ones */ + continue; + } + psubt = subtable; + cp = line.s + pos; + ffound = 0; /* search among already known subjects */ + for (;;) { + res = str_diffn(psubt->sub,cp,HASHLEN); + if (res < 0) { + if (psubt->higher) + psubt = psubt->higher; + else + break; + } else if (res > 0) { + if (psubt->lower) + psubt = psubt->lower; + else + break; + } else { + ffound = 1; + break; + } + } + if (!ffound) { + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } else { /* new # of msg in thread */ + cp += HASHLEN; /* HASHLEN [#] Subject always \n at end */ + if (*(cp++) == ' ' && *(cp++) == '[') { + cp += scan_ulong(cp,&ulmsginthread); + if (*cp == ']') { + psubt->msginthread += (unsigned char) (ulmsginthread & 0xff); + } + } else + flagerror = -5; + } + } + close(fd); + } + continue; + } + + if (psubtm->firstmsg < nextmsg && psubtm->lastmsg >= startmsg) { + if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,psubtm->lastmsg))) + die_nomem(); + if (!stralloc_cats(&line,":")) die_nomem(); + if (!stralloc_catb(&line,psubtm->sub,HASHLEN)) die_nomem(); + if (!stralloc_cats(&line," [")) die_nomem(); + if (!stralloc_catb(&line,strnum, + fmt_ulong(strnum,(unsigned long) psubtm->msginthread))) + die_nomem(); + if (!stralloc_cats(&line,"]")) die_nomem(); + if (!stralloc_catb(&line,psubtm->sub + HASHLEN,psubtm->sublen - HASHLEN)) + die_nomem(); /* has \n */ + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } + psubtm++; + } + if (fdn != -1) + close_proper(&ssout,fn.s,fnn.s); + + psubt = subtable; + while (psubt->sub) { /* now the threads */ + if (!stralloc_copys(&fn,"archive/subjects/")) die_nomem(); + if (!stralloc_catb(&fn,psubt->sub,2)) die_nomem(); + if (!stralloc_0(&fn)) die_nomem(); + if (mkdir(fn.s,0755) == -1) + if (errno != error_exist) + strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fn.s,": "); + fn.s[fn.len - 1] = '/'; + if (!stralloc_catb(&fn,psubt->sub+2,HASHLEN-2)) die_nomem(); + if (!stralloc_copy(&fnn,&fn)) die_nomem(); + if (!stralloc_cats(&fnn,"n")) die_nomem(); + if (!stralloc_0(&fn)) die_nomem(); + if (!stralloc_0(&fnn)) die_nomem(); + if ((fdn = open_trunc(fnn.s)) == -1) + strerr_die4sys(111,FATAL,ERR_CREATE,fnn.s,": "); + substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf)); + if ((fd = open_read(fn.s)) == -1) { + if (errno != error_noent) + strerr_die4sys(111,FATAL,ERR_OPEN,fn.s,": "); + if (substdio_puts(&ssout,psubt->sub) == -1) /* write subject */ + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } else { /* copy data */ + substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf)); + lineno = 0; + for (;;) { + if (getln(&ssin,&line,&match,'\n') == -1) + strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": "); + if (!match) break; + if (!lineno) { /* write subject */ + if (line.len < HASHLEN + 1 || line.s[HASHLEN] != ' ') + flagerror = -3; + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + lineno = 1; + continue; + } + (void) scan_ulong(line.s,&msgnum); + if (msgnum >= from) break; + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } + (void) close(fd); /* close old index */ + } + + subnum = (unsigned long) (psubt - subtable + 1); /* idx of this subj */ + pmsgt = msgtable + psubt->firstmsg - from; /* first message entry */ + for (msg = psubt->firstmsg; msg <= psubt->lastmsg; msg++) { + if (pmsgt->subnum == subnum) { + if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msg))) die_nomem(); + if (!stralloc_cats(&line,":")) die_nomem(); + if (!stralloc_catb(&line,strnum,fmt_uint(strnum,pmsgt->date))) + die_nomem(); + if (!stralloc_cats(&line,":")) die_nomem(); + if (pmsgt->authnum) { + pautht = authtable + pmsgt->authnum - 1; + cp = pautht->auth; + cp1 = cp + str_chr(cp,' '); + if (cp + HASHLEN != cp1) + strerr_die1x(100,ERR_BAD_INDEX); + if (!stralloc_cats(&line,cp)) + die_nomem(); /* hash */ + } else + if (!stralloc_cats(&line,"\n")) die_nomem(); + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } + pmsgt++; + } + close_proper(&ssout,fn.s,fnn.s); + psubt++; + } + + /* (no master author index) */ + pautht = authtable; + while (pautht->auth) { /* now the authors */ + if (!stralloc_copys(&fn,"archive/authors/")) die_nomem(); + if (!stralloc_catb(&fn,pautht->auth,2)) die_nomem(); + if (!stralloc_0(&fn)) die_nomem(); + if (mkdir(fn.s,0755) == -1) + if (errno != error_exist) + strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fn.s,": "); + fn.s[fn.len - 1] = '/'; + if (!stralloc_catb(&fn,pautht->auth+2,HASHLEN-2)) die_nomem(); + if (!stralloc_copy(&fnn,&fn)) die_nomem(); + if (!stralloc_cats(&fnn,"n")) die_nomem(); + if (!stralloc_0(&fn)) die_nomem(); + if (!stralloc_0(&fnn)) die_nomem(); + if ((fdn = open_trunc(fnn.s)) == -1) + strerr_die4sys(111,FATAL,ERR_CREATE,fnn.s,": "); + substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf)); + if ((fd = open_read(fn.s)) == -1) { + if (errno != error_noent) + strerr_die4sys(111,FATAL,ERR_OPEN,fn.s,": "); + else { /* didn't exist before: write author */ + if (substdio_put(&ssout,pautht->auth,pautht->authlen) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } + } else { /* copy data */ + substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf)); + lineno = 0; + for (;;) { + if (getln(&ssin,&line,&match,'\n') == -1) + strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": "); + if (!match) break; + if (!lineno) { /* write author */ + if (line.len < HASHLEN + 1 || line.s[HASHLEN] != ' ') + flagerror = - 4; + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + lineno = 1; + continue; + } + (void) scan_ulong(line.s,&msgnum); + if (msgnum >= from) break; + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } + (void) close(fd); /* close old index */ + } + + authnum = (unsigned long) (pautht - authtable + 1); /* idx of this auth */ + pmsgt = msgtable + pautht->firstmsg - from; /* first message entry */ + for (msg = pautht->firstmsg; msg <= to; msg++) { + if (pmsgt->authnum == authnum) { + if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msg))) die_nomem(); + if (!stralloc_cats(&line,":")) die_nomem(); + if (!stralloc_catb(&line,strnum,fmt_uint(strnum,pmsgt->date))) + die_nomem(); + if (!stralloc_cats(&line,":")) die_nomem(); + if (pmsgt->subnum) { + psubt = subtable + pmsgt->subnum - 1; + if (!stralloc_catb(&line,psubt->sub,psubt->sublen)) + die_nomem(); + } + if (substdio_put(&ssout,line.s,line.len) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + } + pmsgt++; + } + close_proper(&ssout,fn.s,fnn.s); + pautht++; + } +} + +int main(argc,argv) +int argc; +char **argv; +{ + unsigned long archnum = 0L; + unsigned long to = 0L; + unsigned long max; + int fd; + int fdlock; + int flagcreate = 0; + int flagsyncall = 0; + int opt; + msgentry *msgtable; + subentry *subtable; + authentry *authtable; + dateentry *datetable; + + (void) umask(022); + sig_pipeignore(); + + while ((opt = getopt(argc,argv,"cCf:FsSt:TvV")) != opteof) + switch (opt) { + case 'c': flagcreate = 1; + flagsync = 0; + break; /* start at beginning of archive */ + case 'C': flagcreate = 0; + break; /* Do only archnum+1 => num */ + case 'f': if (optarg) { + (void) scan_ulong(optarg,&archnum); + archnum = (archnum / 100) * 100; + } + flagsync = 0; + break; + case 'F': archnum = 0; break; + case 's': flagsyncall = 1; break; + case 'S': flagsyncall = 0; break; + case 't': if (optarg) { + (void) scan_ulong(optarg,&to); + } + flagsync = 0; + break; + case 'T': to = 0; break; + case 'v': + case 'V': strerr_die2x(0,"ezmlm-archive version: ",EZIDX_VERSION); + default: + die_usage(); + } + + if (flagsyncall) flagsync = 1; /* overrides */ + dir = argv[optind++]; + if (!dir) die_usage(); + if (chdir(dir) == -1) + strerr_die4sys(111,FATAL,ERR_SWITCH,dir,": "); + + if (mkdir("archive/threads",0755) == -1) + if (errno != error_exist) + strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/threads: "); + if (mkdir("archive/subjects",0755) == -1) + if (errno != error_exist) + strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/subjects: "); + if (mkdir("archive/authors",0755) == -1) + if (errno != error_exist) + strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/authors: "); + + /* Lock list to assure that no ezmlm-send is working on it */ + /* and that the "num" message is final */ + fdlock = open_append("lock"); + if (fdlock == -1) + strerr_die2sys(111,FATAL,ERR_OPEN_LOCK); + if (lock_ex(fdlock) == -1) { + (void) close(fdlock); + strerr_die2sys(111,FATAL,ERR_OBTAIN_LOCK); + } + /* get num */ + if (!getconf_line(&num,"num",0,FATAL,dir)) + strerr_die1x(100,ERR_EMPTY_LIST); + (void) close(fdlock); + + if (!stralloc_0(&num)) die_nomem(); /* parse num */ + (void) scan_ulong(num.s,&max); + if (!to || to > max) to = max; + + fdlock = open_append("archive/lock"); /* lock index */ + if (fdlock == -1) + strerr_die4sys(111,FATAL,ERR_OPEN,dir,"/archive/lock: "); + if (lock_ex(fdlock) == -1) { + (void) close(fdlock); + strerr_die4sys(111,FATAL,ERR_OBTAIN,dir,"/archive/lock: "); + } + if (!flagcreate && !archnum) { /* adjust archnum (from) / to */ + if (getconf_line(&num,"archnum",0,FATAL,dir)) { + if (!stralloc_0(&num)) die_nomem(); + (void) scan_ulong(num.s,&archnum); + archnum++; + } + } + + if (archnum > to) + _exit(0); /* nothing to do */ + + /* do the subject threading */ + idx_mkthreads(&msgtable,&subtable,&authtable,&datetable, + archnum,to,max,0,FATAL); + /* update the index */ + write_threads(msgtable,subtable,authtable,datetable,archnum,to); + /* update archnum */ + if ((fd = open_trunc("archnumn")) == -1) + strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archnumn: "); + substdio_fdbuf(&ssnum,write,fd,numbuf,sizeof(numbuf)); + if (substdio_put(&ssnum,strnum,fmt_ulong(strnum,to)) == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + if (substdio_puts(&ssnum,"\n") == -1) + strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": "); + close_proper(&ssnum,"archnum","archnumn"); + switch (flagerror) { + case 0: + _exit(0); /* go bye-bye */ + case -1: + strerr_die2x(99,WARNING,"threads entry with illegal format"); + case -2: + strerr_die2x(99,WARNING,"thread in index, but threadfile missing"); + case -3: + strerr_die2x(99,WARNING,"a subject file lacks subject"); + case -4: + strerr_die2x(99,WARNING,"an author file lacks author/hash"); + case -5: + strerr_die2x(99,WARNING,"threads entry lacks message count"); + default: + strerr_die2x(99,WARNING,"something happened that isn't quite right"); + } +} +