Import ezmlm-idx 0.40
[ezmlm] / ezmlm-archive.c
diff --git a/ezmlm-archive.c b/ezmlm-archive.c
new file mode 100644 (file)
index 0000000..9026789
--- /dev/null
@@ -0,0 +1,479 @@
+/*$Id: ezmlm-archive.c,v 1.13 1999/11/28 20:13:32 lindberg Exp $*/
+/*$Name: ezmlm-idx-040 $*/
+
+#include "alloc.h"
+#include "error.h"
+#include "stralloc.h"
+#include "str.h"
+#include "sig.h"
+#include "getconf.h"
+#include "strerr.h"
+#include "getln.h"
+#include "substdio.h"
+#include "readwrite.h"
+#include "fmt.h"
+#include "sgetopt.h"
+#include "idxthread.h"
+#include "makehash.h"
+#include "idx.h"
+#include "errtxt.h"
+
+#define FATAL "ezmlm-archive: fatal: "
+#define WARNING "ezmlm-archive: warning: inconsistent index: "
+
+substdio ssin;
+char inbuf[1024];
+substdio ssout;
+char outbuf[1024];
+substdio ssnum;
+char numbuf[16];
+
+stralloc line = {0};
+stralloc num = {0};
+stralloc fn = {0};
+stralloc fnn = {0};
+
+char strnum[FMT_ULONG];
+int flagerror = 0;
+int flagsync = 1;      /* sync() by default, not for -c or -f or -t */
+char *dir;
+
+struct ca {
+  char *s;             /* start */
+  unsigned int l;      /* length */
+} ca;
+
+void die_usage() {
+  strerr_die1x(100,
+    "ezmlm-archive: usage: "
+       "ezmlm-archive [-cCFsSTvV] [-f min_msg] [-t max_msg] dir");
+}
+
+void die_nomem() { strerr_die2x(111,FATAL,ERR_NOMEM); }
+
+void close_proper(ss,s,sn)
+/* flush,sync,close,move sn->s) */
+substdio *ss;
+char *s, *sn;
+{
+   if (substdio_flush(ss) == -1)
+     strerr_die6sys(111,FATAL,ERR_FLUSH,dir,"/",s,": ");
+  if (flagsync)
+    if (fsync(ss->fd) == -1)
+       strerr_die6sys(111,FATAL,ERR_SYNC,dir,"/",s,": ");
+  if (close(ss->fd) == -1)
+     strerr_die6sys(111,FATAL,ERR_CLOSE,dir,"/",s,": ");
+  if (rename(sn,s) == -1)
+     strerr_die6sys(111,FATAL,ERR_MOVE,dir,"/",sn,": ");
+}
+
+void write_threads(msgtable,subtable,authtable,datetable,from,to)
+/* Add the current threading data to the thread database without dups */
+/* Writes the subject index first, then processes the individual files */
+msgentry *msgtable; subentry *subtable; authentry *authtable;
+dateentry *datetable;
+unsigned long from,to;
+{
+  msgentry *pmsgt;
+  subentry *psubt,*psubtm, *psubtlast;
+  subentry *presubt = (subentry *)0;
+  authentry *pautht;
+  dateentry *pdatet;
+  char *cp,*cp1;
+  unsigned long msg;
+  unsigned long ulmsginthread;
+  unsigned long subnum;
+  unsigned long authnum;
+  unsigned long msgnum;
+  unsigned int pos,l;
+  unsigned int startdate,nextdate;
+  unsigned int startmsg,nextmsg;
+  int fd = -1;
+  int fdn = -1;
+  int match;
+  int ffound;
+  int lineno;
+  int res;
+
+  psubtm = subtable;           /* now for new threads */
+  pdatet = datetable;
+  nextmsg = 0L;
+  nextdate = pdatet->date;
+  while (psubtm->sub) {                /* these are in msgnum order */
+    if (!presubt)              /* for rewind */
+      if (psubtm->lastmsg >= nextmsg)
+       presubt = psubtm;       /* this thread extends beyond current month */
+    if (psubtm->firstmsg >= nextmsg) { /* done with this month */
+      if (fdn != -1) close_proper(&ssout,fn.s,fnn.s);
+      psubtlast = psubtm;              /* last thread done */
+      if (presubt)                     /* need to rewind? */
+       psubtm = presubt;               /* do it */
+      psubt = psubtm;                  /* tmp pointer to reset done flag */
+      presubt = (subentry *)0;         /* reset rewind pointer */
+      pdatet++;                                /* next month */
+      startdate = nextdate;            /* startdate */
+      nextdate = pdatet->date;         /* end date */
+      startmsg = nextmsg;              /* first message in month */
+      nextmsg = pdatet->msg;           /* first message in next month */
+      if (!stralloc_copys(&fn,"archive/threads/")) die_nomem();
+      if (!stralloc_catb(&fn,strnum,fmt_uint(strnum,startdate))) die_nomem();
+      if (!stralloc_copy(&fnn,&fn)) die_nomem();
+      if (!stralloc_0(&fn)) die_nomem();
+      if (!stralloc_cats(&fnn,"n")) die_nomem();
+      if (!stralloc_0(&fnn)) die_nomem();
+      if ((fdn = open_trunc(fnn.s)) == -1)
+       strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fnn.s,": ");
+      substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf));
+      if ((fd = open_read(fn.s)) == -1) {
+      if (errno != error_noent)
+             strerr_die6sys(111,FATAL,ERR_OPEN,dir,"/",fn.s,": ");
+      } else {
+       substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
+      for (;;) {
+      if (getln(&ssin,&line,&match,'\n') == -1)
+             strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": ");
+      if (!match) break;
+      pos = scan_ulong(line.s,&msgnum);
+      pos++;                   /* skip ':' */
+      if (msgnum >= from)
+       continue;               /* ignore entries from threading range */
+      if (line.len < pos + HASHLEN) {
+       flagerror = -1;         /* and bad ones */
+       continue;
+      }
+      psubt = subtable;
+      cp = line.s + pos;
+      ffound = 0;              /* search among already known subjects */
+      for (;;) {
+       res = str_diffn(psubt->sub,cp,HASHLEN);
+       if (res < 0) {
+         if (psubt->higher)
+           psubt = psubt->higher;
+        else
+          break;
+       } else if (res > 0) {
+         if (psubt->lower)
+           psubt = psubt->lower;
+         else
+           break;
+       } else {
+         ffound = 1;
+         break;
+       }
+      }
+      if (!ffound) {
+       if (substdio_put(&ssout,line.s,line.len) == -1)
+         strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+      } else {                 /* new # of msg in thread */
+       cp += HASHLEN;          /* HASHLEN [#] Subject always \n at end */
+       if (*(cp++) == ' ' && *(cp++) == '[') {
+         cp += scan_ulong(cp,&ulmsginthread);
+         if (*cp == ']') {
+           psubt->msginthread += (unsigned char) (ulmsginthread & 0xff);
+         }
+       } else
+         flagerror = -5;
+      }
+    }
+    close(fd);
+  }
+  continue;
+  }
+
+    if (psubtm->firstmsg < nextmsg && psubtm->lastmsg >= startmsg) {
+    if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,psubtm->lastmsg)))
+               die_nomem();
+    if (!stralloc_cats(&line,":")) die_nomem();
+    if (!stralloc_catb(&line,psubtm->sub,HASHLEN)) die_nomem();
+    if (!stralloc_cats(&line," [")) die_nomem();
+    if (!stralloc_catb(&line,strnum,
+       fmt_ulong(strnum,(unsigned long) psubtm->msginthread)))
+               die_nomem();
+    if (!stralloc_cats(&line,"]")) die_nomem();
+    if (!stralloc_catb(&line,psubtm->sub + HASHLEN,psubtm->sublen - HASHLEN))
+                        die_nomem();   /* has \n */
+    if (substdio_put(&ssout,line.s,line.len) == -1)
+       strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+    }
+  psubtm++;
+  }
+  if (fdn != -1)
+    close_proper(&ssout,fn.s,fnn.s);
+
+  psubt = subtable;
+  while (psubt->sub) {         /* now the threads */
+    if (!stralloc_copys(&fn,"archive/subjects/")) die_nomem();
+    if (!stralloc_catb(&fn,psubt->sub,2)) die_nomem();
+    if (!stralloc_0(&fn)) die_nomem();
+    if (mkdir(fn.s,0755) == -1)
+    if (errno != error_exist)
+      strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fn.s,": ");
+    fn.s[fn.len - 1] = '/';
+    if (!stralloc_catb(&fn,psubt->sub+2,HASHLEN-2)) die_nomem();
+    if (!stralloc_copy(&fnn,&fn)) die_nomem();
+    if (!stralloc_cats(&fnn,"n")) die_nomem();
+    if (!stralloc_0(&fn)) die_nomem();
+    if (!stralloc_0(&fnn)) die_nomem();
+    if ((fdn = open_trunc(fnn.s)) == -1)
+      strerr_die4sys(111,FATAL,ERR_CREATE,fnn.s,": ");
+    substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf));
+    if ((fd = open_read(fn.s)) == -1) {
+      if (errno != error_noent)
+         strerr_die4sys(111,FATAL,ERR_OPEN,fn.s,": ");
+      if (substdio_puts(&ssout,psubt->sub) == -1)      /* write subject */
+            strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+    } else {                                   /* copy data */
+       substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
+       lineno = 0;
+       for (;;) {
+         if (getln(&ssin,&line,&match,'\n') == -1)
+             strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": ");
+          if (!match) break;
+         if (!lineno) {                        /* write subject */
+           if (line.len < HASHLEN + 1 || line.s[HASHLEN] != ' ')
+               flagerror = -3;
+           if (substdio_put(&ssout,line.s,line.len) == -1)
+              strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+           lineno = 1;
+           continue;
+         }
+         (void) scan_ulong(line.s,&msgnum);
+         if (msgnum >= from) break;
+         if (substdio_put(&ssout,line.s,line.len) == -1)
+            strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+       }
+       (void) close(fd);       /* close old index */
+      }
+
+    subnum = (unsigned long) (psubt - subtable + 1);   /* idx of this subj */
+    pmsgt = msgtable + psubt->firstmsg - from; /* first message entry */
+    for (msg = psubt->firstmsg; msg <= psubt->lastmsg; msg++) {
+      if (pmsgt->subnum == subnum) {
+        if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msg))) die_nomem();
+        if (!stralloc_cats(&line,":")) die_nomem();
+       if (!stralloc_catb(&line,strnum,fmt_uint(strnum,pmsgt->date)))
+               die_nomem();
+       if (!stralloc_cats(&line,":")) die_nomem();
+        if (pmsgt->authnum) {
+         pautht = authtable + pmsgt->authnum - 1;
+         cp = pautht->auth;
+         cp1 = cp + str_chr(cp,' ');
+         if (cp + HASHLEN != cp1)
+           strerr_die1x(100,ERR_BAD_INDEX);
+         if (!stralloc_cats(&line,cp))
+               die_nomem();                            /* hash */
+       } else
+          if (!stralloc_cats(&line,"\n")) die_nomem();
+       if (substdio_put(&ssout,line.s,line.len) == -1)
+         strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+      }
+      pmsgt++;
+    }
+    close_proper(&ssout,fn.s,fnn.s);
+    psubt++;
+  }
+
+                                       /* (no master author index) */
+  pautht = authtable;
+  while (pautht->auth) {               /* now the authors */
+    if (!stralloc_copys(&fn,"archive/authors/")) die_nomem();
+    if (!stralloc_catb(&fn,pautht->auth,2)) die_nomem();
+    if (!stralloc_0(&fn)) die_nomem();
+    if (mkdir(fn.s,0755) == -1)
+    if (errno != error_exist)
+      strerr_die6sys(111,FATAL,ERR_CREATE,dir,"/",fn.s,": ");
+    fn.s[fn.len - 1] = '/';
+    if (!stralloc_catb(&fn,pautht->auth+2,HASHLEN-2)) die_nomem();
+    if (!stralloc_copy(&fnn,&fn)) die_nomem();
+    if (!stralloc_cats(&fnn,"n")) die_nomem();
+    if (!stralloc_0(&fn)) die_nomem();
+    if (!stralloc_0(&fnn)) die_nomem();
+    if ((fdn = open_trunc(fnn.s)) == -1)
+      strerr_die4sys(111,FATAL,ERR_CREATE,fnn.s,": ");
+    substdio_fdbuf(&ssout,write,fdn,outbuf,sizeof(outbuf));
+      if ((fd = open_read(fn.s)) == -1) {
+       if (errno != error_noent)
+         strerr_die4sys(111,FATAL,ERR_OPEN,fn.s,": ");
+        else {                 /* didn't exist before: write author */
+          if (substdio_put(&ssout,pautht->auth,pautht->authlen) == -1)
+            strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+       }
+      } else {                                 /* copy data */
+       substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
+       lineno = 0;
+       for (;;) {
+         if (getln(&ssin,&line,&match,'\n') == -1)
+             strerr_die6sys(111,FATAL,ERR_READ,dir,"/",fn.s,": ");
+          if (!match) break;
+         if (!lineno) {                        /* write author */
+           if (line.len < HASHLEN + 1 || line.s[HASHLEN] != ' ')
+               flagerror = - 4;
+           if (substdio_put(&ssout,line.s,line.len) == -1)
+              strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+           lineno = 1;
+           continue;
+         }
+         (void) scan_ulong(line.s,&msgnum);
+         if (msgnum >= from) break;
+         if (substdio_put(&ssout,line.s,line.len) == -1)
+            strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+       }
+       (void) close(fd);                       /* close old index */
+      }
+
+    authnum = (unsigned long) (pautht - authtable + 1);        /* idx of this auth */
+    pmsgt = msgtable + pautht->firstmsg - from;        /* first message entry */
+    for (msg = pautht->firstmsg; msg <= to; msg++) {
+      if (pmsgt->authnum == authnum) {
+        if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msg))) die_nomem();
+        if (!stralloc_cats(&line,":")) die_nomem();
+       if (!stralloc_catb(&line,strnum,fmt_uint(strnum,pmsgt->date)))
+               die_nomem();
+       if (!stralloc_cats(&line,":")) die_nomem();
+        if (pmsgt->subnum) {
+         psubt = subtable + pmsgt->subnum - 1;
+          if (!stralloc_catb(&line,psubt->sub,psubt->sublen))
+               die_nomem();
+       }
+       if (substdio_put(&ssout,line.s,line.len) == -1)
+         strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+      }
+      pmsgt++;
+    }
+    close_proper(&ssout,fn.s,fnn.s);
+    pautht++;
+  }
+}
+
+int main(argc,argv)
+int argc;
+char **argv;
+{
+  unsigned long archnum = 0L;
+  unsigned long to = 0L;
+  unsigned long max;
+  int fd;
+  int fdlock;
+  int flagcreate = 0;
+  int flagsyncall = 0;
+  int opt;
+  msgentry *msgtable;
+  subentry *subtable;
+  authentry *authtable;
+  dateentry *datetable;
+
+  (void) umask(022);
+  sig_pipeignore();
+
+  while ((opt = getopt(argc,argv,"cCf:FsSt:TvV")) != opteof)
+    switch (opt) {
+      case 'c':        flagcreate = 1;
+               flagsync = 0;
+               break;                  /* start at beginning of archive */
+      case 'C': flagcreate = 0;
+               break;  /* Do only archnum+1 => num */
+      case 'f': if (optarg) {
+                 (void) scan_ulong(optarg,&archnum);
+                 archnum = (archnum / 100) * 100;
+               }
+               flagsync = 0;
+               break;
+      case 'F': archnum = 0; break;
+      case 's': flagsyncall = 1; break;
+      case 'S': flagsyncall = 0; break;
+      case 't': if (optarg) {
+                 (void) scan_ulong(optarg,&to);
+               }
+               flagsync = 0;
+               break;
+      case 'T': to = 0; break;
+      case 'v':
+      case 'V': strerr_die2x(0,"ezmlm-archive version: ",EZIDX_VERSION);
+      default:
+        die_usage();
+    }
+
+  if (flagsyncall) flagsync = 1;       /* overrides */
+  dir = argv[optind++];
+  if (!dir) die_usage();
+  if (chdir(dir) == -1)
+    strerr_die4sys(111,FATAL,ERR_SWITCH,dir,": ");
+
+  if (mkdir("archive/threads",0755) == -1)
+    if (errno != error_exist)
+      strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/threads: ");
+  if (mkdir("archive/subjects",0755) == -1)
+    if (errno != error_exist)
+      strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/subjects: ");
+  if (mkdir("archive/authors",0755) == -1)
+    if (errno != error_exist)
+      strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archive/authors: ");
+
+       /* Lock list to assure that no ezmlm-send is working on it */
+       /* and that the "num" message is final */
+  fdlock = open_append("lock");
+  if (fdlock == -1)
+    strerr_die2sys(111,FATAL,ERR_OPEN_LOCK);
+  if (lock_ex(fdlock) == -1) {
+    (void) close(fdlock);
+    strerr_die2sys(111,FATAL,ERR_OBTAIN_LOCK);
+  }
+                                       /* get num */
+  if (!getconf_line(&num,"num",0,FATAL,dir))
+    strerr_die1x(100,ERR_EMPTY_LIST);
+  (void) close(fdlock);
+
+  if (!stralloc_0(&num)) die_nomem();  /* parse num */
+  (void) scan_ulong(num.s,&max);
+  if (!to || to > max) to = max;
+
+  fdlock = open_append("archive/lock");        /* lock index */
+  if (fdlock == -1)
+    strerr_die4sys(111,FATAL,ERR_OPEN,dir,"/archive/lock: ");
+  if (lock_ex(fdlock) == -1) {
+    (void) close(fdlock);
+    strerr_die4sys(111,FATAL,ERR_OBTAIN,dir,"/archive/lock: ");
+  }
+  if (!flagcreate && !archnum) {       /* adjust archnum (from) / to */
+    if (getconf_line(&num,"archnum",0,FATAL,dir)) {
+      if (!stralloc_0(&num)) die_nomem();
+      (void) scan_ulong(num.s,&archnum);
+      archnum++;
+    }
+  }
+
+  if (archnum > to)
+    _exit(0);                          /* nothing to do */
+
+                                       /* do the subject threading */
+  idx_mkthreads(&msgtable,&subtable,&authtable,&datetable,
+       archnum,to,max,0,FATAL);
+                                       /* update the index */
+  write_threads(msgtable,subtable,authtable,datetable,archnum,to);
+                                       /* update archnum */
+  if ((fd = open_trunc("archnumn")) == -1)
+    strerr_die4sys(111,FATAL,ERR_CREATE,dir,"/archnumn: ");
+  substdio_fdbuf(&ssnum,write,fd,numbuf,sizeof(numbuf));
+  if (substdio_put(&ssnum,strnum,fmt_ulong(strnum,to)) == -1)
+     strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+  if (substdio_puts(&ssnum,"\n") == -1)
+     strerr_die6sys(111,FATAL,ERR_WRITE,dir,"/",fnn.s,": ");
+  close_proper(&ssnum,"archnum","archnumn");
+  switch (flagerror) {
+    case 0:
+       _exit(0);                               /* go bye-bye */
+    case -1:
+       strerr_die2x(99,WARNING,"threads entry with illegal format");
+    case -2:
+       strerr_die2x(99,WARNING,"thread in index, but threadfile missing");
+    case -3:
+       strerr_die2x(99,WARNING,"a subject file lacks subject");
+    case -4:
+       strerr_die2x(99,WARNING,"an author file lacks author/hash");
+    case -5:
+       strerr_die2x(99,WARNING,"threads entry lacks message count");
+    default:
+       strerr_die2x(99,WARNING,"something happened that isn't quite right");
+  }
+}
+