Import ezmlm-idx 0.40
[ezmlm] / ezmlm-idx.c
diff --git a/ezmlm-idx.c b/ezmlm-idx.c
new file mode 100644 (file)
index 0000000..c552357
--- /dev/null
@@ -0,0 +1,337 @@
+/*$Id: ezmlm-idx.c,v 1.29 1999/10/29 02:49:14 lindberg Exp $*/
+/*$Name: ezmlm-idx-040 $*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "stralloc.h"
+#include "subfd.h"
+#include "strerr.h"
+#include "error.h"
+#include "lock.h"
+#include "slurp.h"
+#include "open.h"
+#include "getln.h"
+#include "sgetopt.h"
+#include "case.h"
+#include "scan.h"
+#include "str.h"
+#include "fmt.h"
+#include "readwrite.h"
+#include "exit.h"
+#include "substdio.h"
+#include "idx.h"
+#include "mime.h"
+#include "errtxt.h"
+#include "getconf.h"
+#include "makehash.h"
+
+#define FATAL "ezmlm-idx: fatal: "
+
+char strnum[FMT_ULONG];
+char hash[HASHLEN];
+
+stralloc fnadir = {0};
+stralloc fnif = {0};
+stralloc fnifn = {0};
+stralloc fnaf = {0};
+
+stralloc line = {0};
+stralloc lines = {0};
+stralloc dummy = {0};
+
+int fdindexn;
+int fdlock;
+int fd;
+int flagdate = 0;      /* use 'Received:' header by default, =1 -> 'Date:' */
+
+       /* for reading index and in ezmlm-idx for reading message */
+static substdio ssin;
+static char inbuf[1024];
+
+substdio ssindex;
+char indexbuf[1024];
+
+struct stat st;
+
+stralloc subject = {0};
+stralloc author = {0};
+stralloc authmail = {0};
+stralloc received = {0};
+stralloc prefix = {0};
+stralloc charset = {0};
+
+struct strerr index_err;
+
+stralloc num = {0};
+
+char buf0[256];
+substdio ss0 = SUBSTDIO_FDBUF(read,0,buf0,sizeof(buf0));
+
+void die_usage()
+{
+  strerr_die1x(100,"ezmlm-idx: usage: ezmlm-idx [-dDF] [-f msg] dir");
+
+}
+
+void die_nomem()
+{
+  strerr_die2x(100,FATAL,ERR_NOMEM);
+}
+
+int idx_get_trimsubject()
+
+/* reads an open message from 'fd', extracts the subject (if any), and    */
+/* returns the subject in 'sub', the author in 'author', and the received */
+/* rfc822 date to 'received'. 'fatal' is a program-specific error string. */
+/* returns: 0 - no reply no prefix */
+/*          1 - reply no prefix */
+/*          2 - prefix no reply */
+/*          3 - reply & prefix */
+/* No terminal '\n' in any of the strallocs! */
+{
+char *cp;
+int foundsubject = 0;
+int issubject = 0;
+int isfrom = 0;
+int foundreceived = 0;
+int foundfrom = 0;
+int match;
+int r;
+unsigned int pos,pos1;
+
+  substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
+  for (;;) {
+    if (getln(&ssin,&line,&match,'\n') == -1)
+      strerr_die2x(111,FATAL,ERR_READ_INPUT);
+    if (match) {
+      if (line.len == 1)
+        break;
+      if (*line.s == ' ' || *line.s == '\t') {
+                               /* continuation */
+        if (issubject) {
+          if (!stralloc_cat(&subject,&line)) die_nomem();
+        } else if (isfrom)
+          if (!stralloc_cat(&author,&line)) die_nomem();
+      } else {
+        issubject = 0;
+        isfrom = 0;
+        if (!foundsubject && case_startb(line.s,line.len,"Subject:")) {
+          if (!stralloc_copyb(&subject,line.s+8,line.len-8)) die_nomem();
+          foundsubject = 1;
+          issubject = 1;
+        } else if (!foundfrom && case_startb(line.s,line.len,"From:")) {
+          if (!stralloc_copyb(&author,line.s+5,line.len-5)) die_nomem();
+          foundfrom = 1;
+          isfrom = 1;
+        } else if (!flagdate && !foundreceived &&
+            case_startb(line.s,line.len,"Received:")) {
+          pos = byte_chr(line.s,line.len,';');
+          if (pos != line.len)
+            if (!stralloc_copyb(&received,line.s+pos+2,line.len - pos - 3))
+              die_nomem();
+          foundreceived = 1;
+        } else if (flagdate && !foundreceived &&
+            case_startb(line.s,line.len,"Date:")) {
+          if (line.len < 22) continue;                         /* illegal */
+          pos = 6 + byte_chr(line.s+6,line.len-6,',');
+          if (pos == line.len)
+            pos = 5;
+          ++pos;
+          while (line.s[pos] == ' ' || line.s[pos] == '\t') ++pos;     /* dd */
+          pos1 = pos + 3;
+          while (++pos1 < line.len && line.s[pos1] != ' ');            /* mo */
+          ++pos1;
+          if (!stralloc_copyb(&received,line.s+pos,pos1 - pos))
+              die_nomem();                                     /* '01 Jun ' */
+          if (pos1 + 2 < line.len) {
+            if (line.s[pos1 + 2] == ' ') {                     /* 2-digit */
+              if (line.s[pos1] >= '7') {                       /* >= 70 */
+              if (!stralloc_cats(&received,"19")) die_nomem();
+              } else if (!stralloc_cats(&received,"20")) die_nomem();
+              pos = pos1 + 3;                                  /* 2 digit */
+            } else
+              pos = pos1 + 5;                                  /* 4 digit */
+            if (pos < line.len) {
+              pos += byte_chr(line.s+pos,line.len-pos,' ');    /* after time */
+              if (pos < line.len) {
+                ++pos;                                         /* zone */
+                while (line.s[pos] != ' ' && line.s[pos] != '\n') ++pos;
+              } else
+                pos = line.len - 1;    /* no zone. Illegal; better than 0 */
+              if (!stralloc_catb(&received,line.s+pos1,pos - pos1))
+                       die_nomem();
+              foundreceived = 1;
+              continue;
+            }
+          }
+          received.len = 0;            /* bad format - scrap */
+        }
+      }
+    } else
+      break;
+  }
+
+  if (foundsubject) {
+    concatHDR(subject.s,subject.len,&lines,FATAL);     /* make 1 line */
+    decodeHDR(lines.s,lines.len,&line,charset.s,FATAL);        /* decode mime */
+    r= unfoldHDR(line.s,line.len,&subject,charset.s,&prefix,1,FATAL);
+                                                /* trim mime */
+  }
+  else {
+    r = 0;
+    subject.len = 0;
+  }
+  return r;
+}
+
+int main(argc,argv)
+int argc;
+char **argv;
+{
+  char *dir,*cp;
+  unsigned long msgnum = 0L;
+  unsigned long msgmax;
+  int opt,r;
+
+  while ((opt = getopt(argc,argv,"dDf:FvV")) != opteof)
+    switch (opt) {
+      case 'd': flagdate = 1; break;
+      case 'D': flagdate = 0; break;
+      case 'f': if (optarg) (void) scan_ulong(optarg,&msgnum); break;
+      case 'F': msgnum = 0L;
+      case 'v':
+      case 'V': strerr_die2x(0,"ezmlm-archive version: ",EZIDX_VERSION);
+      default: die_usage();
+  }
+  dir = argv[optind];
+  if (!dir) die_usage();
+
+  if (chdir(dir) == -1)
+    strerr_die4sys(100,FATAL,ERR_SWITCH,dir,": ");
+
+  (void) umask(022);
+  sig_pipeignore();
+                       /* obtain lock to write index files */
+  fdlock = open_append("lock");
+  if (fdlock == -1)
+    strerr_die2sys(100,FATAL,ERR_OPEN_LOCK);
+  if (lock_ex(fdlock) == -1)
+    strerr_die2sys(100,FATAL,ERR_OBTAIN_LOCK);
+
+  getconf_line(&charset,"charset",0,FATAL,dir);
+  if (!stralloc_0(&charset)) die_nomem();
+
+  getconf_line(&prefix,"prefix",0,FATAL,dir);
+                                       /* support rfc2047-encoded prefix */
+  decodeHDR(prefix.s,prefix.len,&line,charset.s,FATAL);
+  unfoldHDR(line.s,line.len,&prefix,charset.s,&dummy,0,FATAL);
+                                       /* need only decoded one */
+
+                       /* Get message number */
+  switch(slurp("num",&num,32)) {
+    case -1:
+      strerr_die4sys(100,FATAL,ERR_READ,dir,"/num: ");
+    case 0:
+      strerr_die4x(100,FATAL,dir,"/num",ERR_NOEXIST);
+  }
+  if (!stralloc_0(&num)) die_nomem();
+  scan_ulong(num.s,&msgmax);
+  if (msgnum > msgmax) _exit(0);
+  if (msgnum) {
+    msgnum = (msgnum / 100) * 100 - 1;
+  }
+  while (++msgnum <= msgmax) {
+    if (msgnum == 1 || !(msgnum % 100)) {
+      if (!stralloc_copys(&fnadir,"archive/")) die_nomem();
+      if (!stralloc_catb(&fnadir,strnum,fmt_ulong(strnum,msgnum / 100)))
+       die_nomem();
+      if (!stralloc_copy(&fnifn,&fnadir)) die_nomem();
+      if (!stralloc_copy(&fnif,&fnadir)) die_nomem();
+      if (!stralloc_cats(&fnif,"/index")) die_nomem();
+      if (!stralloc_cats(&fnifn,"/indexn")) die_nomem();
+      if (!stralloc_0(&fnadir)) die_nomem();
+      if (!stralloc_0(&fnifn)) die_nomem();
+      if (!stralloc_0(&fnif)) die_nomem();
+
+                       /* May not exist, so be nice and make it */
+      if (mkdir(fnadir.s,0755) == -1)
+       if (errno != error_exist)
+         strerr_die4sys(100,FATAL,ERR_CREATE,fnadir.s,": ");
+
+                       /* Open index */
+      fdindexn = open_trunc(fnifn.s);
+      if (fdindexn == -1)
+        strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s,": ");
+
+                       /* set up buffers for index */
+      substdio_fdbuf(&ssindex,write,fdindexn,indexbuf,sizeof(indexbuf));
+
+                       /* Get subject without the 'Subject: ' */
+                       /* make sure there is one */
+    }
+
+    if (!stralloc_copys(&fnaf,fnadir.s)) die_nomem();
+    if (!stralloc_cats(&fnaf,"/")) die_nomem();
+    if (!stralloc_catb(&fnaf,strnum,
+       fmt_uint0(strnum,(unsigned int) (msgnum % 100),2))) die_nomem();
+    if (!stralloc_0(&fnaf)) die_nomem();
+    fd = open_read(fnaf.s);
+    if (fd == -1) {
+      if (errno != error_noent)
+        strerr_die4sys(100,FATAL,ERR_READ,fnaf.s,": ");
+    } else if (fstat(fd,&st) == -1 || (!(st.st_mode & 0100)))
+        close(fd);
+    else {
+      subject.len = 0;         /* clear in case they're missing in msg */
+      author.len = 0;
+      received.len = 0;
+      r = idx_get_trimsubject();
+      close(fd);
+      if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msgnum))) die_nomem();
+      if (!stralloc_cats(&line,": ")) die_nomem();
+      makehash(subject.s,subject.len,hash);
+      if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem();
+      if (!stralloc_cats(&line," ")) die_nomem();
+      if (r & 1)       /* reply */
+       if (!stralloc_cats(&line,"Re: ")) die_nomem();
+      if (!stralloc_cat(&line,&subject)) die_nomem();
+      if (!stralloc_cats(&line,"\n\t")) die_nomem();
+      if (!stralloc_cat(&line,&received)) die_nomem();
+      if (!stralloc_cats(&line,";")) die_nomem();
+
+      concatHDR(author.s,author.len,&lines,FATAL);
+      mkauthhash(lines.s,lines.len,hash);
+      if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem();
+
+      decodeHDR(cp,author_name(&cp,lines.s,lines.len),&author,charset.s,FATAL);
+      (void) unfoldHDR(author.s,author.len,&lines,charset.s,&prefix,0,FATAL);
+
+      if (!stralloc_cats(&line," ")) die_nomem();
+      if (!stralloc_cat(&line,&lines)) die_nomem();
+      if (!stralloc_cats(&line,"\n")) die_nomem();
+      if (substdio_put(&ssindex,line.s,line.len) == -1)
+          strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": ");
+    }
+
+    if (!((msgnum + 1) % 100) ||
+               (msgnum == msgmax)) {   /* last in this set */
+      if (substdio_flush(&ssindex) == -1)
+        strerr_die4sys(100,FATAL,ERR_FLUSH,fnifn.s, ": ");
+      if (fsync(fdindexn) == -1)
+        strerr_die4sys(100,FATAL,ERR_SYNC,fnifn.s, ": ");
+      if (fchmod(fdindexn,MODE_ARCHIVE | 0700) == -1)
+        strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": ");
+      if (close(fdindexn) == -1)
+        strerr_die4sys(100,FATAL,ERR_CLOSE,fnifn.s,": ");
+      if (rename(fnifn.s,fnif.s) == -1)
+        strerr_die4x(111,FATAL,ERR_MOVE,fnifn.s,": ");
+    }
+  }
+  fd = open_append("indexed");
+  if (fd == -1)
+    strerr_die4sys(100,FATAL,ERR_CREATE,dir,"/indexed: ");
+  close(fd);
+  close(fdlock);
+  _exit(0);
+}
+