| 1 | /*$Id: ezmlm-idx.c,v 1.29 1999/10/29 02:49:14 lindberg Exp $*/ |
| 2 | /*$Name: ezmlm-idx-040 $*/ |
| 3 | |
| 4 | #include <sys/types.h> |
| 5 | #include <sys/stat.h> |
| 6 | #include "stralloc.h" |
| 7 | #include "subfd.h" |
| 8 | #include "strerr.h" |
| 9 | #include "error.h" |
| 10 | #include "lock.h" |
| 11 | #include "slurp.h" |
| 12 | #include "open.h" |
| 13 | #include "getln.h" |
| 14 | #include "sgetopt.h" |
| 15 | #include "case.h" |
| 16 | #include "scan.h" |
| 17 | #include "str.h" |
| 18 | #include "fmt.h" |
| 19 | #include "readwrite.h" |
| 20 | #include "exit.h" |
| 21 | #include "substdio.h" |
| 22 | #include "idx.h" |
| 23 | #include "mime.h" |
| 24 | #include "errtxt.h" |
| 25 | #include "getconf.h" |
| 26 | #include "makehash.h" |
| 27 | |
| 28 | #define FATAL "ezmlm-idx: fatal: " |
| 29 | |
| 30 | char strnum[FMT_ULONG]; |
| 31 | char hash[HASHLEN]; |
| 32 | |
| 33 | stralloc fnadir = {0}; |
| 34 | stralloc fnif = {0}; |
| 35 | stralloc fnifn = {0}; |
| 36 | stralloc fnaf = {0}; |
| 37 | |
| 38 | stralloc line = {0}; |
| 39 | stralloc lines = {0}; |
| 40 | stralloc dummy = {0}; |
| 41 | |
| 42 | int fdindexn; |
| 43 | int fdlock; |
| 44 | int fd; |
| 45 | int flagdate = 0; /* use 'Received:' header by default, =1 -> 'Date:' */ |
| 46 | |
| 47 | /* for reading index and in ezmlm-idx for reading message */ |
| 48 | static substdio ssin; |
| 49 | static char inbuf[1024]; |
| 50 | |
| 51 | substdio ssindex; |
| 52 | char indexbuf[1024]; |
| 53 | |
| 54 | struct stat st; |
| 55 | |
| 56 | stralloc subject = {0}; |
| 57 | stralloc author = {0}; |
| 58 | stralloc authmail = {0}; |
| 59 | stralloc received = {0}; |
| 60 | stralloc prefix = {0}; |
| 61 | stralloc charset = {0}; |
| 62 | |
| 63 | struct strerr index_err; |
| 64 | |
| 65 | stralloc num = {0}; |
| 66 | |
| 67 | char buf0[256]; |
| 68 | substdio ss0 = SUBSTDIO_FDBUF(read,0,buf0,sizeof(buf0)); |
| 69 | |
| 70 | void die_usage() |
| 71 | { |
| 72 | strerr_die1x(100,"ezmlm-idx: usage: ezmlm-idx [-dDF] [-f msg] dir"); |
| 73 | |
| 74 | } |
| 75 | |
| 76 | void die_nomem() |
| 77 | { |
| 78 | strerr_die2x(100,FATAL,ERR_NOMEM); |
| 79 | } |
| 80 | |
| 81 | int idx_get_trimsubject() |
| 82 | |
| 83 | /* reads an open message from 'fd', extracts the subject (if any), and */ |
| 84 | /* returns the subject in 'sub', the author in 'author', and the received */ |
| 85 | /* rfc822 date to 'received'. 'fatal' is a program-specific error string. */ |
| 86 | /* returns: 0 - no reply no prefix */ |
| 87 | /* 1 - reply no prefix */ |
| 88 | /* 2 - prefix no reply */ |
| 89 | /* 3 - reply & prefix */ |
| 90 | /* No terminal '\n' in any of the strallocs! */ |
| 91 | { |
| 92 | char *cp; |
| 93 | int foundsubject = 0; |
| 94 | int issubject = 0; |
| 95 | int isfrom = 0; |
| 96 | int foundreceived = 0; |
| 97 | int foundfrom = 0; |
| 98 | int match; |
| 99 | int r; |
| 100 | unsigned int pos,pos1; |
| 101 | |
| 102 | substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf)); |
| 103 | for (;;) { |
| 104 | if (getln(&ssin,&line,&match,'\n') == -1) |
| 105 | strerr_die2x(111,FATAL,ERR_READ_INPUT); |
| 106 | if (match) { |
| 107 | if (line.len == 1) |
| 108 | break; |
| 109 | if (*line.s == ' ' || *line.s == '\t') { |
| 110 | /* continuation */ |
| 111 | if (issubject) { |
| 112 | if (!stralloc_cat(&subject,&line)) die_nomem(); |
| 113 | } else if (isfrom) |
| 114 | if (!stralloc_cat(&author,&line)) die_nomem(); |
| 115 | } else { |
| 116 | issubject = 0; |
| 117 | isfrom = 0; |
| 118 | if (!foundsubject && case_startb(line.s,line.len,"Subject:")) { |
| 119 | if (!stralloc_copyb(&subject,line.s+8,line.len-8)) die_nomem(); |
| 120 | foundsubject = 1; |
| 121 | issubject = 1; |
| 122 | } else if (!foundfrom && case_startb(line.s,line.len,"From:")) { |
| 123 | if (!stralloc_copyb(&author,line.s+5,line.len-5)) die_nomem(); |
| 124 | foundfrom = 1; |
| 125 | isfrom = 1; |
| 126 | } else if (!flagdate && !foundreceived && |
| 127 | case_startb(line.s,line.len,"Received:")) { |
| 128 | pos = byte_chr(line.s,line.len,';'); |
| 129 | if (pos != line.len) |
| 130 | if (!stralloc_copyb(&received,line.s+pos+2,line.len - pos - 3)) |
| 131 | die_nomem(); |
| 132 | foundreceived = 1; |
| 133 | } else if (flagdate && !foundreceived && |
| 134 | case_startb(line.s,line.len,"Date:")) { |
| 135 | if (line.len < 22) continue; /* illegal */ |
| 136 | pos = 6 + byte_chr(line.s+6,line.len-6,','); |
| 137 | if (pos == line.len) |
| 138 | pos = 5; |
| 139 | ++pos; |
| 140 | while (line.s[pos] == ' ' || line.s[pos] == '\t') ++pos; /* dd */ |
| 141 | pos1 = pos + 3; |
| 142 | while (++pos1 < line.len && line.s[pos1] != ' '); /* mo */ |
| 143 | ++pos1; |
| 144 | if (!stralloc_copyb(&received,line.s+pos,pos1 - pos)) |
| 145 | die_nomem(); /* '01 Jun ' */ |
| 146 | if (pos1 + 2 < line.len) { |
| 147 | if (line.s[pos1 + 2] == ' ') { /* 2-digit */ |
| 148 | if (line.s[pos1] >= '7') { /* >= 70 */ |
| 149 | if (!stralloc_cats(&received,"19")) die_nomem(); |
| 150 | } else if (!stralloc_cats(&received,"20")) die_nomem(); |
| 151 | pos = pos1 + 3; /* 2 digit */ |
| 152 | } else |
| 153 | pos = pos1 + 5; /* 4 digit */ |
| 154 | if (pos < line.len) { |
| 155 | pos += byte_chr(line.s+pos,line.len-pos,' '); /* after time */ |
| 156 | if (pos < line.len) { |
| 157 | ++pos; /* zone */ |
| 158 | while (line.s[pos] != ' ' && line.s[pos] != '\n') ++pos; |
| 159 | } else |
| 160 | pos = line.len - 1; /* no zone. Illegal; better than 0 */ |
| 161 | if (!stralloc_catb(&received,line.s+pos1,pos - pos1)) |
| 162 | die_nomem(); |
| 163 | foundreceived = 1; |
| 164 | continue; |
| 165 | } |
| 166 | } |
| 167 | received.len = 0; /* bad format - scrap */ |
| 168 | } |
| 169 | } |
| 170 | } else |
| 171 | break; |
| 172 | } |
| 173 | |
| 174 | if (foundsubject) { |
| 175 | concatHDR(subject.s,subject.len,&lines,FATAL); /* make 1 line */ |
| 176 | decodeHDR(lines.s,lines.len,&line,charset.s,FATAL); /* decode mime */ |
| 177 | r= unfoldHDR(line.s,line.len,&subject,charset.s,&prefix,1,FATAL); |
| 178 | /* trim mime */ |
| 179 | } |
| 180 | else { |
| 181 | r = 0; |
| 182 | subject.len = 0; |
| 183 | } |
| 184 | return r; |
| 185 | } |
| 186 | |
| 187 | int main(argc,argv) |
| 188 | int argc; |
| 189 | char **argv; |
| 190 | { |
| 191 | char *dir,*cp; |
| 192 | unsigned long msgnum = 0L; |
| 193 | unsigned long msgmax; |
| 194 | int opt,r; |
| 195 | |
| 196 | while ((opt = getopt(argc,argv,"dDf:FvV")) != opteof) |
| 197 | switch (opt) { |
| 198 | case 'd': flagdate = 1; break; |
| 199 | case 'D': flagdate = 0; break; |
| 200 | case 'f': if (optarg) (void) scan_ulong(optarg,&msgnum); break; |
| 201 | case 'F': msgnum = 0L; |
| 202 | case 'v': |
| 203 | case 'V': strerr_die2x(0,"ezmlm-archive version: ",EZIDX_VERSION); |
| 204 | default: die_usage(); |
| 205 | } |
| 206 | dir = argv[optind]; |
| 207 | if (!dir) die_usage(); |
| 208 | |
| 209 | if (chdir(dir) == -1) |
| 210 | strerr_die4sys(100,FATAL,ERR_SWITCH,dir,": "); |
| 211 | |
| 212 | (void) umask(022); |
| 213 | sig_pipeignore(); |
| 214 | /* obtain lock to write index files */ |
| 215 | fdlock = open_append("lock"); |
| 216 | if (fdlock == -1) |
| 217 | strerr_die2sys(100,FATAL,ERR_OPEN_LOCK); |
| 218 | if (lock_ex(fdlock) == -1) |
| 219 | strerr_die2sys(100,FATAL,ERR_OBTAIN_LOCK); |
| 220 | |
| 221 | getconf_line(&charset,"charset",0,FATAL,dir); |
| 222 | if (!stralloc_0(&charset)) die_nomem(); |
| 223 | |
| 224 | getconf_line(&prefix,"prefix",0,FATAL,dir); |
| 225 | /* support rfc2047-encoded prefix */ |
| 226 | decodeHDR(prefix.s,prefix.len,&line,charset.s,FATAL); |
| 227 | unfoldHDR(line.s,line.len,&prefix,charset.s,&dummy,0,FATAL); |
| 228 | /* need only decoded one */ |
| 229 | |
| 230 | /* Get message number */ |
| 231 | switch(slurp("num",&num,32)) { |
| 232 | case -1: |
| 233 | strerr_die4sys(100,FATAL,ERR_READ,dir,"/num: "); |
| 234 | case 0: |
| 235 | strerr_die4x(100,FATAL,dir,"/num",ERR_NOEXIST); |
| 236 | } |
| 237 | if (!stralloc_0(&num)) die_nomem(); |
| 238 | scan_ulong(num.s,&msgmax); |
| 239 | if (msgnum > msgmax) _exit(0); |
| 240 | if (msgnum) { |
| 241 | msgnum = (msgnum / 100) * 100 - 1; |
| 242 | } |
| 243 | while (++msgnum <= msgmax) { |
| 244 | if (msgnum == 1 || !(msgnum % 100)) { |
| 245 | if (!stralloc_copys(&fnadir,"archive/")) die_nomem(); |
| 246 | if (!stralloc_catb(&fnadir,strnum,fmt_ulong(strnum,msgnum / 100))) |
| 247 | die_nomem(); |
| 248 | if (!stralloc_copy(&fnifn,&fnadir)) die_nomem(); |
| 249 | if (!stralloc_copy(&fnif,&fnadir)) die_nomem(); |
| 250 | if (!stralloc_cats(&fnif,"/index")) die_nomem(); |
| 251 | if (!stralloc_cats(&fnifn,"/indexn")) die_nomem(); |
| 252 | if (!stralloc_0(&fnadir)) die_nomem(); |
| 253 | if (!stralloc_0(&fnifn)) die_nomem(); |
| 254 | if (!stralloc_0(&fnif)) die_nomem(); |
| 255 | |
| 256 | /* May not exist, so be nice and make it */ |
| 257 | if (mkdir(fnadir.s,0755) == -1) |
| 258 | if (errno != error_exist) |
| 259 | strerr_die4sys(100,FATAL,ERR_CREATE,fnadir.s,": "); |
| 260 | |
| 261 | /* Open index */ |
| 262 | fdindexn = open_trunc(fnifn.s); |
| 263 | if (fdindexn == -1) |
| 264 | strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s,": "); |
| 265 | |
| 266 | /* set up buffers for index */ |
| 267 | substdio_fdbuf(&ssindex,write,fdindexn,indexbuf,sizeof(indexbuf)); |
| 268 | |
| 269 | /* Get subject without the 'Subject: ' */ |
| 270 | /* make sure there is one */ |
| 271 | } |
| 272 | |
| 273 | if (!stralloc_copys(&fnaf,fnadir.s)) die_nomem(); |
| 274 | if (!stralloc_cats(&fnaf,"/")) die_nomem(); |
| 275 | if (!stralloc_catb(&fnaf,strnum, |
| 276 | fmt_uint0(strnum,(unsigned int) (msgnum % 100),2))) die_nomem(); |
| 277 | if (!stralloc_0(&fnaf)) die_nomem(); |
| 278 | fd = open_read(fnaf.s); |
| 279 | if (fd == -1) { |
| 280 | if (errno != error_noent) |
| 281 | strerr_die4sys(100,FATAL,ERR_READ,fnaf.s,": "); |
| 282 | } else if (fstat(fd,&st) == -1 || (!(st.st_mode & 0100))) |
| 283 | close(fd); |
| 284 | else { |
| 285 | subject.len = 0; /* clear in case they're missing in msg */ |
| 286 | author.len = 0; |
| 287 | received.len = 0; |
| 288 | r = idx_get_trimsubject(); |
| 289 | close(fd); |
| 290 | if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msgnum))) die_nomem(); |
| 291 | if (!stralloc_cats(&line,": ")) die_nomem(); |
| 292 | makehash(subject.s,subject.len,hash); |
| 293 | if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem(); |
| 294 | if (!stralloc_cats(&line," ")) die_nomem(); |
| 295 | if (r & 1) /* reply */ |
| 296 | if (!stralloc_cats(&line,"Re: ")) die_nomem(); |
| 297 | if (!stralloc_cat(&line,&subject)) die_nomem(); |
| 298 | if (!stralloc_cats(&line,"\n\t")) die_nomem(); |
| 299 | if (!stralloc_cat(&line,&received)) die_nomem(); |
| 300 | if (!stralloc_cats(&line,";")) die_nomem(); |
| 301 | |
| 302 | concatHDR(author.s,author.len,&lines,FATAL); |
| 303 | mkauthhash(lines.s,lines.len,hash); |
| 304 | if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem(); |
| 305 | |
| 306 | decodeHDR(cp,author_name(&cp,lines.s,lines.len),&author,charset.s,FATAL); |
| 307 | (void) unfoldHDR(author.s,author.len,&lines,charset.s,&prefix,0,FATAL); |
| 308 | |
| 309 | if (!stralloc_cats(&line," ")) die_nomem(); |
| 310 | if (!stralloc_cat(&line,&lines)) die_nomem(); |
| 311 | if (!stralloc_cats(&line,"\n")) die_nomem(); |
| 312 | if (substdio_put(&ssindex,line.s,line.len) == -1) |
| 313 | strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": "); |
| 314 | } |
| 315 | |
| 316 | if (!((msgnum + 1) % 100) || |
| 317 | (msgnum == msgmax)) { /* last in this set */ |
| 318 | if (substdio_flush(&ssindex) == -1) |
| 319 | strerr_die4sys(100,FATAL,ERR_FLUSH,fnifn.s, ": "); |
| 320 | if (fsync(fdindexn) == -1) |
| 321 | strerr_die4sys(100,FATAL,ERR_SYNC,fnifn.s, ": "); |
| 322 | if (fchmod(fdindexn,MODE_ARCHIVE | 0700) == -1) |
| 323 | strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": "); |
| 324 | if (close(fdindexn) == -1) |
| 325 | strerr_die4sys(100,FATAL,ERR_CLOSE,fnifn.s,": "); |
| 326 | if (rename(fnifn.s,fnif.s) == -1) |
| 327 | strerr_die4x(111,FATAL,ERR_MOVE,fnifn.s,": "); |
| 328 | } |
| 329 | } |
| 330 | fd = open_append("indexed"); |
| 331 | if (fd == -1) |
| 332 | strerr_die4sys(100,FATAL,ERR_CREATE,dir,"/indexed: "); |
| 333 | close(fd); |
| 334 | close(fdlock); |
| 335 | _exit(0); |
| 336 | } |
| 337 | |