Import ezmlm-idx 0.40
[ezmlm] / ezmlm-idx.c
1 /*$Id: ezmlm-idx.c,v 1.29 1999/10/29 02:49:14 lindberg Exp $*/
2 /*$Name: ezmlm-idx-040 $*/
3
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include "stralloc.h"
7 #include "subfd.h"
8 #include "strerr.h"
9 #include "error.h"
10 #include "lock.h"
11 #include "slurp.h"
12 #include "open.h"
13 #include "getln.h"
14 #include "sgetopt.h"
15 #include "case.h"
16 #include "scan.h"
17 #include "str.h"
18 #include "fmt.h"
19 #include "readwrite.h"
20 #include "exit.h"
21 #include "substdio.h"
22 #include "idx.h"
23 #include "mime.h"
24 #include "errtxt.h"
25 #include "getconf.h"
26 #include "makehash.h"
27
28 #define FATAL "ezmlm-idx: fatal: "
29
30 char strnum[FMT_ULONG];
31 char hash[HASHLEN];
32
33 stralloc fnadir = {0};
34 stralloc fnif = {0};
35 stralloc fnifn = {0};
36 stralloc fnaf = {0};
37
38 stralloc line = {0};
39 stralloc lines = {0};
40 stralloc dummy = {0};
41
42 int fdindexn;
43 int fdlock;
44 int fd;
45 int flagdate = 0; /* use 'Received:' header by default, =1 -> 'Date:' */
46
47 /* for reading index and in ezmlm-idx for reading message */
48 static substdio ssin;
49 static char inbuf[1024];
50
51 substdio ssindex;
52 char indexbuf[1024];
53
54 struct stat st;
55
56 stralloc subject = {0};
57 stralloc author = {0};
58 stralloc authmail = {0};
59 stralloc received = {0};
60 stralloc prefix = {0};
61 stralloc charset = {0};
62
63 struct strerr index_err;
64
65 stralloc num = {0};
66
67 char buf0[256];
68 substdio ss0 = SUBSTDIO_FDBUF(read,0,buf0,sizeof(buf0));
69
70 void die_usage()
71 {
72 strerr_die1x(100,"ezmlm-idx: usage: ezmlm-idx [-dDF] [-f msg] dir");
73
74 }
75
76 void die_nomem()
77 {
78 strerr_die2x(100,FATAL,ERR_NOMEM);
79 }
80
81 int idx_get_trimsubject()
82
83 /* reads an open message from 'fd', extracts the subject (if any), and */
84 /* returns the subject in 'sub', the author in 'author', and the received */
85 /* rfc822 date to 'received'. 'fatal' is a program-specific error string. */
86 /* returns: 0 - no reply no prefix */
87 /* 1 - reply no prefix */
88 /* 2 - prefix no reply */
89 /* 3 - reply & prefix */
90 /* No terminal '\n' in any of the strallocs! */
91 {
92 char *cp;
93 int foundsubject = 0;
94 int issubject = 0;
95 int isfrom = 0;
96 int foundreceived = 0;
97 int foundfrom = 0;
98 int match;
99 int r;
100 unsigned int pos,pos1;
101
102 substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf));
103 for (;;) {
104 if (getln(&ssin,&line,&match,'\n') == -1)
105 strerr_die2x(111,FATAL,ERR_READ_INPUT);
106 if (match) {
107 if (line.len == 1)
108 break;
109 if (*line.s == ' ' || *line.s == '\t') {
110 /* continuation */
111 if (issubject) {
112 if (!stralloc_cat(&subject,&line)) die_nomem();
113 } else if (isfrom)
114 if (!stralloc_cat(&author,&line)) die_nomem();
115 } else {
116 issubject = 0;
117 isfrom = 0;
118 if (!foundsubject && case_startb(line.s,line.len,"Subject:")) {
119 if (!stralloc_copyb(&subject,line.s+8,line.len-8)) die_nomem();
120 foundsubject = 1;
121 issubject = 1;
122 } else if (!foundfrom && case_startb(line.s,line.len,"From:")) {
123 if (!stralloc_copyb(&author,line.s+5,line.len-5)) die_nomem();
124 foundfrom = 1;
125 isfrom = 1;
126 } else if (!flagdate && !foundreceived &&
127 case_startb(line.s,line.len,"Received:")) {
128 pos = byte_chr(line.s,line.len,';');
129 if (pos != line.len)
130 if (!stralloc_copyb(&received,line.s+pos+2,line.len - pos - 3))
131 die_nomem();
132 foundreceived = 1;
133 } else if (flagdate && !foundreceived &&
134 case_startb(line.s,line.len,"Date:")) {
135 if (line.len < 22) continue; /* illegal */
136 pos = 6 + byte_chr(line.s+6,line.len-6,',');
137 if (pos == line.len)
138 pos = 5;
139 ++pos;
140 while (line.s[pos] == ' ' || line.s[pos] == '\t') ++pos; /* dd */
141 pos1 = pos + 3;
142 while (++pos1 < line.len && line.s[pos1] != ' '); /* mo */
143 ++pos1;
144 if (!stralloc_copyb(&received,line.s+pos,pos1 - pos))
145 die_nomem(); /* '01 Jun ' */
146 if (pos1 + 2 < line.len) {
147 if (line.s[pos1 + 2] == ' ') { /* 2-digit */
148 if (line.s[pos1] >= '7') { /* >= 70 */
149 if (!stralloc_cats(&received,"19")) die_nomem();
150 } else if (!stralloc_cats(&received,"20")) die_nomem();
151 pos = pos1 + 3; /* 2 digit */
152 } else
153 pos = pos1 + 5; /* 4 digit */
154 if (pos < line.len) {
155 pos += byte_chr(line.s+pos,line.len-pos,' '); /* after time */
156 if (pos < line.len) {
157 ++pos; /* zone */
158 while (line.s[pos] != ' ' && line.s[pos] != '\n') ++pos;
159 } else
160 pos = line.len - 1; /* no zone. Illegal; better than 0 */
161 if (!stralloc_catb(&received,line.s+pos1,pos - pos1))
162 die_nomem();
163 foundreceived = 1;
164 continue;
165 }
166 }
167 received.len = 0; /* bad format - scrap */
168 }
169 }
170 } else
171 break;
172 }
173
174 if (foundsubject) {
175 concatHDR(subject.s,subject.len,&lines,FATAL); /* make 1 line */
176 decodeHDR(lines.s,lines.len,&line,charset.s,FATAL); /* decode mime */
177 r= unfoldHDR(line.s,line.len,&subject,charset.s,&prefix,1,FATAL);
178 /* trim mime */
179 }
180 else {
181 r = 0;
182 subject.len = 0;
183 }
184 return r;
185 }
186
187 int main(argc,argv)
188 int argc;
189 char **argv;
190 {
191 char *dir,*cp;
192 unsigned long msgnum = 0L;
193 unsigned long msgmax;
194 int opt,r;
195
196 while ((opt = getopt(argc,argv,"dDf:FvV")) != opteof)
197 switch (opt) {
198 case 'd': flagdate = 1; break;
199 case 'D': flagdate = 0; break;
200 case 'f': if (optarg) (void) scan_ulong(optarg,&msgnum); break;
201 case 'F': msgnum = 0L;
202 case 'v':
203 case 'V': strerr_die2x(0,"ezmlm-archive version: ",EZIDX_VERSION);
204 default: die_usage();
205 }
206 dir = argv[optind];
207 if (!dir) die_usage();
208
209 if (chdir(dir) == -1)
210 strerr_die4sys(100,FATAL,ERR_SWITCH,dir,": ");
211
212 (void) umask(022);
213 sig_pipeignore();
214 /* obtain lock to write index files */
215 fdlock = open_append("lock");
216 if (fdlock == -1)
217 strerr_die2sys(100,FATAL,ERR_OPEN_LOCK);
218 if (lock_ex(fdlock) == -1)
219 strerr_die2sys(100,FATAL,ERR_OBTAIN_LOCK);
220
221 getconf_line(&charset,"charset",0,FATAL,dir);
222 if (!stralloc_0(&charset)) die_nomem();
223
224 getconf_line(&prefix,"prefix",0,FATAL,dir);
225 /* support rfc2047-encoded prefix */
226 decodeHDR(prefix.s,prefix.len,&line,charset.s,FATAL);
227 unfoldHDR(line.s,line.len,&prefix,charset.s,&dummy,0,FATAL);
228 /* need only decoded one */
229
230 /* Get message number */
231 switch(slurp("num",&num,32)) {
232 case -1:
233 strerr_die4sys(100,FATAL,ERR_READ,dir,"/num: ");
234 case 0:
235 strerr_die4x(100,FATAL,dir,"/num",ERR_NOEXIST);
236 }
237 if (!stralloc_0(&num)) die_nomem();
238 scan_ulong(num.s,&msgmax);
239 if (msgnum > msgmax) _exit(0);
240 if (msgnum) {
241 msgnum = (msgnum / 100) * 100 - 1;
242 }
243 while (++msgnum <= msgmax) {
244 if (msgnum == 1 || !(msgnum % 100)) {
245 if (!stralloc_copys(&fnadir,"archive/")) die_nomem();
246 if (!stralloc_catb(&fnadir,strnum,fmt_ulong(strnum,msgnum / 100)))
247 die_nomem();
248 if (!stralloc_copy(&fnifn,&fnadir)) die_nomem();
249 if (!stralloc_copy(&fnif,&fnadir)) die_nomem();
250 if (!stralloc_cats(&fnif,"/index")) die_nomem();
251 if (!stralloc_cats(&fnifn,"/indexn")) die_nomem();
252 if (!stralloc_0(&fnadir)) die_nomem();
253 if (!stralloc_0(&fnifn)) die_nomem();
254 if (!stralloc_0(&fnif)) die_nomem();
255
256 /* May not exist, so be nice and make it */
257 if (mkdir(fnadir.s,0755) == -1)
258 if (errno != error_exist)
259 strerr_die4sys(100,FATAL,ERR_CREATE,fnadir.s,": ");
260
261 /* Open index */
262 fdindexn = open_trunc(fnifn.s);
263 if (fdindexn == -1)
264 strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s,": ");
265
266 /* set up buffers for index */
267 substdio_fdbuf(&ssindex,write,fdindexn,indexbuf,sizeof(indexbuf));
268
269 /* Get subject without the 'Subject: ' */
270 /* make sure there is one */
271 }
272
273 if (!stralloc_copys(&fnaf,fnadir.s)) die_nomem();
274 if (!stralloc_cats(&fnaf,"/")) die_nomem();
275 if (!stralloc_catb(&fnaf,strnum,
276 fmt_uint0(strnum,(unsigned int) (msgnum % 100),2))) die_nomem();
277 if (!stralloc_0(&fnaf)) die_nomem();
278 fd = open_read(fnaf.s);
279 if (fd == -1) {
280 if (errno != error_noent)
281 strerr_die4sys(100,FATAL,ERR_READ,fnaf.s,": ");
282 } else if (fstat(fd,&st) == -1 || (!(st.st_mode & 0100)))
283 close(fd);
284 else {
285 subject.len = 0; /* clear in case they're missing in msg */
286 author.len = 0;
287 received.len = 0;
288 r = idx_get_trimsubject();
289 close(fd);
290 if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msgnum))) die_nomem();
291 if (!stralloc_cats(&line,": ")) die_nomem();
292 makehash(subject.s,subject.len,hash);
293 if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem();
294 if (!stralloc_cats(&line," ")) die_nomem();
295 if (r & 1) /* reply */
296 if (!stralloc_cats(&line,"Re: ")) die_nomem();
297 if (!stralloc_cat(&line,&subject)) die_nomem();
298 if (!stralloc_cats(&line,"\n\t")) die_nomem();
299 if (!stralloc_cat(&line,&received)) die_nomem();
300 if (!stralloc_cats(&line,";")) die_nomem();
301
302 concatHDR(author.s,author.len,&lines,FATAL);
303 mkauthhash(lines.s,lines.len,hash);
304 if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem();
305
306 decodeHDR(cp,author_name(&cp,lines.s,lines.len),&author,charset.s,FATAL);
307 (void) unfoldHDR(author.s,author.len,&lines,charset.s,&prefix,0,FATAL);
308
309 if (!stralloc_cats(&line," ")) die_nomem();
310 if (!stralloc_cat(&line,&lines)) die_nomem();
311 if (!stralloc_cats(&line,"\n")) die_nomem();
312 if (substdio_put(&ssindex,line.s,line.len) == -1)
313 strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": ");
314 }
315
316 if (!((msgnum + 1) % 100) ||
317 (msgnum == msgmax)) { /* last in this set */
318 if (substdio_flush(&ssindex) == -1)
319 strerr_die4sys(100,FATAL,ERR_FLUSH,fnifn.s, ": ");
320 if (fsync(fdindexn) == -1)
321 strerr_die4sys(100,FATAL,ERR_SYNC,fnifn.s, ": ");
322 if (fchmod(fdindexn,MODE_ARCHIVE | 0700) == -1)
323 strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": ");
324 if (close(fdindexn) == -1)
325 strerr_die4sys(100,FATAL,ERR_CLOSE,fnifn.s,": ");
326 if (rename(fnifn.s,fnif.s) == -1)
327 strerr_die4x(111,FATAL,ERR_MOVE,fnifn.s,": ");
328 }
329 }
330 fd = open_append("indexed");
331 if (fd == -1)
332 strerr_die4sys(100,FATAL,ERR_CREATE,dir,"/indexed: ");
333 close(fd);
334 close(fdlock);
335 _exit(0);
336 }
337