Commit | Line | Data |
---|---|---|
f8beb284 MW |
1 | /*$Id: ezmlm-idx.c,v 1.29 1999/10/29 02:49:14 lindberg Exp $*/ |
2 | /*$Name: ezmlm-idx-040 $*/ | |
3 | ||
4 | #include <sys/types.h> | |
5 | #include <sys/stat.h> | |
6 | #include "stralloc.h" | |
7 | #include "subfd.h" | |
8 | #include "strerr.h" | |
9 | #include "error.h" | |
10 | #include "lock.h" | |
11 | #include "slurp.h" | |
12 | #include "open.h" | |
13 | #include "getln.h" | |
14 | #include "sgetopt.h" | |
15 | #include "case.h" | |
16 | #include "scan.h" | |
17 | #include "str.h" | |
18 | #include "fmt.h" | |
19 | #include "readwrite.h" | |
20 | #include "exit.h" | |
21 | #include "substdio.h" | |
22 | #include "idx.h" | |
23 | #include "mime.h" | |
24 | #include "errtxt.h" | |
25 | #include "getconf.h" | |
26 | #include "makehash.h" | |
27 | ||
28 | #define FATAL "ezmlm-idx: fatal: " | |
29 | ||
30 | char strnum[FMT_ULONG]; | |
31 | char hash[HASHLEN]; | |
32 | ||
33 | stralloc fnadir = {0}; | |
34 | stralloc fnif = {0}; | |
35 | stralloc fnifn = {0}; | |
36 | stralloc fnaf = {0}; | |
37 | ||
38 | stralloc line = {0}; | |
39 | stralloc lines = {0}; | |
40 | stralloc dummy = {0}; | |
41 | ||
42 | int fdindexn; | |
43 | int fdlock; | |
44 | int fd; | |
45 | int flagdate = 0; /* use 'Received:' header by default, =1 -> 'Date:' */ | |
46 | ||
47 | /* for reading index and in ezmlm-idx for reading message */ | |
48 | static substdio ssin; | |
49 | static char inbuf[1024]; | |
50 | ||
51 | substdio ssindex; | |
52 | char indexbuf[1024]; | |
53 | ||
54 | struct stat st; | |
55 | ||
56 | stralloc subject = {0}; | |
57 | stralloc author = {0}; | |
58 | stralloc authmail = {0}; | |
59 | stralloc received = {0}; | |
60 | stralloc prefix = {0}; | |
61 | stralloc charset = {0}; | |
62 | ||
63 | struct strerr index_err; | |
64 | ||
65 | stralloc num = {0}; | |
66 | ||
67 | char buf0[256]; | |
68 | substdio ss0 = SUBSTDIO_FDBUF(read,0,buf0,sizeof(buf0)); | |
69 | ||
70 | void die_usage() | |
71 | { | |
72 | strerr_die1x(100,"ezmlm-idx: usage: ezmlm-idx [-dDF] [-f msg] dir"); | |
73 | ||
74 | } | |
75 | ||
76 | void die_nomem() | |
77 | { | |
78 | strerr_die2x(100,FATAL,ERR_NOMEM); | |
79 | } | |
80 | ||
81 | int idx_get_trimsubject() | |
82 | ||
83 | /* reads an open message from 'fd', extracts the subject (if any), and */ | |
84 | /* returns the subject in 'sub', the author in 'author', and the received */ | |
85 | /* rfc822 date to 'received'. 'fatal' is a program-specific error string. */ | |
86 | /* returns: 0 - no reply no prefix */ | |
87 | /* 1 - reply no prefix */ | |
88 | /* 2 - prefix no reply */ | |
89 | /* 3 - reply & prefix */ | |
90 | /* No terminal '\n' in any of the strallocs! */ | |
91 | { | |
92 | char *cp; | |
93 | int foundsubject = 0; | |
94 | int issubject = 0; | |
95 | int isfrom = 0; | |
96 | int foundreceived = 0; | |
97 | int foundfrom = 0; | |
98 | int match; | |
99 | int r; | |
100 | unsigned int pos,pos1; | |
101 | ||
102 | substdio_fdbuf(&ssin,read,fd,inbuf,sizeof(inbuf)); | |
103 | for (;;) { | |
104 | if (getln(&ssin,&line,&match,'\n') == -1) | |
105 | strerr_die2x(111,FATAL,ERR_READ_INPUT); | |
106 | if (match) { | |
107 | if (line.len == 1) | |
108 | break; | |
109 | if (*line.s == ' ' || *line.s == '\t') { | |
110 | /* continuation */ | |
111 | if (issubject) { | |
112 | if (!stralloc_cat(&subject,&line)) die_nomem(); | |
113 | } else if (isfrom) | |
114 | if (!stralloc_cat(&author,&line)) die_nomem(); | |
115 | } else { | |
116 | issubject = 0; | |
117 | isfrom = 0; | |
118 | if (!foundsubject && case_startb(line.s,line.len,"Subject:")) { | |
119 | if (!stralloc_copyb(&subject,line.s+8,line.len-8)) die_nomem(); | |
120 | foundsubject = 1; | |
121 | issubject = 1; | |
122 | } else if (!foundfrom && case_startb(line.s,line.len,"From:")) { | |
123 | if (!stralloc_copyb(&author,line.s+5,line.len-5)) die_nomem(); | |
124 | foundfrom = 1; | |
125 | isfrom = 1; | |
126 | } else if (!flagdate && !foundreceived && | |
127 | case_startb(line.s,line.len,"Received:")) { | |
128 | pos = byte_chr(line.s,line.len,';'); | |
129 | if (pos != line.len) | |
130 | if (!stralloc_copyb(&received,line.s+pos+2,line.len - pos - 3)) | |
131 | die_nomem(); | |
132 | foundreceived = 1; | |
133 | } else if (flagdate && !foundreceived && | |
134 | case_startb(line.s,line.len,"Date:")) { | |
135 | if (line.len < 22) continue; /* illegal */ | |
136 | pos = 6 + byte_chr(line.s+6,line.len-6,','); | |
137 | if (pos == line.len) | |
138 | pos = 5; | |
139 | ++pos; | |
140 | while (line.s[pos] == ' ' || line.s[pos] == '\t') ++pos; /* dd */ | |
141 | pos1 = pos + 3; | |
142 | while (++pos1 < line.len && line.s[pos1] != ' '); /* mo */ | |
143 | ++pos1; | |
144 | if (!stralloc_copyb(&received,line.s+pos,pos1 - pos)) | |
145 | die_nomem(); /* '01 Jun ' */ | |
146 | if (pos1 + 2 < line.len) { | |
147 | if (line.s[pos1 + 2] == ' ') { /* 2-digit */ | |
148 | if (line.s[pos1] >= '7') { /* >= 70 */ | |
149 | if (!stralloc_cats(&received,"19")) die_nomem(); | |
150 | } else if (!stralloc_cats(&received,"20")) die_nomem(); | |
151 | pos = pos1 + 3; /* 2 digit */ | |
152 | } else | |
153 | pos = pos1 + 5; /* 4 digit */ | |
154 | if (pos < line.len) { | |
155 | pos += byte_chr(line.s+pos,line.len-pos,' '); /* after time */ | |
156 | if (pos < line.len) { | |
157 | ++pos; /* zone */ | |
158 | while (line.s[pos] != ' ' && line.s[pos] != '\n') ++pos; | |
159 | } else | |
160 | pos = line.len - 1; /* no zone. Illegal; better than 0 */ | |
161 | if (!stralloc_catb(&received,line.s+pos1,pos - pos1)) | |
162 | die_nomem(); | |
163 | foundreceived = 1; | |
164 | continue; | |
165 | } | |
166 | } | |
167 | received.len = 0; /* bad format - scrap */ | |
168 | } | |
169 | } | |
170 | } else | |
171 | break; | |
172 | } | |
173 | ||
174 | if (foundsubject) { | |
175 | concatHDR(subject.s,subject.len,&lines,FATAL); /* make 1 line */ | |
176 | decodeHDR(lines.s,lines.len,&line,charset.s,FATAL); /* decode mime */ | |
177 | r= unfoldHDR(line.s,line.len,&subject,charset.s,&prefix,1,FATAL); | |
178 | /* trim mime */ | |
179 | } | |
180 | else { | |
181 | r = 0; | |
182 | subject.len = 0; | |
183 | } | |
184 | return r; | |
185 | } | |
186 | ||
187 | int main(argc,argv) | |
188 | int argc; | |
189 | char **argv; | |
190 | { | |
191 | char *dir,*cp; | |
192 | unsigned long msgnum = 0L; | |
193 | unsigned long msgmax; | |
194 | int opt,r; | |
195 | ||
196 | while ((opt = getopt(argc,argv,"dDf:FvV")) != opteof) | |
197 | switch (opt) { | |
198 | case 'd': flagdate = 1; break; | |
199 | case 'D': flagdate = 0; break; | |
200 | case 'f': if (optarg) (void) scan_ulong(optarg,&msgnum); break; | |
201 | case 'F': msgnum = 0L; | |
202 | case 'v': | |
203 | case 'V': strerr_die2x(0,"ezmlm-archive version: ",EZIDX_VERSION); | |
204 | default: die_usage(); | |
205 | } | |
206 | dir = argv[optind]; | |
207 | if (!dir) die_usage(); | |
208 | ||
209 | if (chdir(dir) == -1) | |
210 | strerr_die4sys(100,FATAL,ERR_SWITCH,dir,": "); | |
211 | ||
212 | (void) umask(022); | |
213 | sig_pipeignore(); | |
214 | /* obtain lock to write index files */ | |
215 | fdlock = open_append("lock"); | |
216 | if (fdlock == -1) | |
217 | strerr_die2sys(100,FATAL,ERR_OPEN_LOCK); | |
218 | if (lock_ex(fdlock) == -1) | |
219 | strerr_die2sys(100,FATAL,ERR_OBTAIN_LOCK); | |
220 | ||
221 | getconf_line(&charset,"charset",0,FATAL,dir); | |
222 | if (!stralloc_0(&charset)) die_nomem(); | |
223 | ||
224 | getconf_line(&prefix,"prefix",0,FATAL,dir); | |
225 | /* support rfc2047-encoded prefix */ | |
226 | decodeHDR(prefix.s,prefix.len,&line,charset.s,FATAL); | |
227 | unfoldHDR(line.s,line.len,&prefix,charset.s,&dummy,0,FATAL); | |
228 | /* need only decoded one */ | |
229 | ||
230 | /* Get message number */ | |
231 | switch(slurp("num",&num,32)) { | |
232 | case -1: | |
233 | strerr_die4sys(100,FATAL,ERR_READ,dir,"/num: "); | |
234 | case 0: | |
235 | strerr_die4x(100,FATAL,dir,"/num",ERR_NOEXIST); | |
236 | } | |
237 | if (!stralloc_0(&num)) die_nomem(); | |
238 | scan_ulong(num.s,&msgmax); | |
239 | if (msgnum > msgmax) _exit(0); | |
240 | if (msgnum) { | |
241 | msgnum = (msgnum / 100) * 100 - 1; | |
242 | } | |
243 | while (++msgnum <= msgmax) { | |
244 | if (msgnum == 1 || !(msgnum % 100)) { | |
245 | if (!stralloc_copys(&fnadir,"archive/")) die_nomem(); | |
246 | if (!stralloc_catb(&fnadir,strnum,fmt_ulong(strnum,msgnum / 100))) | |
247 | die_nomem(); | |
248 | if (!stralloc_copy(&fnifn,&fnadir)) die_nomem(); | |
249 | if (!stralloc_copy(&fnif,&fnadir)) die_nomem(); | |
250 | if (!stralloc_cats(&fnif,"/index")) die_nomem(); | |
251 | if (!stralloc_cats(&fnifn,"/indexn")) die_nomem(); | |
252 | if (!stralloc_0(&fnadir)) die_nomem(); | |
253 | if (!stralloc_0(&fnifn)) die_nomem(); | |
254 | if (!stralloc_0(&fnif)) die_nomem(); | |
255 | ||
256 | /* May not exist, so be nice and make it */ | |
257 | if (mkdir(fnadir.s,0755) == -1) | |
258 | if (errno != error_exist) | |
259 | strerr_die4sys(100,FATAL,ERR_CREATE,fnadir.s,": "); | |
260 | ||
261 | /* Open index */ | |
262 | fdindexn = open_trunc(fnifn.s); | |
263 | if (fdindexn == -1) | |
264 | strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s,": "); | |
265 | ||
266 | /* set up buffers for index */ | |
267 | substdio_fdbuf(&ssindex,write,fdindexn,indexbuf,sizeof(indexbuf)); | |
268 | ||
269 | /* Get subject without the 'Subject: ' */ | |
270 | /* make sure there is one */ | |
271 | } | |
272 | ||
273 | if (!stralloc_copys(&fnaf,fnadir.s)) die_nomem(); | |
274 | if (!stralloc_cats(&fnaf,"/")) die_nomem(); | |
275 | if (!stralloc_catb(&fnaf,strnum, | |
276 | fmt_uint0(strnum,(unsigned int) (msgnum % 100),2))) die_nomem(); | |
277 | if (!stralloc_0(&fnaf)) die_nomem(); | |
278 | fd = open_read(fnaf.s); | |
279 | if (fd == -1) { | |
280 | if (errno != error_noent) | |
281 | strerr_die4sys(100,FATAL,ERR_READ,fnaf.s,": "); | |
282 | } else if (fstat(fd,&st) == -1 || (!(st.st_mode & 0100))) | |
283 | close(fd); | |
284 | else { | |
285 | subject.len = 0; /* clear in case they're missing in msg */ | |
286 | author.len = 0; | |
287 | received.len = 0; | |
288 | r = idx_get_trimsubject(); | |
289 | close(fd); | |
290 | if (!stralloc_copyb(&line,strnum,fmt_ulong(strnum,msgnum))) die_nomem(); | |
291 | if (!stralloc_cats(&line,": ")) die_nomem(); | |
292 | makehash(subject.s,subject.len,hash); | |
293 | if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem(); | |
294 | if (!stralloc_cats(&line," ")) die_nomem(); | |
295 | if (r & 1) /* reply */ | |
296 | if (!stralloc_cats(&line,"Re: ")) die_nomem(); | |
297 | if (!stralloc_cat(&line,&subject)) die_nomem(); | |
298 | if (!stralloc_cats(&line,"\n\t")) die_nomem(); | |
299 | if (!stralloc_cat(&line,&received)) die_nomem(); | |
300 | if (!stralloc_cats(&line,";")) die_nomem(); | |
301 | ||
302 | concatHDR(author.s,author.len,&lines,FATAL); | |
303 | mkauthhash(lines.s,lines.len,hash); | |
304 | if (!stralloc_catb(&line,hash,HASHLEN)) die_nomem(); | |
305 | ||
306 | decodeHDR(cp,author_name(&cp,lines.s,lines.len),&author,charset.s,FATAL); | |
307 | (void) unfoldHDR(author.s,author.len,&lines,charset.s,&prefix,0,FATAL); | |
308 | ||
309 | if (!stralloc_cats(&line," ")) die_nomem(); | |
310 | if (!stralloc_cat(&line,&lines)) die_nomem(); | |
311 | if (!stralloc_cats(&line,"\n")) die_nomem(); | |
312 | if (substdio_put(&ssindex,line.s,line.len) == -1) | |
313 | strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": "); | |
314 | } | |
315 | ||
316 | if (!((msgnum + 1) % 100) || | |
317 | (msgnum == msgmax)) { /* last in this set */ | |
318 | if (substdio_flush(&ssindex) == -1) | |
319 | strerr_die4sys(100,FATAL,ERR_FLUSH,fnifn.s, ": "); | |
320 | if (fsync(fdindexn) == -1) | |
321 | strerr_die4sys(100,FATAL,ERR_SYNC,fnifn.s, ": "); | |
322 | if (fchmod(fdindexn,MODE_ARCHIVE | 0700) == -1) | |
323 | strerr_die4sys(100,FATAL,ERR_WRITE,fnifn.s, ": "); | |
324 | if (close(fdindexn) == -1) | |
325 | strerr_die4sys(100,FATAL,ERR_CLOSE,fnifn.s,": "); | |
326 | if (rename(fnifn.s,fnif.s) == -1) | |
327 | strerr_die4x(111,FATAL,ERR_MOVE,fnifn.s,": "); | |
328 | } | |
329 | } | |
330 | fd = open_append("indexed"); | |
331 | if (fd == -1) | |
332 | strerr_die4sys(100,FATAL,ERR_CREATE,dir,"/indexed: "); | |
333 | close(fd); | |
334 | close(fdlock); | |
335 | _exit(0); | |
336 | } | |
337 |