1 /*$Id: idxthread.c,v 1.35 1999/11/22 01:47:45 lindberg Exp $*/
2 /*$Name: ezmlm-idx-040 $*/
4 /* idxthread.c contains routines to from the ezmlm-idx subject index build */
5 /* a structure of unique subjects as well as a table of messages with */
6 /* pointers to the subject. This leads to information on message threads */
7 /* arranged chronologically within the thread, and with the threads */
8 /* arranged chronologically by the first message within the range. */
9 /* idx_mkthreads() will arrange the author list in a similar manner. This */
10 /* saves some space, and takes a little extra time. It's needed when */
11 /* generating an author index. */
13 #include <sys/types.h>
25 #include "readwrite.h"
30 static stralloc line
= {0}; /* primary input */
31 static stralloc authline
= {0}; /* second line of primary input */
32 static stralloc dummyind
= {0};
34 static substdio ssindex
;
35 static char indexbuf
[1024];
37 static char strnum
[FMT_ULONG
];
40 /* if no data, these may be the entire table, so */
41 /* need to be static */
42 static subentry sdummy
;
43 static authentry adummy
;
46 static void die_nomem(fatal
)
49 strerr_die2x(111,fatal
,ERR_NOMEM
);
54 /* NOTE: These do NOT prevent double locking */
55 static void lockup(fatal
)
58 fdlock
= open_append("lock");
60 strerr_die2sys(111,fatal
,ERR_OPEN_LOCK
);
61 if (lock_ex(fdlock
) == -1) {
63 strerr_die2sys(111,fatal
,ERR_OBTAIN_LOCK
);
72 static void newsub(psubt
,subject
,sublen
,msg
,fatal
)
73 /* Initializes subentry pointed to by psubt, adds a '\0' to subject, */
74 /* allocates space and copies in subject, and puts a pointer to it in */
82 register char *cpfrom
, *cpto
;
83 register unsigned int cpno
;
85 psubt
->higher
= (subentry
*) 0;
86 psubt
->lower
= (subentry
*) 0;
87 psubt
->firstmsg
= msg
;
89 psubt
->msginthread
= 1;
90 if (!(psubt
->sub
= alloc ((sublen
) * sizeof(char))))
95 while (cpno
--) *(cpto
++) = *(cpfrom
++);
96 psubt
->sublen
= sublen
;
99 static void newauth(pautht
,author
,authlen
,msg
,fatal
)
100 /* Allocates space for author of length authlen+1 adding a terminal '\0' */
101 /* and puts the pointer in pautht->auth. Analog to newsub(). */
102 authentry
*pautht
; /* entry for current message */
103 char *author
; /* pointer to author string (not sz!) */
104 unsigned int authlen
; /* lenth of author */
106 char *fatal
; /* sz */
109 register char *cpfrom
, *cpto
;
110 register unsigned int cpno
;
112 pautht
->higher
= (subentry
*) 0;
113 pautht
->lower
= (subentry
*) 0;
114 pautht
->firstmsg
= msg
;
115 if (!(pautht
->auth
= alloc ((authlen
) * sizeof(char))))
120 while (cpno
--) *(cpto
++) = *(cpfrom
++);
121 pautht
->authlen
= authlen
;
124 static void init_dummy(fatal
)
129 if (!stralloc_ready(&dummyind
,HASHLEN
+ 1)) die_nomem(fatal
);
130 for (i
= 0; i
< HASHLEN
; i
++)
132 dummyind
.len
= HASHLEN
;
133 if (!stralloc_append(&dummyind
," ")) die_nomem(fatal
);
136 void idx_mkthreads(pmsgtable
,psubtable
,pauthtable
,pdatetable
,
137 msg_from
,msg_to
,msg_latest
,locked
,fatal
)
138 /* Threads messages msg_from -> msg_to into pmsgtable & psubtable. When */
139 /* reading the latest index file (containing msg_latest) it locks the */
140 /* directory, unless it is already locked (as in digest creation). */
141 /* msgtable has the subject number 1.. (0 if there is no subject match, */
142 /* which should happen only if the subject index is corrupt.) */
144 /* 19971107 Changed to deal with index files that are missing, or have */
145 /* missing entries, not necessarily reflecting missing archive files. */
146 /* This all to make ezmlm-get more robust to get maximal info out of */
147 /* corrupted archives. */
149 msgentry
**pmsgtable
; /* table of message<->subject */
150 subentry
**psubtable
; /* subject no, len, str char * */
151 authentry
**pauthtable
; /* author no, len, str char * */
152 dateentry
**pdatetable
; /* message per date */
153 unsigned long msg_from
; /* first message in range */
154 unsigned long msg_to
; /* last message in range */
155 unsigned long msg_latest
; /* latest message in archive (for locking) */
156 int locked
; /* if already locked */
157 char *fatal
; /* Program-specific */
160 unsigned long idxlatest
; /* need to lock for this (last) index file */
161 unsigned long msg
; /* current msg number */
162 unsigned long endmsg
; /* max msg in this idx file */
163 unsigned long tmpmsg
; /* index entry's msg number */
164 unsigned long idx
; /* current index file no */
165 unsigned long idxto
; /* index containing end of range */
166 unsigned long ulmrange
; /* total # of messages in range */
167 char *subject
; /* subject on line */
168 unsigned int sublen
; /* length of subject */
170 unsigned int authlen
;
171 unsigned int pos
,posa
;
172 unsigned long submax
; /* max subject num in subtable */
173 subentry
*psubnext
; /* points to next entry in subtable */
174 subentry
*psubt
; /* points to entry in subtable */
175 authentry
*pauthnext
; /* points to next entry in authtable */
176 authentry
*pautht
; /* points to entry in authtable */
177 int fd
; /* index file handle */
178 int flagmissingindex
; /* current index file is missing */
179 int flagauth
; /* read index entry has author info */
180 int hasauth
; /* current msg's entry has author info */
184 unsigned int datepos
,datemax
;
185 unsigned int datetablesize
,datetableunit
;
186 unsigned int lastdate
= 0;
187 unsigned int thisdate
;
188 register msgentry
*x
, *y
;
190 /* a few unnecessary sanity checks */
191 if (msg_to
> msg_latest
)
193 if (msg_to
< msg_from
)
194 strerr_die2x(100,fatal
,"Program error: bad range in idx_mkthreads");
195 ulmrange
= msg_to
- msg_from
+ 1;
196 if (!(*pmsgtable
= (msgentry
*) alloc(ulmrange
* sizeof(msgentry
))))
199 x
= y
+ ulmrange
; /* clear */
205 /* max entries - acceptable waste for now */
206 if (!(*psubtable
= (subentry
*) alloc((ulmrange
+1) * sizeof(subentry
))))
209 if (!(*pauthtable
= (authentry
*) alloc((ulmrange
+1) * sizeof(authentry
))))
211 datetableunit
= DATENO
* sizeof(dateentry
);
212 datetablesize
= datetableunit
;
213 if (!(*pdatetable
= (dateentry
*) alloc(datetablesize
)))
216 datemax
= DATENO
- 2; /* entry 0 and end marker */
219 idxlatest
= msg_latest
/ 100;
220 idxto
= msg_to
/ 100;
222 psubnext
= *psubtable
; /* dummy node to get tree going. Basically, */
223 psubt
= &sdummy
; /* assure that subject > psubt-sub and that */
224 init_dummy(fatal
); /* below ok unless HASHLEN > 40 */
226 psubt
->sublen
= 40; /* there is something to hold psubt->higher */
227 psubt
->higher
= (subentry
*) 0;
228 psubt
->lower
= (subentry
*) 0;
229 pauthnext
= *pauthtable
;
231 pautht
->auth
= psubt
->sub
;
232 pautht
->authlen
= psubt
->sublen
;
233 pautht
->higher
= (authentry
*) 0;
234 pautht
->lower
= (authentry
*) 0;
235 for (idx
= msg_from
/ 100; idx
<= idxto
; idx
++) {
236 /* make index file name */
237 if (!stralloc_copys(&line
,"archive/")) die_nomem(fatal
);
238 if (!stralloc_catb(&line
,strnum
,fmt_ulong(strnum
,idx
))) die_nomem(fatal
);
239 if (!stralloc_cats(&line
,"/index")) die_nomem(fatal
);
240 if (!stralloc_0(&line
)) die_nomem(fatal
);
241 if (!locked
&& idx
== idxlatest
)
243 flagmissingindex
= 0;
244 fd
= open_read(line
.s
);
246 if (errno
== error_noent
) { /* this means the index is not here */
247 /* but the lists is supposedly indexed*/
248 flagmissingindex
= 1;
250 strerr_die4sys(111,fatal
,ERR_OPEN
,line
.s
,": ");
252 substdio_fdbuf(&ssindex
,read
,fd
,indexbuf
,sizeof(indexbuf
));
254 msg
= 100L * idx
; /* current msg# */
255 endmsg
= msg
+ 99L; /* max msg in this index */
256 if (!msg
) msg
= 1L; /* for start to make msg > tmpmsg */
257 tmpmsg
= 0L; /* msg number of read index line */
258 if (endmsg
> msg_to
) /* skip non-asked for subjects */
260 for (; msg
<= endmsg
; msg
++) {
261 if (!flagmissingindex
&& (msg
> tmpmsg
)) {
263 if (getln(&ssindex
,&line
,&match
,'\n') == -1)
264 strerr_die3sys(111,fatal
,ERR_READ
,"index: ");
266 flagmissingindex
= 1;
268 pos
= scan_ulong(line
.s
,&tmpmsg
);
269 if (line
.s
[pos
++] == ':') {
270 if (getln(&ssindex
,&authline
,&match
,'\n') == -1)
271 strerr_die3sys(111,fatal
,ERR_READ
,"index: ");
273 flagmissingindex
= 1;
281 if (msg
< msg_from
) /* Nothing before start of range */
284 subject
= line
.s
+ pos
;
285 sublen
= line
.len
- pos
;
286 if (sublen
<= HASHLEN
)
287 strerr_die2x(100,fatal
,ERR_BAD_INDEX
);
290 subject
= dummyind
.s
;
291 sublen
= dummyind
.len
;
294 for(;;) { /* search among already known subjects */
295 res
= str_diffn(psubt
->sub
,subject
,HASHLEN
);
298 psubt
= psubt
->higher
;
300 newsub(psubnext
,subject
,sublen
,msg
,fatal
);
301 psubt
->higher
= psubnext
;
306 } else if (res
> 0) {
308 psubt
= psubt
->lower
;
310 newsub(psubnext
,subject
,sublen
,msg
,fatal
);
311 psubt
->lower
= psubnext
;
317 psubt
->lastmsg
= msg
;
318 (psubt
->msginthread
)++; /* one more message in thread */
322 /* first subnum =1 (=0 is empty for thread) */
323 pmsgt
= *pmsgtable
+ msg
- msg_from
;
324 pmsgt
->subnum
= (unsigned int) (psubt
- *psubtable
+ 1);
325 pmsgt
->date
= lastdate
;
328 while (authline
.s
[pos
] && authline
.s
[pos
] != ' ') pos
++;
329 if (authline
.s
[++pos
]) {
330 thisdate
= date2yyyymm(authline
.s
+ pos
);
331 if (thisdate
) pmsgt
->date
= thisdate
;
332 if (pmsgt
->date
> lastdate
) {
333 lastdate
= pmsgt
->date
;
334 if (datepos
>= datemax
) { /* more space */
336 if (!(*pdatetable
= (dateentry
*) alloc_re(*pdatetable
,
337 datetablesize
,datetablesize
+datetableunit
)))
340 (*pdatetable
)[datepos
].msg
= msg
; /* first msg this mo */
341 (*pdatetable
)[datepos
].date
= lastdate
;
344 posa
= byte_chr(authline
.s
,authline
.len
,';');
345 if (authline
.len
> posa
+ HASHLEN
+ 1 && authline
.s
[pos
+1] != ' ') {
346 /* old: "; auth", new: ";hash auth" */
347 auth
= authline
.s
+ posa
+ 1;
348 authlen
= authline
.len
- posa
- 1;
351 authlen
= dummyind
.len
;
354 /* allright! Same procedure, but for author */
355 for (;;) { /* search among already known authors */
356 res
= str_diffn(pautht
->auth
,auth
,HASHLEN
);
359 pautht
= pautht
->higher
;
361 newauth(pauthnext
,auth
,authlen
,msg
,fatal
);
362 pautht
->higher
= pauthnext
;
367 } else if (res
> 0) {
369 pautht
= pautht
->lower
;
371 newauth(pauthnext
,auth
,authlen
,msg
,fatal
);
372 pautht
->lower
= pauthnext
;
380 } /* link from message to this author */
381 pmsgt
->authnum
= (unsigned int) (pautht
- *pauthtable
+ 1);
382 pautht
= *pauthtable
;
385 psubt
= *psubtable
; /* setup psubt. Done here rather than before */
386 /* the for loop, so that we can start off */
387 /* the dummy node. */
391 if (!locked
&& idx
== idxlatest
)
392 unlock(); /* 'locked' refers to locked before calling */
394 psubnext
->sub
= (char *) 0; /* end of table marker */
395 pauthnext
->auth
= (char *) 0; /* end of table marker */
396 (*pdatetable
)[datepos
].msg
= msg_to
+ 1;
397 (*pdatetable
)[datepos
].date
= lastdate
+ 1;
401 void idx_mkthread(pmsgtable
,psubtable
,pauthtable
,msg_from
,msg_to
,msg_master
,
402 msg_latest
,locked
,fatal
)
403 /* Works like idx_mkthreads, except that it finds the subject for message */
404 /* msg_master, then identifies messages in the range that have the same */
405 /* subject. msgtable entries with subject 0 do not match, with '1' do match.*/
407 msgentry
**pmsgtable
; /* pointer to table of message<->subject */
408 subentry
**psubtable
; /* ptr to tbl of subject no, len, str char * */
409 authentry
**pauthtable
;
410 unsigned long msg_from
; /* first message in range */
411 unsigned long msg_to
; /* last message in range */
412 unsigned long msg_latest
; /* latest message in archive (for locking) */
413 unsigned long msg_master
; /* master message for single thread, else 0*/
414 int locked
; /* if already locked */
415 char *fatal
; /* Program-specific */
418 unsigned long idxlatest
; /* need to lock for this (last) index file */
419 unsigned long idxto
; /* index for last msg in range */
420 unsigned long idx
; /* current index file no */
421 unsigned long msg
; /* index entry's msg number */
422 unsigned long ulmrange
; /* total # of messages in range */
423 subentry
*psubt
; /* points to last entry in subtable */
424 int ffound
; /* msg subject was found in subtable */
425 int flagauth
; /* there is author info */
426 int firstfound
= 1; /* = 1 until first message in thread found */
427 int res
; /* comparison result */
429 unsigned int authlen
;
430 authentry
*pauthnext
; /* points to next entry in authtable */
431 authentry
*pautht
; /* points to entry in authtable */
433 int fd
; /* index file handle */
436 register msgentry
*x
,*y
;
438 if ((ulmrange
= msg_to
- msg_from
+1) <= 0)
439 strerr_die2x(100,fatal
,"Program error: bad range in idx_mkthreads");
440 if (!(*pmsgtable
= (msgentry
*) alloc(ulmrange
* sizeof(msgentry
))))
450 if (!(*psubtable
= (subentry
*) alloc(2 * sizeof(subentry
))))
453 if (!(*pauthtable
= (authentry
*) alloc((ulmrange
+ 1) * sizeof(authentry
))))
456 pauthnext
= *pauthtable
;
460 pautht
->authlen
= 21;
461 pautht
->higher
= (authentry
*) 0;
462 pautht
->lower
= (authentry
*) 0;
463 idxlatest
= msg_latest
/ 100;
464 idxto
= msg_to
/ 100;
465 idx
= msg_master
/ 100; /* index for master subject */
467 /* Get master subject */
468 if (!stralloc_copys(&line
,"archive/")) die_nomem(fatal
);
469 if (!stralloc_catb(&line
,strnum
,fmt_ulong(strnum
,idx
))) die_nomem(fatal
);
470 if (!stralloc_cats(&line
,"/index")) die_nomem(fatal
);
471 if (!stralloc_0(&line
)) die_nomem(fatal
);
473 if (!locked
&& idx
== idxlatest
)
475 fd
= open_read(line
.s
);
478 if (errno
!= error_noent
)
479 strerr_die4sys(111,fatal
,ERR_OPEN
,line
.s
,": ");
481 strerr_die2x(111,fatal
,ERR_NOINDEX
); /* temp - admin can fix! */
483 substdio_fdbuf(&ssindex
,read
,fd
,indexbuf
,sizeof(indexbuf
));
485 if (getln(&ssindex
,&line
,&match
,'\n') == -1)
486 strerr_die3sys(111,fatal
,ERR_OPEN
,"index: ");
489 pos
=scan_ulong(line
.s
,&msg
);
490 if (line
.s
[pos
++] == ':') { /* marker for author info */
495 if (msg
== msg_master
) {
496 newsub(psubt
,line
.s
+pos
,line
.len
-pos
,msg
,fatal
);
497 /* need to update msg later! */
501 if (flagauth
) { /* skip author line */
502 if (getln(&ssindex
,&line
,&match
,'\n') == -1)
503 strerr_die3sys(111,fatal
,ERR_OPEN
,"index: ");
510 if (!locked
&& idx
== idxlatest
)
513 strerr_die2x(100,fatal
,ERR_NOINDEX
);
514 for (idx
= msg_from
/ 100; idx
<= idxto
; idx
++) {
515 /* make index file name */
516 if (!stralloc_copys(&line
,"archive/")) die_nomem(fatal
);
517 if (!stralloc_catb(&line
,strnum
,fmt_ulong(strnum
,idx
))) die_nomem(fatal
);
518 if (!stralloc_cats(&line
,"/index")) die_nomem(fatal
);
519 if (!stralloc_0(&line
)) die_nomem(fatal
);
520 if (!locked
&& idx
== idxlatest
)
522 fd
= open_read(line
.s
);
524 if (errno
!= error_noent
)
525 strerr_die4sys(111,fatal
,ERR_OPEN
,line
.s
,": ");
527 substdio_fdbuf(&ssindex
,read
,fd
,indexbuf
,sizeof(indexbuf
));
529 if (getln(&ssindex
,&line
,&match
,'\n') == -1)
530 strerr_die3sys(111,fatal
,ERR_READ
,"index: ");
533 pos
=scan_ulong(line
.s
,&msg
);
534 if (line
.s
[pos
++] == ':') {
537 if (getln(&ssindex
,&authline
,&match
,'\n') == -1)
538 strerr_die3sys(111,fatal
,ERR_READ
,"index: ");
543 if (msg
< msg_from
) /* Nothing before start of range */
545 if (msg
> msg_to
) /* Don't do anything after range */
547 if (!str_diffn(psubt
->sub
,line
.s
+pos
,HASHLEN
)) {
548 pmsgt
= *pmsgtable
+ msg
- msg_from
;
549 if (firstfound
) { /* update to first message with this subj */
550 psubt
->firstmsg
= msg
;
553 psubt
->lastmsg
= msg
;
557 pmsgt
->date
= date2yyyymm(authline
.s
+ 1);
558 pos
= byte_chr(authline
.s
,authline
.len
,';');
559 if (authline
.len
> pos
+ HASHLEN
+ 1 && authline
.s
[pos
+1] != ' ') {
560 /* old: "; auth", new: ";hash auth" */
561 auth
= authline
.s
+ pos
+ 1;
562 authlen
= authline
.len
- pos
- 1;
565 authlen
= dummyind
.len
;
567 for (;;) { /* search among already known authors */
568 res
= str_diffn(pautht
->auth
,auth
,HASHLEN
);
571 pautht
= pautht
->higher
;
573 newauth(pauthnext
,auth
,authlen
,msg
,fatal
);
574 pautht
->higher
= pauthnext
;
579 } else if (res
> 0) {
581 pautht
= pautht
->lower
;
583 newauth(pauthnext
,auth
,authlen
,msg
,fatal
);
584 pautht
->lower
= pauthnext
;
592 } /* link from message to this author */
593 pmsgt
->authnum
= (unsigned int) (pautht
- *pauthtable
+ 1);
594 pautht
= *pauthtable
;
601 if (!locked
&& idx
== idxlatest
)
605 psubt
->sub
= (char *) 0; /* end of table marker */
606 pauthnext
->auth
= (char *) 0; /* end of table marker */
609 void idx_mklist(pmsgtable
,psubtable
,pauthtable
,msg_from
,msg_to
,fatal
)
610 /* Like mkthreads, except that it works without a subject index. The result */
611 /* is just a dummy subject and a sequential list of messages. This to allow */
612 /* use of the same routines when creating digest from lists that have no */
613 /* subject index (for whatever reason). */
614 msgentry
**pmsgtable
; /* pointer to table of message<->subject */
615 subentry
**psubtable
; /* ptr to tbl of subject no, len, str char * */
616 authentry
**pauthtable
;
617 unsigned long msg_from
; /* first message in range */
618 unsigned long msg_to
; /* last message in range */
619 char *fatal
; /* Program-specific */
621 unsigned long ulmrange
;
622 register msgentry
*x
,*y
;
626 if ((ulmrange
= msg_to
- msg_from
+1) <= 0)
627 strerr_die2x(111,fatal
,"bad range in idx_mkthreads :");
629 if (!(*pmsgtable
= (msgentry
*) alloc(ulmrange
* sizeof(msgentry
))))
640 if (!(*psubtable
= (subentry
*) alloc(2 * sizeof(subentry
))))
643 newsub(psubt
,dummyind
.s
,dummyind
.len
,msg_from
,fatal
);
644 psubt
->lastmsg
= msg_to
;
646 psubt
->sub
= (char *) 0;
647 if (!(*pauthtable
= (authentry
*) alloc(sizeof(authentry
))))
648 die_nomem(fatal
); /* nodata. Avoid dangling ptr. */
649 pautht
= *pauthtable
;
650 pautht
->auth
= 0; /* tells app that there are no author data */
651 pautht
->higher
= (authentry
*) 0;
652 pautht
->lower
= (authentry
*) 0;
655 void idx_destroythread(msgtable
,subtable
,authtable
)
656 /* Frees space allocated by idxthread routines. This is needed only if */
657 /* one does several threadings in one program run. Otherwise, exit() */
658 /* should free all allocated memory, which will be faster. */
659 msgentry
*msgtable
; subentry
*subtable
; authentry
*authtable
;
664 psubt
= subtable
; /* free subjects */
666 alloc_free(psubt
->sub
);
670 pautht
= authtable
; /* free authors */
671 while(pautht
->auth
) {
672 alloc_free(pautht
->auth
);
676 alloc_free(subtable
); /* free subtable */
677 alloc_free(authtable
); /* free authtable */
678 alloc_free(msgtable
); /* free msgtable */
679 subtable
= (subentry
*) 0; /* kill pointers */
680 authtable
= (authentry
*) 0;
681 msgtable
= (msgentry
*) 0;