Initial commit of a basically working but severely unpolished
[sgt/agedu] / trie.h
1 /*
2 * trie.h: functions to build and search a trie-like structure
3 * mapping pathnames to file records.
4 */
5
6 #include <sys/types.h>
7
8 /*
9 * An entry in the trie file describing an actual file.
10 */
11 struct trie_file {
12 unsigned long long blocks;
13 unsigned long long atime;
14 };
15
16 /* ----------------------------------------------------------------------
17 * Functions which can be passed a list of pathnames and file data
18 * in strict order, and will build up a trie and write it out to a
19 * file.
20 */
21
22 typedef struct triebuild triebuild;
23
24 /*
25 * Create a new trie-building context given a writable file
26 * descriptor, which should point to the start of an empty file.
27 */
28 triebuild *triebuild_new(int fd);
29
30 /*
31 * Add a trie_file record to the trie. The pathnames should appear
32 * in trie collation order (i.e. strict ASCII sorting order except
33 * that / is moved so that it sorts before any other non-NUL
34 * character), on penalty of assertion failure.
35 */
36 void triebuild_add(triebuild *tb, const char *pathname,
37 const struct trie_file *file);
38
39 /*
40 * Put the finishing touches to the contents of the output file
41 * once all the trie_file records are present. Returns the total
42 * number of elements in the trie.
43 */
44 int triebuild_finish(triebuild *tb);
45
46 /*
47 * Free the context. (Does not close the file, but may leave the
48 * file pointer in an arbitrary place.)
49 */
50 void triebuild_free(triebuild *tb);
51
52 /* ----------------------------------------------------------------------
53 * Functions to query a trie given a pointer to the start of the
54 * memory-mapped file.
55 */
56
57 /*
58 * Return the length of the longest pathname stored in the trie,
59 * including its trailing NUL.
60 */
61 size_t trie_maxpathlen(const void *t);
62
63 /*
64 * Determine the total number of entries in the trie which sort
65 * strictly before the given pathname (irrespective of whether the
66 * pathname actually exists in the trie).
67 */
68 unsigned long trie_before(const void *t, const char *pathname);
69
70 /*
71 * Return the pathname for the nth entry in the trie, written into
72 * the supplied buffer (which must be large enough, i.e. at least
73 * trie_maxpathlen(t) bytes).
74 */
75 void trie_getpath(const void *t, unsigned long n, char *buf);
76
77 /*
78 * Return the total number of entries in the whole trie.
79 */
80 unsigned long trie_count(const void *t);
81
82 /*
83 * Enumerate all the trie_file records in the trie, in sequence,
84 * and return pointers to their trie_file structures. Returns NULL
85 * at end of list, naturally.
86 *
87 * Optionally returns the pathnames as well: if "buf" is non-NULL
88 * then it is expected to point to a buffer large enough to store
89 * all the pathnames in the trie (i.e. at least trie_maxpathlen(t)
90 * bytes). This buffer is also expected to remain unchanged
91 * between calls to triewalk_next(), or else the returned
92 * pathnames will be corrupted.
93 */
94 typedef struct triewalk triewalk;
95 triewalk *triewalk_new(const void *t);
96 const struct trie_file *triewalk_next(triewalk *tw, char *buf);
97 void triewalk_free(triewalk *tw);
98
99 /* ----------------------------------------------------------------------
100 * The trie file also contains an index managed by index.h, so we
101 * must be able to ask about that too.
102 */
103 void trie_set_index_offset(void *t, off_t ptr);
104 off_t trie_get_index_offset(const void *t);