| 1 | /* |
| 2 | * trie.h: functions to build and search a trie-like structure |
| 3 | * mapping pathnames to file records. |
| 4 | */ |
| 5 | |
| 6 | #include <sys/types.h> |
| 7 | |
| 8 | /* |
| 9 | * An entry in the trie file describing an actual file. |
| 10 | */ |
| 11 | struct trie_file { |
| 12 | unsigned long long size; |
| 13 | unsigned long long atime; |
| 14 | }; |
| 15 | |
| 16 | /* ---------------------------------------------------------------------- |
| 17 | * Functions which can be passed a list of pathnames and file data |
| 18 | * in strict order, and will build up a trie and write it out to a |
| 19 | * file. |
| 20 | */ |
| 21 | |
| 22 | typedef struct triebuild triebuild; |
| 23 | |
| 24 | /* |
| 25 | * Create a new trie-building context given a writable file |
| 26 | * descriptor, which should point to the start of an empty file. |
| 27 | */ |
| 28 | triebuild *triebuild_new(int fd); |
| 29 | |
| 30 | /* |
| 31 | * Add a trie_file record to the trie. The pathnames should appear |
| 32 | * in trie collation order (i.e. strict ASCII sorting order except |
| 33 | * that / is moved so that it sorts before any other non-NUL |
| 34 | * character), on penalty of assertion failure. |
| 35 | */ |
| 36 | void triebuild_add(triebuild *tb, const char *pathname, |
| 37 | const struct trie_file *file); |
| 38 | |
| 39 | /* |
| 40 | * Put the finishing touches to the contents of the output file |
| 41 | * once all the trie_file records are present. Returns the total |
| 42 | * number of elements in the trie. |
| 43 | */ |
| 44 | int triebuild_finish(triebuild *tb); |
| 45 | |
| 46 | /* |
| 47 | * Free the context. (Does not close the file, but may leave the |
| 48 | * file pointer in an arbitrary place.) |
| 49 | */ |
| 50 | void triebuild_free(triebuild *tb); |
| 51 | |
| 52 | /* ---------------------------------------------------------------------- |
| 53 | * Anomalous function which modifies a trie after it's memory-mapped. |
| 54 | */ |
| 55 | |
| 56 | /* |
| 57 | * Invent new fake atimes for each directory in the trie, by |
| 58 | * taking the maximum (latest) of the directory's previously |
| 59 | * stored atime and the atimes of everything below it. |
| 60 | */ |
| 61 | void trie_fake_dir_atimes(void *t); |
| 62 | |
| 63 | /* ---------------------------------------------------------------------- |
| 64 | * Functions to query a trie given a pointer to the start of the |
| 65 | * memory-mapped file. |
| 66 | */ |
| 67 | |
| 68 | /* |
| 69 | * Return the path separator character in use in the trie. |
| 70 | */ |
| 71 | char trie_pathsep(const void *t); |
| 72 | |
| 73 | /* |
| 74 | * Return the length of the longest pathname stored in the trie, |
| 75 | * including its trailing NUL. |
| 76 | */ |
| 77 | size_t trie_maxpathlen(const void *t); |
| 78 | |
| 79 | /* |
| 80 | * Determine the total number of entries in the trie which sort |
| 81 | * strictly before the given pathname (irrespective of whether the |
| 82 | * pathname actually exists in the trie). |
| 83 | */ |
| 84 | unsigned long trie_before(const void *t, const char *pathname); |
| 85 | |
| 86 | /* |
| 87 | * Return the pathname for the nth entry in the trie, written into |
| 88 | * the supplied buffer (which must be large enough, i.e. at least |
| 89 | * trie_maxpathlen(t) bytes). |
| 90 | */ |
| 91 | void trie_getpath(const void *t, unsigned long n, char *buf); |
| 92 | |
| 93 | /* |
| 94 | * Return the trie_file * for the nth entry in the trie. |
| 95 | */ |
| 96 | const struct trie_file *trie_getfile(const void *t, unsigned long n); |
| 97 | |
| 98 | /* |
| 99 | * Return the total number of entries in the whole trie. |
| 100 | */ |
| 101 | unsigned long trie_count(const void *t); |
| 102 | |
| 103 | /* |
| 104 | * Enumerate all the trie_file records in the trie, in sequence, |
| 105 | * and return pointers to their trie_file structures. Returns NULL |
| 106 | * at end of list, naturally. |
| 107 | * |
| 108 | * Optionally returns the pathnames as well: if "buf" is non-NULL |
| 109 | * then it is expected to point to a buffer large enough to store |
| 110 | * all the pathnames in the trie (i.e. at least trie_maxpathlen(t) |
| 111 | * bytes). This buffer is also expected to remain unchanged |
| 112 | * between calls to triewalk_next(), or else the returned |
| 113 | * pathnames will be corrupted. |
| 114 | */ |
| 115 | typedef struct triewalk triewalk; |
| 116 | triewalk *triewalk_new(const void *t); |
| 117 | const struct trie_file *triewalk_next(triewalk *tw, char *buf); |
| 118 | void triewalk_rebase(triewalk *tw, const void *t); |
| 119 | void triewalk_free(triewalk *tw); |
| 120 | |
| 121 | /* ---------------------------------------------------------------------- |
| 122 | * The trie file also contains an index managed by index.h, so we |
| 123 | * must be able to ask about that too. |
| 124 | */ |
| 125 | void trie_set_index_offset(void *t, off_t ptr); |
| 126 | off_t trie_get_index_offset(const void *t); |
| 127 | |
| 128 | /* ---------------------------------------------------------------------- |
| 129 | * Utility functions not directly involved with the trie. |
| 130 | */ |
| 131 | |
| 132 | /* |
| 133 | * Given a pathname in a buffer, adjust the pathname in place so |
| 134 | * that it points at a string which, when passed to trie_before, |
| 135 | * will reliably return the index of the thing that comes after |
| 136 | * that pathname and all its descendants. Usually this is done by |
| 137 | * suffixing ^A (since foo^A is guaranteeably the first thing that |
| 138 | * sorts after foo and foo/stuff); however, if the pathname |
| 139 | * actually ends in a path separator (e.g. if it's just "/"), that |
| 140 | * must be stripped off first. |
| 141 | */ |
| 142 | void make_successor(char *pathbuf); |