Change the magic number used to introduce a trie file, so that instead
[sgt/agedu] / du.c
CommitLineData
70322ae3 1/*
2 * du.c: implementation of du.h.
3 */
4
50e82fdc 5#include "agedu.h"
70322ae3 6#include "du.h"
995db599 7#include "alloc.h"
70322ae3 8
b5af45d3 9#if !defined __linux__ || !defined O_NOATIME || defined HAVE_FDOPENDIR
704fafa3 10
50e82fdc 11#ifdef HAVE_DIRENT_H
12# include <dirent.h>
13#endif
14#ifdef HAVE_NDIR_H
15# include <ndir.h>
16#endif
17#ifdef HAVE_SYS_DIR_H
18# include <sys/dir.h>
19#endif
20#ifdef HAVE_SYS_NDIR_H
21# include <sys/ndir.h>
22#endif
23
704fafa3 24/*
9c6e61f2 25 * Wrappers around POSIX opendir, readdir and closedir, which
26 * permit me to replace them with different wrappers in special
27 * circumstances.
704fafa3 28 */
29
9c6e61f2 30typedef DIR *dirhandle;
704fafa3 31
32int open_dir(const char *path, dirhandle *dh)
33{
9c6e61f2 34#if defined O_NOATIME && defined HAVE_FDOPENDIR
35
36 /*
37 * On Linux, we have the O_NOATIME flag. This means we can
38 * read the contents of directories without affecting their
39 * atimes, which enables us to at least try to include them in
40 * the age display rather than exempting them.
41 *
42 * Unfortunately, opendir() doesn't let us open a directory
43 * with O_NOATIME. So instead, we have to open the directory
44 * with vanilla open(), and then use fdopendir() to translate
45 * the fd into a POSIX dir handle.
46 */
47 int fd;
48
49 fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY | O_LARGEFILE |
50 O_NOATIME | O_DIRECTORY);
51 if (fd < 0) {
704fafa3 52 /*
53 * Opening a file with O_NOATIME is not unconditionally
54 * permitted by the Linux kernel. As far as I can tell,
55 * it's permitted only for files on which the user would
56 * have been able to call utime(2): in other words, files
57 * for which the user could have deliberately set the
58 * atime back to its original value after finishing with
59 * it. Hence, O_NOATIME has no security implications; it's
60 * simply a cleaner, faster and more race-condition-free
61 * alternative to stat(), a normal open(), and a utimes()
62 * when finished.
63 *
64 * The upshot of all of which, for these purposes, is that
65 * we must be prepared to try again without O_NOATIME if
66 * we receive EPERM.
67 */
68 if (errno == EPERM)
9c6e61f2 69 fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY |
70 O_LARGEFILE | O_DIRECTORY);
71 if (fd < 0)
72 return -1;
73 }
74
75 *dh = fdopendir(fd);
76#else
77 *dh = opendir(path);
78#endif
79
80 if (!*dh)
81 return -1;
82 return 0;
83}
84
85const char *read_dir(dirhandle *dh)
86{
87 struct dirent *de = readdir(*dh);
88 return de ? de->d_name : NULL;
89}
90
91void close_dir(dirhandle *dh)
92{
93 closedir(*dh);
94}
95
96#else /* defined __linux__ && !defined HAVE_FDOPENDIR */
97
98/*
99 * Earlier versions of glibc do not have fdopendir(). Therefore,
100 * if we are on Linux and still wish to make use of O_NOATIME, we
101 * have no option but to talk directly to the kernel system call
102 * interface which underlies the POSIX opendir/readdir machinery.
103 */
104
105#define __KERNEL__
9c6e61f2 106#include <linux/types.h>
107#include <linux/dirent.h>
108#include <linux/unistd.h>
109
110_syscall3(int, getdents, uint, fd, struct dirent *, dirp, uint, count)
111
112typedef struct {
113 int fd;
114 struct dirent data[32];
115 struct dirent *curr;
116 int pos, endpos;
117} dirhandle;
118
119int open_dir(const char *path, dirhandle *dh)
120{
121 /*
122 * As above, we try with O_NOATIME and then fall back to
123 * trying without it.
124 */
125 dh->fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY | O_LARGEFILE |
126 O_NOATIME | O_DIRECTORY);
127 if (dh->fd < 0) {
128 if (errno == EPERM)
129 dh->fd = open(path, O_RDONLY | O_NONBLOCK | O_NOCTTY |
130 O_LARGEFILE | O_DIRECTORY);
704fafa3 131 if (dh->fd < 0)
132 return -1;
133 }
134
135 dh->pos = dh->endpos = 0;
136
137 return 0;
138}
139
140const char *read_dir(dirhandle *dh)
141{
142 const char *ret;
143
144 if (dh->pos >= dh->endpos) {
145 dh->curr = dh->data;
146 dh->pos = 0;
147 dh->endpos = getdents(dh->fd, dh->data, sizeof(dh->data));
148 if (dh->endpos <= 0)
149 return NULL;
150 }
151
152 ret = dh->curr->d_name;
153
154 dh->pos += dh->curr->d_reclen;
155 dh->curr = (struct dirent *)((char *)dh->data + dh->pos);
156
157 return ret;
158}
159
160void close_dir(dirhandle *dh)
161{
162 close(dh->fd);
163}
164
9c6e61f2 165#endif /* !defined __linux__ || defined HAVE_FDOPENDIR */
70322ae3 166
167static int str_cmp(const void *av, const void *bv)
168{
169 return strcmp(*(const char **)av, *(const char **)bv);
170}
171
172static void du_recurse(char **path, size_t pathlen, size_t *pathsize,
b02bd008 173 gotdata_fn_t gotdata, err_fn_t err, void *gotdata_ctx,
174 int toplevel)
70322ae3 175{
704fafa3 176 const char *name;
70322ae3 177 dirhandle d;
9c6e61f2 178 STRUCT_STAT st;
70322ae3 179 char **names;
180 size_t i, nnames, namesize;
b02bd008 181 int statret;
70322ae3 182
b02bd008 183 /*
184 * Special case: at the very top of the scan, we follow a
185 * symlink.
186 */
187 if (toplevel)
f4592adf 188 statret = STAT_FUNC(*path, &st);
b02bd008 189 else
f4592adf 190 statret = LSTAT_FUNC(*path, &st);
b02bd008 191 if (statret < 0) {
09fd7619 192 err(gotdata_ctx, "%s: lstat: %s\n", *path, strerror(errno));
70322ae3 193 return;
194 }
195
196 if (!gotdata(gotdata_ctx, *path, &st))
197 return;
198
199 if (!S_ISDIR(st.st_mode))
200 return;
201
202 names = NULL;
203 nnames = namesize = 0;
204
704fafa3 205 if (open_dir(*path, &d) < 0) {
09fd7619 206 err(gotdata_ctx, "%s: opendir: %s\n", *path, strerror(errno));
70322ae3 207 return;
208 }
704fafa3 209 while ((name = read_dir(&d)) != NULL) {
210 if (name[0] == '.' && (!name[1] || (name[1] == '.' && !name[2]))) {
211 /* do nothing - we skip "." and ".." */
212 } else {
213 if (nnames >= namesize) {
214 namesize = nnames * 3 / 2 + 64;
215 names = sresize(names, namesize, char *);
70322ae3 216 }
704fafa3 217 names[nnames++] = dupstr(name);
218 }
70322ae3 219 }
704fafa3 220 close_dir(&d);
70322ae3 221
222 if (nnames == 0)
223 return;
224
225 qsort(names, nnames, sizeof(*names), str_cmp);
226
227 for (i = 0; i < nnames; i++) {
228 size_t newpathlen = pathlen + 1 + strlen(names[i]);
229 if (*pathsize <= newpathlen) {
230 *pathsize = newpathlen * 3 / 2 + 256;
231 *path = sresize(*path, *pathsize, char);
232 }
256c29a2 233 /*
234 * Avoid duplicating a slash if we got a trailing one to
235 * begin with (i.e. if we're starting the scan in '/' itself).
236 */
237 if (pathlen > 0 && (*path)[pathlen-1] == '/') {
238 strcpy(*path + pathlen, names[i]);
239 newpathlen--;
240 } else {
241 sprintf(*path + pathlen, "/%s", names[i]);
242 }
70322ae3 243
b02bd008 244 du_recurse(path, newpathlen, pathsize, gotdata, err, gotdata_ctx, 0);
70322ae3 245
246 sfree(names[i]);
247 }
248 sfree(names);
249}
250
09fd7619 251void du(const char *inpath, gotdata_fn_t gotdata, err_fn_t err,
252 void *gotdata_ctx)
70322ae3 253{
254 char *path;
255 size_t pathlen, pathsize;
256
257 pathlen = strlen(inpath);
66f0cb51 258
259 /*
260 * Trim any trailing slashes from the input path, otherwise we'll
261 * store them in the index with confusing effects.
262 */
263 while (pathlen > 1 && inpath[pathlen-1] == '/')
264 pathlen--;
265
70322ae3 266 pathsize = pathlen + 256;
267 path = snewn(pathsize, char);
66f0cb51 268 memcpy(path, inpath, pathlen);
269 path[pathlen] = '\0';
70322ae3 270
b02bd008 271 du_recurse(&path, pathlen, &pathsize, gotdata, err, gotdata_ctx, 1);
70322ae3 272}