Nearly forgot: leave out individual files in the text display.
[sgt/agedu] / du.c
1 /*
2 * du.c: implementation of du.h.
3 */
4
5 #define _GNU_SOURCE
6 #include <features.h>
7
8 #include <stdio.h>
9 #include <string.h>
10 #include <stdlib.h>
11 #include <errno.h>
12
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <unistd.h>
16
17 #include "du.h"
18 #include "malloc.h"
19
20 #ifdef __linux__
21
22 /*
23 * On Linux, we have the O_NOATIME flag. This means we can read
24 * the contents of directories without affecting their atimes,
25 * which enables us to at least try to include them in the age
26 * display rather than exempting them.
27 *
28 * Unfortunately, opendir() doesn't let us open a directory with
29 * O_NOATIME. In later glibcs we can open one manually using
30 * open() and then use fdopendir() to translate the fd into a
31 * POSIX dir handle; in earlier glibcs fdopendir() is not
32 * available, so we have no option but to talk directly to the
33 * kernel system call interface which underlies the POSIX
34 * opendir/readdir machinery.
35 */
36
37 #define __KERNEL__
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <linux/types.h>
41 #include <linux/dirent.h>
42 #include <linux/unistd.h>
43
44 _syscall3(int, getdents, uint, fd, struct dirent *, dirp, uint, count)
45
46 typedef struct {
47 int fd;
48 struct dirent data[32];
49 struct dirent *curr;
50 int pos, endpos;
51 } dirhandle;
52
53 int open_dir(const char *path, dirhandle *dh)
54 {
55 dh->fd = open(path, O_RDONLY | O_NOATIME | O_DIRECTORY);
56 if (dh->fd < 0) {
57 /*
58 * Opening a file with O_NOATIME is not unconditionally
59 * permitted by the Linux kernel. As far as I can tell,
60 * it's permitted only for files on which the user would
61 * have been able to call utime(2): in other words, files
62 * for which the user could have deliberately set the
63 * atime back to its original value after finishing with
64 * it. Hence, O_NOATIME has no security implications; it's
65 * simply a cleaner, faster and more race-condition-free
66 * alternative to stat(), a normal open(), and a utimes()
67 * when finished.
68 *
69 * The upshot of all of which, for these purposes, is that
70 * we must be prepared to try again without O_NOATIME if
71 * we receive EPERM.
72 */
73 if (errno == EPERM)
74 dh->fd = open(path, O_RDONLY | O_DIRECTORY);
75 if (dh->fd < 0)
76 return -1;
77 }
78
79 dh->pos = dh->endpos = 0;
80
81 return 0;
82 }
83
84 const char *read_dir(dirhandle *dh)
85 {
86 const char *ret;
87
88 if (dh->pos >= dh->endpos) {
89 dh->curr = dh->data;
90 dh->pos = 0;
91 dh->endpos = getdents(dh->fd, dh->data, sizeof(dh->data));
92 if (dh->endpos <= 0)
93 return NULL;
94 }
95
96 ret = dh->curr->d_name;
97
98 dh->pos += dh->curr->d_reclen;
99 dh->curr = (struct dirent *)((char *)dh->data + dh->pos);
100
101 return ret;
102 }
103
104 void close_dir(dirhandle *dh)
105 {
106 close(dh->fd);
107 }
108
109 #else /* __linux__ */
110
111 /*
112 * This branch of the ifdef is a simple exercise of ordinary POSIX
113 * opendir/readdir.
114 */
115
116 #include <dirent.h>
117 typedef DIR *dirhandle;
118
119 int open_dir(const char *path, dirhandle *dh)
120 {
121 *dh = opendir(path);
122 if (!*dh)
123 return -1;
124 return 0;
125 }
126
127 const char *read_dir(dirhandle *dh)
128 {
129 struct dirent *de = readdir(*dh);
130 return de ? de->d_name : NULL;
131 }
132
133 void close_dir(dirhandle *dh)
134 {
135 closedir(*dh);
136 }
137
138 #endif
139
140 static int str_cmp(const void *av, const void *bv)
141 {
142 return strcmp(*(const char **)av, *(const char **)bv);
143 }
144
145 static void du_recurse(char **path, size_t pathlen, size_t *pathsize,
146 gotdata_fn_t gotdata, void *gotdata_ctx)
147 {
148 const char *name;
149 dirhandle d;
150 struct stat64 st;
151 char **names;
152 size_t i, nnames, namesize;
153
154 if (lstat64(*path, &st) < 0) {
155 fprintf(stderr, "%s: lstat: %s\n", *path, strerror(errno));
156 return;
157 }
158
159 if (!gotdata(gotdata_ctx, *path, &st))
160 return;
161
162 if (!S_ISDIR(st.st_mode))
163 return;
164
165 names = NULL;
166 nnames = namesize = 0;
167
168 if (open_dir(*path, &d) < 0) {
169 fprintf(stderr, "%s: opendir: %s\n", *path, strerror(errno));
170 return;
171 }
172 while ((name = read_dir(&d)) != NULL) {
173 if (name[0] == '.' && (!name[1] || (name[1] == '.' && !name[2]))) {
174 /* do nothing - we skip "." and ".." */
175 } else {
176 if (nnames >= namesize) {
177 namesize = nnames * 3 / 2 + 64;
178 names = sresize(names, namesize, char *);
179 }
180 names[nnames++] = dupstr(name);
181 }
182 }
183 close_dir(&d);
184
185 if (nnames == 0)
186 return;
187
188 qsort(names, nnames, sizeof(*names), str_cmp);
189
190 for (i = 0; i < nnames; i++) {
191 size_t newpathlen = pathlen + 1 + strlen(names[i]);
192 if (*pathsize <= newpathlen) {
193 *pathsize = newpathlen * 3 / 2 + 256;
194 *path = sresize(*path, *pathsize, char);
195 }
196 /*
197 * Avoid duplicating a slash if we got a trailing one to
198 * begin with (i.e. if we're starting the scan in '/' itself).
199 */
200 if (pathlen > 0 && (*path)[pathlen-1] == '/') {
201 strcpy(*path + pathlen, names[i]);
202 newpathlen--;
203 } else {
204 sprintf(*path + pathlen, "/%s", names[i]);
205 }
206
207 du_recurse(path, newpathlen, pathsize, gotdata, gotdata_ctx);
208
209 sfree(names[i]);
210 }
211 sfree(names);
212 }
213
214 void du(const char *inpath, gotdata_fn_t gotdata, void *gotdata_ctx)
215 {
216 char *path;
217 size_t pathlen, pathsize;
218
219 pathlen = strlen(inpath);
220 pathsize = pathlen + 256;
221 path = snewn(pathsize, char);
222 strcpy(path, inpath);
223
224 du_recurse(&path, pathlen, &pathsize, gotdata, gotdata_ctx);
225 }