Fix signedness compiler warnings.
[sgt/utils] / pid / pid.c
CommitLineData
1f7c0ae1 1/*
2 * pid - find the pid of a process given its command name
3 *
4 * Same basic idea as Debian's "pidof", in that you type 'pid command'
5 * and it finds a process running that command and gives you the pid;
6 * but souped up with various pragmatic features such as recognising
7 * well known interpreters (so you can search for, say, 'pid
8 * script.sh' as well as 'pid bash' and have it do what you meant).
9 *
10 * Currently tested only on Linux using /proc directly, but I've tried
11 * to set it up so that the logic of what processes to choose is
12 * separated from the mechanism used to iterate over processes and
13 * find their command lines.
14 */
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <assert.h>
20#include <ctype.h>
21
22#include <sys/types.h>
23#include <dirent.h>
24#include <unistd.h>
25
26#define lenof(x) (sizeof((x))/sizeof(*(x)))
27
28#define PIDMAX 32768
29
30/* ----------------------------------------------------------------------
31 * General-purpose code for storing a set of process ids, testing
32 * membership, and iterating over them. Since pids have a very limited
33 * range, we just do this as a giant bitmask.
34 */
35
36#define WORDBITS 32
37
38struct pidset {
39 unsigned long procbits[PIDMAX/WORDBITS];
40 int next;
41};
42
43static void pidset_init(struct pidset *p)
44{
45 int i;
0a1d8c87 46 for (i = 0; i < (int)lenof(p->procbits); i++)
1f7c0ae1 47 p->procbits[i] = 0L;
48}
49
50static void pidset_add(struct pidset *p, int pid)
51{
52 assert(pid >= 0 && pid < PIDMAX);
53 p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS);
54}
55
56static int pidset_in(const struct pidset *p, int pid)
57{
58 assert(pid >= 0 && pid < PIDMAX);
59 return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0;
60}
61
62static int pidset_size(const struct pidset *p)
63{
64 int word, count;
65
66 count = 0;
0a1d8c87 67 for (word = 0; word < (int)lenof(p->procbits); word++) {
1f7c0ae1 68 unsigned long mask = p->procbits[word];
69 while (mask > 0) {
70 count += (mask & 1);
71 mask >>= 1;
72 }
73 }
74
75 return count;
76}
77
78static int pidset_step(struct pidset *p)
79{
80 int word = p->next / WORDBITS;
81 int bit = p->next % WORDBITS;
0a1d8c87 82 while (word < (int)lenof(p->procbits) && p->procbits[word] >> bit == 0) {
1f7c0ae1 83 word++;
84 bit = 0;
85 p->next = WORDBITS * word + bit;
86 }
87
0a1d8c87 88 if (word >= (int)lenof(p->procbits))
1f7c0ae1 89 return -1;
90
91 while (!((p->procbits[word] >> bit) & 1)) {
92 bit++;
93 p->next = WORDBITS * word + bit;
94 }
95
96 assert(bit < WORDBITS);
97 return p->next++;
98}
99
100static int pidset_first(struct pidset *p)
101{
102 p->next = 0;
103 return pidset_step(p);
104}
105
106static int pidset_next(struct pidset *p)
107{
108 return pidset_step(p);
109}
110
111/* ----------------------------------------------------------------------
112 * Code to scan the list of processes and retrieve all the information
113 * we'll want about each one. This may in future be conditional on the
114 * OS's local mechanism for finding that information (i.e. if we want
115 * to run on kernels that don't provide Linux-style /proc).
116 */
117
118struct procdata {
119 int pid, ppid, uid;
120 int argc;
121 const char *const *argv;
122 const char *exe;
123};
124static struct procdata *procs[PIDMAX];
125
126static char *get_contents(const char *filename, int *returned_len)
127{
128 int len;
129 char *buf = NULL;
130 int bufsize = 0;
131
132 FILE *fp = fopen(filename, "rb");
133 if (!fp)
134 return NULL;
135
136 len = 0;
137 while (1) {
138 int readret;
139
140 if (len >= bufsize) {
141 bufsize = len * 5 / 4 + 4096;
142 buf = realloc(buf, bufsize);
143 if (!buf) {
144 fprintf(stderr, "pid: out of memory\n");
145 exit(1);
146 }
147 }
148
149 readret = fread(buf + len, 1, bufsize - len, fp);
150 if (readret < 0) {
151 fclose(fp);
152 free(buf);
153 return NULL; /* I/O error */
154 } else if (readret == 0) {
155 fclose(fp);
156 if (returned_len)
157 *returned_len = len;
158 buf = realloc(buf, len + 1);
159 buf[len] = '\0';
160 return buf;
161 } else {
162 len += readret;
163 }
164 }
165}
166
167static char *get_link_dest(const char *filename)
168{
169 char *buf;
170 int bufsize;
171 ssize_t ret;
172
173 buf = NULL;
174 bufsize = 0;
175
176 while (1) {
177 bufsize = bufsize * 5 / 4 + 1024;
178 buf = realloc(buf, bufsize);
179 if (!buf) {
180 fprintf(stderr, "pid: out of memory\n");
181 exit(1);
182 }
183
184 ret = readlink(filename, buf, (size_t)bufsize);
185 if (ret < 0) {
186 free(buf);
187 return NULL; /* I/O error */
188 } else if (ret < bufsize) {
189 /*
190 * Success! We've read the full link text.
191 */
192 buf = realloc(buf, ret+1);
193 buf[ret] = '\0';
194 return buf;
195 } else {
196 /* Overflow. Go round again. */
197 }
198 }
199}
200
201static struct pidset get_processes(void)
202{
203 struct dirent *de;
204 struct pidset ret;
205 DIR *d;
206
207 pidset_init(&ret);
208
209 d = opendir("/proc");
210 if (!d) {
211 perror("/proc: open\n");
212 exit(1);
213 }
214 while ((de = readdir(d)) != NULL) {
215 int pid;
216 char *cmdline, *status, *exe;
217 int cmdlinelen;
218 const char **argv;
219 char filename[256];
220 struct procdata *proc;
221
222 const char *name = de->d_name;
223 if (name[strspn(name, "0123456789")])
224 continue;
225
226 /*
227 * The filename is numeric, i.e. we've found a pid. Try to
228 * retrieve all the information we want about it.
229 *
230 * We expect this will fail every so often for random reasons,
231 * e.g. if the pid has disappeared between us fetching a list
232 * of them and trying to read their command lines. In that
233 * situation, we won't bother reporting errors: we'll just
234 * drop this pid and silently move on to the next one.
235 */
236 pid = atoi(name);
237 assert(pid >= 0 && pid < PIDMAX);
238
239 sprintf(filename, "/proc/%d/cmdline", pid);
240 if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL)
241 continue;
242
243 sprintf(filename, "/proc/%d/status", pid);
244 if ((status = get_contents(filename, NULL)) == NULL) {
245 free(cmdline);
246 continue;
247 }
248
249 sprintf(filename, "/proc/%d/exe", pid);
b5fccf05 250 exe = get_link_dest(filename);
251 /* This may fail, if the process isn't ours, but we continue
252 * anyway. */
1f7c0ae1 253
254 /*
255 * Now we've got all our raw data out of /proc. Process it
256 * into the internal representation we're going to use in the
257 * process-selection logic.
258 */
259 proc = (struct procdata *)malloc(sizeof(struct procdata));
260 if (!proc) {
261 fprintf(stderr, "pid: out of memory\n");
262 exit(1);
263 }
264 proc->pid = pid;
265 proc->exe = exe;
266
267 /*
268 * cmdline contains a list of NUL-terminated strings. Scan
269 * them to get the argv pointers.
270 */
271 {
272 const char *p;
273 int nargs;
274
275 /* Count the arguments. */
276 nargs = 0;
277 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
278 nargs++;
279
280 /* Allocate space for the pointers. */
281 argv = (const char **)malloc((nargs+1) * sizeof(char *));
282 proc->argv = argv;
283 if (!argv) {
284 fprintf(stderr, "pid: out of memory\n");
285 exit(1);
286 }
287
288 /* Store the pointers. */
289 proc->argc = 0;
290 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
291 argv[proc->argc++] = p;
292
293 /* Trailing NULL to match standard argv lists, just in case. */
294 assert(proc->argc == nargs);
295 argv[proc->argc] = NULL;
296 }
297
298 /*
299 * Scan status for the uid and the parent pid. This file
300 * contains a list of \n-terminated lines of text.
301 */
302 {
303 const char *p;
304 int got_ppid = 0, got_uid = 0;
305
306 p = status;
307 while (p && *p) {
308 if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1)
309 got_ppid = 1;
310 if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1)
311 got_uid = 1;
312
313 /*
314 * Advance to next line.
315 */
316 p = strchr(p, '\n');
317 if (p) p++;
318 }
319
320 if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */
321 free(cmdline);
322 free(exe);
323 free(status);
324 free(argv);
325 free(proc);
326 continue;
327 }
328 }
329
330 /*
331 * If we get here, we've got everything we need. Add the
332 * process to the list of things we can usefully work
333 * with.
334 */
335 procs[pid] = proc;
336 pidset_add(&ret, pid);
337 }
338 closedir(d);
339
340 return ret;
341}
342
343static const struct procdata *get_proc(int pid)
344{
345 assert(pid >= 0 && pid < PIDMAX);
346 assert(procs[pid]);
347 return procs[pid];
348}
349
350/* ----------------------------------------------------------------------
351 * Logic to pick out the set of processes we care about.
352 */
353
354static int is_an_interpreter(const char *basename)
355{
356 if (!strcmp(basename, "perl") ||
357 !strcmp(basename, "python") ||
358 !strcmp(basename, "ruby") ||
359 !strcmp(basename, "rep") ||
360 !strcmp(basename, "bash") ||
361 !strcmp(basename, "sh") ||
362 !strcmp(basename, "dash") ||
363 !strcmp(basename, "lua") ||
364 !strcmp(basename, "java"))
365 return 1;
366 else
367 return 0;
368}
369
370static const char *find_basename(const char *path)
371{
372 const char *ret = path;
373 const char *p;
374
375 while (1) {
376 p = ret + strcspn(ret, "/");
377 if (*p) {
378 ret = p+1;
379 } else {
380 return ret;
381 }
382 }
383}
384
385static int find_command(int pid_argc, const char *const *pid_argv,
386 const char *cmd)
387{
388 const char *base;
389
390 base = pid_argv[0];
391 if (*base == '-')
392 base++; /* skip indicator of login shells */
393 base = find_basename(base);
394
395 if (!strcmp(base, cmd)) {
396 /*
397 * argv[0] matches the supplied command name.
398 */
399 return 0;
400 } else if (is_an_interpreter(base)) {
401 /*
402 * argv[0] is an interpreter program of some kind. Look
403 * along its command line for the program it's running,
404 * and see if _that_ matches the command name.
405 */
406 int i = 1;
407 while (i < pid_argc && pid_argv[i][0] == '-')
408 i++; /* skip interpreter options */
409 if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd))
410 return i;
411 }
412 return -1; /* no match */
413}
414
b5fccf05 415static int strnullcmp(const char *a, const char *b)
416{
417 /*
418 * Like strcmp, but cope with NULL inputs by making them compare
419 * identical to each other and before any non-null string.
420 */
421 if (!a || !b)
422 return (b != 0) - (a != 0);
423 else
424 return strcmp(a, b);
425}
426
1f7c0ae1 427static int argcmp(const char *const *a, const char *const *b)
428{
429 while (*a && *b) {
430 int ret = strcmp(*a, *b);
431 if (ret)
432 return ret;
433 a++;
434 b++;
435 }
436
437 return (*b != NULL) - (*a != NULL);
438}
439
440static struct pidset filter_out_self(struct pidset in)
441{
442 /*
443 * Discard our own pid from a set. (Generally we won't want to
444 * return ourself from any search.)
445 */
446 struct pidset ret;
447 int pid;
448 int our_pid = getpid();
449
450 pidset_init(&ret);
451 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
452 if (pid != our_pid)
453 pidset_add(&ret, pid);
454 }
455 return ret;
456}
457
d1bc4fef 458static struct pidset filter_by_uid(struct pidset in, int uid)
459{
460 /*
461 * Return only those processes with a given uid.
462 */
463 struct pidset ret;
464 int pid;
465
466 pidset_init(&ret);
467 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
468 const struct procdata *proc = get_proc(pid);
469 if (proc->uid == uid)
470 pidset_add(&ret, pid);
471 }
472 return ret;
473}
474
1f7c0ae1 475static struct pidset filter_by_command(struct pidset in, const char **words)
476{
477 /*
478 * Look for processes matching the user-supplied command name and
479 * subsequent arguments.
480 */
481 struct pidset ret;
482 int pid;
483
484 pidset_init(&ret);
485 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
486 const struct procdata *proc = get_proc(pid);
487 int i, j;
488
489 if (!proc->argv || proc->argc < 1)
490 goto no_match;
491
492 /* Find the command, whether it's a binary or a script. */
493 i = find_command(proc->argc, proc->argv, words[0]);
494 if (i < 0)
495 goto no_match;
496
497 /* Now check that subsequent arguments match. */
498 for (j = 1; words[j]; j++)
499 if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j]))
500 goto no_match;
501
502 /* If we get here, we have a match! */
503 pidset_add(&ret, pid);
504
505 no_match:;
506 }
507 return ret;
508}
509
510static struct pidset filter_out_forks(struct pidset in)
511{
512 /*
513 * Discard any process whose parent is also in our remaining match
514 * set and looks sufficiently like it for us to decide this one's
515 * an uninteresting fork (e.g. of a shell script executing a
516 * complex pipeline).
517 */
518 struct pidset ret;
519 int pid;
520
521 pidset_init(&ret);
522 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
523 const struct procdata *proc = get_proc(pid);
524
525 if (pidset_in(&in, proc->ppid)) {
526 /* The parent is in our set too. Is it similar? */
527 const struct procdata *parent = get_proc(proc->ppid);
b5fccf05 528 if (!strnullcmp(parent->exe, proc->exe) &&
1f7c0ae1 529 !argcmp(parent->argv, proc->argv)) {
530 /* Yes; don't list it. */
531 continue;
532 }
533 }
534
535 pidset_add(&ret, pid);
536 }
537 return ret;
538}
539
540/* ----------------------------------------------------------------------
541 * Main program.
542 */
543
544const char usagemsg[] =
545 "usage: pid [options] <search-cmd> [<search-arg>...]\n"
546 "where: -a report all matching pids, not just one\n"
d1bc4fef 547 " -U report pids of any user, not just ours\n"
1f7c0ae1 548 " also: pid --version report version number\n"
549 " pid --help display this help text\n"
550 " pid --licence display the (MIT) licence text\n"
551 ;
552
553void usage(void) {
554 fputs(usagemsg, stdout);
555}
556
557const char licencemsg[] =
558 "pid is copyright 2012 Simon Tatham.\n"
559 "\n"
560 "Permission is hereby granted, free of charge, to any person\n"
561 "obtaining a copy of this software and associated documentation files\n"
562 "(the \"Software\"), to deal in the Software without restriction,\n"
563 "including without limitation the rights to use, copy, modify, merge,\n"
564 "publish, distribute, sublicense, and/or sell copies of the Software,\n"
565 "and to permit persons to whom the Software is furnished to do so,\n"
566 "subject to the following conditions:\n"
567 "\n"
568 "The above copyright notice and this permission notice shall be\n"
569 "included in all copies or substantial portions of the Software.\n"
570 "\n"
571 "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n"
572 "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n"
573 "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n"
574 "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n"
575 "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n"
576 "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n"
577 "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
578 "SOFTWARE.\n"
579 ;
580
581void licence(void) {
582 fputs(licencemsg, stdout);
583}
584
585void version(void) {
586#define SVN_REV "$Revision$"
587 char rev[sizeof(SVN_REV)];
588 char *p, *q;
589
590 strcpy(rev, SVN_REV);
591
592 for (p = rev; *p && *p != ':'; p++);
593 if (*p) {
594 p++;
595 while (*p && isspace((unsigned char)*p)) p++;
596 for (q = p; *q && *q != '$'; q++);
597 if (*q) *q = '\0';
598 printf("pid revision %s\n", p);
599 } else {
600 printf("pid: unknown version\n");
601 }
602}
603
604int main(int argc, char **argv)
605{
606 const char **searchwords;
607 int nsearchwords;
d1bc4fef 608 int all = 0, all_uids = 0;
1f7c0ae1 609 int doing_opts = 1;
610
611 /*
612 * Allocate enough space in 'searchwords' that we could shovel the
613 * whole of our argv into it if we had to. Then we won't have to
614 * worry about it later.
615 */
616 searchwords = (const char **)malloc((argc+1) * sizeof(const char *));
617 nsearchwords = 0;
618
619 /*
620 * Parse the command line.
621 */
622 while (--argc > 0) {
623 char *p = *++argv;
624 if (doing_opts && *p == '-') {
625 if (!strcmp(p, "-a") || !strcmp(p, "--all")) {
626 all = 1;
d1bc4fef 627 } else if (!strcmp(p, "-U") || !strcmp(p, "--all-uids")) {
628 all_uids = 1;
1f7c0ae1 629 } else if (!strcmp(p, "--version")) {
630 version();
631 return 0;
632 } else if (!strcmp(p, "--help")) {
633 usage();
634 return 0;
635 } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) {
636 licence();
637 return 0;
638 } else if (!strcmp(p, "--")) {
639 doing_opts = 0;
640 } else {
641 fprintf(stderr, "pid: unrecognised option '%s'\n", p);
642 return 1;
643 }
644 } else {
645 searchwords[nsearchwords++] = p;
646 doing_opts = 0; /* further optionlike args become search terms */
647 }
648 }
649
650 if (!nsearchwords) {
651 fprintf(stderr, "pid: expected a command to search for; "
652 "type 'pid --help' for help\n");
653 return 1;
654 }
655 searchwords[nsearchwords] = NULL; /* terminate list */
656
657 {
658 struct pidset procs;
d1bc4fef 659 int uid, pid, npids;
1f7c0ae1 660 /*
661 * Construct our list of processes.
662 */
663 procs = get_processes();
d1bc4fef 664 uid = getuid();
665 if (uid > 0 && !all_uids)
666 procs = filter_by_uid(procs, uid);
1f7c0ae1 667 procs = filter_out_self(procs);
668 procs = filter_by_command(procs, searchwords);
669 if (!all)
670 procs = filter_out_forks(procs);
671
672 /*
673 * Output.
674 */
675 npids = pidset_size(&procs);
676 if (npids == 0) {
677 printf("NONE\n");
678 } else if (all) {
679 const char *sep = "";
680 for (pid = pidset_first(&procs); pid >= 0;
681 pid = pidset_next(&procs)) {
682 printf("%s%d", sep, pid);
683 sep = " ";
684 }
685 putchar('\n');
686 } else {
687 if (npids == 1) {
688 printf("%d\n", pidset_first(&procs));
689 } else {
690 printf("MULTIPLE\n");
691 }
692 }
693 }
694
695 return 0;
696}