Adjust 'after' so that it tries more rigorously to parse the input
[sgt/utils] / pid / pid.c
CommitLineData
1f7c0ae1 1/*
2 * pid - find the pid of a process given its command name
3 *
4 * Same basic idea as Debian's "pidof", in that you type 'pid command'
5 * and it finds a process running that command and gives you the pid;
8f9d00c3 6 * but differs in details, for example it will search for scripts by
7 * default rather than requiring pidof's -x option, and it will also
8 * look for command-line arguments ('pid make test') and try to find
9 * the parent process of a bunch of forks from the same shell script
10 * invocation.
1f7c0ae1 11 *
12 * Currently tested only on Linux using /proc directly, but I've tried
13 * to set it up so that the logic of what processes to choose is
14 * separated from the mechanism used to iterate over processes and
15 * find their command lines.
16 */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <assert.h>
22#include <ctype.h>
23
24#include <sys/types.h>
25#include <dirent.h>
26#include <unistd.h>
27
28#define lenof(x) (sizeof((x))/sizeof(*(x)))
29
30#define PIDMAX 32768
31
32/* ----------------------------------------------------------------------
33 * General-purpose code for storing a set of process ids, testing
34 * membership, and iterating over them. Since pids have a very limited
35 * range, we just do this as a giant bitmask.
36 */
37
38#define WORDBITS 32
39
40struct pidset {
41 unsigned long procbits[PIDMAX/WORDBITS];
42 int next;
43};
44
45static void pidset_init(struct pidset *p)
46{
47 int i;
0a1d8c87 48 for (i = 0; i < (int)lenof(p->procbits); i++)
1f7c0ae1 49 p->procbits[i] = 0L;
50}
51
52static void pidset_add(struct pidset *p, int pid)
53{
54 assert(pid >= 0 && pid < PIDMAX);
55 p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS);
56}
57
58static int pidset_in(const struct pidset *p, int pid)
59{
60 assert(pid >= 0 && pid < PIDMAX);
61 return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0;
62}
63
64static int pidset_size(const struct pidset *p)
65{
66 int word, count;
67
68 count = 0;
0a1d8c87 69 for (word = 0; word < (int)lenof(p->procbits); word++) {
1f7c0ae1 70 unsigned long mask = p->procbits[word];
71 while (mask > 0) {
72 count += (mask & 1);
73 mask >>= 1;
74 }
75 }
76
77 return count;
78}
79
80static int pidset_step(struct pidset *p)
81{
82 int word = p->next / WORDBITS;
83 int bit = p->next % WORDBITS;
0a1d8c87 84 while (word < (int)lenof(p->procbits) && p->procbits[word] >> bit == 0) {
1f7c0ae1 85 word++;
86 bit = 0;
87 p->next = WORDBITS * word + bit;
88 }
89
0a1d8c87 90 if (word >= (int)lenof(p->procbits))
1f7c0ae1 91 return -1;
92
93 while (!((p->procbits[word] >> bit) & 1)) {
94 bit++;
95 p->next = WORDBITS * word + bit;
96 }
97
98 assert(bit < WORDBITS);
99 return p->next++;
100}
101
102static int pidset_first(struct pidset *p)
103{
104 p->next = 0;
105 return pidset_step(p);
106}
107
108static int pidset_next(struct pidset *p)
109{
110 return pidset_step(p);
111}
112
113/* ----------------------------------------------------------------------
114 * Code to scan the list of processes and retrieve all the information
115 * we'll want about each one. This may in future be conditional on the
116 * OS's local mechanism for finding that information (i.e. if we want
117 * to run on kernels that don't provide Linux-style /proc).
118 */
119
120struct procdata {
121 int pid, ppid, uid;
122 int argc;
123 const char *const *argv;
124 const char *exe;
125};
126static struct procdata *procs[PIDMAX];
127
128static char *get_contents(const char *filename, int *returned_len)
129{
130 int len;
131 char *buf = NULL;
132 int bufsize = 0;
133
134 FILE *fp = fopen(filename, "rb");
135 if (!fp)
136 return NULL;
137
138 len = 0;
139 while (1) {
140 int readret;
141
142 if (len >= bufsize) {
143 bufsize = len * 5 / 4 + 4096;
144 buf = realloc(buf, bufsize);
145 if (!buf) {
146 fprintf(stderr, "pid: out of memory\n");
147 exit(1);
148 }
149 }
150
151 readret = fread(buf + len, 1, bufsize - len, fp);
152 if (readret < 0) {
153 fclose(fp);
154 free(buf);
155 return NULL; /* I/O error */
156 } else if (readret == 0) {
157 fclose(fp);
158 if (returned_len)
159 *returned_len = len;
160 buf = realloc(buf, len + 1);
161 buf[len] = '\0';
162 return buf;
163 } else {
164 len += readret;
165 }
166 }
167}
168
169static char *get_link_dest(const char *filename)
170{
171 char *buf;
172 int bufsize;
173 ssize_t ret;
174
175 buf = NULL;
176 bufsize = 0;
177
178 while (1) {
179 bufsize = bufsize * 5 / 4 + 1024;
180 buf = realloc(buf, bufsize);
181 if (!buf) {
182 fprintf(stderr, "pid: out of memory\n");
183 exit(1);
184 }
185
186 ret = readlink(filename, buf, (size_t)bufsize);
187 if (ret < 0) {
188 free(buf);
189 return NULL; /* I/O error */
190 } else if (ret < bufsize) {
191 /*
192 * Success! We've read the full link text.
193 */
194 buf = realloc(buf, ret+1);
195 buf[ret] = '\0';
196 return buf;
197 } else {
198 /* Overflow. Go round again. */
199 }
200 }
201}
202
203static struct pidset get_processes(void)
204{
205 struct dirent *de;
206 struct pidset ret;
207 DIR *d;
208
209 pidset_init(&ret);
210
211 d = opendir("/proc");
212 if (!d) {
213 perror("/proc: open\n");
214 exit(1);
215 }
216 while ((de = readdir(d)) != NULL) {
217 int pid;
218 char *cmdline, *status, *exe;
219 int cmdlinelen;
220 const char **argv;
221 char filename[256];
222 struct procdata *proc;
223
224 const char *name = de->d_name;
225 if (name[strspn(name, "0123456789")])
226 continue;
227
228 /*
229 * The filename is numeric, i.e. we've found a pid. Try to
230 * retrieve all the information we want about it.
231 *
232 * We expect this will fail every so often for random reasons,
233 * e.g. if the pid has disappeared between us fetching a list
234 * of them and trying to read their command lines. In that
235 * situation, we won't bother reporting errors: we'll just
236 * drop this pid and silently move on to the next one.
237 */
238 pid = atoi(name);
239 assert(pid >= 0 && pid < PIDMAX);
240
241 sprintf(filename, "/proc/%d/cmdline", pid);
242 if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL)
243 continue;
244
245 sprintf(filename, "/proc/%d/status", pid);
246 if ((status = get_contents(filename, NULL)) == NULL) {
247 free(cmdline);
248 continue;
249 }
250
251 sprintf(filename, "/proc/%d/exe", pid);
b5fccf05 252 exe = get_link_dest(filename);
253 /* This may fail, if the process isn't ours, but we continue
254 * anyway. */
1f7c0ae1 255
256 /*
257 * Now we've got all our raw data out of /proc. Process it
258 * into the internal representation we're going to use in the
259 * process-selection logic.
260 */
261 proc = (struct procdata *)malloc(sizeof(struct procdata));
262 if (!proc) {
263 fprintf(stderr, "pid: out of memory\n");
264 exit(1);
265 }
266 proc->pid = pid;
267 proc->exe = exe;
268
269 /*
270 * cmdline contains a list of NUL-terminated strings. Scan
271 * them to get the argv pointers.
272 */
273 {
274 const char *p;
275 int nargs;
276
277 /* Count the arguments. */
278 nargs = 0;
279 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
280 nargs++;
281
282 /* Allocate space for the pointers. */
283 argv = (const char **)malloc((nargs+1) * sizeof(char *));
284 proc->argv = argv;
285 if (!argv) {
286 fprintf(stderr, "pid: out of memory\n");
287 exit(1);
288 }
289
290 /* Store the pointers. */
291 proc->argc = 0;
292 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
293 argv[proc->argc++] = p;
294
295 /* Trailing NULL to match standard argv lists, just in case. */
296 assert(proc->argc == nargs);
297 argv[proc->argc] = NULL;
298 }
299
300 /*
301 * Scan status for the uid and the parent pid. This file
302 * contains a list of \n-terminated lines of text.
303 */
304 {
305 const char *p;
306 int got_ppid = 0, got_uid = 0;
307
308 p = status;
309 while (p && *p) {
310 if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1)
311 got_ppid = 1;
312 if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1)
313 got_uid = 1;
314
315 /*
316 * Advance to next line.
317 */
318 p = strchr(p, '\n');
319 if (p) p++;
320 }
321
322 if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */
323 free(cmdline);
324 free(exe);
325 free(status);
326 free(argv);
327 free(proc);
328 continue;
329 }
330 }
331
332 /*
333 * If we get here, we've got everything we need. Add the
334 * process to the list of things we can usefully work
335 * with.
336 */
337 procs[pid] = proc;
338 pidset_add(&ret, pid);
339 }
340 closedir(d);
341
342 return ret;
343}
344
345static const struct procdata *get_proc(int pid)
346{
347 assert(pid >= 0 && pid < PIDMAX);
348 assert(procs[pid]);
349 return procs[pid];
350}
351
352/* ----------------------------------------------------------------------
353 * Logic to pick out the set of processes we care about.
354 */
355
acf5ef0b 356static int is_an_interpreter(const char *basename, const char **stop_opt)
1f7c0ae1 357{
358 if (!strcmp(basename, "perl") ||
acf5ef0b 359 !strcmp(basename, "ruby")) {
360 *stop_opt = "-e";
361 return 1;
362 }
363 if (!strcmp(basename, "python") ||
1f7c0ae1 364 !strcmp(basename, "bash") ||
365 !strcmp(basename, "sh") ||
acf5ef0b 366 !strcmp(basename, "dash")) {
367 *stop_opt = "-c";
368 return 1;
369 }
370 if (!strcmp(basename, "rep") ||
1f7c0ae1 371 !strcmp(basename, "lua") ||
acf5ef0b 372 !strcmp(basename, "java")) {
373 *stop_opt = NULL;
1f7c0ae1 374 return 1;
acf5ef0b 375 }
376 return 0;
1f7c0ae1 377}
378
379static const char *find_basename(const char *path)
380{
381 const char *ret = path;
382 const char *p;
383
384 while (1) {
385 p = ret + strcspn(ret, "/");
386 if (*p) {
387 ret = p+1;
388 } else {
389 return ret;
390 }
391 }
392}
393
394static int find_command(int pid_argc, const char *const *pid_argv,
395 const char *cmd)
396{
acf5ef0b 397 const char *base, *stop_opt;
1f7c0ae1 398
399 base = pid_argv[0];
400 if (*base == '-')
401 base++; /* skip indicator of login shells */
402 base = find_basename(base);
403
404 if (!strcmp(base, cmd)) {
405 /*
406 * argv[0] matches the supplied command name.
407 */
408 return 0;
acf5ef0b 409 } else if (is_an_interpreter(base, &stop_opt)) {
1f7c0ae1 410 /*
411 * argv[0] is an interpreter program of some kind. Look
412 * along its command line for the program it's running,
413 * and see if _that_ matches the command name.
414 */
415 int i = 1;
acf5ef0b 416 while (i < pid_argc && pid_argv[i][0] == '-') {
417 /*
418 * Skip interpreter options, unless they're things which
419 * make the next non-option argument not a script name
420 * (e.g. sh -c, perl -e).
421 */
422 if (stop_opt && !strncmp(pid_argv[i], stop_opt, strlen(stop_opt)))
423 return -1; /* no match */
424 i++;
425 }
1f7c0ae1 426 if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd))
427 return i;
428 }
429 return -1; /* no match */
430}
431
b5fccf05 432static int strnullcmp(const char *a, const char *b)
433{
434 /*
435 * Like strcmp, but cope with NULL inputs by making them compare
436 * identical to each other and before any non-null string.
437 */
438 if (!a || !b)
439 return (b != 0) - (a != 0);
440 else
441 return strcmp(a, b);
442}
443
1f7c0ae1 444static int argcmp(const char *const *a, const char *const *b)
445{
446 while (*a && *b) {
447 int ret = strcmp(*a, *b);
448 if (ret)
449 return ret;
450 a++;
451 b++;
452 }
453
454 return (*b != NULL) - (*a != NULL);
455}
456
457static struct pidset filter_out_self(struct pidset in)
458{
459 /*
460 * Discard our own pid from a set. (Generally we won't want to
461 * return ourself from any search.)
462 */
463 struct pidset ret;
464 int pid;
465 int our_pid = getpid();
466
467 pidset_init(&ret);
468 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
469 if (pid != our_pid)
470 pidset_add(&ret, pid);
471 }
472 return ret;
473}
474
d1bc4fef 475static struct pidset filter_by_uid(struct pidset in, int uid)
476{
477 /*
478 * Return only those processes with a given uid.
479 */
480 struct pidset ret;
481 int pid;
482
483 pidset_init(&ret);
484 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
485 const struct procdata *proc = get_proc(pid);
486 if (proc->uid == uid)
487 pidset_add(&ret, pid);
488 }
489 return ret;
490}
491
1f7c0ae1 492static struct pidset filter_by_command(struct pidset in, const char **words)
493{
494 /*
495 * Look for processes matching the user-supplied command name and
496 * subsequent arguments.
497 */
498 struct pidset ret;
499 int pid;
500
501 pidset_init(&ret);
502 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
503 const struct procdata *proc = get_proc(pid);
504 int i, j;
505
506 if (!proc->argv || proc->argc < 1)
507 goto no_match;
508
509 /* Find the command, whether it's a binary or a script. */
510 i = find_command(proc->argc, proc->argv, words[0]);
511 if (i < 0)
512 goto no_match;
513
514 /* Now check that subsequent arguments match. */
515 for (j = 1; words[j]; j++)
516 if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j]))
517 goto no_match;
518
519 /* If we get here, we have a match! */
520 pidset_add(&ret, pid);
521
522 no_match:;
523 }
524 return ret;
525}
526
527static struct pidset filter_out_forks(struct pidset in)
528{
529 /*
530 * Discard any process whose parent is also in our remaining match
531 * set and looks sufficiently like it for us to decide this one's
532 * an uninteresting fork (e.g. of a shell script executing a
533 * complex pipeline).
534 */
535 struct pidset ret;
536 int pid;
537
538 pidset_init(&ret);
539 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
540 const struct procdata *proc = get_proc(pid);
541
542 if (pidset_in(&in, proc->ppid)) {
543 /* The parent is in our set too. Is it similar? */
544 const struct procdata *parent = get_proc(proc->ppid);
b5fccf05 545 if (!strnullcmp(parent->exe, proc->exe) &&
1f7c0ae1 546 !argcmp(parent->argv, proc->argv)) {
547 /* Yes; don't list it. */
548 continue;
549 }
550 }
551
552 pidset_add(&ret, pid);
553 }
554 return ret;
555}
556
557/* ----------------------------------------------------------------------
558 * Main program.
559 */
560
561const char usagemsg[] =
562 "usage: pid [options] <search-cmd> [<search-arg>...]\n"
563 "where: -a report all matching pids, not just one\n"
d1bc4fef 564 " -U report pids of any user, not just ours\n"
1f7c0ae1 565 " also: pid --version report version number\n"
566 " pid --help display this help text\n"
567 " pid --licence display the (MIT) licence text\n"
568 ;
569
570void usage(void) {
571 fputs(usagemsg, stdout);
572}
573
574const char licencemsg[] =
575 "pid is copyright 2012 Simon Tatham.\n"
576 "\n"
577 "Permission is hereby granted, free of charge, to any person\n"
578 "obtaining a copy of this software and associated documentation files\n"
579 "(the \"Software\"), to deal in the Software without restriction,\n"
580 "including without limitation the rights to use, copy, modify, merge,\n"
581 "publish, distribute, sublicense, and/or sell copies of the Software,\n"
582 "and to permit persons to whom the Software is furnished to do so,\n"
583 "subject to the following conditions:\n"
584 "\n"
585 "The above copyright notice and this permission notice shall be\n"
586 "included in all copies or substantial portions of the Software.\n"
587 "\n"
588 "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n"
589 "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n"
590 "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n"
591 "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n"
592 "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n"
593 "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n"
594 "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
595 "SOFTWARE.\n"
596 ;
597
598void licence(void) {
599 fputs(licencemsg, stdout);
600}
601
602void version(void) {
603#define SVN_REV "$Revision$"
604 char rev[sizeof(SVN_REV)];
605 char *p, *q;
606
607 strcpy(rev, SVN_REV);
608
609 for (p = rev; *p && *p != ':'; p++);
610 if (*p) {
611 p++;
612 while (*p && isspace((unsigned char)*p)) p++;
613 for (q = p; *q && *q != '$'; q++);
614 if (*q) *q = '\0';
615 printf("pid revision %s\n", p);
616 } else {
617 printf("pid: unknown version\n");
618 }
619}
620
621int main(int argc, char **argv)
622{
623 const char **searchwords;
624 int nsearchwords;
d1bc4fef 625 int all = 0, all_uids = 0;
1f7c0ae1 626 int doing_opts = 1;
627
628 /*
629 * Allocate enough space in 'searchwords' that we could shovel the
630 * whole of our argv into it if we had to. Then we won't have to
631 * worry about it later.
632 */
633 searchwords = (const char **)malloc((argc+1) * sizeof(const char *));
634 nsearchwords = 0;
635
636 /*
637 * Parse the command line.
638 */
639 while (--argc > 0) {
640 char *p = *++argv;
641 if (doing_opts && *p == '-') {
642 if (!strcmp(p, "-a") || !strcmp(p, "--all")) {
643 all = 1;
d1bc4fef 644 } else if (!strcmp(p, "-U") || !strcmp(p, "--all-uids")) {
645 all_uids = 1;
1f7c0ae1 646 } else if (!strcmp(p, "--version")) {
647 version();
648 return 0;
649 } else if (!strcmp(p, "--help")) {
650 usage();
651 return 0;
652 } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) {
653 licence();
654 return 0;
655 } else if (!strcmp(p, "--")) {
656 doing_opts = 0;
657 } else {
658 fprintf(stderr, "pid: unrecognised option '%s'\n", p);
659 return 1;
660 }
661 } else {
662 searchwords[nsearchwords++] = p;
663 doing_opts = 0; /* further optionlike args become search terms */
664 }
665 }
666
667 if (!nsearchwords) {
668 fprintf(stderr, "pid: expected a command to search for; "
669 "type 'pid --help' for help\n");
670 return 1;
671 }
672 searchwords[nsearchwords] = NULL; /* terminate list */
673
674 {
675 struct pidset procs;
d1bc4fef 676 int uid, pid, npids;
1f7c0ae1 677 /*
678 * Construct our list of processes.
679 */
680 procs = get_processes();
d1bc4fef 681 uid = getuid();
682 if (uid > 0 && !all_uids)
683 procs = filter_by_uid(procs, uid);
1f7c0ae1 684 procs = filter_out_self(procs);
685 procs = filter_by_command(procs, searchwords);
686 if (!all)
687 procs = filter_out_forks(procs);
688
689 /*
690 * Output.
691 */
692 npids = pidset_size(&procs);
693 if (npids == 0) {
694 printf("NONE\n");
695 } else if (all) {
696 const char *sep = "";
697 for (pid = pidset_first(&procs); pid >= 0;
698 pid = pidset_next(&procs)) {
699 printf("%s%d", sep, pid);
700 sep = " ";
701 }
702 putchar('\n');
703 } else {
704 if (npids == 1) {
705 printf("%d\n", pidset_first(&procs));
706 } else {
707 printf("MULTIPLE\n");
708 }
709 }
710 }
711
712 return 0;
713}