Cope with processes owned by someone other than us (for which
[sgt/utils] / pid / pid.c
CommitLineData
1f7c0ae1 1/*
2 * pid - find the pid of a process given its command name
3 *
4 * Same basic idea as Debian's "pidof", in that you type 'pid command'
5 * and it finds a process running that command and gives you the pid;
6 * but souped up with various pragmatic features such as recognising
7 * well known interpreters (so you can search for, say, 'pid
8 * script.sh' as well as 'pid bash' and have it do what you meant).
9 *
10 * Currently tested only on Linux using /proc directly, but I've tried
11 * to set it up so that the logic of what processes to choose is
12 * separated from the mechanism used to iterate over processes and
13 * find their command lines.
14 */
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <assert.h>
20#include <ctype.h>
21
22#include <sys/types.h>
23#include <dirent.h>
24#include <unistd.h>
25
26#define lenof(x) (sizeof((x))/sizeof(*(x)))
27
28#define PIDMAX 32768
29
30/* ----------------------------------------------------------------------
31 * General-purpose code for storing a set of process ids, testing
32 * membership, and iterating over them. Since pids have a very limited
33 * range, we just do this as a giant bitmask.
34 */
35
36#define WORDBITS 32
37
38struct pidset {
39 unsigned long procbits[PIDMAX/WORDBITS];
40 int next;
41};
42
43static void pidset_init(struct pidset *p)
44{
45 int i;
46 for (i = 0; i < lenof(p->procbits); i++)
47 p->procbits[i] = 0L;
48}
49
50static void pidset_add(struct pidset *p, int pid)
51{
52 assert(pid >= 0 && pid < PIDMAX);
53 p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS);
54}
55
56static int pidset_in(const struct pidset *p, int pid)
57{
58 assert(pid >= 0 && pid < PIDMAX);
59 return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0;
60}
61
62static int pidset_size(const struct pidset *p)
63{
64 int word, count;
65
66 count = 0;
67 for (word = 0; word < lenof(p->procbits); word++) {
68 unsigned long mask = p->procbits[word];
69 while (mask > 0) {
70 count += (mask & 1);
71 mask >>= 1;
72 }
73 }
74
75 return count;
76}
77
78static int pidset_step(struct pidset *p)
79{
80 int word = p->next / WORDBITS;
81 int bit = p->next % WORDBITS;
82 while (word < lenof(p->procbits) && p->procbits[word] >> bit == 0) {
83 word++;
84 bit = 0;
85 p->next = WORDBITS * word + bit;
86 }
87
88 if (word >= lenof(p->procbits))
89 return -1;
90
91 while (!((p->procbits[word] >> bit) & 1)) {
92 bit++;
93 p->next = WORDBITS * word + bit;
94 }
95
96 assert(bit < WORDBITS);
97 return p->next++;
98}
99
100static int pidset_first(struct pidset *p)
101{
102 p->next = 0;
103 return pidset_step(p);
104}
105
106static int pidset_next(struct pidset *p)
107{
108 return pidset_step(p);
109}
110
111/* ----------------------------------------------------------------------
112 * Code to scan the list of processes and retrieve all the information
113 * we'll want about each one. This may in future be conditional on the
114 * OS's local mechanism for finding that information (i.e. if we want
115 * to run on kernels that don't provide Linux-style /proc).
116 */
117
118struct procdata {
119 int pid, ppid, uid;
120 int argc;
121 const char *const *argv;
122 const char *exe;
123};
124static struct procdata *procs[PIDMAX];
125
126static char *get_contents(const char *filename, int *returned_len)
127{
128 int len;
129 char *buf = NULL;
130 int bufsize = 0;
131
132 FILE *fp = fopen(filename, "rb");
133 if (!fp)
134 return NULL;
135
136 len = 0;
137 while (1) {
138 int readret;
139
140 if (len >= bufsize) {
141 bufsize = len * 5 / 4 + 4096;
142 buf = realloc(buf, bufsize);
143 if (!buf) {
144 fprintf(stderr, "pid: out of memory\n");
145 exit(1);
146 }
147 }
148
149 readret = fread(buf + len, 1, bufsize - len, fp);
150 if (readret < 0) {
151 fclose(fp);
152 free(buf);
153 return NULL; /* I/O error */
154 } else if (readret == 0) {
155 fclose(fp);
156 if (returned_len)
157 *returned_len = len;
158 buf = realloc(buf, len + 1);
159 buf[len] = '\0';
160 return buf;
161 } else {
162 len += readret;
163 }
164 }
165}
166
167static char *get_link_dest(const char *filename)
168{
169 char *buf;
170 int bufsize;
171 ssize_t ret;
172
173 buf = NULL;
174 bufsize = 0;
175
176 while (1) {
177 bufsize = bufsize * 5 / 4 + 1024;
178 buf = realloc(buf, bufsize);
179 if (!buf) {
180 fprintf(stderr, "pid: out of memory\n");
181 exit(1);
182 }
183
184 ret = readlink(filename, buf, (size_t)bufsize);
185 if (ret < 0) {
186 free(buf);
187 return NULL; /* I/O error */
188 } else if (ret < bufsize) {
189 /*
190 * Success! We've read the full link text.
191 */
192 buf = realloc(buf, ret+1);
193 buf[ret] = '\0';
194 return buf;
195 } else {
196 /* Overflow. Go round again. */
197 }
198 }
199}
200
201static struct pidset get_processes(void)
202{
203 struct dirent *de;
204 struct pidset ret;
205 DIR *d;
206
207 pidset_init(&ret);
208
209 d = opendir("/proc");
210 if (!d) {
211 perror("/proc: open\n");
212 exit(1);
213 }
214 while ((de = readdir(d)) != NULL) {
215 int pid;
216 char *cmdline, *status, *exe;
217 int cmdlinelen;
218 const char **argv;
219 char filename[256];
220 struct procdata *proc;
221
222 const char *name = de->d_name;
223 if (name[strspn(name, "0123456789")])
224 continue;
225
226 /*
227 * The filename is numeric, i.e. we've found a pid. Try to
228 * retrieve all the information we want about it.
229 *
230 * We expect this will fail every so often for random reasons,
231 * e.g. if the pid has disappeared between us fetching a list
232 * of them and trying to read their command lines. In that
233 * situation, we won't bother reporting errors: we'll just
234 * drop this pid and silently move on to the next one.
235 */
236 pid = atoi(name);
237 assert(pid >= 0 && pid < PIDMAX);
238
239 sprintf(filename, "/proc/%d/cmdline", pid);
240 if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL)
241 continue;
242
243 sprintf(filename, "/proc/%d/status", pid);
244 if ((status = get_contents(filename, NULL)) == NULL) {
245 free(cmdline);
246 continue;
247 }
248
249 sprintf(filename, "/proc/%d/exe", pid);
b5fccf05 250 exe = get_link_dest(filename);
251 /* This may fail, if the process isn't ours, but we continue
252 * anyway. */
1f7c0ae1 253
254 /*
255 * Now we've got all our raw data out of /proc. Process it
256 * into the internal representation we're going to use in the
257 * process-selection logic.
258 */
259 proc = (struct procdata *)malloc(sizeof(struct procdata));
260 if (!proc) {
261 fprintf(stderr, "pid: out of memory\n");
262 exit(1);
263 }
264 proc->pid = pid;
265 proc->exe = exe;
266
267 /*
268 * cmdline contains a list of NUL-terminated strings. Scan
269 * them to get the argv pointers.
270 */
271 {
272 const char *p;
273 int nargs;
274
275 /* Count the arguments. */
276 nargs = 0;
277 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
278 nargs++;
279
280 /* Allocate space for the pointers. */
281 argv = (const char **)malloc((nargs+1) * sizeof(char *));
282 proc->argv = argv;
283 if (!argv) {
284 fprintf(stderr, "pid: out of memory\n");
285 exit(1);
286 }
287
288 /* Store the pointers. */
289 proc->argc = 0;
290 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
291 argv[proc->argc++] = p;
292
293 /* Trailing NULL to match standard argv lists, just in case. */
294 assert(proc->argc == nargs);
295 argv[proc->argc] = NULL;
296 }
297
298 /*
299 * Scan status for the uid and the parent pid. This file
300 * contains a list of \n-terminated lines of text.
301 */
302 {
303 const char *p;
304 int got_ppid = 0, got_uid = 0;
305
306 p = status;
307 while (p && *p) {
308 if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1)
309 got_ppid = 1;
310 if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1)
311 got_uid = 1;
312
313 /*
314 * Advance to next line.
315 */
316 p = strchr(p, '\n');
317 if (p) p++;
318 }
319
320 if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */
321 free(cmdline);
322 free(exe);
323 free(status);
324 free(argv);
325 free(proc);
326 continue;
327 }
328 }
329
330 /*
331 * If we get here, we've got everything we need. Add the
332 * process to the list of things we can usefully work
333 * with.
334 */
335 procs[pid] = proc;
336 pidset_add(&ret, pid);
337 }
338 closedir(d);
339
340 return ret;
341}
342
343static const struct procdata *get_proc(int pid)
344{
345 assert(pid >= 0 && pid < PIDMAX);
346 assert(procs[pid]);
347 return procs[pid];
348}
349
350/* ----------------------------------------------------------------------
351 * Logic to pick out the set of processes we care about.
352 */
353
354static int is_an_interpreter(const char *basename)
355{
356 if (!strcmp(basename, "perl") ||
357 !strcmp(basename, "python") ||
358 !strcmp(basename, "ruby") ||
359 !strcmp(basename, "rep") ||
360 !strcmp(basename, "bash") ||
361 !strcmp(basename, "sh") ||
362 !strcmp(basename, "dash") ||
363 !strcmp(basename, "lua") ||
364 !strcmp(basename, "java"))
365 return 1;
366 else
367 return 0;
368}
369
370static const char *find_basename(const char *path)
371{
372 const char *ret = path;
373 const char *p;
374
375 while (1) {
376 p = ret + strcspn(ret, "/");
377 if (*p) {
378 ret = p+1;
379 } else {
380 return ret;
381 }
382 }
383}
384
385static int find_command(int pid_argc, const char *const *pid_argv,
386 const char *cmd)
387{
388 const char *base;
389
390 base = pid_argv[0];
391 if (*base == '-')
392 base++; /* skip indicator of login shells */
393 base = find_basename(base);
394
395 if (!strcmp(base, cmd)) {
396 /*
397 * argv[0] matches the supplied command name.
398 */
399 return 0;
400 } else if (is_an_interpreter(base)) {
401 /*
402 * argv[0] is an interpreter program of some kind. Look
403 * along its command line for the program it's running,
404 * and see if _that_ matches the command name.
405 */
406 int i = 1;
407 while (i < pid_argc && pid_argv[i][0] == '-')
408 i++; /* skip interpreter options */
409 if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd))
410 return i;
411 }
412 return -1; /* no match */
413}
414
b5fccf05 415static int strnullcmp(const char *a, const char *b)
416{
417 /*
418 * Like strcmp, but cope with NULL inputs by making them compare
419 * identical to each other and before any non-null string.
420 */
421 if (!a || !b)
422 return (b != 0) - (a != 0);
423 else
424 return strcmp(a, b);
425}
426
1f7c0ae1 427static int argcmp(const char *const *a, const char *const *b)
428{
429 while (*a && *b) {
430 int ret = strcmp(*a, *b);
431 if (ret)
432 return ret;
433 a++;
434 b++;
435 }
436
437 return (*b != NULL) - (*a != NULL);
438}
439
440static struct pidset filter_out_self(struct pidset in)
441{
442 /*
443 * Discard our own pid from a set. (Generally we won't want to
444 * return ourself from any search.)
445 */
446 struct pidset ret;
447 int pid;
448 int our_pid = getpid();
449
450 pidset_init(&ret);
451 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
452 if (pid != our_pid)
453 pidset_add(&ret, pid);
454 }
455 return ret;
456}
457
458static struct pidset filter_by_command(struct pidset in, const char **words)
459{
460 /*
461 * Look for processes matching the user-supplied command name and
462 * subsequent arguments.
463 */
464 struct pidset ret;
465 int pid;
466
467 pidset_init(&ret);
468 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
469 const struct procdata *proc = get_proc(pid);
470 int i, j;
471
472 if (!proc->argv || proc->argc < 1)
473 goto no_match;
474
475 /* Find the command, whether it's a binary or a script. */
476 i = find_command(proc->argc, proc->argv, words[0]);
477 if (i < 0)
478 goto no_match;
479
480 /* Now check that subsequent arguments match. */
481 for (j = 1; words[j]; j++)
482 if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j]))
483 goto no_match;
484
485 /* If we get here, we have a match! */
486 pidset_add(&ret, pid);
487
488 no_match:;
489 }
490 return ret;
491}
492
493static struct pidset filter_out_forks(struct pidset in)
494{
495 /*
496 * Discard any process whose parent is also in our remaining match
497 * set and looks sufficiently like it for us to decide this one's
498 * an uninteresting fork (e.g. of a shell script executing a
499 * complex pipeline).
500 */
501 struct pidset ret;
502 int pid;
503
504 pidset_init(&ret);
505 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
506 const struct procdata *proc = get_proc(pid);
507
508 if (pidset_in(&in, proc->ppid)) {
509 /* The parent is in our set too. Is it similar? */
510 const struct procdata *parent = get_proc(proc->ppid);
b5fccf05 511 if (!strnullcmp(parent->exe, proc->exe) &&
1f7c0ae1 512 !argcmp(parent->argv, proc->argv)) {
513 /* Yes; don't list it. */
514 continue;
515 }
516 }
517
518 pidset_add(&ret, pid);
519 }
520 return ret;
521}
522
523/* ----------------------------------------------------------------------
524 * Main program.
525 */
526
527const char usagemsg[] =
528 "usage: pid [options] <search-cmd> [<search-arg>...]\n"
529 "where: -a report all matching pids, not just one\n"
530 " also: pid --version report version number\n"
531 " pid --help display this help text\n"
532 " pid --licence display the (MIT) licence text\n"
533 ;
534
535void usage(void) {
536 fputs(usagemsg, stdout);
537}
538
539const char licencemsg[] =
540 "pid is copyright 2012 Simon Tatham.\n"
541 "\n"
542 "Permission is hereby granted, free of charge, to any person\n"
543 "obtaining a copy of this software and associated documentation files\n"
544 "(the \"Software\"), to deal in the Software without restriction,\n"
545 "including without limitation the rights to use, copy, modify, merge,\n"
546 "publish, distribute, sublicense, and/or sell copies of the Software,\n"
547 "and to permit persons to whom the Software is furnished to do so,\n"
548 "subject to the following conditions:\n"
549 "\n"
550 "The above copyright notice and this permission notice shall be\n"
551 "included in all copies or substantial portions of the Software.\n"
552 "\n"
553 "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n"
554 "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n"
555 "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n"
556 "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n"
557 "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n"
558 "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n"
559 "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
560 "SOFTWARE.\n"
561 ;
562
563void licence(void) {
564 fputs(licencemsg, stdout);
565}
566
567void version(void) {
568#define SVN_REV "$Revision$"
569 char rev[sizeof(SVN_REV)];
570 char *p, *q;
571
572 strcpy(rev, SVN_REV);
573
574 for (p = rev; *p && *p != ':'; p++);
575 if (*p) {
576 p++;
577 while (*p && isspace((unsigned char)*p)) p++;
578 for (q = p; *q && *q != '$'; q++);
579 if (*q) *q = '\0';
580 printf("pid revision %s\n", p);
581 } else {
582 printf("pid: unknown version\n");
583 }
584}
585
586int main(int argc, char **argv)
587{
588 const char **searchwords;
589 int nsearchwords;
590 int all = 0;
591 int doing_opts = 1;
592
593 /*
594 * Allocate enough space in 'searchwords' that we could shovel the
595 * whole of our argv into it if we had to. Then we won't have to
596 * worry about it later.
597 */
598 searchwords = (const char **)malloc((argc+1) * sizeof(const char *));
599 nsearchwords = 0;
600
601 /*
602 * Parse the command line.
603 */
604 while (--argc > 0) {
605 char *p = *++argv;
606 if (doing_opts && *p == '-') {
607 if (!strcmp(p, "-a") || !strcmp(p, "--all")) {
608 all = 1;
609 } else if (!strcmp(p, "--version")) {
610 version();
611 return 0;
612 } else if (!strcmp(p, "--help")) {
613 usage();
614 return 0;
615 } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) {
616 licence();
617 return 0;
618 } else if (!strcmp(p, "--")) {
619 doing_opts = 0;
620 } else {
621 fprintf(stderr, "pid: unrecognised option '%s'\n", p);
622 return 1;
623 }
624 } else {
625 searchwords[nsearchwords++] = p;
626 doing_opts = 0; /* further optionlike args become search terms */
627 }
628 }
629
630 if (!nsearchwords) {
631 fprintf(stderr, "pid: expected a command to search for; "
632 "type 'pid --help' for help\n");
633 return 1;
634 }
635 searchwords[nsearchwords] = NULL; /* terminate list */
636
637 {
638 struct pidset procs;
639 int pid, npids;
640 /*
641 * Construct our list of processes.
642 */
643 procs = get_processes();
644 procs = filter_out_self(procs);
645 procs = filter_by_command(procs, searchwords);
646 if (!all)
647 procs = filter_out_forks(procs);
648
649 /*
650 * Output.
651 */
652 npids = pidset_size(&procs);
653 if (npids == 0) {
654 printf("NONE\n");
655 } else if (all) {
656 const char *sep = "";
657 for (pid = pidset_first(&procs); pid >= 0;
658 pid = pidset_next(&procs)) {
659 printf("%s%d", sep, pid);
660 sep = " ";
661 }
662 putchar('\n');
663 } else {
664 if (npids == 1) {
665 printf("%d\n", pidset_first(&procs));
666 } else {
667 printf("MULTIPLE\n");
668 }
669 }
670 }
671
672 return 0;
673}