New utility: 'pid', a thing more or less similar in concept to
[sgt/utils] / pid / pid.c
1 /*
2 * pid - find the pid of a process given its command name
3 *
4 * Same basic idea as Debian's "pidof", in that you type 'pid command'
5 * and it finds a process running that command and gives you the pid;
6 * but souped up with various pragmatic features such as recognising
7 * well known interpreters (so you can search for, say, 'pid
8 * script.sh' as well as 'pid bash' and have it do what you meant).
9 *
10 * Currently tested only on Linux using /proc directly, but I've tried
11 * to set it up so that the logic of what processes to choose is
12 * separated from the mechanism used to iterate over processes and
13 * find their command lines.
14 */
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <assert.h>
20 #include <ctype.h>
21
22 #include <sys/types.h>
23 #include <dirent.h>
24 #include <unistd.h>
25
26 #define lenof(x) (sizeof((x))/sizeof(*(x)))
27
28 #define PIDMAX 32768
29
30 /* ----------------------------------------------------------------------
31 * General-purpose code for storing a set of process ids, testing
32 * membership, and iterating over them. Since pids have a very limited
33 * range, we just do this as a giant bitmask.
34 */
35
36 #define WORDBITS 32
37
38 struct pidset {
39 unsigned long procbits[PIDMAX/WORDBITS];
40 int next;
41 };
42
43 static void pidset_init(struct pidset *p)
44 {
45 int i;
46 for (i = 0; i < lenof(p->procbits); i++)
47 p->procbits[i] = 0L;
48 }
49
50 static void pidset_add(struct pidset *p, int pid)
51 {
52 assert(pid >= 0 && pid < PIDMAX);
53 p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS);
54 }
55
56 static int pidset_in(const struct pidset *p, int pid)
57 {
58 assert(pid >= 0 && pid < PIDMAX);
59 return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0;
60 }
61
62 static int pidset_size(const struct pidset *p)
63 {
64 int word, count;
65
66 count = 0;
67 for (word = 0; word < lenof(p->procbits); word++) {
68 unsigned long mask = p->procbits[word];
69 while (mask > 0) {
70 count += (mask & 1);
71 mask >>= 1;
72 }
73 }
74
75 return count;
76 }
77
78 static int pidset_step(struct pidset *p)
79 {
80 int word = p->next / WORDBITS;
81 int bit = p->next % WORDBITS;
82 while (word < lenof(p->procbits) && p->procbits[word] >> bit == 0) {
83 word++;
84 bit = 0;
85 p->next = WORDBITS * word + bit;
86 }
87
88 if (word >= lenof(p->procbits))
89 return -1;
90
91 while (!((p->procbits[word] >> bit) & 1)) {
92 bit++;
93 p->next = WORDBITS * word + bit;
94 }
95
96 assert(bit < WORDBITS);
97 return p->next++;
98 }
99
100 static int pidset_first(struct pidset *p)
101 {
102 p->next = 0;
103 return pidset_step(p);
104 }
105
106 static int pidset_next(struct pidset *p)
107 {
108 return pidset_step(p);
109 }
110
111 /* ----------------------------------------------------------------------
112 * Code to scan the list of processes and retrieve all the information
113 * we'll want about each one. This may in future be conditional on the
114 * OS's local mechanism for finding that information (i.e. if we want
115 * to run on kernels that don't provide Linux-style /proc).
116 */
117
118 struct procdata {
119 int pid, ppid, uid;
120 int argc;
121 const char *const *argv;
122 const char *exe;
123 };
124 static struct procdata *procs[PIDMAX];
125
126 static char *get_contents(const char *filename, int *returned_len)
127 {
128 int len;
129 char *buf = NULL;
130 int bufsize = 0;
131
132 FILE *fp = fopen(filename, "rb");
133 if (!fp)
134 return NULL;
135
136 len = 0;
137 while (1) {
138 int readret;
139
140 if (len >= bufsize) {
141 bufsize = len * 5 / 4 + 4096;
142 buf = realloc(buf, bufsize);
143 if (!buf) {
144 fprintf(stderr, "pid: out of memory\n");
145 exit(1);
146 }
147 }
148
149 readret = fread(buf + len, 1, bufsize - len, fp);
150 if (readret < 0) {
151 fclose(fp);
152 free(buf);
153 return NULL; /* I/O error */
154 } else if (readret == 0) {
155 fclose(fp);
156 if (returned_len)
157 *returned_len = len;
158 buf = realloc(buf, len + 1);
159 buf[len] = '\0';
160 return buf;
161 } else {
162 len += readret;
163 }
164 }
165 }
166
167 static char *get_link_dest(const char *filename)
168 {
169 char *buf;
170 int bufsize;
171 ssize_t ret;
172
173 buf = NULL;
174 bufsize = 0;
175
176 while (1) {
177 bufsize = bufsize * 5 / 4 + 1024;
178 buf = realloc(buf, bufsize);
179 if (!buf) {
180 fprintf(stderr, "pid: out of memory\n");
181 exit(1);
182 }
183
184 ret = readlink(filename, buf, (size_t)bufsize);
185 if (ret < 0) {
186 free(buf);
187 return NULL; /* I/O error */
188 } else if (ret < bufsize) {
189 /*
190 * Success! We've read the full link text.
191 */
192 buf = realloc(buf, ret+1);
193 buf[ret] = '\0';
194 return buf;
195 } else {
196 /* Overflow. Go round again. */
197 }
198 }
199 }
200
201 static struct pidset get_processes(void)
202 {
203 struct dirent *de;
204 struct pidset ret;
205 DIR *d;
206
207 pidset_init(&ret);
208
209 d = opendir("/proc");
210 if (!d) {
211 perror("/proc: open\n");
212 exit(1);
213 }
214 while ((de = readdir(d)) != NULL) {
215 int pid;
216 char *cmdline, *status, *exe;
217 int cmdlinelen;
218 const char **argv;
219 char filename[256];
220 struct procdata *proc;
221
222 const char *name = de->d_name;
223 if (name[strspn(name, "0123456789")])
224 continue;
225
226 /*
227 * The filename is numeric, i.e. we've found a pid. Try to
228 * retrieve all the information we want about it.
229 *
230 * We expect this will fail every so often for random reasons,
231 * e.g. if the pid has disappeared between us fetching a list
232 * of them and trying to read their command lines. In that
233 * situation, we won't bother reporting errors: we'll just
234 * drop this pid and silently move on to the next one.
235 */
236 pid = atoi(name);
237 assert(pid >= 0 && pid < PIDMAX);
238
239 sprintf(filename, "/proc/%d/cmdline", pid);
240 if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL)
241 continue;
242
243 sprintf(filename, "/proc/%d/status", pid);
244 if ((status = get_contents(filename, NULL)) == NULL) {
245 free(cmdline);
246 continue;
247 }
248
249 sprintf(filename, "/proc/%d/exe", pid);
250 if ((exe = get_link_dest(filename)) == NULL) {
251 free(cmdline);
252 free(status);
253 continue;
254 }
255
256 /*
257 * Now we've got all our raw data out of /proc. Process it
258 * into the internal representation we're going to use in the
259 * process-selection logic.
260 */
261 proc = (struct procdata *)malloc(sizeof(struct procdata));
262 if (!proc) {
263 fprintf(stderr, "pid: out of memory\n");
264 exit(1);
265 }
266 proc->pid = pid;
267 proc->exe = exe;
268
269 /*
270 * cmdline contains a list of NUL-terminated strings. Scan
271 * them to get the argv pointers.
272 */
273 {
274 const char *p;
275 int nargs;
276
277 /* Count the arguments. */
278 nargs = 0;
279 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
280 nargs++;
281
282 /* Allocate space for the pointers. */
283 argv = (const char **)malloc((nargs+1) * sizeof(char *));
284 proc->argv = argv;
285 if (!argv) {
286 fprintf(stderr, "pid: out of memory\n");
287 exit(1);
288 }
289
290 /* Store the pointers. */
291 proc->argc = 0;
292 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
293 argv[proc->argc++] = p;
294
295 /* Trailing NULL to match standard argv lists, just in case. */
296 assert(proc->argc == nargs);
297 argv[proc->argc] = NULL;
298 }
299
300 /*
301 * Scan status for the uid and the parent pid. This file
302 * contains a list of \n-terminated lines of text.
303 */
304 {
305 const char *p;
306 int got_ppid = 0, got_uid = 0;
307
308 p = status;
309 while (p && *p) {
310 if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1)
311 got_ppid = 1;
312 if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1)
313 got_uid = 1;
314
315 /*
316 * Advance to next line.
317 */
318 p = strchr(p, '\n');
319 if (p) p++;
320 }
321
322 if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */
323 free(cmdline);
324 free(exe);
325 free(status);
326 free(argv);
327 free(proc);
328 continue;
329 }
330 }
331
332 /*
333 * If we get here, we've got everything we need. Add the
334 * process to the list of things we can usefully work
335 * with.
336 */
337 procs[pid] = proc;
338 pidset_add(&ret, pid);
339 }
340 closedir(d);
341
342 return ret;
343 }
344
345 static const struct procdata *get_proc(int pid)
346 {
347 assert(pid >= 0 && pid < PIDMAX);
348 assert(procs[pid]);
349 return procs[pid];
350 }
351
352 /* ----------------------------------------------------------------------
353 * Logic to pick out the set of processes we care about.
354 */
355
356 static int is_an_interpreter(const char *basename)
357 {
358 if (!strcmp(basename, "perl") ||
359 !strcmp(basename, "python") ||
360 !strcmp(basename, "ruby") ||
361 !strcmp(basename, "rep") ||
362 !strcmp(basename, "bash") ||
363 !strcmp(basename, "sh") ||
364 !strcmp(basename, "dash") ||
365 !strcmp(basename, "lua") ||
366 !strcmp(basename, "java"))
367 return 1;
368 else
369 return 0;
370 }
371
372 static const char *find_basename(const char *path)
373 {
374 const char *ret = path;
375 const char *p;
376
377 while (1) {
378 p = ret + strcspn(ret, "/");
379 if (*p) {
380 ret = p+1;
381 } else {
382 return ret;
383 }
384 }
385 }
386
387 static int find_command(int pid_argc, const char *const *pid_argv,
388 const char *cmd)
389 {
390 const char *base;
391
392 base = pid_argv[0];
393 if (*base == '-')
394 base++; /* skip indicator of login shells */
395 base = find_basename(base);
396
397 if (!strcmp(base, cmd)) {
398 /*
399 * argv[0] matches the supplied command name.
400 */
401 return 0;
402 } else if (is_an_interpreter(base)) {
403 /*
404 * argv[0] is an interpreter program of some kind. Look
405 * along its command line for the program it's running,
406 * and see if _that_ matches the command name.
407 */
408 int i = 1;
409 while (i < pid_argc && pid_argv[i][0] == '-')
410 i++; /* skip interpreter options */
411 if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd))
412 return i;
413 }
414 return -1; /* no match */
415 }
416
417 static int argcmp(const char *const *a, const char *const *b)
418 {
419 while (*a && *b) {
420 int ret = strcmp(*a, *b);
421 if (ret)
422 return ret;
423 a++;
424 b++;
425 }
426
427 return (*b != NULL) - (*a != NULL);
428 }
429
430 static struct pidset filter_out_self(struct pidset in)
431 {
432 /*
433 * Discard our own pid from a set. (Generally we won't want to
434 * return ourself from any search.)
435 */
436 struct pidset ret;
437 int pid;
438 int our_pid = getpid();
439
440 pidset_init(&ret);
441 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
442 if (pid != our_pid)
443 pidset_add(&ret, pid);
444 }
445 return ret;
446 }
447
448 static struct pidset filter_by_command(struct pidset in, const char **words)
449 {
450 /*
451 * Look for processes matching the user-supplied command name and
452 * subsequent arguments.
453 */
454 struct pidset ret;
455 int pid;
456
457 pidset_init(&ret);
458 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
459 const struct procdata *proc = get_proc(pid);
460 int i, j;
461
462 if (!proc->argv || proc->argc < 1)
463 goto no_match;
464
465 /* Find the command, whether it's a binary or a script. */
466 i = find_command(proc->argc, proc->argv, words[0]);
467 if (i < 0)
468 goto no_match;
469
470 /* Now check that subsequent arguments match. */
471 for (j = 1; words[j]; j++)
472 if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j]))
473 goto no_match;
474
475 /* If we get here, we have a match! */
476 pidset_add(&ret, pid);
477
478 no_match:;
479 }
480 return ret;
481 }
482
483 static struct pidset filter_out_forks(struct pidset in)
484 {
485 /*
486 * Discard any process whose parent is also in our remaining match
487 * set and looks sufficiently like it for us to decide this one's
488 * an uninteresting fork (e.g. of a shell script executing a
489 * complex pipeline).
490 */
491 struct pidset ret;
492 int pid;
493
494 pidset_init(&ret);
495 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
496 const struct procdata *proc = get_proc(pid);
497
498 if (pidset_in(&in, proc->ppid)) {
499 /* The parent is in our set too. Is it similar? */
500 const struct procdata *parent = get_proc(proc->ppid);
501 if (!strcmp(parent->exe, proc->exe) &&
502 !argcmp(parent->argv, proc->argv)) {
503 /* Yes; don't list it. */
504 continue;
505 }
506 }
507
508 pidset_add(&ret, pid);
509 }
510 return ret;
511 }
512
513 /* ----------------------------------------------------------------------
514 * Main program.
515 */
516
517 const char usagemsg[] =
518 "usage: pid [options] <search-cmd> [<search-arg>...]\n"
519 "where: -a report all matching pids, not just one\n"
520 " also: pid --version report version number\n"
521 " pid --help display this help text\n"
522 " pid --licence display the (MIT) licence text\n"
523 ;
524
525 void usage(void) {
526 fputs(usagemsg, stdout);
527 }
528
529 const char licencemsg[] =
530 "pid is copyright 2012 Simon Tatham.\n"
531 "\n"
532 "Permission is hereby granted, free of charge, to any person\n"
533 "obtaining a copy of this software and associated documentation files\n"
534 "(the \"Software\"), to deal in the Software without restriction,\n"
535 "including without limitation the rights to use, copy, modify, merge,\n"
536 "publish, distribute, sublicense, and/or sell copies of the Software,\n"
537 "and to permit persons to whom the Software is furnished to do so,\n"
538 "subject to the following conditions:\n"
539 "\n"
540 "The above copyright notice and this permission notice shall be\n"
541 "included in all copies or substantial portions of the Software.\n"
542 "\n"
543 "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n"
544 "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n"
545 "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n"
546 "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n"
547 "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n"
548 "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n"
549 "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
550 "SOFTWARE.\n"
551 ;
552
553 void licence(void) {
554 fputs(licencemsg, stdout);
555 }
556
557 void version(void) {
558 #define SVN_REV "$Revision$"
559 char rev[sizeof(SVN_REV)];
560 char *p, *q;
561
562 strcpy(rev, SVN_REV);
563
564 for (p = rev; *p && *p != ':'; p++);
565 if (*p) {
566 p++;
567 while (*p && isspace((unsigned char)*p)) p++;
568 for (q = p; *q && *q != '$'; q++);
569 if (*q) *q = '\0';
570 printf("pid revision %s\n", p);
571 } else {
572 printf("pid: unknown version\n");
573 }
574 }
575
576 int main(int argc, char **argv)
577 {
578 const char **searchwords;
579 int nsearchwords;
580 int all = 0;
581 int doing_opts = 1;
582
583 /*
584 * Allocate enough space in 'searchwords' that we could shovel the
585 * whole of our argv into it if we had to. Then we won't have to
586 * worry about it later.
587 */
588 searchwords = (const char **)malloc((argc+1) * sizeof(const char *));
589 nsearchwords = 0;
590
591 /*
592 * Parse the command line.
593 */
594 while (--argc > 0) {
595 char *p = *++argv;
596 if (doing_opts && *p == '-') {
597 if (!strcmp(p, "-a") || !strcmp(p, "--all")) {
598 all = 1;
599 } else if (!strcmp(p, "--version")) {
600 version();
601 return 0;
602 } else if (!strcmp(p, "--help")) {
603 usage();
604 return 0;
605 } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) {
606 licence();
607 return 0;
608 } else if (!strcmp(p, "--")) {
609 doing_opts = 0;
610 } else {
611 fprintf(stderr, "pid: unrecognised option '%s'\n", p);
612 return 1;
613 }
614 } else {
615 searchwords[nsearchwords++] = p;
616 doing_opts = 0; /* further optionlike args become search terms */
617 }
618 }
619
620 if (!nsearchwords) {
621 fprintf(stderr, "pid: expected a command to search for; "
622 "type 'pid --help' for help\n");
623 return 1;
624 }
625 searchwords[nsearchwords] = NULL; /* terminate list */
626
627 {
628 struct pidset procs;
629 int pid, npids;
630 /*
631 * Construct our list of processes.
632 */
633 procs = get_processes();
634 procs = filter_out_self(procs);
635 procs = filter_by_command(procs, searchwords);
636 if (!all)
637 procs = filter_out_forks(procs);
638
639 /*
640 * Output.
641 */
642 npids = pidset_size(&procs);
643 if (npids == 0) {
644 printf("NONE\n");
645 } else if (all) {
646 const char *sep = "";
647 for (pid = pidset_first(&procs); pid >= 0;
648 pid = pidset_next(&procs)) {
649 printf("%s%d", sep, pid);
650 sep = " ";
651 }
652 putchar('\n');
653 } else {
654 if (npids == 1) {
655 printf("%d\n", pidset_first(&procs));
656 } else {
657 printf("MULTIPLE\n");
658 }
659 }
660 }
661
662 return 0;
663 }