Avoid treating 'sh -c emacs' as an instance of emacs, by recognising
[sgt/utils] / pid / pid.c
CommitLineData
1f7c0ae1 1/*
2 * pid - find the pid of a process given its command name
3 *
4 * Same basic idea as Debian's "pidof", in that you type 'pid command'
5 * and it finds a process running that command and gives you the pid;
6 * but souped up with various pragmatic features such as recognising
7 * well known interpreters (so you can search for, say, 'pid
8 * script.sh' as well as 'pid bash' and have it do what you meant).
9 *
10 * Currently tested only on Linux using /proc directly, but I've tried
11 * to set it up so that the logic of what processes to choose is
12 * separated from the mechanism used to iterate over processes and
13 * find their command lines.
14 */
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <assert.h>
20#include <ctype.h>
21
22#include <sys/types.h>
23#include <dirent.h>
24#include <unistd.h>
25
26#define lenof(x) (sizeof((x))/sizeof(*(x)))
27
28#define PIDMAX 32768
29
30/* ----------------------------------------------------------------------
31 * General-purpose code for storing a set of process ids, testing
32 * membership, and iterating over them. Since pids have a very limited
33 * range, we just do this as a giant bitmask.
34 */
35
36#define WORDBITS 32
37
38struct pidset {
39 unsigned long procbits[PIDMAX/WORDBITS];
40 int next;
41};
42
43static void pidset_init(struct pidset *p)
44{
45 int i;
0a1d8c87 46 for (i = 0; i < (int)lenof(p->procbits); i++)
1f7c0ae1 47 p->procbits[i] = 0L;
48}
49
50static void pidset_add(struct pidset *p, int pid)
51{
52 assert(pid >= 0 && pid < PIDMAX);
53 p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS);
54}
55
56static int pidset_in(const struct pidset *p, int pid)
57{
58 assert(pid >= 0 && pid < PIDMAX);
59 return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0;
60}
61
62static int pidset_size(const struct pidset *p)
63{
64 int word, count;
65
66 count = 0;
0a1d8c87 67 for (word = 0; word < (int)lenof(p->procbits); word++) {
1f7c0ae1 68 unsigned long mask = p->procbits[word];
69 while (mask > 0) {
70 count += (mask & 1);
71 mask >>= 1;
72 }
73 }
74
75 return count;
76}
77
78static int pidset_step(struct pidset *p)
79{
80 int word = p->next / WORDBITS;
81 int bit = p->next % WORDBITS;
0a1d8c87 82 while (word < (int)lenof(p->procbits) && p->procbits[word] >> bit == 0) {
1f7c0ae1 83 word++;
84 bit = 0;
85 p->next = WORDBITS * word + bit;
86 }
87
0a1d8c87 88 if (word >= (int)lenof(p->procbits))
1f7c0ae1 89 return -1;
90
91 while (!((p->procbits[word] >> bit) & 1)) {
92 bit++;
93 p->next = WORDBITS * word + bit;
94 }
95
96 assert(bit < WORDBITS);
97 return p->next++;
98}
99
100static int pidset_first(struct pidset *p)
101{
102 p->next = 0;
103 return pidset_step(p);
104}
105
106static int pidset_next(struct pidset *p)
107{
108 return pidset_step(p);
109}
110
111/* ----------------------------------------------------------------------
112 * Code to scan the list of processes and retrieve all the information
113 * we'll want about each one. This may in future be conditional on the
114 * OS's local mechanism for finding that information (i.e. if we want
115 * to run on kernels that don't provide Linux-style /proc).
116 */
117
118struct procdata {
119 int pid, ppid, uid;
120 int argc;
121 const char *const *argv;
122 const char *exe;
123};
124static struct procdata *procs[PIDMAX];
125
126static char *get_contents(const char *filename, int *returned_len)
127{
128 int len;
129 char *buf = NULL;
130 int bufsize = 0;
131
132 FILE *fp = fopen(filename, "rb");
133 if (!fp)
134 return NULL;
135
136 len = 0;
137 while (1) {
138 int readret;
139
140 if (len >= bufsize) {
141 bufsize = len * 5 / 4 + 4096;
142 buf = realloc(buf, bufsize);
143 if (!buf) {
144 fprintf(stderr, "pid: out of memory\n");
145 exit(1);
146 }
147 }
148
149 readret = fread(buf + len, 1, bufsize - len, fp);
150 if (readret < 0) {
151 fclose(fp);
152 free(buf);
153 return NULL; /* I/O error */
154 } else if (readret == 0) {
155 fclose(fp);
156 if (returned_len)
157 *returned_len = len;
158 buf = realloc(buf, len + 1);
159 buf[len] = '\0';
160 return buf;
161 } else {
162 len += readret;
163 }
164 }
165}
166
167static char *get_link_dest(const char *filename)
168{
169 char *buf;
170 int bufsize;
171 ssize_t ret;
172
173 buf = NULL;
174 bufsize = 0;
175
176 while (1) {
177 bufsize = bufsize * 5 / 4 + 1024;
178 buf = realloc(buf, bufsize);
179 if (!buf) {
180 fprintf(stderr, "pid: out of memory\n");
181 exit(1);
182 }
183
184 ret = readlink(filename, buf, (size_t)bufsize);
185 if (ret < 0) {
186 free(buf);
187 return NULL; /* I/O error */
188 } else if (ret < bufsize) {
189 /*
190 * Success! We've read the full link text.
191 */
192 buf = realloc(buf, ret+1);
193 buf[ret] = '\0';
194 return buf;
195 } else {
196 /* Overflow. Go round again. */
197 }
198 }
199}
200
201static struct pidset get_processes(void)
202{
203 struct dirent *de;
204 struct pidset ret;
205 DIR *d;
206
207 pidset_init(&ret);
208
209 d = opendir("/proc");
210 if (!d) {
211 perror("/proc: open\n");
212 exit(1);
213 }
214 while ((de = readdir(d)) != NULL) {
215 int pid;
216 char *cmdline, *status, *exe;
217 int cmdlinelen;
218 const char **argv;
219 char filename[256];
220 struct procdata *proc;
221
222 const char *name = de->d_name;
223 if (name[strspn(name, "0123456789")])
224 continue;
225
226 /*
227 * The filename is numeric, i.e. we've found a pid. Try to
228 * retrieve all the information we want about it.
229 *
230 * We expect this will fail every so often for random reasons,
231 * e.g. if the pid has disappeared between us fetching a list
232 * of them and trying to read their command lines. In that
233 * situation, we won't bother reporting errors: we'll just
234 * drop this pid and silently move on to the next one.
235 */
236 pid = atoi(name);
237 assert(pid >= 0 && pid < PIDMAX);
238
239 sprintf(filename, "/proc/%d/cmdline", pid);
240 if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL)
241 continue;
242
243 sprintf(filename, "/proc/%d/status", pid);
244 if ((status = get_contents(filename, NULL)) == NULL) {
245 free(cmdline);
246 continue;
247 }
248
249 sprintf(filename, "/proc/%d/exe", pid);
b5fccf05 250 exe = get_link_dest(filename);
251 /* This may fail, if the process isn't ours, but we continue
252 * anyway. */
1f7c0ae1 253
254 /*
255 * Now we've got all our raw data out of /proc. Process it
256 * into the internal representation we're going to use in the
257 * process-selection logic.
258 */
259 proc = (struct procdata *)malloc(sizeof(struct procdata));
260 if (!proc) {
261 fprintf(stderr, "pid: out of memory\n");
262 exit(1);
263 }
264 proc->pid = pid;
265 proc->exe = exe;
266
267 /*
268 * cmdline contains a list of NUL-terminated strings. Scan
269 * them to get the argv pointers.
270 */
271 {
272 const char *p;
273 int nargs;
274
275 /* Count the arguments. */
276 nargs = 0;
277 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
278 nargs++;
279
280 /* Allocate space for the pointers. */
281 argv = (const char **)malloc((nargs+1) * sizeof(char *));
282 proc->argv = argv;
283 if (!argv) {
284 fprintf(stderr, "pid: out of memory\n");
285 exit(1);
286 }
287
288 /* Store the pointers. */
289 proc->argc = 0;
290 for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1)
291 argv[proc->argc++] = p;
292
293 /* Trailing NULL to match standard argv lists, just in case. */
294 assert(proc->argc == nargs);
295 argv[proc->argc] = NULL;
296 }
297
298 /*
299 * Scan status for the uid and the parent pid. This file
300 * contains a list of \n-terminated lines of text.
301 */
302 {
303 const char *p;
304 int got_ppid = 0, got_uid = 0;
305
306 p = status;
307 while (p && *p) {
308 if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1)
309 got_ppid = 1;
310 if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1)
311 got_uid = 1;
312
313 /*
314 * Advance to next line.
315 */
316 p = strchr(p, '\n');
317 if (p) p++;
318 }
319
320 if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */
321 free(cmdline);
322 free(exe);
323 free(status);
324 free(argv);
325 free(proc);
326 continue;
327 }
328 }
329
330 /*
331 * If we get here, we've got everything we need. Add the
332 * process to the list of things we can usefully work
333 * with.
334 */
335 procs[pid] = proc;
336 pidset_add(&ret, pid);
337 }
338 closedir(d);
339
340 return ret;
341}
342
343static const struct procdata *get_proc(int pid)
344{
345 assert(pid >= 0 && pid < PIDMAX);
346 assert(procs[pid]);
347 return procs[pid];
348}
349
350/* ----------------------------------------------------------------------
351 * Logic to pick out the set of processes we care about.
352 */
353
acf5ef0b 354static int is_an_interpreter(const char *basename, const char **stop_opt)
1f7c0ae1 355{
356 if (!strcmp(basename, "perl") ||
acf5ef0b 357 !strcmp(basename, "ruby")) {
358 *stop_opt = "-e";
359 return 1;
360 }
361 if (!strcmp(basename, "python") ||
1f7c0ae1 362 !strcmp(basename, "bash") ||
363 !strcmp(basename, "sh") ||
acf5ef0b 364 !strcmp(basename, "dash")) {
365 *stop_opt = "-c";
366 return 1;
367 }
368 if (!strcmp(basename, "rep") ||
1f7c0ae1 369 !strcmp(basename, "lua") ||
acf5ef0b 370 !strcmp(basename, "java")) {
371 *stop_opt = NULL;
1f7c0ae1 372 return 1;
acf5ef0b 373 }
374 return 0;
1f7c0ae1 375}
376
377static const char *find_basename(const char *path)
378{
379 const char *ret = path;
380 const char *p;
381
382 while (1) {
383 p = ret + strcspn(ret, "/");
384 if (*p) {
385 ret = p+1;
386 } else {
387 return ret;
388 }
389 }
390}
391
392static int find_command(int pid_argc, const char *const *pid_argv,
393 const char *cmd)
394{
acf5ef0b 395 const char *base, *stop_opt;
1f7c0ae1 396
397 base = pid_argv[0];
398 if (*base == '-')
399 base++; /* skip indicator of login shells */
400 base = find_basename(base);
401
402 if (!strcmp(base, cmd)) {
403 /*
404 * argv[0] matches the supplied command name.
405 */
406 return 0;
acf5ef0b 407 } else if (is_an_interpreter(base, &stop_opt)) {
1f7c0ae1 408 /*
409 * argv[0] is an interpreter program of some kind. Look
410 * along its command line for the program it's running,
411 * and see if _that_ matches the command name.
412 */
413 int i = 1;
acf5ef0b 414 while (i < pid_argc && pid_argv[i][0] == '-') {
415 /*
416 * Skip interpreter options, unless they're things which
417 * make the next non-option argument not a script name
418 * (e.g. sh -c, perl -e).
419 */
420 if (stop_opt && !strncmp(pid_argv[i], stop_opt, strlen(stop_opt)))
421 return -1; /* no match */
422 i++;
423 }
1f7c0ae1 424 if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd))
425 return i;
426 }
427 return -1; /* no match */
428}
429
b5fccf05 430static int strnullcmp(const char *a, const char *b)
431{
432 /*
433 * Like strcmp, but cope with NULL inputs by making them compare
434 * identical to each other and before any non-null string.
435 */
436 if (!a || !b)
437 return (b != 0) - (a != 0);
438 else
439 return strcmp(a, b);
440}
441
1f7c0ae1 442static int argcmp(const char *const *a, const char *const *b)
443{
444 while (*a && *b) {
445 int ret = strcmp(*a, *b);
446 if (ret)
447 return ret;
448 a++;
449 b++;
450 }
451
452 return (*b != NULL) - (*a != NULL);
453}
454
455static struct pidset filter_out_self(struct pidset in)
456{
457 /*
458 * Discard our own pid from a set. (Generally we won't want to
459 * return ourself from any search.)
460 */
461 struct pidset ret;
462 int pid;
463 int our_pid = getpid();
464
465 pidset_init(&ret);
466 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
467 if (pid != our_pid)
468 pidset_add(&ret, pid);
469 }
470 return ret;
471}
472
d1bc4fef 473static struct pidset filter_by_uid(struct pidset in, int uid)
474{
475 /*
476 * Return only those processes with a given uid.
477 */
478 struct pidset ret;
479 int pid;
480
481 pidset_init(&ret);
482 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
483 const struct procdata *proc = get_proc(pid);
484 if (proc->uid == uid)
485 pidset_add(&ret, pid);
486 }
487 return ret;
488}
489
1f7c0ae1 490static struct pidset filter_by_command(struct pidset in, const char **words)
491{
492 /*
493 * Look for processes matching the user-supplied command name and
494 * subsequent arguments.
495 */
496 struct pidset ret;
497 int pid;
498
499 pidset_init(&ret);
500 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
501 const struct procdata *proc = get_proc(pid);
502 int i, j;
503
504 if (!proc->argv || proc->argc < 1)
505 goto no_match;
506
507 /* Find the command, whether it's a binary or a script. */
508 i = find_command(proc->argc, proc->argv, words[0]);
509 if (i < 0)
510 goto no_match;
511
512 /* Now check that subsequent arguments match. */
513 for (j = 1; words[j]; j++)
514 if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j]))
515 goto no_match;
516
517 /* If we get here, we have a match! */
518 pidset_add(&ret, pid);
519
520 no_match:;
521 }
522 return ret;
523}
524
525static struct pidset filter_out_forks(struct pidset in)
526{
527 /*
528 * Discard any process whose parent is also in our remaining match
529 * set and looks sufficiently like it for us to decide this one's
530 * an uninteresting fork (e.g. of a shell script executing a
531 * complex pipeline).
532 */
533 struct pidset ret;
534 int pid;
535
536 pidset_init(&ret);
537 for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) {
538 const struct procdata *proc = get_proc(pid);
539
540 if (pidset_in(&in, proc->ppid)) {
541 /* The parent is in our set too. Is it similar? */
542 const struct procdata *parent = get_proc(proc->ppid);
b5fccf05 543 if (!strnullcmp(parent->exe, proc->exe) &&
1f7c0ae1 544 !argcmp(parent->argv, proc->argv)) {
545 /* Yes; don't list it. */
546 continue;
547 }
548 }
549
550 pidset_add(&ret, pid);
551 }
552 return ret;
553}
554
555/* ----------------------------------------------------------------------
556 * Main program.
557 */
558
559const char usagemsg[] =
560 "usage: pid [options] <search-cmd> [<search-arg>...]\n"
561 "where: -a report all matching pids, not just one\n"
d1bc4fef 562 " -U report pids of any user, not just ours\n"
1f7c0ae1 563 " also: pid --version report version number\n"
564 " pid --help display this help text\n"
565 " pid --licence display the (MIT) licence text\n"
566 ;
567
568void usage(void) {
569 fputs(usagemsg, stdout);
570}
571
572const char licencemsg[] =
573 "pid is copyright 2012 Simon Tatham.\n"
574 "\n"
575 "Permission is hereby granted, free of charge, to any person\n"
576 "obtaining a copy of this software and associated documentation files\n"
577 "(the \"Software\"), to deal in the Software without restriction,\n"
578 "including without limitation the rights to use, copy, modify, merge,\n"
579 "publish, distribute, sublicense, and/or sell copies of the Software,\n"
580 "and to permit persons to whom the Software is furnished to do so,\n"
581 "subject to the following conditions:\n"
582 "\n"
583 "The above copyright notice and this permission notice shall be\n"
584 "included in all copies or substantial portions of the Software.\n"
585 "\n"
586 "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n"
587 "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n"
588 "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n"
589 "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n"
590 "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n"
591 "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n"
592 "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
593 "SOFTWARE.\n"
594 ;
595
596void licence(void) {
597 fputs(licencemsg, stdout);
598}
599
600void version(void) {
601#define SVN_REV "$Revision$"
602 char rev[sizeof(SVN_REV)];
603 char *p, *q;
604
605 strcpy(rev, SVN_REV);
606
607 for (p = rev; *p && *p != ':'; p++);
608 if (*p) {
609 p++;
610 while (*p && isspace((unsigned char)*p)) p++;
611 for (q = p; *q && *q != '$'; q++);
612 if (*q) *q = '\0';
613 printf("pid revision %s\n", p);
614 } else {
615 printf("pid: unknown version\n");
616 }
617}
618
619int main(int argc, char **argv)
620{
621 const char **searchwords;
622 int nsearchwords;
d1bc4fef 623 int all = 0, all_uids = 0;
1f7c0ae1 624 int doing_opts = 1;
625
626 /*
627 * Allocate enough space in 'searchwords' that we could shovel the
628 * whole of our argv into it if we had to. Then we won't have to
629 * worry about it later.
630 */
631 searchwords = (const char **)malloc((argc+1) * sizeof(const char *));
632 nsearchwords = 0;
633
634 /*
635 * Parse the command line.
636 */
637 while (--argc > 0) {
638 char *p = *++argv;
639 if (doing_opts && *p == '-') {
640 if (!strcmp(p, "-a") || !strcmp(p, "--all")) {
641 all = 1;
d1bc4fef 642 } else if (!strcmp(p, "-U") || !strcmp(p, "--all-uids")) {
643 all_uids = 1;
1f7c0ae1 644 } else if (!strcmp(p, "--version")) {
645 version();
646 return 0;
647 } else if (!strcmp(p, "--help")) {
648 usage();
649 return 0;
650 } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) {
651 licence();
652 return 0;
653 } else if (!strcmp(p, "--")) {
654 doing_opts = 0;
655 } else {
656 fprintf(stderr, "pid: unrecognised option '%s'\n", p);
657 return 1;
658 }
659 } else {
660 searchwords[nsearchwords++] = p;
661 doing_opts = 0; /* further optionlike args become search terms */
662 }
663 }
664
665 if (!nsearchwords) {
666 fprintf(stderr, "pid: expected a command to search for; "
667 "type 'pid --help' for help\n");
668 return 1;
669 }
670 searchwords[nsearchwords] = NULL; /* terminate list */
671
672 {
673 struct pidset procs;
d1bc4fef 674 int uid, pid, npids;
1f7c0ae1 675 /*
676 * Construct our list of processes.
677 */
678 procs = get_processes();
d1bc4fef 679 uid = getuid();
680 if (uid > 0 && !all_uids)
681 procs = filter_by_uid(procs, uid);
1f7c0ae1 682 procs = filter_out_self(procs);
683 procs = filter_by_command(procs, searchwords);
684 if (!all)
685 procs = filter_out_forks(procs);
686
687 /*
688 * Output.
689 */
690 npids = pidset_size(&procs);
691 if (npids == 0) {
692 printf("NONE\n");
693 } else if (all) {
694 const char *sep = "";
695 for (pid = pidset_first(&procs); pid >= 0;
696 pid = pidset_next(&procs)) {
697 printf("%s%d", sep, pid);
698 sep = " ";
699 }
700 putchar('\n');
701 } else {
702 if (npids == 1) {
703 printf("%d\n", pidset_first(&procs));
704 } else {
705 printf("MULTIPLE\n");
706 }
707 }
708 }
709
710 return 0;
711}