1f7c0ae1 |
1 | /* |
2 | * pid - find the pid of a process given its command name |
3 | * |
4 | * Same basic idea as Debian's "pidof", in that you type 'pid command' |
5 | * and it finds a process running that command and gives you the pid; |
6 | * but souped up with various pragmatic features such as recognising |
7 | * well known interpreters (so you can search for, say, 'pid |
8 | * script.sh' as well as 'pid bash' and have it do what you meant). |
9 | * |
10 | * Currently tested only on Linux using /proc directly, but I've tried |
11 | * to set it up so that the logic of what processes to choose is |
12 | * separated from the mechanism used to iterate over processes and |
13 | * find their command lines. |
14 | */ |
15 | |
16 | #include <stdio.h> |
17 | #include <stdlib.h> |
18 | #include <string.h> |
19 | #include <assert.h> |
20 | #include <ctype.h> |
21 | |
22 | #include <sys/types.h> |
23 | #include <dirent.h> |
24 | #include <unistd.h> |
25 | |
26 | #define lenof(x) (sizeof((x))/sizeof(*(x))) |
27 | |
28 | #define PIDMAX 32768 |
29 | |
30 | /* ---------------------------------------------------------------------- |
31 | * General-purpose code for storing a set of process ids, testing |
32 | * membership, and iterating over them. Since pids have a very limited |
33 | * range, we just do this as a giant bitmask. |
34 | */ |
35 | |
36 | #define WORDBITS 32 |
37 | |
38 | struct pidset { |
39 | unsigned long procbits[PIDMAX/WORDBITS]; |
40 | int next; |
41 | }; |
42 | |
43 | static void pidset_init(struct pidset *p) |
44 | { |
45 | int i; |
46 | for (i = 0; i < lenof(p->procbits); i++) |
47 | p->procbits[i] = 0L; |
48 | } |
49 | |
50 | static void pidset_add(struct pidset *p, int pid) |
51 | { |
52 | assert(pid >= 0 && pid < PIDMAX); |
53 | p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS); |
54 | } |
55 | |
56 | static int pidset_in(const struct pidset *p, int pid) |
57 | { |
58 | assert(pid >= 0 && pid < PIDMAX); |
59 | return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0; |
60 | } |
61 | |
62 | static int pidset_size(const struct pidset *p) |
63 | { |
64 | int word, count; |
65 | |
66 | count = 0; |
67 | for (word = 0; word < lenof(p->procbits); word++) { |
68 | unsigned long mask = p->procbits[word]; |
69 | while (mask > 0) { |
70 | count += (mask & 1); |
71 | mask >>= 1; |
72 | } |
73 | } |
74 | |
75 | return count; |
76 | } |
77 | |
78 | static int pidset_step(struct pidset *p) |
79 | { |
80 | int word = p->next / WORDBITS; |
81 | int bit = p->next % WORDBITS; |
82 | while (word < lenof(p->procbits) && p->procbits[word] >> bit == 0) { |
83 | word++; |
84 | bit = 0; |
85 | p->next = WORDBITS * word + bit; |
86 | } |
87 | |
88 | if (word >= lenof(p->procbits)) |
89 | return -1; |
90 | |
91 | while (!((p->procbits[word] >> bit) & 1)) { |
92 | bit++; |
93 | p->next = WORDBITS * word + bit; |
94 | } |
95 | |
96 | assert(bit < WORDBITS); |
97 | return p->next++; |
98 | } |
99 | |
100 | static int pidset_first(struct pidset *p) |
101 | { |
102 | p->next = 0; |
103 | return pidset_step(p); |
104 | } |
105 | |
106 | static int pidset_next(struct pidset *p) |
107 | { |
108 | return pidset_step(p); |
109 | } |
110 | |
111 | /* ---------------------------------------------------------------------- |
112 | * Code to scan the list of processes and retrieve all the information |
113 | * we'll want about each one. This may in future be conditional on the |
114 | * OS's local mechanism for finding that information (i.e. if we want |
115 | * to run on kernels that don't provide Linux-style /proc). |
116 | */ |
117 | |
118 | struct procdata { |
119 | int pid, ppid, uid; |
120 | int argc; |
121 | const char *const *argv; |
122 | const char *exe; |
123 | }; |
124 | static struct procdata *procs[PIDMAX]; |
125 | |
126 | static char *get_contents(const char *filename, int *returned_len) |
127 | { |
128 | int len; |
129 | char *buf = NULL; |
130 | int bufsize = 0; |
131 | |
132 | FILE *fp = fopen(filename, "rb"); |
133 | if (!fp) |
134 | return NULL; |
135 | |
136 | len = 0; |
137 | while (1) { |
138 | int readret; |
139 | |
140 | if (len >= bufsize) { |
141 | bufsize = len * 5 / 4 + 4096; |
142 | buf = realloc(buf, bufsize); |
143 | if (!buf) { |
144 | fprintf(stderr, "pid: out of memory\n"); |
145 | exit(1); |
146 | } |
147 | } |
148 | |
149 | readret = fread(buf + len, 1, bufsize - len, fp); |
150 | if (readret < 0) { |
151 | fclose(fp); |
152 | free(buf); |
153 | return NULL; /* I/O error */ |
154 | } else if (readret == 0) { |
155 | fclose(fp); |
156 | if (returned_len) |
157 | *returned_len = len; |
158 | buf = realloc(buf, len + 1); |
159 | buf[len] = '\0'; |
160 | return buf; |
161 | } else { |
162 | len += readret; |
163 | } |
164 | } |
165 | } |
166 | |
167 | static char *get_link_dest(const char *filename) |
168 | { |
169 | char *buf; |
170 | int bufsize; |
171 | ssize_t ret; |
172 | |
173 | buf = NULL; |
174 | bufsize = 0; |
175 | |
176 | while (1) { |
177 | bufsize = bufsize * 5 / 4 + 1024; |
178 | buf = realloc(buf, bufsize); |
179 | if (!buf) { |
180 | fprintf(stderr, "pid: out of memory\n"); |
181 | exit(1); |
182 | } |
183 | |
184 | ret = readlink(filename, buf, (size_t)bufsize); |
185 | if (ret < 0) { |
186 | free(buf); |
187 | return NULL; /* I/O error */ |
188 | } else if (ret < bufsize) { |
189 | /* |
190 | * Success! We've read the full link text. |
191 | */ |
192 | buf = realloc(buf, ret+1); |
193 | buf[ret] = '\0'; |
194 | return buf; |
195 | } else { |
196 | /* Overflow. Go round again. */ |
197 | } |
198 | } |
199 | } |
200 | |
201 | static struct pidset get_processes(void) |
202 | { |
203 | struct dirent *de; |
204 | struct pidset ret; |
205 | DIR *d; |
206 | |
207 | pidset_init(&ret); |
208 | |
209 | d = opendir("/proc"); |
210 | if (!d) { |
211 | perror("/proc: open\n"); |
212 | exit(1); |
213 | } |
214 | while ((de = readdir(d)) != NULL) { |
215 | int pid; |
216 | char *cmdline, *status, *exe; |
217 | int cmdlinelen; |
218 | const char **argv; |
219 | char filename[256]; |
220 | struct procdata *proc; |
221 | |
222 | const char *name = de->d_name; |
223 | if (name[strspn(name, "0123456789")]) |
224 | continue; |
225 | |
226 | /* |
227 | * The filename is numeric, i.e. we've found a pid. Try to |
228 | * retrieve all the information we want about it. |
229 | * |
230 | * We expect this will fail every so often for random reasons, |
231 | * e.g. if the pid has disappeared between us fetching a list |
232 | * of them and trying to read their command lines. In that |
233 | * situation, we won't bother reporting errors: we'll just |
234 | * drop this pid and silently move on to the next one. |
235 | */ |
236 | pid = atoi(name); |
237 | assert(pid >= 0 && pid < PIDMAX); |
238 | |
239 | sprintf(filename, "/proc/%d/cmdline", pid); |
240 | if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL) |
241 | continue; |
242 | |
243 | sprintf(filename, "/proc/%d/status", pid); |
244 | if ((status = get_contents(filename, NULL)) == NULL) { |
245 | free(cmdline); |
246 | continue; |
247 | } |
248 | |
249 | sprintf(filename, "/proc/%d/exe", pid); |
b5fccf05 |
250 | exe = get_link_dest(filename); |
251 | /* This may fail, if the process isn't ours, but we continue |
252 | * anyway. */ |
1f7c0ae1 |
253 | |
254 | /* |
255 | * Now we've got all our raw data out of /proc. Process it |
256 | * into the internal representation we're going to use in the |
257 | * process-selection logic. |
258 | */ |
259 | proc = (struct procdata *)malloc(sizeof(struct procdata)); |
260 | if (!proc) { |
261 | fprintf(stderr, "pid: out of memory\n"); |
262 | exit(1); |
263 | } |
264 | proc->pid = pid; |
265 | proc->exe = exe; |
266 | |
267 | /* |
268 | * cmdline contains a list of NUL-terminated strings. Scan |
269 | * them to get the argv pointers. |
270 | */ |
271 | { |
272 | const char *p; |
273 | int nargs; |
274 | |
275 | /* Count the arguments. */ |
276 | nargs = 0; |
277 | for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1) |
278 | nargs++; |
279 | |
280 | /* Allocate space for the pointers. */ |
281 | argv = (const char **)malloc((nargs+1) * sizeof(char *)); |
282 | proc->argv = argv; |
283 | if (!argv) { |
284 | fprintf(stderr, "pid: out of memory\n"); |
285 | exit(1); |
286 | } |
287 | |
288 | /* Store the pointers. */ |
289 | proc->argc = 0; |
290 | for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1) |
291 | argv[proc->argc++] = p; |
292 | |
293 | /* Trailing NULL to match standard argv lists, just in case. */ |
294 | assert(proc->argc == nargs); |
295 | argv[proc->argc] = NULL; |
296 | } |
297 | |
298 | /* |
299 | * Scan status for the uid and the parent pid. This file |
300 | * contains a list of \n-terminated lines of text. |
301 | */ |
302 | { |
303 | const char *p; |
304 | int got_ppid = 0, got_uid = 0; |
305 | |
306 | p = status; |
307 | while (p && *p) { |
308 | if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1) |
309 | got_ppid = 1; |
310 | if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1) |
311 | got_uid = 1; |
312 | |
313 | /* |
314 | * Advance to next line. |
315 | */ |
316 | p = strchr(p, '\n'); |
317 | if (p) p++; |
318 | } |
319 | |
320 | if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */ |
321 | free(cmdline); |
322 | free(exe); |
323 | free(status); |
324 | free(argv); |
325 | free(proc); |
326 | continue; |
327 | } |
328 | } |
329 | |
330 | /* |
331 | * If we get here, we've got everything we need. Add the |
332 | * process to the list of things we can usefully work |
333 | * with. |
334 | */ |
335 | procs[pid] = proc; |
336 | pidset_add(&ret, pid); |
337 | } |
338 | closedir(d); |
339 | |
340 | return ret; |
341 | } |
342 | |
343 | static const struct procdata *get_proc(int pid) |
344 | { |
345 | assert(pid >= 0 && pid < PIDMAX); |
346 | assert(procs[pid]); |
347 | return procs[pid]; |
348 | } |
349 | |
350 | /* ---------------------------------------------------------------------- |
351 | * Logic to pick out the set of processes we care about. |
352 | */ |
353 | |
354 | static int is_an_interpreter(const char *basename) |
355 | { |
356 | if (!strcmp(basename, "perl") || |
357 | !strcmp(basename, "python") || |
358 | !strcmp(basename, "ruby") || |
359 | !strcmp(basename, "rep") || |
360 | !strcmp(basename, "bash") || |
361 | !strcmp(basename, "sh") || |
362 | !strcmp(basename, "dash") || |
363 | !strcmp(basename, "lua") || |
364 | !strcmp(basename, "java")) |
365 | return 1; |
366 | else |
367 | return 0; |
368 | } |
369 | |
370 | static const char *find_basename(const char *path) |
371 | { |
372 | const char *ret = path; |
373 | const char *p; |
374 | |
375 | while (1) { |
376 | p = ret + strcspn(ret, "/"); |
377 | if (*p) { |
378 | ret = p+1; |
379 | } else { |
380 | return ret; |
381 | } |
382 | } |
383 | } |
384 | |
385 | static int find_command(int pid_argc, const char *const *pid_argv, |
386 | const char *cmd) |
387 | { |
388 | const char *base; |
389 | |
390 | base = pid_argv[0]; |
391 | if (*base == '-') |
392 | base++; /* skip indicator of login shells */ |
393 | base = find_basename(base); |
394 | |
395 | if (!strcmp(base, cmd)) { |
396 | /* |
397 | * argv[0] matches the supplied command name. |
398 | */ |
399 | return 0; |
400 | } else if (is_an_interpreter(base)) { |
401 | /* |
402 | * argv[0] is an interpreter program of some kind. Look |
403 | * along its command line for the program it's running, |
404 | * and see if _that_ matches the command name. |
405 | */ |
406 | int i = 1; |
407 | while (i < pid_argc && pid_argv[i][0] == '-') |
408 | i++; /* skip interpreter options */ |
409 | if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd)) |
410 | return i; |
411 | } |
412 | return -1; /* no match */ |
413 | } |
414 | |
b5fccf05 |
415 | static int strnullcmp(const char *a, const char *b) |
416 | { |
417 | /* |
418 | * Like strcmp, but cope with NULL inputs by making them compare |
419 | * identical to each other and before any non-null string. |
420 | */ |
421 | if (!a || !b) |
422 | return (b != 0) - (a != 0); |
423 | else |
424 | return strcmp(a, b); |
425 | } |
426 | |
1f7c0ae1 |
427 | static int argcmp(const char *const *a, const char *const *b) |
428 | { |
429 | while (*a && *b) { |
430 | int ret = strcmp(*a, *b); |
431 | if (ret) |
432 | return ret; |
433 | a++; |
434 | b++; |
435 | } |
436 | |
437 | return (*b != NULL) - (*a != NULL); |
438 | } |
439 | |
440 | static struct pidset filter_out_self(struct pidset in) |
441 | { |
442 | /* |
443 | * Discard our own pid from a set. (Generally we won't want to |
444 | * return ourself from any search.) |
445 | */ |
446 | struct pidset ret; |
447 | int pid; |
448 | int our_pid = getpid(); |
449 | |
450 | pidset_init(&ret); |
451 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
452 | if (pid != our_pid) |
453 | pidset_add(&ret, pid); |
454 | } |
455 | return ret; |
456 | } |
457 | |
458 | static struct pidset filter_by_command(struct pidset in, const char **words) |
459 | { |
460 | /* |
461 | * Look for processes matching the user-supplied command name and |
462 | * subsequent arguments. |
463 | */ |
464 | struct pidset ret; |
465 | int pid; |
466 | |
467 | pidset_init(&ret); |
468 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
469 | const struct procdata *proc = get_proc(pid); |
470 | int i, j; |
471 | |
472 | if (!proc->argv || proc->argc < 1) |
473 | goto no_match; |
474 | |
475 | /* Find the command, whether it's a binary or a script. */ |
476 | i = find_command(proc->argc, proc->argv, words[0]); |
477 | if (i < 0) |
478 | goto no_match; |
479 | |
480 | /* Now check that subsequent arguments match. */ |
481 | for (j = 1; words[j]; j++) |
482 | if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j])) |
483 | goto no_match; |
484 | |
485 | /* If we get here, we have a match! */ |
486 | pidset_add(&ret, pid); |
487 | |
488 | no_match:; |
489 | } |
490 | return ret; |
491 | } |
492 | |
493 | static struct pidset filter_out_forks(struct pidset in) |
494 | { |
495 | /* |
496 | * Discard any process whose parent is also in our remaining match |
497 | * set and looks sufficiently like it for us to decide this one's |
498 | * an uninteresting fork (e.g. of a shell script executing a |
499 | * complex pipeline). |
500 | */ |
501 | struct pidset ret; |
502 | int pid; |
503 | |
504 | pidset_init(&ret); |
505 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
506 | const struct procdata *proc = get_proc(pid); |
507 | |
508 | if (pidset_in(&in, proc->ppid)) { |
509 | /* The parent is in our set too. Is it similar? */ |
510 | const struct procdata *parent = get_proc(proc->ppid); |
b5fccf05 |
511 | if (!strnullcmp(parent->exe, proc->exe) && |
1f7c0ae1 |
512 | !argcmp(parent->argv, proc->argv)) { |
513 | /* Yes; don't list it. */ |
514 | continue; |
515 | } |
516 | } |
517 | |
518 | pidset_add(&ret, pid); |
519 | } |
520 | return ret; |
521 | } |
522 | |
523 | /* ---------------------------------------------------------------------- |
524 | * Main program. |
525 | */ |
526 | |
527 | const char usagemsg[] = |
528 | "usage: pid [options] <search-cmd> [<search-arg>...]\n" |
529 | "where: -a report all matching pids, not just one\n" |
530 | " also: pid --version report version number\n" |
531 | " pid --help display this help text\n" |
532 | " pid --licence display the (MIT) licence text\n" |
533 | ; |
534 | |
535 | void usage(void) { |
536 | fputs(usagemsg, stdout); |
537 | } |
538 | |
539 | const char licencemsg[] = |
540 | "pid is copyright 2012 Simon Tatham.\n" |
541 | "\n" |
542 | "Permission is hereby granted, free of charge, to any person\n" |
543 | "obtaining a copy of this software and associated documentation files\n" |
544 | "(the \"Software\"), to deal in the Software without restriction,\n" |
545 | "including without limitation the rights to use, copy, modify, merge,\n" |
546 | "publish, distribute, sublicense, and/or sell copies of the Software,\n" |
547 | "and to permit persons to whom the Software is furnished to do so,\n" |
548 | "subject to the following conditions:\n" |
549 | "\n" |
550 | "The above copyright notice and this permission notice shall be\n" |
551 | "included in all copies or substantial portions of the Software.\n" |
552 | "\n" |
553 | "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n" |
554 | "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n" |
555 | "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n" |
556 | "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n" |
557 | "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n" |
558 | "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n" |
559 | "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" |
560 | "SOFTWARE.\n" |
561 | ; |
562 | |
563 | void licence(void) { |
564 | fputs(licencemsg, stdout); |
565 | } |
566 | |
567 | void version(void) { |
568 | #define SVN_REV "$Revision$" |
569 | char rev[sizeof(SVN_REV)]; |
570 | char *p, *q; |
571 | |
572 | strcpy(rev, SVN_REV); |
573 | |
574 | for (p = rev; *p && *p != ':'; p++); |
575 | if (*p) { |
576 | p++; |
577 | while (*p && isspace((unsigned char)*p)) p++; |
578 | for (q = p; *q && *q != '$'; q++); |
579 | if (*q) *q = '\0'; |
580 | printf("pid revision %s\n", p); |
581 | } else { |
582 | printf("pid: unknown version\n"); |
583 | } |
584 | } |
585 | |
586 | int main(int argc, char **argv) |
587 | { |
588 | const char **searchwords; |
589 | int nsearchwords; |
590 | int all = 0; |
591 | int doing_opts = 1; |
592 | |
593 | /* |
594 | * Allocate enough space in 'searchwords' that we could shovel the |
595 | * whole of our argv into it if we had to. Then we won't have to |
596 | * worry about it later. |
597 | */ |
598 | searchwords = (const char **)malloc((argc+1) * sizeof(const char *)); |
599 | nsearchwords = 0; |
600 | |
601 | /* |
602 | * Parse the command line. |
603 | */ |
604 | while (--argc > 0) { |
605 | char *p = *++argv; |
606 | if (doing_opts && *p == '-') { |
607 | if (!strcmp(p, "-a") || !strcmp(p, "--all")) { |
608 | all = 1; |
609 | } else if (!strcmp(p, "--version")) { |
610 | version(); |
611 | return 0; |
612 | } else if (!strcmp(p, "--help")) { |
613 | usage(); |
614 | return 0; |
615 | } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) { |
616 | licence(); |
617 | return 0; |
618 | } else if (!strcmp(p, "--")) { |
619 | doing_opts = 0; |
620 | } else { |
621 | fprintf(stderr, "pid: unrecognised option '%s'\n", p); |
622 | return 1; |
623 | } |
624 | } else { |
625 | searchwords[nsearchwords++] = p; |
626 | doing_opts = 0; /* further optionlike args become search terms */ |
627 | } |
628 | } |
629 | |
630 | if (!nsearchwords) { |
631 | fprintf(stderr, "pid: expected a command to search for; " |
632 | "type 'pid --help' for help\n"); |
633 | return 1; |
634 | } |
635 | searchwords[nsearchwords] = NULL; /* terminate list */ |
636 | |
637 | { |
638 | struct pidset procs; |
639 | int pid, npids; |
640 | /* |
641 | * Construct our list of processes. |
642 | */ |
643 | procs = get_processes(); |
644 | procs = filter_out_self(procs); |
645 | procs = filter_by_command(procs, searchwords); |
646 | if (!all) |
647 | procs = filter_out_forks(procs); |
648 | |
649 | /* |
650 | * Output. |
651 | */ |
652 | npids = pidset_size(&procs); |
653 | if (npids == 0) { |
654 | printf("NONE\n"); |
655 | } else if (all) { |
656 | const char *sep = ""; |
657 | for (pid = pidset_first(&procs); pid >= 0; |
658 | pid = pidset_next(&procs)) { |
659 | printf("%s%d", sep, pid); |
660 | sep = " "; |
661 | } |
662 | putchar('\n'); |
663 | } else { |
664 | if (npids == 1) { |
665 | printf("%d\n", pidset_first(&procs)); |
666 | } else { |
667 | printf("MULTIPLE\n"); |
668 | } |
669 | } |
670 | } |
671 | |
672 | return 0; |
673 | } |