1f7c0ae1 |
1 | /* |
2 | * pid - find the pid of a process given its command name |
3 | * |
4 | * Same basic idea as Debian's "pidof", in that you type 'pid command' |
5 | * and it finds a process running that command and gives you the pid; |
6 | * but souped up with various pragmatic features such as recognising |
7 | * well known interpreters (so you can search for, say, 'pid |
8 | * script.sh' as well as 'pid bash' and have it do what you meant). |
9 | * |
10 | * Currently tested only on Linux using /proc directly, but I've tried |
11 | * to set it up so that the logic of what processes to choose is |
12 | * separated from the mechanism used to iterate over processes and |
13 | * find their command lines. |
14 | */ |
15 | |
16 | #include <stdio.h> |
17 | #include <stdlib.h> |
18 | #include <string.h> |
19 | #include <assert.h> |
20 | #include <ctype.h> |
21 | |
22 | #include <sys/types.h> |
23 | #include <dirent.h> |
24 | #include <unistd.h> |
25 | |
26 | #define lenof(x) (sizeof((x))/sizeof(*(x))) |
27 | |
28 | #define PIDMAX 32768 |
29 | |
30 | /* ---------------------------------------------------------------------- |
31 | * General-purpose code for storing a set of process ids, testing |
32 | * membership, and iterating over them. Since pids have a very limited |
33 | * range, we just do this as a giant bitmask. |
34 | */ |
35 | |
36 | #define WORDBITS 32 |
37 | |
38 | struct pidset { |
39 | unsigned long procbits[PIDMAX/WORDBITS]; |
40 | int next; |
41 | }; |
42 | |
43 | static void pidset_init(struct pidset *p) |
44 | { |
45 | int i; |
46 | for (i = 0; i < lenof(p->procbits); i++) |
47 | p->procbits[i] = 0L; |
48 | } |
49 | |
50 | static void pidset_add(struct pidset *p, int pid) |
51 | { |
52 | assert(pid >= 0 && pid < PIDMAX); |
53 | p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS); |
54 | } |
55 | |
56 | static int pidset_in(const struct pidset *p, int pid) |
57 | { |
58 | assert(pid >= 0 && pid < PIDMAX); |
59 | return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0; |
60 | } |
61 | |
62 | static int pidset_size(const struct pidset *p) |
63 | { |
64 | int word, count; |
65 | |
66 | count = 0; |
67 | for (word = 0; word < lenof(p->procbits); word++) { |
68 | unsigned long mask = p->procbits[word]; |
69 | while (mask > 0) { |
70 | count += (mask & 1); |
71 | mask >>= 1; |
72 | } |
73 | } |
74 | |
75 | return count; |
76 | } |
77 | |
78 | static int pidset_step(struct pidset *p) |
79 | { |
80 | int word = p->next / WORDBITS; |
81 | int bit = p->next % WORDBITS; |
82 | while (word < lenof(p->procbits) && p->procbits[word] >> bit == 0) { |
83 | word++; |
84 | bit = 0; |
85 | p->next = WORDBITS * word + bit; |
86 | } |
87 | |
88 | if (word >= lenof(p->procbits)) |
89 | return -1; |
90 | |
91 | while (!((p->procbits[word] >> bit) & 1)) { |
92 | bit++; |
93 | p->next = WORDBITS * word + bit; |
94 | } |
95 | |
96 | assert(bit < WORDBITS); |
97 | return p->next++; |
98 | } |
99 | |
100 | static int pidset_first(struct pidset *p) |
101 | { |
102 | p->next = 0; |
103 | return pidset_step(p); |
104 | } |
105 | |
106 | static int pidset_next(struct pidset *p) |
107 | { |
108 | return pidset_step(p); |
109 | } |
110 | |
111 | /* ---------------------------------------------------------------------- |
112 | * Code to scan the list of processes and retrieve all the information |
113 | * we'll want about each one. This may in future be conditional on the |
114 | * OS's local mechanism for finding that information (i.e. if we want |
115 | * to run on kernels that don't provide Linux-style /proc). |
116 | */ |
117 | |
118 | struct procdata { |
119 | int pid, ppid, uid; |
120 | int argc; |
121 | const char *const *argv; |
122 | const char *exe; |
123 | }; |
124 | static struct procdata *procs[PIDMAX]; |
125 | |
126 | static char *get_contents(const char *filename, int *returned_len) |
127 | { |
128 | int len; |
129 | char *buf = NULL; |
130 | int bufsize = 0; |
131 | |
132 | FILE *fp = fopen(filename, "rb"); |
133 | if (!fp) |
134 | return NULL; |
135 | |
136 | len = 0; |
137 | while (1) { |
138 | int readret; |
139 | |
140 | if (len >= bufsize) { |
141 | bufsize = len * 5 / 4 + 4096; |
142 | buf = realloc(buf, bufsize); |
143 | if (!buf) { |
144 | fprintf(stderr, "pid: out of memory\n"); |
145 | exit(1); |
146 | } |
147 | } |
148 | |
149 | readret = fread(buf + len, 1, bufsize - len, fp); |
150 | if (readret < 0) { |
151 | fclose(fp); |
152 | free(buf); |
153 | return NULL; /* I/O error */ |
154 | } else if (readret == 0) { |
155 | fclose(fp); |
156 | if (returned_len) |
157 | *returned_len = len; |
158 | buf = realloc(buf, len + 1); |
159 | buf[len] = '\0'; |
160 | return buf; |
161 | } else { |
162 | len += readret; |
163 | } |
164 | } |
165 | } |
166 | |
167 | static char *get_link_dest(const char *filename) |
168 | { |
169 | char *buf; |
170 | int bufsize; |
171 | ssize_t ret; |
172 | |
173 | buf = NULL; |
174 | bufsize = 0; |
175 | |
176 | while (1) { |
177 | bufsize = bufsize * 5 / 4 + 1024; |
178 | buf = realloc(buf, bufsize); |
179 | if (!buf) { |
180 | fprintf(stderr, "pid: out of memory\n"); |
181 | exit(1); |
182 | } |
183 | |
184 | ret = readlink(filename, buf, (size_t)bufsize); |
185 | if (ret < 0) { |
186 | free(buf); |
187 | return NULL; /* I/O error */ |
188 | } else if (ret < bufsize) { |
189 | /* |
190 | * Success! We've read the full link text. |
191 | */ |
192 | buf = realloc(buf, ret+1); |
193 | buf[ret] = '\0'; |
194 | return buf; |
195 | } else { |
196 | /* Overflow. Go round again. */ |
197 | } |
198 | } |
199 | } |
200 | |
201 | static struct pidset get_processes(void) |
202 | { |
203 | struct dirent *de; |
204 | struct pidset ret; |
205 | DIR *d; |
206 | |
207 | pidset_init(&ret); |
208 | |
209 | d = opendir("/proc"); |
210 | if (!d) { |
211 | perror("/proc: open\n"); |
212 | exit(1); |
213 | } |
214 | while ((de = readdir(d)) != NULL) { |
215 | int pid; |
216 | char *cmdline, *status, *exe; |
217 | int cmdlinelen; |
218 | const char **argv; |
219 | char filename[256]; |
220 | struct procdata *proc; |
221 | |
222 | const char *name = de->d_name; |
223 | if (name[strspn(name, "0123456789")]) |
224 | continue; |
225 | |
226 | /* |
227 | * The filename is numeric, i.e. we've found a pid. Try to |
228 | * retrieve all the information we want about it. |
229 | * |
230 | * We expect this will fail every so often for random reasons, |
231 | * e.g. if the pid has disappeared between us fetching a list |
232 | * of them and trying to read their command lines. In that |
233 | * situation, we won't bother reporting errors: we'll just |
234 | * drop this pid and silently move on to the next one. |
235 | */ |
236 | pid = atoi(name); |
237 | assert(pid >= 0 && pid < PIDMAX); |
238 | |
239 | sprintf(filename, "/proc/%d/cmdline", pid); |
240 | if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL) |
241 | continue; |
242 | |
243 | sprintf(filename, "/proc/%d/status", pid); |
244 | if ((status = get_contents(filename, NULL)) == NULL) { |
245 | free(cmdline); |
246 | continue; |
247 | } |
248 | |
249 | sprintf(filename, "/proc/%d/exe", pid); |
250 | if ((exe = get_link_dest(filename)) == NULL) { |
251 | free(cmdline); |
252 | free(status); |
253 | continue; |
254 | } |
255 | |
256 | /* |
257 | * Now we've got all our raw data out of /proc. Process it |
258 | * into the internal representation we're going to use in the |
259 | * process-selection logic. |
260 | */ |
261 | proc = (struct procdata *)malloc(sizeof(struct procdata)); |
262 | if (!proc) { |
263 | fprintf(stderr, "pid: out of memory\n"); |
264 | exit(1); |
265 | } |
266 | proc->pid = pid; |
267 | proc->exe = exe; |
268 | |
269 | /* |
270 | * cmdline contains a list of NUL-terminated strings. Scan |
271 | * them to get the argv pointers. |
272 | */ |
273 | { |
274 | const char *p; |
275 | int nargs; |
276 | |
277 | /* Count the arguments. */ |
278 | nargs = 0; |
279 | for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1) |
280 | nargs++; |
281 | |
282 | /* Allocate space for the pointers. */ |
283 | argv = (const char **)malloc((nargs+1) * sizeof(char *)); |
284 | proc->argv = argv; |
285 | if (!argv) { |
286 | fprintf(stderr, "pid: out of memory\n"); |
287 | exit(1); |
288 | } |
289 | |
290 | /* Store the pointers. */ |
291 | proc->argc = 0; |
292 | for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1) |
293 | argv[proc->argc++] = p; |
294 | |
295 | /* Trailing NULL to match standard argv lists, just in case. */ |
296 | assert(proc->argc == nargs); |
297 | argv[proc->argc] = NULL; |
298 | } |
299 | |
300 | /* |
301 | * Scan status for the uid and the parent pid. This file |
302 | * contains a list of \n-terminated lines of text. |
303 | */ |
304 | { |
305 | const char *p; |
306 | int got_ppid = 0, got_uid = 0; |
307 | |
308 | p = status; |
309 | while (p && *p) { |
310 | if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1) |
311 | got_ppid = 1; |
312 | if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1) |
313 | got_uid = 1; |
314 | |
315 | /* |
316 | * Advance to next line. |
317 | */ |
318 | p = strchr(p, '\n'); |
319 | if (p) p++; |
320 | } |
321 | |
322 | if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */ |
323 | free(cmdline); |
324 | free(exe); |
325 | free(status); |
326 | free(argv); |
327 | free(proc); |
328 | continue; |
329 | } |
330 | } |
331 | |
332 | /* |
333 | * If we get here, we've got everything we need. Add the |
334 | * process to the list of things we can usefully work |
335 | * with. |
336 | */ |
337 | procs[pid] = proc; |
338 | pidset_add(&ret, pid); |
339 | } |
340 | closedir(d); |
341 | |
342 | return ret; |
343 | } |
344 | |
345 | static const struct procdata *get_proc(int pid) |
346 | { |
347 | assert(pid >= 0 && pid < PIDMAX); |
348 | assert(procs[pid]); |
349 | return procs[pid]; |
350 | } |
351 | |
352 | /* ---------------------------------------------------------------------- |
353 | * Logic to pick out the set of processes we care about. |
354 | */ |
355 | |
356 | static int is_an_interpreter(const char *basename) |
357 | { |
358 | if (!strcmp(basename, "perl") || |
359 | !strcmp(basename, "python") || |
360 | !strcmp(basename, "ruby") || |
361 | !strcmp(basename, "rep") || |
362 | !strcmp(basename, "bash") || |
363 | !strcmp(basename, "sh") || |
364 | !strcmp(basename, "dash") || |
365 | !strcmp(basename, "lua") || |
366 | !strcmp(basename, "java")) |
367 | return 1; |
368 | else |
369 | return 0; |
370 | } |
371 | |
372 | static const char *find_basename(const char *path) |
373 | { |
374 | const char *ret = path; |
375 | const char *p; |
376 | |
377 | while (1) { |
378 | p = ret + strcspn(ret, "/"); |
379 | if (*p) { |
380 | ret = p+1; |
381 | } else { |
382 | return ret; |
383 | } |
384 | } |
385 | } |
386 | |
387 | static int find_command(int pid_argc, const char *const *pid_argv, |
388 | const char *cmd) |
389 | { |
390 | const char *base; |
391 | |
392 | base = pid_argv[0]; |
393 | if (*base == '-') |
394 | base++; /* skip indicator of login shells */ |
395 | base = find_basename(base); |
396 | |
397 | if (!strcmp(base, cmd)) { |
398 | /* |
399 | * argv[0] matches the supplied command name. |
400 | */ |
401 | return 0; |
402 | } else if (is_an_interpreter(base)) { |
403 | /* |
404 | * argv[0] is an interpreter program of some kind. Look |
405 | * along its command line for the program it's running, |
406 | * and see if _that_ matches the command name. |
407 | */ |
408 | int i = 1; |
409 | while (i < pid_argc && pid_argv[i][0] == '-') |
410 | i++; /* skip interpreter options */ |
411 | if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd)) |
412 | return i; |
413 | } |
414 | return -1; /* no match */ |
415 | } |
416 | |
417 | static int argcmp(const char *const *a, const char *const *b) |
418 | { |
419 | while (*a && *b) { |
420 | int ret = strcmp(*a, *b); |
421 | if (ret) |
422 | return ret; |
423 | a++; |
424 | b++; |
425 | } |
426 | |
427 | return (*b != NULL) - (*a != NULL); |
428 | } |
429 | |
430 | static struct pidset filter_out_self(struct pidset in) |
431 | { |
432 | /* |
433 | * Discard our own pid from a set. (Generally we won't want to |
434 | * return ourself from any search.) |
435 | */ |
436 | struct pidset ret; |
437 | int pid; |
438 | int our_pid = getpid(); |
439 | |
440 | pidset_init(&ret); |
441 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
442 | if (pid != our_pid) |
443 | pidset_add(&ret, pid); |
444 | } |
445 | return ret; |
446 | } |
447 | |
448 | static struct pidset filter_by_command(struct pidset in, const char **words) |
449 | { |
450 | /* |
451 | * Look for processes matching the user-supplied command name and |
452 | * subsequent arguments. |
453 | */ |
454 | struct pidset ret; |
455 | int pid; |
456 | |
457 | pidset_init(&ret); |
458 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
459 | const struct procdata *proc = get_proc(pid); |
460 | int i, j; |
461 | |
462 | if (!proc->argv || proc->argc < 1) |
463 | goto no_match; |
464 | |
465 | /* Find the command, whether it's a binary or a script. */ |
466 | i = find_command(proc->argc, proc->argv, words[0]); |
467 | if (i < 0) |
468 | goto no_match; |
469 | |
470 | /* Now check that subsequent arguments match. */ |
471 | for (j = 1; words[j]; j++) |
472 | if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j])) |
473 | goto no_match; |
474 | |
475 | /* If we get here, we have a match! */ |
476 | pidset_add(&ret, pid); |
477 | |
478 | no_match:; |
479 | } |
480 | return ret; |
481 | } |
482 | |
483 | static struct pidset filter_out_forks(struct pidset in) |
484 | { |
485 | /* |
486 | * Discard any process whose parent is also in our remaining match |
487 | * set and looks sufficiently like it for us to decide this one's |
488 | * an uninteresting fork (e.g. of a shell script executing a |
489 | * complex pipeline). |
490 | */ |
491 | struct pidset ret; |
492 | int pid; |
493 | |
494 | pidset_init(&ret); |
495 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
496 | const struct procdata *proc = get_proc(pid); |
497 | |
498 | if (pidset_in(&in, proc->ppid)) { |
499 | /* The parent is in our set too. Is it similar? */ |
500 | const struct procdata *parent = get_proc(proc->ppid); |
501 | if (!strcmp(parent->exe, proc->exe) && |
502 | !argcmp(parent->argv, proc->argv)) { |
503 | /* Yes; don't list it. */ |
504 | continue; |
505 | } |
506 | } |
507 | |
508 | pidset_add(&ret, pid); |
509 | } |
510 | return ret; |
511 | } |
512 | |
513 | /* ---------------------------------------------------------------------- |
514 | * Main program. |
515 | */ |
516 | |
517 | const char usagemsg[] = |
518 | "usage: pid [options] <search-cmd> [<search-arg>...]\n" |
519 | "where: -a report all matching pids, not just one\n" |
520 | " also: pid --version report version number\n" |
521 | " pid --help display this help text\n" |
522 | " pid --licence display the (MIT) licence text\n" |
523 | ; |
524 | |
525 | void usage(void) { |
526 | fputs(usagemsg, stdout); |
527 | } |
528 | |
529 | const char licencemsg[] = |
530 | "pid is copyright 2012 Simon Tatham.\n" |
531 | "\n" |
532 | "Permission is hereby granted, free of charge, to any person\n" |
533 | "obtaining a copy of this software and associated documentation files\n" |
534 | "(the \"Software\"), to deal in the Software without restriction,\n" |
535 | "including without limitation the rights to use, copy, modify, merge,\n" |
536 | "publish, distribute, sublicense, and/or sell copies of the Software,\n" |
537 | "and to permit persons to whom the Software is furnished to do so,\n" |
538 | "subject to the following conditions:\n" |
539 | "\n" |
540 | "The above copyright notice and this permission notice shall be\n" |
541 | "included in all copies or substantial portions of the Software.\n" |
542 | "\n" |
543 | "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n" |
544 | "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n" |
545 | "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n" |
546 | "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n" |
547 | "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n" |
548 | "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n" |
549 | "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" |
550 | "SOFTWARE.\n" |
551 | ; |
552 | |
553 | void licence(void) { |
554 | fputs(licencemsg, stdout); |
555 | } |
556 | |
557 | void version(void) { |
558 | #define SVN_REV "$Revision$" |
559 | char rev[sizeof(SVN_REV)]; |
560 | char *p, *q; |
561 | |
562 | strcpy(rev, SVN_REV); |
563 | |
564 | for (p = rev; *p && *p != ':'; p++); |
565 | if (*p) { |
566 | p++; |
567 | while (*p && isspace((unsigned char)*p)) p++; |
568 | for (q = p; *q && *q != '$'; q++); |
569 | if (*q) *q = '\0'; |
570 | printf("pid revision %s\n", p); |
571 | } else { |
572 | printf("pid: unknown version\n"); |
573 | } |
574 | } |
575 | |
576 | int main(int argc, char **argv) |
577 | { |
578 | const char **searchwords; |
579 | int nsearchwords; |
580 | int all = 0; |
581 | int doing_opts = 1; |
582 | |
583 | /* |
584 | * Allocate enough space in 'searchwords' that we could shovel the |
585 | * whole of our argv into it if we had to. Then we won't have to |
586 | * worry about it later. |
587 | */ |
588 | searchwords = (const char **)malloc((argc+1) * sizeof(const char *)); |
589 | nsearchwords = 0; |
590 | |
591 | /* |
592 | * Parse the command line. |
593 | */ |
594 | while (--argc > 0) { |
595 | char *p = *++argv; |
596 | if (doing_opts && *p == '-') { |
597 | if (!strcmp(p, "-a") || !strcmp(p, "--all")) { |
598 | all = 1; |
599 | } else if (!strcmp(p, "--version")) { |
600 | version(); |
601 | return 0; |
602 | } else if (!strcmp(p, "--help")) { |
603 | usage(); |
604 | return 0; |
605 | } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) { |
606 | licence(); |
607 | return 0; |
608 | } else if (!strcmp(p, "--")) { |
609 | doing_opts = 0; |
610 | } else { |
611 | fprintf(stderr, "pid: unrecognised option '%s'\n", p); |
612 | return 1; |
613 | } |
614 | } else { |
615 | searchwords[nsearchwords++] = p; |
616 | doing_opts = 0; /* further optionlike args become search terms */ |
617 | } |
618 | } |
619 | |
620 | if (!nsearchwords) { |
621 | fprintf(stderr, "pid: expected a command to search for; " |
622 | "type 'pid --help' for help\n"); |
623 | return 1; |
624 | } |
625 | searchwords[nsearchwords] = NULL; /* terminate list */ |
626 | |
627 | { |
628 | struct pidset procs; |
629 | int pid, npids; |
630 | /* |
631 | * Construct our list of processes. |
632 | */ |
633 | procs = get_processes(); |
634 | procs = filter_out_self(procs); |
635 | procs = filter_by_command(procs, searchwords); |
636 | if (!all) |
637 | procs = filter_out_forks(procs); |
638 | |
639 | /* |
640 | * Output. |
641 | */ |
642 | npids = pidset_size(&procs); |
643 | if (npids == 0) { |
644 | printf("NONE\n"); |
645 | } else if (all) { |
646 | const char *sep = ""; |
647 | for (pid = pidset_first(&procs); pid >= 0; |
648 | pid = pidset_next(&procs)) { |
649 | printf("%s%d", sep, pid); |
650 | sep = " "; |
651 | } |
652 | putchar('\n'); |
653 | } else { |
654 | if (npids == 1) { |
655 | printf("%d\n", pidset_first(&procs)); |
656 | } else { |
657 | printf("MULTIPLE\n"); |
658 | } |
659 | } |
660 | } |
661 | |
662 | return 0; |
663 | } |