1f7c0ae1 |
1 | /* |
2 | * pid - find the pid of a process given its command name |
3 | * |
4 | * Same basic idea as Debian's "pidof", in that you type 'pid command' |
5 | * and it finds a process running that command and gives you the pid; |
6 | * but souped up with various pragmatic features such as recognising |
7 | * well known interpreters (so you can search for, say, 'pid |
8 | * script.sh' as well as 'pid bash' and have it do what you meant). |
9 | * |
10 | * Currently tested only on Linux using /proc directly, but I've tried |
11 | * to set it up so that the logic of what processes to choose is |
12 | * separated from the mechanism used to iterate over processes and |
13 | * find their command lines. |
14 | */ |
15 | |
16 | #include <stdio.h> |
17 | #include <stdlib.h> |
18 | #include <string.h> |
19 | #include <assert.h> |
20 | #include <ctype.h> |
21 | |
22 | #include <sys/types.h> |
23 | #include <dirent.h> |
24 | #include <unistd.h> |
25 | |
26 | #define lenof(x) (sizeof((x))/sizeof(*(x))) |
27 | |
28 | #define PIDMAX 32768 |
29 | |
30 | /* ---------------------------------------------------------------------- |
31 | * General-purpose code for storing a set of process ids, testing |
32 | * membership, and iterating over them. Since pids have a very limited |
33 | * range, we just do this as a giant bitmask. |
34 | */ |
35 | |
36 | #define WORDBITS 32 |
37 | |
38 | struct pidset { |
39 | unsigned long procbits[PIDMAX/WORDBITS]; |
40 | int next; |
41 | }; |
42 | |
43 | static void pidset_init(struct pidset *p) |
44 | { |
45 | int i; |
46 | for (i = 0; i < lenof(p->procbits); i++) |
47 | p->procbits[i] = 0L; |
48 | } |
49 | |
50 | static void pidset_add(struct pidset *p, int pid) |
51 | { |
52 | assert(pid >= 0 && pid < PIDMAX); |
53 | p->procbits[pid / WORDBITS] |= 1 << (pid % WORDBITS); |
54 | } |
55 | |
56 | static int pidset_in(const struct pidset *p, int pid) |
57 | { |
58 | assert(pid >= 0 && pid < PIDMAX); |
59 | return (p->procbits[pid / WORDBITS] & (1 << (pid % WORDBITS))) != 0; |
60 | } |
61 | |
62 | static int pidset_size(const struct pidset *p) |
63 | { |
64 | int word, count; |
65 | |
66 | count = 0; |
67 | for (word = 0; word < lenof(p->procbits); word++) { |
68 | unsigned long mask = p->procbits[word]; |
69 | while (mask > 0) { |
70 | count += (mask & 1); |
71 | mask >>= 1; |
72 | } |
73 | } |
74 | |
75 | return count; |
76 | } |
77 | |
78 | static int pidset_step(struct pidset *p) |
79 | { |
80 | int word = p->next / WORDBITS; |
81 | int bit = p->next % WORDBITS; |
82 | while (word < lenof(p->procbits) && p->procbits[word] >> bit == 0) { |
83 | word++; |
84 | bit = 0; |
85 | p->next = WORDBITS * word + bit; |
86 | } |
87 | |
88 | if (word >= lenof(p->procbits)) |
89 | return -1; |
90 | |
91 | while (!((p->procbits[word] >> bit) & 1)) { |
92 | bit++; |
93 | p->next = WORDBITS * word + bit; |
94 | } |
95 | |
96 | assert(bit < WORDBITS); |
97 | return p->next++; |
98 | } |
99 | |
100 | static int pidset_first(struct pidset *p) |
101 | { |
102 | p->next = 0; |
103 | return pidset_step(p); |
104 | } |
105 | |
106 | static int pidset_next(struct pidset *p) |
107 | { |
108 | return pidset_step(p); |
109 | } |
110 | |
111 | /* ---------------------------------------------------------------------- |
112 | * Code to scan the list of processes and retrieve all the information |
113 | * we'll want about each one. This may in future be conditional on the |
114 | * OS's local mechanism for finding that information (i.e. if we want |
115 | * to run on kernels that don't provide Linux-style /proc). |
116 | */ |
117 | |
118 | struct procdata { |
119 | int pid, ppid, uid; |
120 | int argc; |
121 | const char *const *argv; |
122 | const char *exe; |
123 | }; |
124 | static struct procdata *procs[PIDMAX]; |
125 | |
126 | static char *get_contents(const char *filename, int *returned_len) |
127 | { |
128 | int len; |
129 | char *buf = NULL; |
130 | int bufsize = 0; |
131 | |
132 | FILE *fp = fopen(filename, "rb"); |
133 | if (!fp) |
134 | return NULL; |
135 | |
136 | len = 0; |
137 | while (1) { |
138 | int readret; |
139 | |
140 | if (len >= bufsize) { |
141 | bufsize = len * 5 / 4 + 4096; |
142 | buf = realloc(buf, bufsize); |
143 | if (!buf) { |
144 | fprintf(stderr, "pid: out of memory\n"); |
145 | exit(1); |
146 | } |
147 | } |
148 | |
149 | readret = fread(buf + len, 1, bufsize - len, fp); |
150 | if (readret < 0) { |
151 | fclose(fp); |
152 | free(buf); |
153 | return NULL; /* I/O error */ |
154 | } else if (readret == 0) { |
155 | fclose(fp); |
156 | if (returned_len) |
157 | *returned_len = len; |
158 | buf = realloc(buf, len + 1); |
159 | buf[len] = '\0'; |
160 | return buf; |
161 | } else { |
162 | len += readret; |
163 | } |
164 | } |
165 | } |
166 | |
167 | static char *get_link_dest(const char *filename) |
168 | { |
169 | char *buf; |
170 | int bufsize; |
171 | ssize_t ret; |
172 | |
173 | buf = NULL; |
174 | bufsize = 0; |
175 | |
176 | while (1) { |
177 | bufsize = bufsize * 5 / 4 + 1024; |
178 | buf = realloc(buf, bufsize); |
179 | if (!buf) { |
180 | fprintf(stderr, "pid: out of memory\n"); |
181 | exit(1); |
182 | } |
183 | |
184 | ret = readlink(filename, buf, (size_t)bufsize); |
185 | if (ret < 0) { |
186 | free(buf); |
187 | return NULL; /* I/O error */ |
188 | } else if (ret < bufsize) { |
189 | /* |
190 | * Success! We've read the full link text. |
191 | */ |
192 | buf = realloc(buf, ret+1); |
193 | buf[ret] = '\0'; |
194 | return buf; |
195 | } else { |
196 | /* Overflow. Go round again. */ |
197 | } |
198 | } |
199 | } |
200 | |
201 | static struct pidset get_processes(void) |
202 | { |
203 | struct dirent *de; |
204 | struct pidset ret; |
205 | DIR *d; |
206 | |
207 | pidset_init(&ret); |
208 | |
209 | d = opendir("/proc"); |
210 | if (!d) { |
211 | perror("/proc: open\n"); |
212 | exit(1); |
213 | } |
214 | while ((de = readdir(d)) != NULL) { |
215 | int pid; |
216 | char *cmdline, *status, *exe; |
217 | int cmdlinelen; |
218 | const char **argv; |
219 | char filename[256]; |
220 | struct procdata *proc; |
221 | |
222 | const char *name = de->d_name; |
223 | if (name[strspn(name, "0123456789")]) |
224 | continue; |
225 | |
226 | /* |
227 | * The filename is numeric, i.e. we've found a pid. Try to |
228 | * retrieve all the information we want about it. |
229 | * |
230 | * We expect this will fail every so often for random reasons, |
231 | * e.g. if the pid has disappeared between us fetching a list |
232 | * of them and trying to read their command lines. In that |
233 | * situation, we won't bother reporting errors: we'll just |
234 | * drop this pid and silently move on to the next one. |
235 | */ |
236 | pid = atoi(name); |
237 | assert(pid >= 0 && pid < PIDMAX); |
238 | |
239 | sprintf(filename, "/proc/%d/cmdline", pid); |
240 | if ((cmdline = get_contents(filename, &cmdlinelen)) == NULL) |
241 | continue; |
242 | |
243 | sprintf(filename, "/proc/%d/status", pid); |
244 | if ((status = get_contents(filename, NULL)) == NULL) { |
245 | free(cmdline); |
246 | continue; |
247 | } |
248 | |
249 | sprintf(filename, "/proc/%d/exe", pid); |
b5fccf05 |
250 | exe = get_link_dest(filename); |
251 | /* This may fail, if the process isn't ours, but we continue |
252 | * anyway. */ |
1f7c0ae1 |
253 | |
254 | /* |
255 | * Now we've got all our raw data out of /proc. Process it |
256 | * into the internal representation we're going to use in the |
257 | * process-selection logic. |
258 | */ |
259 | proc = (struct procdata *)malloc(sizeof(struct procdata)); |
260 | if (!proc) { |
261 | fprintf(stderr, "pid: out of memory\n"); |
262 | exit(1); |
263 | } |
264 | proc->pid = pid; |
265 | proc->exe = exe; |
266 | |
267 | /* |
268 | * cmdline contains a list of NUL-terminated strings. Scan |
269 | * them to get the argv pointers. |
270 | */ |
271 | { |
272 | const char *p; |
273 | int nargs; |
274 | |
275 | /* Count the arguments. */ |
276 | nargs = 0; |
277 | for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1) |
278 | nargs++; |
279 | |
280 | /* Allocate space for the pointers. */ |
281 | argv = (const char **)malloc((nargs+1) * sizeof(char *)); |
282 | proc->argv = argv; |
283 | if (!argv) { |
284 | fprintf(stderr, "pid: out of memory\n"); |
285 | exit(1); |
286 | } |
287 | |
288 | /* Store the pointers. */ |
289 | proc->argc = 0; |
290 | for (p = cmdline; p < cmdline + cmdlinelen; p += strlen(p)+1) |
291 | argv[proc->argc++] = p; |
292 | |
293 | /* Trailing NULL to match standard argv lists, just in case. */ |
294 | assert(proc->argc == nargs); |
295 | argv[proc->argc] = NULL; |
296 | } |
297 | |
298 | /* |
299 | * Scan status for the uid and the parent pid. This file |
300 | * contains a list of \n-terminated lines of text. |
301 | */ |
302 | { |
303 | const char *p; |
304 | int got_ppid = 0, got_uid = 0; |
305 | |
306 | p = status; |
307 | while (p && *p) { |
308 | if (!got_ppid && sscanf(p, "PPid: %d", &proc->ppid) == 1) |
309 | got_ppid = 1; |
310 | if (!got_uid && sscanf(p, "Uid: %*d %d", &proc->uid) == 1) |
311 | got_uid = 1; |
312 | |
313 | /* |
314 | * Advance to next line. |
315 | */ |
316 | p = strchr(p, '\n'); |
317 | if (p) p++; |
318 | } |
319 | |
320 | if (!got_uid || !got_ppid) { /* arrgh, abort everything so far */ |
321 | free(cmdline); |
322 | free(exe); |
323 | free(status); |
324 | free(argv); |
325 | free(proc); |
326 | continue; |
327 | } |
328 | } |
329 | |
330 | /* |
331 | * If we get here, we've got everything we need. Add the |
332 | * process to the list of things we can usefully work |
333 | * with. |
334 | */ |
335 | procs[pid] = proc; |
336 | pidset_add(&ret, pid); |
337 | } |
338 | closedir(d); |
339 | |
340 | return ret; |
341 | } |
342 | |
343 | static const struct procdata *get_proc(int pid) |
344 | { |
345 | assert(pid >= 0 && pid < PIDMAX); |
346 | assert(procs[pid]); |
347 | return procs[pid]; |
348 | } |
349 | |
350 | /* ---------------------------------------------------------------------- |
351 | * Logic to pick out the set of processes we care about. |
352 | */ |
353 | |
354 | static int is_an_interpreter(const char *basename) |
355 | { |
356 | if (!strcmp(basename, "perl") || |
357 | !strcmp(basename, "python") || |
358 | !strcmp(basename, "ruby") || |
359 | !strcmp(basename, "rep") || |
360 | !strcmp(basename, "bash") || |
361 | !strcmp(basename, "sh") || |
362 | !strcmp(basename, "dash") || |
363 | !strcmp(basename, "lua") || |
364 | !strcmp(basename, "java")) |
365 | return 1; |
366 | else |
367 | return 0; |
368 | } |
369 | |
370 | static const char *find_basename(const char *path) |
371 | { |
372 | const char *ret = path; |
373 | const char *p; |
374 | |
375 | while (1) { |
376 | p = ret + strcspn(ret, "/"); |
377 | if (*p) { |
378 | ret = p+1; |
379 | } else { |
380 | return ret; |
381 | } |
382 | } |
383 | } |
384 | |
385 | static int find_command(int pid_argc, const char *const *pid_argv, |
386 | const char *cmd) |
387 | { |
388 | const char *base; |
389 | |
390 | base = pid_argv[0]; |
391 | if (*base == '-') |
392 | base++; /* skip indicator of login shells */ |
393 | base = find_basename(base); |
394 | |
395 | if (!strcmp(base, cmd)) { |
396 | /* |
397 | * argv[0] matches the supplied command name. |
398 | */ |
399 | return 0; |
400 | } else if (is_an_interpreter(base)) { |
401 | /* |
402 | * argv[0] is an interpreter program of some kind. Look |
403 | * along its command line for the program it's running, |
404 | * and see if _that_ matches the command name. |
405 | */ |
406 | int i = 1; |
407 | while (i < pid_argc && pid_argv[i][0] == '-') |
408 | i++; /* skip interpreter options */ |
409 | if (i < pid_argc && !strcmp(find_basename(pid_argv[i]), cmd)) |
410 | return i; |
411 | } |
412 | return -1; /* no match */ |
413 | } |
414 | |
b5fccf05 |
415 | static int strnullcmp(const char *a, const char *b) |
416 | { |
417 | /* |
418 | * Like strcmp, but cope with NULL inputs by making them compare |
419 | * identical to each other and before any non-null string. |
420 | */ |
421 | if (!a || !b) |
422 | return (b != 0) - (a != 0); |
423 | else |
424 | return strcmp(a, b); |
425 | } |
426 | |
1f7c0ae1 |
427 | static int argcmp(const char *const *a, const char *const *b) |
428 | { |
429 | while (*a && *b) { |
430 | int ret = strcmp(*a, *b); |
431 | if (ret) |
432 | return ret; |
433 | a++; |
434 | b++; |
435 | } |
436 | |
437 | return (*b != NULL) - (*a != NULL); |
438 | } |
439 | |
440 | static struct pidset filter_out_self(struct pidset in) |
441 | { |
442 | /* |
443 | * Discard our own pid from a set. (Generally we won't want to |
444 | * return ourself from any search.) |
445 | */ |
446 | struct pidset ret; |
447 | int pid; |
448 | int our_pid = getpid(); |
449 | |
450 | pidset_init(&ret); |
451 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
452 | if (pid != our_pid) |
453 | pidset_add(&ret, pid); |
454 | } |
455 | return ret; |
456 | } |
457 | |
d1bc4fef |
458 | static struct pidset filter_by_uid(struct pidset in, int uid) |
459 | { |
460 | /* |
461 | * Return only those processes with a given uid. |
462 | */ |
463 | struct pidset ret; |
464 | int pid; |
465 | |
466 | pidset_init(&ret); |
467 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
468 | const struct procdata *proc = get_proc(pid); |
469 | if (proc->uid == uid) |
470 | pidset_add(&ret, pid); |
471 | } |
472 | return ret; |
473 | } |
474 | |
1f7c0ae1 |
475 | static struct pidset filter_by_command(struct pidset in, const char **words) |
476 | { |
477 | /* |
478 | * Look for processes matching the user-supplied command name and |
479 | * subsequent arguments. |
480 | */ |
481 | struct pidset ret; |
482 | int pid; |
483 | |
484 | pidset_init(&ret); |
485 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
486 | const struct procdata *proc = get_proc(pid); |
487 | int i, j; |
488 | |
489 | if (!proc->argv || proc->argc < 1) |
490 | goto no_match; |
491 | |
492 | /* Find the command, whether it's a binary or a script. */ |
493 | i = find_command(proc->argc, proc->argv, words[0]); |
494 | if (i < 0) |
495 | goto no_match; |
496 | |
497 | /* Now check that subsequent arguments match. */ |
498 | for (j = 1; words[j]; j++) |
499 | if (!proc->argv[i+j] || strcmp(proc->argv[i+j], words[j])) |
500 | goto no_match; |
501 | |
502 | /* If we get here, we have a match! */ |
503 | pidset_add(&ret, pid); |
504 | |
505 | no_match:; |
506 | } |
507 | return ret; |
508 | } |
509 | |
510 | static struct pidset filter_out_forks(struct pidset in) |
511 | { |
512 | /* |
513 | * Discard any process whose parent is also in our remaining match |
514 | * set and looks sufficiently like it for us to decide this one's |
515 | * an uninteresting fork (e.g. of a shell script executing a |
516 | * complex pipeline). |
517 | */ |
518 | struct pidset ret; |
519 | int pid; |
520 | |
521 | pidset_init(&ret); |
522 | for (pid = pidset_first(&in); pid >= 0; pid = pidset_next(&in)) { |
523 | const struct procdata *proc = get_proc(pid); |
524 | |
525 | if (pidset_in(&in, proc->ppid)) { |
526 | /* The parent is in our set too. Is it similar? */ |
527 | const struct procdata *parent = get_proc(proc->ppid); |
b5fccf05 |
528 | if (!strnullcmp(parent->exe, proc->exe) && |
1f7c0ae1 |
529 | !argcmp(parent->argv, proc->argv)) { |
530 | /* Yes; don't list it. */ |
531 | continue; |
532 | } |
533 | } |
534 | |
535 | pidset_add(&ret, pid); |
536 | } |
537 | return ret; |
538 | } |
539 | |
540 | /* ---------------------------------------------------------------------- |
541 | * Main program. |
542 | */ |
543 | |
544 | const char usagemsg[] = |
545 | "usage: pid [options] <search-cmd> [<search-arg>...]\n" |
546 | "where: -a report all matching pids, not just one\n" |
d1bc4fef |
547 | " -U report pids of any user, not just ours\n" |
1f7c0ae1 |
548 | " also: pid --version report version number\n" |
549 | " pid --help display this help text\n" |
550 | " pid --licence display the (MIT) licence text\n" |
551 | ; |
552 | |
553 | void usage(void) { |
554 | fputs(usagemsg, stdout); |
555 | } |
556 | |
557 | const char licencemsg[] = |
558 | "pid is copyright 2012 Simon Tatham.\n" |
559 | "\n" |
560 | "Permission is hereby granted, free of charge, to any person\n" |
561 | "obtaining a copy of this software and associated documentation files\n" |
562 | "(the \"Software\"), to deal in the Software without restriction,\n" |
563 | "including without limitation the rights to use, copy, modify, merge,\n" |
564 | "publish, distribute, sublicense, and/or sell copies of the Software,\n" |
565 | "and to permit persons to whom the Software is furnished to do so,\n" |
566 | "subject to the following conditions:\n" |
567 | "\n" |
568 | "The above copyright notice and this permission notice shall be\n" |
569 | "included in all copies or substantial portions of the Software.\n" |
570 | "\n" |
571 | "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n" |
572 | "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n" |
573 | "MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n" |
574 | "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n" |
575 | "BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n" |
576 | "ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n" |
577 | "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" |
578 | "SOFTWARE.\n" |
579 | ; |
580 | |
581 | void licence(void) { |
582 | fputs(licencemsg, stdout); |
583 | } |
584 | |
585 | void version(void) { |
586 | #define SVN_REV "$Revision$" |
587 | char rev[sizeof(SVN_REV)]; |
588 | char *p, *q; |
589 | |
590 | strcpy(rev, SVN_REV); |
591 | |
592 | for (p = rev; *p && *p != ':'; p++); |
593 | if (*p) { |
594 | p++; |
595 | while (*p && isspace((unsigned char)*p)) p++; |
596 | for (q = p; *q && *q != '$'; q++); |
597 | if (*q) *q = '\0'; |
598 | printf("pid revision %s\n", p); |
599 | } else { |
600 | printf("pid: unknown version\n"); |
601 | } |
602 | } |
603 | |
604 | int main(int argc, char **argv) |
605 | { |
606 | const char **searchwords; |
607 | int nsearchwords; |
d1bc4fef |
608 | int all = 0, all_uids = 0; |
1f7c0ae1 |
609 | int doing_opts = 1; |
610 | |
611 | /* |
612 | * Allocate enough space in 'searchwords' that we could shovel the |
613 | * whole of our argv into it if we had to. Then we won't have to |
614 | * worry about it later. |
615 | */ |
616 | searchwords = (const char **)malloc((argc+1) * sizeof(const char *)); |
617 | nsearchwords = 0; |
618 | |
619 | /* |
620 | * Parse the command line. |
621 | */ |
622 | while (--argc > 0) { |
623 | char *p = *++argv; |
624 | if (doing_opts && *p == '-') { |
625 | if (!strcmp(p, "-a") || !strcmp(p, "--all")) { |
626 | all = 1; |
d1bc4fef |
627 | } else if (!strcmp(p, "-U") || !strcmp(p, "--all-uids")) { |
628 | all_uids = 1; |
1f7c0ae1 |
629 | } else if (!strcmp(p, "--version")) { |
630 | version(); |
631 | return 0; |
632 | } else if (!strcmp(p, "--help")) { |
633 | usage(); |
634 | return 0; |
635 | } else if (!strcmp(p, "--licence") || !strcmp(p, "--license")) { |
636 | licence(); |
637 | return 0; |
638 | } else if (!strcmp(p, "--")) { |
639 | doing_opts = 0; |
640 | } else { |
641 | fprintf(stderr, "pid: unrecognised option '%s'\n", p); |
642 | return 1; |
643 | } |
644 | } else { |
645 | searchwords[nsearchwords++] = p; |
646 | doing_opts = 0; /* further optionlike args become search terms */ |
647 | } |
648 | } |
649 | |
650 | if (!nsearchwords) { |
651 | fprintf(stderr, "pid: expected a command to search for; " |
652 | "type 'pid --help' for help\n"); |
653 | return 1; |
654 | } |
655 | searchwords[nsearchwords] = NULL; /* terminate list */ |
656 | |
657 | { |
658 | struct pidset procs; |
d1bc4fef |
659 | int uid, pid, npids; |
1f7c0ae1 |
660 | /* |
661 | * Construct our list of processes. |
662 | */ |
663 | procs = get_processes(); |
d1bc4fef |
664 | uid = getuid(); |
665 | if (uid > 0 && !all_uids) |
666 | procs = filter_by_uid(procs, uid); |
1f7c0ae1 |
667 | procs = filter_out_self(procs); |
668 | procs = filter_by_command(procs, searchwords); |
669 | if (!all) |
670 | procs = filter_out_forks(procs); |
671 | |
672 | /* |
673 | * Output. |
674 | */ |
675 | npids = pidset_size(&procs); |
676 | if (npids == 0) { |
677 | printf("NONE\n"); |
678 | } else if (all) { |
679 | const char *sep = ""; |
680 | for (pid = pidset_first(&procs); pid >= 0; |
681 | pid = pidset_next(&procs)) { |
682 | printf("%s%d", sep, pid); |
683 | sep = " "; |
684 | } |
685 | putchar('\n'); |
686 | } else { |
687 | if (npids == 1) { |
688 | printf("%d\n", pidset_first(&procs)); |
689 | } else { |
690 | printf("MULTIPLE\n"); |
691 | } |
692 | } |
693 | } |
694 | |
695 | return 0; |
696 | } |