如何使用PTRACE获得多个线程的一致视图?
作者:互联网
当我在on this question工作时,我遇到了一个使用ptrace的可能想法,但是我无法正确理解ptrace如何与线程交互.
假设我有一个给定的多线程主进程,我想附加到其中的特定线程(可能来自一个分叉的子进程).
>我可以附加到特定的帖子吗? (手册在这个问题上有所不同.)
>如果是这样,这是否意味着单步执行只会逐步完成一个线程的指令?它是否会停止所有进程的线程?
>如果是这样,在调用PTRACE_SYSCALL或PTRACE_SINGLESTEP时,是否所有其他线程都保持停止状态,还是继续执行所有线程?有没有办法只在一个线程中前进,但保证其他线程保持停止状态?
基本上,我想通过强制所有线程停止来同步原始程序,然后通过单步执行一个跟踪线程来执行一小组单线程指令.
到目前为止,我个人的尝试看起来有点像这样:
pid_t target = syscall(SYS_gettid); // get the calling thread's ID
pid_t pid = fork();
if (pid > 0)
{
waitpid(pid, NULL, 0); // synchronise main process
important_instruction();
}
else if (pid == 0)
{
ptrace(target, PTRACE_ATTACH, NULL, NULL); // does this work?
// cancel parent's "waitpid" call, e.g. with a signal
// single-step to execute "important_instruction()" above
ptrace(target, PTRACE_DETACH, NULL, NULL); // parent's threads resume?
_Exit(0);
}
但是,我不确定,并且找不到合适的引用,这是同时正确的,并且保证在所有其他线程停止时执行important_instruction().我也明白当父母从别处收到信号时可能会有竞争条件,我听说我应该使用PTRACE_SEIZE,但这似乎并不存在.
任何澄清或参考将不胜感激!
解决方法:
我写了第二个测试用例.我不得不添加一个单独的答案,因为它太长而不适合包含示例输出的第一个答案.
首先,这是tracer.c:
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/user.h>
#include <dirent.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <stdio.h>
#ifndef SINGLESTEPS
#define SINGLESTEPS 10
#endif
/* Similar to getline(), except gets process pid task IDs.
* Returns positive (number of TIDs in list) if success,
* otherwise 0 with errno set. */
size_t get_tids(pid_t **const listptr, size_t *const sizeptr, const pid_t pid)
{
char dirname[64];
DIR *dir;
pid_t *list;
size_t size, used = 0;
if (!listptr || !sizeptr || pid < (pid_t)1) {
errno = EINVAL;
return (size_t)0;
}
if (*sizeptr > 0) {
list = *listptr;
size = *sizeptr;
} else {
list = *listptr = NULL;
size = *sizeptr = 0;
}
if (snprintf(dirname, sizeof dirname, "/proc/%d/task/", (int)pid) >= (int)sizeof dirname) {
errno = ENOTSUP;
return (size_t)0;
}
dir = opendir(dirname);
if (!dir) {
errno = ESRCH;
return (size_t)0;
}
while (1) {
struct dirent *ent;
int value;
char dummy;
errno = 0;
ent = readdir(dir);
if (!ent)
break;
/* Parse TIDs. Ignore non-numeric entries. */
if (sscanf(ent->d_name, "%d%c", &value, &dummy) != 1)
continue;
/* Ignore obviously invalid entries. */
if (value < 1)
continue;
/* Make sure there is room for another TID. */
if (used >= size) {
size = (used | 127) + 128;
list = realloc(list, size * sizeof list[0]);
if (!list) {
closedir(dir);
errno = ENOMEM;
return (size_t)0;
}
*listptr = list;
*sizeptr = size;
}
/* Add to list. */
list[used++] = (pid_t)value;
}
if (errno) {
const int saved_errno = errno;
closedir(dir);
errno = saved_errno;
return (size_t)0;
}
if (closedir(dir)) {
errno = EIO;
return (size_t)0;
}
/* None? */
if (used < 1) {
errno = ESRCH;
return (size_t)0;
}
/* Make sure there is room for a terminating (pid_t)0. */
if (used >= size) {
size = used + 1;
list = realloc(list, size * sizeof list[0]);
if (!list) {
errno = ENOMEM;
return (size_t)0;
}
*listptr = list;
*sizeptr = size;
}
/* Terminate list; done. */
list[used] = (pid_t)0;
errno = 0;
return used;
}
static int wait_process(const pid_t pid, int *const statusptr)
{
int status;
pid_t p;
do {
status = 0;
p = waitpid(pid, &status, WUNTRACED | WCONTINUED);
} while (p == (pid_t)-1 && errno == EINTR);
if (p != pid)
return errno = ESRCH;
if (statusptr)
*statusptr = status;
return errno = 0;
}
static int continue_process(const pid_t pid, int *const statusptr)
{
int status;
pid_t p;
do {
if (kill(pid, SIGCONT) == -1)
return errno = ESRCH;
do {
status = 0;
p = waitpid(pid, &status, WUNTRACED | WCONTINUED);
} while (p == (pid_t)-1 && errno == EINTR);
if (p != pid)
return errno = ESRCH;
} while (WIFSTOPPED(status));
if (statusptr)
*statusptr = status;
return errno = 0;
}
void show_registers(FILE *const out, pid_t tid, const char *const note)
{
struct user_regs_struct regs;
long r;
do {
r = ptrace(PTRACE_GETREGS, tid, ®s, ®s);
} while (r == -1L && errno == ESRCH);
if (r == -1L)
return;
#if (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 64
if (note && *note)
fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx. %s\n", (int)tid, regs.rip, regs.rsp, note);
else
fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx.\n", (int)tid, regs.rip, regs.rsp);
#elif (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 32
if (note && *note)
fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx. %s\n", (int)tid, regs.eip, regs.esp, note);
else
fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx.\n", (int)tid, regs.eip, regs.esp);
#endif
}
int main(int argc, char *argv[])
{
pid_t *tid = 0;
size_t tids = 0;
size_t tids_max = 0;
size_t t, s;
long r;
pid_t child;
int status;
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s COMMAND [ ARGS ... ]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program executes COMMAND in a child process,\n");
fprintf(stderr, "and waits for it to stop (via a SIGSTOP signal).\n");
fprintf(stderr, "When that occurs, the register state of each thread\n");
fprintf(stderr, "is dumped to standard output, then the child process\n");
fprintf(stderr, "is sent a SIGCONT signal.\n");
fprintf(stderr, "\n");
return 1;
}
child = fork();
if (child == (pid_t)-1) {
fprintf(stderr, "fork() failed: %s.\n", strerror(errno));
return 1;
}
if (!child) {
prctl(PR_SET_DUMPABLE, (long)1);
prctl(PR_SET_PTRACER, (long)getppid());
fflush(stdout);
fflush(stderr);
execvp(argv[1], argv + 1);
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return 127;
}
fprintf(stderr, "Tracer: Waiting for child (pid %d) events.\n\n", (int)child);
fflush(stderr);
while (1) {
/* Wait for a child event. */
if (wait_process(child, &status))
break;
/* Exited? */
if (WIFEXITED(status) || WIFSIGNALED(status)) {
errno = 0;
break;
}
/* At this point, only stopped events are interesting. */
if (!WIFSTOPPED(status))
continue;
/* Obtain task IDs. */
tids = get_tids(&tid, &tids_max, child);
if (!tids)
break;
printf("Process %d has %d tasks,", (int)child, (int)tids);
fflush(stdout);
/* Attach to all tasks. */
for (t = 0; t < tids; t++) {
do {
r = ptrace(PTRACE_ATTACH, tid[t], (void *)0, (void *)0);
} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
if (r == -1L) {
const int saved_errno = errno;
while (t-->0)
do {
r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0);
} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
tids = 0;
errno = saved_errno;
break;
}
}
if (!tids) {
const int saved_errno = errno;
if (continue_process(child, &status))
break;
printf(" failed to attach (%s).\n", strerror(saved_errno));
fflush(stdout);
if (WIFCONTINUED(status))
continue;
errno = 0;
break;
}
printf(" attached to all.\n\n");
fflush(stdout);
/* Dump the registers of each task. */
for (t = 0; t < tids; t++)
show_registers(stdout, tid[t], "");
printf("\n");
fflush(stdout);
for (s = 0; s < SINGLESTEPS; s++) {
do {
r = ptrace(PTRACE_SINGLESTEP, tid[tids-1], (void *)0, (void *)0);
} while (r == -1L && errno == ESRCH);
if (!r) {
for (t = 0; t < tids - 1; t++)
show_registers(stdout, tid[t], "");
show_registers(stdout, tid[tids-1], "Advanced by one step.");
printf("\n");
fflush(stdout);
} else {
fprintf(stderr, "Single-step failed: %s.\n", strerror(errno));
fflush(stderr);
}
}
/* Detach from all tasks. */
for (t = 0; t < tids; t++)
do {
r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0);
} while (r == -1 && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
tids = 0;
if (continue_process(child, &status))
break;
if (WIFCONTINUED(status)) {
printf("Detached. Waiting for new stop events.\n\n");
fflush(stdout);
continue;
}
errno = 0;
break;
}
if (errno)
fprintf(stderr, "Tracer: Child lost (%s)\n", strerror(errno));
else
if (WIFEXITED(status))
fprintf(stderr, "Tracer: Child exited (%d)\n", WEXITSTATUS(status));
else
if (WIFSIGNALED(status))
fprintf(stderr, "Tracer: Child died from signal %d\n", WTERMSIG(status));
else
fprintf(stderr, "Tracer: Child vanished\n");
fflush(stderr);
return status;
}
tracer.c执行指定的命令,等待命令接收SIGSTOP信号. (tracer.c本身不发送;您可以让tracee自行停止,或者从外部发送信号.)
当命令停止时,tracer.c将ptrace附加到每个线程,并以一个固定步数(SINGLESTEPS编译时常量)单步执行其中一个线程,显示每个线程的相关寄存器状态.
之后,它从命令中分离出来,并向它发送一个SIGCONT信号,让它继续正常运行.
这是一个简单的测试程序worker.c,我用于测试:
#include <pthread.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#ifndef THREADS
#define THREADS 2
#endif
volatile sig_atomic_t done = 0;
void catch_done(int signum)
{
done = signum;
}
int install_done(const int signum)
{
struct sigaction act;
sigemptyset(&act.sa_mask);
act.sa_handler = catch_done;
act.sa_flags = 0;
if (sigaction(signum, &act, NULL))
return errno;
else
return 0;
}
void *worker(void *data)
{
volatile unsigned long *const counter = data;
while (!done)
__sync_add_and_fetch(counter, 1UL);
return (void *)(unsigned long)__sync_or_and_fetch(counter, 0UL);
}
int main(void)
{
unsigned long counter = 0UL;
pthread_t thread[THREADS];
pthread_attr_t attrs;
size_t i;
if (install_done(SIGHUP) ||
install_done(SIGTERM) ||
install_done(SIGUSR1)) {
fprintf(stderr, "Worker: Cannot install signal handlers: %s.\n", strerror(errno));
return 1;
}
pthread_attr_init(&attrs);
pthread_attr_setstacksize(&attrs, 65536);
for (i = 0; i < THREADS; i++)
if (pthread_create(&thread[i], &attrs, worker, &counter)) {
done = 1;
fprintf(stderr, "Worker: Cannot create thread: %s.\n", strerror(errno));
return 1;
}
pthread_attr_destroy(&attrs);
/* Let the original thread also do the worker dance. */
worker(&counter);
for (i = 0; i < THREADS; i++)
pthread_join(thread[i], NULL);
return 0;
}
使用例如编译两者
gcc -W -Wall -O3 -fomit-frame-pointer worker.c -pthread -o worker
gcc -W -Wall -O3 -fomit-frame-pointer tracer.c -o tracer
并且使用例如在单独的终端或背景中运行.
./tracer ./worker &
跟踪器显示工人的PID:
Tracer: Waiting for child (pid 24275) events.
此时,孩子正常运转.当您向孩子发送SIGSTOP时,操作开始.跟踪器检测到它,执行所需的跟踪,然后分离并让孩子继续正常:
kill -STOP 24275
Process 24275 has 3 tasks, attached to all.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.
Detached. Waiting for new stop events.
您可以根据需要重复上述次数.请注意,我选择了SIGSTOP信号作为触发器,因为这种方式tracer.c也可用作为每个请求生成复杂的多线程核心转储的基础(因为多线程进程可以通过发送自己的SIGSTOP来简单地触发它).
在上面的例子中,线程都在旋转的worker()函数的反汇编:
0x400a50: eb 0b jmp 0x400a5d
0x400a52: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0x400a58: f0 48 83 07 01 lock addq $0x1,(%rdi) = fourth step
0x400a5d: 8b 05 00 00 00 00 mov 0x0(%rip),%eax = first step
0x400a63: 85 c0 test %eax,%eax = second step
0x400a65: 74 f1 je 0x400a58 = third step
0x400a67: 48 8b 07 mov (%rdi),%rax
0x400a6a: 48 89 c2 mov %rax,%rdx
0x400a6d: f0 48 0f b1 07 lock cmpxchg %rax,(%rdi)
0x400a72: 75 f6 jne 0x400a6a
0x400a74: 48 89 d0 mov %rdx,%rax
0x400a77: c3 retq
现在,这个测试程序只显示如何停止进程,附加到其所有线程,单步执行其中一个线程所需数量的指令,然后让所有线程继续正常;它还没有证明同样适用于让特定线程继续正常(通过PTRACE_CONT).但是,我在下面描述的细节表明,对于PTRACE_CONT,相同的方法应该可以正常工作.
我在编写上述测试程序时遇到的主要问题或惊喜是必要性
long r;
do {
r = ptrace(PTRACE_cmd, tid, ...);
} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
循环,特别是对于ESRCH情况(由于ptrace man page描述我仅添加的其他情况).
您会看到,大多数ptrace命令仅在任务停止时允许.但是,当任务仍在完成时,任务不会停止.单步命令.因此,使用上述循环 – 可能添加毫秒纳秒或类似以避免浪费CPU – 确保先前的ptrace命令已完成(因此任务停止),然后我们尝试提供新的.
Kerrek SB,我相信至少你在测试程序中遇到的一些麻烦是由于这个问题?对我个人而言,这是一种D’哦!当下要认识到这是必要的,因为ptracing本质上是异步的,而不是同步的.
(这种异步性也是我上面提到的SIGCONT-PTRACE_CONT交互的原因.我相信使用上面显示的循环进行适当的处理,这种交互不再是一个问题 – 而且实际上是可以理解的.)
添加到此答案的评论:
Linux内核在task_struct结构中使用一组任务状态标志(参见include/linux/sched.h
的定义)来跟踪每个任务的状态. ptrace()的面向用户空间的一面在kernel/ptrace.c
中定义.
调用PTRACE_SINGLESTEP或PTRACE_CONT时,kernel/ptrace.c
:ptrace_continue()
处理大部分细节.它通过调用wake_up_state(child,__TASK_TRACED)完成(kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0)
).
当通过SIGSTOP信号停止进程时,所有任务都将停止,并以“已停止,未跟踪”状态结束.
附加到每个任务(通过PTRACE_ATTACH或PTRACE_SEIZE,参见kernel/ptrace.c
:ptrace_attach()
)修改任务状态.但是,ptrace状态位(见include/linux/ptrace.h:PT_
constants)与任务可运行状态位分开(见include/linux/sched.h:TASK_
constants).
在附加到任务并向进程发送SIGCONT信号之后,停止状态不会立即被修改(我相信),因为任务也被跟踪.执行PTRACE_SINGLESTEP或PTRACE_CONT将在kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0)
结束,更新任务状态,并将任务移至运行队列.
现在,我还没有找到代码路径的复杂部分是在下次调度任务时如何在内核中更新任务状态.我的测试表明,通过单步执行(这是另一个任务状态标志),只有任务状态得到更新,并且单步标志被清除.似乎PTRACE_CONT不那么可靠;我相信这是因为单步标志“强迫”任务状态发生变化.也许存在“竞争条件”.继续信号传递和状态变化?
(进一步编辑:内核开发人员肯定希望调用wait(),参见例如this thread.)
换句话说,在注意到进程已停止后(请注意,如果进程不是子进程,并且尚未附加到,则可以使用/ proc / PID / stat或/ proc / PID / status),我相信以下过程是最强大的一个:
pid_t pid, p; /* Process owning the tasks */
tid_t *tid; /* Task ID array */
size_t tids; /* Tasks */
long result;
int status;
size_t i;
for (i = 0; i < tids; i++) {
while (1) {
result = ptrace(PTRACE_ATTACH, tid[i], (void *)0, (void *)0);
if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) {
/* To avoid burning up CPU for nothing: */
sched_yield(); /* or nanosleep(), or usleep() */
continue;
}
break;
}
if (result == -1L) {
/*
* Fatal error. First detach from tid[0..i-1], then exit.
*/
}
}
/* Send SIGCONT to the process. */
if (kill(pid, SIGCONT)) {
/*
* Fatal error, see errno. Exit.
*/
}
/* Since we are attached to the process,
* we can wait() on it. */
while (1) {
errno = 0;
status = 0;
p = waitpid(pid, &status, WCONTINUED);
if (p == (pid_t)-1) {
if (errno == EINTR)
continue;
else
break;
} else
if (p != pid) {
errno = ESRCH;
break;
} else
if (WIFCONTINUED(status)) {
errno = 0;
break;
}
}
if (errno) {
/*
* Fatal error. First detach from tid[0..tids-1], then exit.
*/
}
/* Single-step each task to update the task states. */
for (i = 0; i < tids; i++) {
while (1) {
result = ptrace(PTRACE_SINGLESTEP, tid[i], (void *)0, (void *)0);
if (result == -1L && errno == ESRCH) {
/* To avoid burning up CPU for nothing: */
sched_yield(); /* or nanosleep(), or usleep() */
continue;
}
break;
}
if (result == -1L) {
/*
* Fatal error. First detach from tid[0..i-1], then exit.
*/
}
}
/* Obtain task register structures, to make sure the single-steps
* have completed and their states have stabilized. */
for (i = 0; i < tids; i++) {
struct user_regs_struct regs;
while (1) {
result = ptrace(PTRACE_GETREGS, tid[i], ®s, ®s);
if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) {
/* To avoid burning up CPU for nothing: */
sched_yield(); /* or nanosleep(), or usleep() */
continue;
}
break;
}
if (result == -1L) {
/*
* Fatal error. First detach from tid[0..i-1], then exit.
*/
}
}
在上述之后,所有任务都应该附加并处于预期状态,以便例如PTRACE_CONT无需进一步操作.
如果行为在未来的内核中发生变化 – 我确实认为STOP / CONT信号和ptracing之间的相互作用可能会发生变化;至少向LKML开发人员提出有关此行为的问题是有道理的! – ,上述程序仍然可以有效地运作. (谨慎一点,通过几次使用PTRACE_SINGLESTEP循环,也可能是一个好主意.)
与PTRACE_CONT的区别在于,如果将来行为发生变化,则初始PTRACE_CONT可能实际上继续该过程,导致其后面的ptrace()失败.使用PTRACE_SINGLESTEP,进程将停止,允许进一步的ptrace()调用成功.
有问题吗?
标签:c-3,multithreading,linux,pthreads,ptrace 来源: https://codeday.me/bug/20190926/1821064.html