亲子信号沟通陷入僵局

Jon*_*ler 6 c c++ unix signals

我已经实现了两个程序(parent.cpp,child.c),它们应该通过信号进行通信,以交替方式无限期地工作。然而,由于某种原因,程序在随机的时间后进入死锁,我无法弄清楚为什么(发送给父进程的信号似乎在没有执行信号处理程序的情况下被丢弃;因此,两者都等待来自其他进程的信号)。

程序采取的步骤:首先,初始化父进程,生成子进程并等待子进程的初始化(参见LLVMFuzzerInitializeparent.cpp)。然后,进入循环并在每次迭代中LLVMFuzzerTestOneInput调用。这里,(1) 父进程将数据写入子进程的 stdin,(2) 通过 SIGUSR2 唤醒子进程,(3) 子进程处理数据,以及 (4) 通过 SIGUSR1 唤醒父进程。

在 Linux 上,parent通常在几千次迭代后停止,而parent_main在大多数运行中无限期地运行(尽管有时它也会在几千次迭代后停止)。在 Mac 上,这两个程序都会在几秒钟后停止。

下面是一个由parent.cpp、child.c 和 Makefile 组成的最小工作示例:

// parent.cpp
#include <cstdlib>
#include <string>
#include <vector>
#include <cstring>
#include <iostream>

#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>

static volatile sig_atomic_t signal_received = 0;

static pid_t child_pid = -1;
static int fdin[2], fdout[2];

void handleSIGUSR1(int signo, siginfo_t *info, void *context)
{
    if (child_pid == info->si_pid)
        signal_received = 1;
}

void handleSIGCHLD(int signo, siginfo_t *info, void *context)
{
    exit(EXIT_FAILURE);
}

void initPipes()
{
    if (pipe(fdin) < 0)
        exit(EXIT_FAILURE);

    if (pipe(fdout) < 0)
        exit(EXIT_FAILURE);
}

void initSignalHandler()
{
    struct sigaction act = {0};

    act.sa_flags = SA_SIGINFO | SA_RESTART;

    {
        act.sa_sigaction = &handleSIGUSR1;
        sigaction(SIGUSR1, &act, NULL);
    }

    {
        act.sa_sigaction = &handleSIGCHLD;
        sigaction(SIGCHLD, &act, NULL);
    }
}

void waitForChild()
{
    sigset_t mask, oldmask;
    sigemptyset(&mask);
    sigaddset(&mask, SIGUSR1);

    if (sigprocmask(SIG_BLOCK, &mask, &oldmask) == -1)
        exit(EXIT_FAILURE);

    while (signal_received == 0)
    {
        sigsuspend(&oldmask);
    }
    signal_received = 0;
    if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
        exit(EXIT_FAILURE);
}

extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
{
    initPipes();
    initSignalHandler();

    child_pid = fork();
    if (child_pid < 0)
        exit(EXIT_FAILURE);

    if (child_pid == 0)
    {
        if (dup2(fdin[0], STDIN_FILENO) < 0)
            exit(EXIT_FAILURE);

        if (dup2(fdout[1], STDOUT_FILENO) < 0)
            exit(EXIT_FAILURE);

        char file[] = "./child";
        char *const argv[] = {"child", NULL};
        char *env[] = {NULL};

        execve(file, argv, env);
        exit(EXIT_FAILURE);
    }

    // Waiting until child is initialized
    waitForChild();

    return 0;
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
{
    static int iteration = 0;
    std::cout << "parent.iteration: " << iteration++ << std::endl;

    char buffer[1024];
    write(fdin[1], buffer, 1024);

    kill(child_pid, SIGUSR2);

    waitForChild();

    return 0;
}

#ifdef DEFINE_MAIN
int main(int argc, char **argv)
{
    LLVMFuzzerInitialize(&argc, &argv);
    for (;;)
    {
        uint8_t buffer[1];
        LLVMFuzzerTestOneInput(buffer, 1);
    }
}
#endif
Run Code Online (Sandbox Code Playgroud)
// child.c
#include <stdio.h>
#include <signal.h>
#include <inttypes.h>
#include <unistd.h>
#include <stdlib.h>

static volatile sig_atomic_t signal_received = 0;

void handleSIGUSR2(int signo, siginfo_t *info, void *context)
{
    signal_received = 1;
}

void waitForParent()
{
    sigset_t mask, oldmask;
    sigemptyset(&mask);
    sigaddset(&mask, SIGUSR2);

    if (sigprocmask(SIG_BLOCK, &mask, &oldmask) == -1) {
        perror("sigprocmask-block (child)");
        exit(EXIT_FAILURE);
    }
    while (signal_received == 0)
    {
        sigsuspend(&oldmask);
    }
    signal_received = 0;
    if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1) {
        perror("sigprocmask-unblock (child)");
        exit(EXIT_FAILURE);
    };
}

void readPackage()
{
    uint8_t buffer[1024];
    read(0, buffer, 1024);
}

int main(int argc, char **argv)
{
    struct sigaction act = {0};
    act.sa_flags = SA_SIGINFO | SA_RESTART;
    act.sa_sigaction = &handleSIGUSR2;
    sigaction(SIGUSR2, &act, NULL);

    // Notify parent about initialization
    kill(getppid(), SIGUSR1);

    while (1)
    {
        static int iteration = 0;

        waitForParent();

        fprintf(stderr, "child.iteration: %d\n", iteration++);
        readPackage();

        kill(getppid(), SIGUSR1);
    }
}
Run Code Online (Sandbox Code Playgroud)
all: child parent parent_main

child: child.c
    clang -fsanitize=address $< -o $@

parent: parent.cpp
    clang++ -fsanitize=fuzzer,address $< -o $@

parent_main: parent.cpp
    clang++ -DDEFINE_MAIN $< -o $@
Run Code Online (Sandbox Code Playgroud)

编辑:

在Linux(archlinux 5.19.11,clang版本:14.0.6)上,我还无法parent_main再次进入死锁状态,但我使用了strace -f ./parent(为简洁起见,我只显示了最后两次迭代):

[pid 167430] write(1, "parent.iteration: 1698\n", 23) = 23
[pid 167430] write(4, "0\0\0\0 `\0\0`\204\10\334\376\177\0\0\20\205\10\334\376\177\0\0\352\213\216\200\337U\0\0"..., 1024) = 1024
[pid 167430] kill(167431, SIGUSR2)      = 0
[pid 167431] <... rt_sigsuspend resumed>) = ? ERESTARTNOHAND (To be restarted if no handler)
[pid 167430] rt_sigprocmask(SIG_BLOCK, [USR1],  <unfinished ...>
[pid 167431] --- SIGUSR2 {si_signo=SIGUSR2, si_code=SI_USER, si_pid=167430, si_uid=0} ---
[pid 167430] <... rt_sigprocmask resumed>[], 8) = 0
[pid 167431] rt_sigreturn({mask=[USR2]} <unfinished ...>
[pid 167430] rt_sigsuspend([], 8 <unfinished ...>
[pid 167431] <... rt_sigreturn resumed>) = -1 EINTR (Interrupted system call)
[pid 167431] rt_sigprocmask(SIG_UNBLOCK, [USR2], NULL, 8) = 0
[pid 167431] write(2, "child.iteration: 1698\n", 22child.iteration: 1698
) = 22
[pid 167431] read(0, "0\0\0\0 `\0\0`\204\10\334\376\177\0\0\20\205\10\334\376\177\0\0\352\213\216\200\337U\0\0"..., 1024) = 1024
[pid 167431] getppid()                  = 167430
[pid 167431] kill(167430, SIGUSR1 <unfinished ...>
[pid 167430] <... rt_sigsuspend resumed>) = ? ERESTARTNOHAND (To be restarted if no handler)
[pid 167431] <... kill resumed>)        = 0
[pid 167430] --- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_USER, si_pid=167431, si_uid=0} ---
[pid 167431] rt_sigprocmask(SIG_BLOCK, [USR2],  <unfinished ...>
[pid 167430] rt_sigreturn({mask=[USR1]} <unfinished ...>
[pid 167431] <... rt_sigprocmask resumed>[], 8) = 0
[pid 167430] <... rt_sigreturn resumed>) = -1 EINTR (Interrupted system call)
[pid 167431] rt_sigsuspend([], 8 <unfinished ...>
[pid 167430] rt_sigprocmask(SIG_UNBLOCK, [USR1], NULL, 8) = 0
[pid 167430] clock_gettime(CLOCK_REALTIME, {tv_sec=1665498102, tv_nsec=952308734}) = 0
[pid 167430] clock_gettime(CLOCK_REALTIME, {tv_sec=1665498102, tv_nsec=952354900}) = 0


[pid 167430] write(1, "parent.iteration: 1699\n", 23) = 23
[pid 167430] write(4, "0\0\0\0 `\0\0`\204\10\334\376\177\0\0\20\205\10\334\376\177\0\0\352\213\216\200\337U\0\0"..., 1024) = 1024
[pid 167430] kill(167431, SIGUSR2)      = 0
[pid 167431] <... rt_sigsuspend resumed>) = ? ERESTARTNOHAND (To be restarted if no handler)
[pid 167430] rt_sigprocmask(SIG_BLOCK, [USR1],  <unfinished ...>
[pid 167431] --- SIGUSR2 {si_signo=SIGUSR2, si_code=SI_USER, si_pid=167430, si_uid=0} ---
[pid 167430] <... rt_sigprocmask resumed>[], 8) = 0
[pid 167431] rt_sigreturn({mask=[USR2]} <unfinished ...>
[pid 167430] rt_sigsuspend([], 8 <unfinished ...>
[pid 167431] <... rt_sigreturn resumed>) = -1 EINTR (Interrupted system call)
[pid 167431] rt_sigprocmask(SIG_UNBLOCK, [USR2], NULL, 8) = 0
[pid 167431] write(2, "child.iteration: 1699\n", 22child.iteration: 1699
) = 22
[pid 167431] read(0, "0\0\0\0 `\0\0`\204\10\334\376\177\0\0\20\205\10\334\376\177\0\0\352\213\216\200\337U\0\0"..., 1024) = 1024
[pid 167431] getppid()                  = 167430
[pid 167432] <... clock_nanosleep resumed>0x7ff1859f8de0) = 0
[pid 167431] kill(167430, SIGUSR1 <unfinished ...>
[pid 167430] <... rt_sigsuspend resumed>) = ? ERESTARTNOHAND (To be restarted if no handler)
[pid 167432] --- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_USER, si_pid=167431, si_uid=0} ---
[pid 167431] <... kill resumed>)        = 0
[pid 167430] rt_sigsuspend([], 8 <unfinished ...>
[pid 167431] rt_sigprocmask(SIG_BLOCK, [USR2],  <unfinished ...>
[pid 167432] rt_sigreturn({mask=[]} <unfinished ...>
[pid 167431] <... rt_sigprocmask resumed>[], 8) = 0
[pid 167432] <... rt_sigreturn resumed>) = 0
[pid 167431] rt_sigsuspend([], 8 <unfinished ...>
Run Code Online (Sandbox Code Playgroud)

看起来虽然孩子调用kill(1674310, SIGUSR1),信号到达 libFuzzer 进程(pid:167432)...我不知道从这里去哪里?