The child process does not start with syscall(SYS_clone3, ...) + CLONE_VM

Hello, everyone.

It is necessary that the calling process and the child process are in the same memory space.
Therefore, I use the CLONE_VM flag.
But the child process does not start.
It looks like something is wrong with the allocation of memory for the stack.
Could you please explain the reason?

#define _DEFAULT_SOURCE         /* syscall() */
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64    /* getrlimit() */

#include <sched.h>              /* CLONE_* constants */
#include <linux/sched.h>        /* struct clone_args */
#include <sys/syscall.h>        /* SYS_* constants */
#include <unistd.h>

#include <stdint.h>             /* uintptr_t */

#include <stdio.h>
#include <stdlib.h>
#include <err.h>                /* err() */
#include <string.h>
#include <unistd.h>

#include <sys/resource.h>
#include <sys/mman.h>
#include <signal.h>

void spawn(void)
{
    struct rlimit       rlim;
    struct clone_args   cl_args;
    __u64               stack_size;
    __u64               *stackBot;
    __u64               *stackTop;
    pid_t               pid;

    memset(&cl_args, 0, sizeof(cl_args));

    if (getrlimit(RLIMIT_STACK, &rlim) == -1) {
        err(EXIT_FAILURE, "getrlimit");
    }
    stack_size = rlim.rlim_cur;

    stackBot = (__u64 *)
        mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
             MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN |
             MAP_STACK, -1, 0);
    if (stackBot == MAP_FAILED) {
        err(EXIT_FAILURE, "mmap");
    }

    stackTop = stackBot + (stack_size / sizeof(__u64));

    cl_args.flags       = CLONE_FILES | CLONE_IO | CLONE_VM;
    cl_args.exit_signal = SIGCHLD;
    cl_args.stack       = (__u64) (uintptr_t) stackTop;
    cl_args.stack_size  = stack_size;

    pid = syscall(SYS_clone3, &cl_args, sizeof(cl_args));
    switch(pid) {
        case -1:
            munmap(stackBot, stack_size);
            err(EXIT_FAILURE, "syscall");
        case 0:     /* Child */
            printf("Hello from child!\n");
            munmap(stackBot, stack_size);
            break;
        default:    /* Parent */
            printf("Hello from parent!\n");
            break;
    }
}

int
main(int argc, char *argv[])
{
    spawn();

    printf("Before last while\n");
    while(1) {
    }

    return EXIT_SUCCESS;
}
1 Like

I rewrote it, it still doesn’t work.

#define _DEFAULT_SOURCE          /* syscall() */
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64     /* getrlimit() */

#include <sched.h>              /* CLONE_* constants */
#include <linux/sched.h>        /* struct clone_args */
#include <sys/syscall.h>        /* SYS_* constants */
#include <unistd.h>

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>             /* uintptr_t */
#include <err.h>                /* err() */

#include <sys/resource.h>
#include <sys/mman.h>
#include <signal.h>

void spawn(void)
{
    struct rlimit       rlim;
    struct clone_args   cl_args = {0};
    uint64_t            stack_size;
    void                *stackBot;
    void                *stackTop;
    pid_t               pid;

    if (getrlimit(RLIMIT_STACK, &rlim) == -1)
        err(EXIT_FAILURE, "getrlimit");
    stack_size = rlim.rlim_cur;

    stackBot = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN |
                    MAP_STACK, -1, 0);
    if (stackBot == MAP_FAILED)
        err(EXIT_FAILURE, "mmap");

    long count = stack_size / sizeof(uint64_t);
    stackTop = (uint64_t *)stackBot + count - 1;

    printf("stack_size:      %lu\n", stack_size);
    printf("stack_size/2:    %lu\n", stack_size / 2);
    printf("stack_size/4:    %lu\n", stack_size / 4);
    printf("stack_size/8:    %lu\n", stack_size / 8);
    printf("stack_size/16:   %lu\n", stack_size / 16);
    printf("stack_size/4096: %lu\n", stack_size / 4096);   /* PAGESIZE */

    printf("stackBotAddr: %p\n", (void *)stackBot);
    printf("stackTopAddr: %p\n", (void *)stackTop);

    printf("stackBotVal: %lu\n", *(uint64_t *)(uintptr_t)stackBot);
    printf("stackTopVal: %lu\n", *(uint64_t *)(uintptr_t)stackTop);

    printf("stackTopAddr-stackBotAddr: %lu\n", (void *)stackTop - stackBot);

    printf("stackBotCast: %lu\n", (uint64_t)(uintptr_t)stackBot);
    printf("stackTopCast: %lu\n", (uint64_t)(uintptr_t)stackTop);

    cl_args.flags       = CLONE_FILES | CLONE_IO | CLONE_VM;
    cl_args.exit_signal = SIGCHLD;
    cl_args.stack       = (uint64_t)(uintptr_t)stackBot;
    cl_args.stack_size  = (void *)stackTop - stackBot;

    pid = syscall(SYS_clone3, &cl_args, sizeof(cl_args));
    switch(pid) {
        case -1:
            munmap(stackBot, stack_size);
            err(EXIT_FAILURE, "syscall");
        case 0:     /* Child */
            printf("Hello from child. Child's pid: %d\n", getpid());
            munmap(stackBot, stack_size);
            break;
        default:    /* Parent */
            printf("Hello from parent. Parent's pid: %d\n", getpid());
            break;
    }
}

int
main(int argc, char *argv[])
{
    spawn();

    printf("Before last while\n");
    while(1) {
    }

    return EXIT_SUCCESS;
}