rseq c/r support as required by glibc 2.35
see https://github.com/checkpoint-restore/criu/issues/1696 and https://github.com/checkpoint-restore/criu/pull/1706
This commit is contained in:
parent
78ebe85a03
commit
0d1c6b4481
@ -0,0 +1,74 @@
|
||||
From ee46b1b5755eacf3be02a67934f0dc690293745b Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 13:28:51 +0800
|
||||
Subject: [PATCH 02/16] compel: add rseq syscall into compel std plugin syscall
|
||||
tables Add rseq syscall numbers for: arm/aarch64, mips64, ppc64le, s390,
|
||||
x86_64/x86
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
compel/arch/arm/plugins/std/syscalls/syscall.def | 1 +
|
||||
compel/arch/mips/plugins/std/syscalls/syscall_64.tbl | 1 +
|
||||
.../compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 +
|
||||
.../compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl | 1 +
|
||||
compel/arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 +
|
||||
compel/arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 +
|
||||
6 files changed, 6 insertions(+)
|
||||
|
||||
diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def
|
||||
index 1b877d1..bb78cbb 100644
|
||||
--- a/compel/arch/arm/plugins/std/syscalls/syscall.def
|
||||
+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def
|
||||
@@ -119,3 +119,4 @@ clone3 435 435 (struct clone_args *uargs, size_t size)
|
||||
sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask)
|
||||
pidfd_open 434 434 (pid_t pid, unsigned int flags)
|
||||
pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
|
||||
+rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
|
||||
index 7a6db19..95dc7d3 100644
|
||||
--- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
|
||||
+++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
|
||||
@@ -115,3 +115,4 @@ __NR_fsmount 5432 sys_fsmount (int fd, unsigned int flags, unsigned int attr
|
||||
__NR_clone3 5435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_pidfd_open 5434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 5438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 5327 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
|
||||
index dd79187..ad0d94f 100644
|
||||
--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
|
||||
+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
|
||||
@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 387 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
|
||||
index 282adaf..916b697 100644
|
||||
--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
|
||||
+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
|
||||
@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 383 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
|
||||
index 3fe3194..90f23d5 100644
|
||||
--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
|
||||
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
|
||||
@@ -103,3 +103,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_f
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 386 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
|
||||
index c1d119d..323fab1 100644
|
||||
--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
|
||||
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
|
||||
@@ -114,3 +114,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
--
|
||||
2.30.0
|
||||
|
||||
62
0003-kerndat-check-for-rseq-syscall-support.patch
Normal file
62
0003-kerndat-check-for-rseq-syscall-support.patch
Normal file
@ -0,0 +1,62 @@
|
||||
From ebd917f395b8bb3c4d6bbe51f9210d1aeca2e1fd Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 13:34:10 +0800
|
||||
Subject: [PATCH 03/16] kerndat: check for rseq syscall support Signed-off-by:
|
||||
Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
criu/include/kerndat.h | 1 +
|
||||
criu/kerndat.c | 18 ++++++++++++++++++
|
||||
2 files changed, 19 insertions(+)
|
||||
|
||||
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
|
||||
index 80bad7f..44a6976 100644
|
||||
--- a/criu/include/kerndat.h
|
||||
+++ b/criu/include/kerndat.h
|
||||
@@ -74,6 +74,7 @@ struct kerndat_s {
|
||||
bool has_pidfd_getfd;
|
||||
bool has_nspid;
|
||||
bool has_nftables_concat;
|
||||
+ bool has_rseq;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index 0e88ba4..f5a4490 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -816,6 +816,20 @@ static int kerndat_x86_has_ptrace_fpu_xsave_bug(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int kerndat_has_rseq(void)
|
||||
+{
|
||||
+ if (syscall(__NR_rseq, NULL, 0, 0, 0) != -1) {
|
||||
+ pr_err("rseq should fail\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ if (errno == ENOSYS)
|
||||
+ pr_info("rseq syscall isn't supported\n");
|
||||
+ else
|
||||
+ kdat.has_rseq = true;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat"
|
||||
#define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat"
|
||||
|
||||
@@ -1360,6 +1374,10 @@ int kerndat_init(void)
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
+ if (!ret && kerndat_has_rseq()) {
|
||||
+ pr_err("kerndat_has_rseq failed when initializing kerndat.\n");
|
||||
+ ret = -1;
|
||||
+ }
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
kerndat_files_stat();
|
||||
--
|
||||
2.30.0
|
||||
|
||||
161
0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch
Normal file
161
0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch
Normal file
@ -0,0 +1,161 @@
|
||||
From fe1f84eb98092b1aff60ae2be11e351b165f3f43 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 13:35:53 +0800
|
||||
Subject: [PATCH 04/16] util: move fork_and_ptrace_attach helper from cr-check
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
criu/cr-check.c | 55 -------------------------------
|
||||
criu/include/util.h | 1 +
|
||||
criu/util.c | 57 +++++++++++++++++++++++++++++++++
|
||||
3 files changed, 58 insertions(+), 55 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-check.c b/criu/cr-check.c
|
||||
index 3575fb3..d41ef8f 100644
|
||||
--- a/criu/cr-check.c
|
||||
+++ b/criu/cr-check.c
|
||||
@@ -537,61 +537,6 @@ static int check_sigqueuinfo(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static pid_t fork_and_ptrace_attach(int (*child_setup)(void))
|
||||
-{
|
||||
- pid_t pid;
|
||||
- int sk_pair[2], sk;
|
||||
- char c = 0;
|
||||
-
|
||||
- if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
|
||||
- pr_perror("socketpair");
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
- pid = fork();
|
||||
- if (pid < 0) {
|
||||
- pr_perror("fork");
|
||||
- return -1;
|
||||
- } else if (pid == 0) {
|
||||
- sk = sk_pair[1];
|
||||
- close(sk_pair[0]);
|
||||
-
|
||||
- if (child_setup && child_setup() != 0)
|
||||
- exit(1);
|
||||
-
|
||||
- if (write(sk, &c, 1) != 1) {
|
||||
- pr_perror("write");
|
||||
- exit(1);
|
||||
- }
|
||||
-
|
||||
- while (1)
|
||||
- sleep(1000);
|
||||
- exit(1);
|
||||
- }
|
||||
-
|
||||
- sk = sk_pair[0];
|
||||
- close(sk_pair[1]);
|
||||
-
|
||||
- if (read(sk, &c, 1) != 1) {
|
||||
- close(sk);
|
||||
- kill(pid, SIGKILL);
|
||||
- pr_perror("read");
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
- close(sk);
|
||||
-
|
||||
- if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) {
|
||||
- pr_perror("Unable to ptrace the child");
|
||||
- kill(pid, SIGKILL);
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
- waitpid(pid, NULL, 0);
|
||||
-
|
||||
- return pid;
|
||||
-}
|
||||
-
|
||||
static int check_ptrace_peeksiginfo(void)
|
||||
{
|
||||
struct ptrace_peeksiginfo_args arg;
|
||||
diff --git a/criu/include/util.h b/criu/include/util.h
|
||||
index a2dac22..1c0b3c7 100644
|
||||
--- a/criu/include/util.h
|
||||
+++ b/criu/include/util.h
|
||||
@@ -166,6 +166,7 @@ extern int is_anon_link_type(char *link, char *type);
|
||||
|
||||
extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags);
|
||||
extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid);
|
||||
+extern pid_t fork_and_ptrace_attach(int (*child_setup)(void));
|
||||
extern int cr_daemon(int nochdir, int noclose, int close_fd);
|
||||
extern int status_ready(void);
|
||||
extern int is_root_user(void);
|
||||
diff --git a/criu/util.c b/criu/util.c
|
||||
index 06124c2..e682161 100644
|
||||
--- a/criu/util.c
|
||||
+++ b/criu/util.c
|
||||
@@ -654,6 +654,63 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+pid_t fork_and_ptrace_attach(int (*child_setup)(void))
|
||||
+{
|
||||
+ pid_t pid;
|
||||
+ int sk_pair[2], sk;
|
||||
+ char c = 0;
|
||||
+
|
||||
+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
|
||||
+ pr_perror("socketpair");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ pid = fork();
|
||||
+ if (pid < 0) {
|
||||
+ pr_perror("fork");
|
||||
+ return -1;
|
||||
+ } else if (pid == 0) {
|
||||
+ sk = sk_pair[1];
|
||||
+ close(sk_pair[0]);
|
||||
+
|
||||
+ if (child_setup && child_setup() != 0)
|
||||
+ exit(1);
|
||||
+
|
||||
+ if (write(sk, &c, 1) != 1) {
|
||||
+ pr_perror("write");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ while (1)
|
||||
+ sleep(1000);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ sk = sk_pair[0];
|
||||
+ close(sk_pair[1]);
|
||||
+
|
||||
+ if (read(sk, &c, 1) != 1) {
|
||||
+ close(sk);
|
||||
+ kill(pid, SIGKILL);
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+ pr_perror("read");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ close(sk);
|
||||
+
|
||||
+ if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) {
|
||||
+ pr_perror("Unable to ptrace the child");
|
||||
+ kill(pid, SIGKILL);
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+
|
||||
+ return pid;
|
||||
+}
|
||||
+
|
||||
int status_ready(void)
|
||||
{
|
||||
char c = 0;
|
||||
--
|
||||
2.30.0
|
||||
|
||||
162
0005-cr-check-Add-ptrace-rseq-conf-dump-feature.patch
Normal file
162
0005-cr-check-Add-ptrace-rseq-conf-dump-feature.patch
Normal file
@ -0,0 +1,162 @@
|
||||
From 3c567693f2e6579109dbabcca0e90c059ce5af25 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:30:18 +0800
|
||||
Subject: [PATCH 05/16] cr-check: Add ptrace rseq conf dump feature Add
|
||||
"get_rseq_conf" feature corresponding to the
|
||||
ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support.
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
compel/include/uapi/ptrace.h | 12 +++++++
|
||||
criu/cr-check.c | 11 +++++++
|
||||
criu/include/kerndat.h | 1 +
|
||||
criu/kerndat.c | 41 ++++++++++++++++++++++++
|
||||
4 files changed, 65 insertions(+)
|
||||
|
||||
diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h
|
||||
index c5291d2..bfe28c7 100644
|
||||
--- a/compel/include/uapi/ptrace.h
|
||||
+++ b/compel/include/uapi/ptrace.h
|
||||
@@ -65,6 +65,18 @@ typedef struct {
|
||||
uint64_t flags; /* Output: filter's flags */
|
||||
} seccomp_metadata_t;
|
||||
|
||||
+#ifndef PTRACE_GET_RSEQ_CONFIGURATION
|
||||
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
|
||||
+
|
||||
+struct ptrace_rseq_configuration {
|
||||
+ __u64 rseq_abi_pointer;
|
||||
+ __u32 rseq_abi_size;
|
||||
+ __u32 signature;
|
||||
+ __u32 flags;
|
||||
+ __u32 pad;
|
||||
+};
|
||||
+#endif
|
||||
+
|
||||
#ifdef PTRACE_EVENT_STOP
|
||||
#if PTRACE_EVENT_STOP == 7 /* Bad value from Linux 3.1-3.3, fixed in 3.4 */
|
||||
#undef PTRACE_EVENT_STOP
|
||||
diff --git a/criu/cr-check.c b/criu/cr-check.c
|
||||
index d41ef8f..ba87511 100644
|
||||
--- a/criu/cr-check.c
|
||||
+++ b/criu/cr-check.c
|
||||
@@ -794,6 +794,15 @@ static int check_ptrace_dump_seccomp_filters(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int check_ptrace_get_rseq_conf(void)
|
||||
+{
|
||||
+ if (!kdat.has_ptrace_get_rseq_conf) {
|
||||
+ pr_warn("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported. C/R of processes which are using rseq() won't work.\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int check_mem_dirty_track(void)
|
||||
{
|
||||
if (!kdat.has_dirty_track) {
|
||||
@@ -1435,6 +1444,7 @@ int cr_check(void)
|
||||
ret |= check_ns_pid();
|
||||
ret |= check_apparmor_stacking();
|
||||
ret |= check_network_lock_nftables();
|
||||
+ ret |= check_ptrace_get_rseq_conf();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1547,6 +1557,7 @@ static struct feature_list feature_list[] = {
|
||||
{ "ns_pid", check_ns_pid },
|
||||
{ "apparmor_stacking", check_apparmor_stacking },
|
||||
{ "network_lock_nftables", check_network_lock_nftables },
|
||||
+ { "get_rseq_conf", check_ptrace_get_rseq_conf },
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
|
||||
index 44a6976..05abeda 100644
|
||||
--- a/criu/include/kerndat.h
|
||||
+++ b/criu/include/kerndat.h
|
||||
@@ -75,6 +75,7 @@ struct kerndat_s {
|
||||
bool has_nspid;
|
||||
bool has_nftables_concat;
|
||||
bool has_rseq;
|
||||
+ bool has_ptrace_get_rseq_conf;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index f5a4490..4841387 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -4,6 +4,8 @@
|
||||
#include <sys/file.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
+#include <sys/ptrace.h>
|
||||
+#include <sys/wait.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#include <sys/syscall.h>
|
||||
@@ -36,6 +38,7 @@
|
||||
#include "sockets.h"
|
||||
#include "net.h"
|
||||
#include "tun.h"
|
||||
+#include <compel/ptrace.h>
|
||||
#include <compel/plugins/std/syscall-codes.h>
|
||||
#include "netfilter.h"
|
||||
#include "fsnotify.h"
|
||||
@@ -830,6 +833,40 @@ static int kerndat_has_rseq(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int kerndat_has_ptrace_get_rseq_conf(void)
|
||||
+{
|
||||
+ pid_t pid;
|
||||
+ int len;
|
||||
+ struct ptrace_rseq_configuration rseq;
|
||||
+
|
||||
+ pid = fork_and_ptrace_attach(NULL);
|
||||
+ if (pid < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ len = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, pid, sizeof(rseq), &rseq);
|
||||
+ if (len != sizeof(rseq)) {
|
||||
+ kdat.has_ptrace_get_rseq_conf = false;
|
||||
+ pr_info("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) is not supported\n");
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * flags is always zero from the kernel side, if it will be changed
|
||||
+ * we need to pay attention to that and, possibly, make changes on the CRIU side.
|
||||
+ */
|
||||
+ if (rseq.flags != 0) {
|
||||
+ kdat.has_ptrace_get_rseq_conf = false;
|
||||
+ pr_err("ptrace(PTRACE_GET_RSEQ_CONFIGURATION): rseq.flags != 0\n");
|
||||
+ } else {
|
||||
+ kdat.has_ptrace_get_rseq_conf = true;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ kill(pid, SIGKILL);
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat"
|
||||
#define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat"
|
||||
|
||||
@@ -1378,6 +1415,10 @@ int kerndat_init(void)
|
||||
pr_err("kerndat_has_rseq failed when initializing kerndat.\n");
|
||||
ret = -1;
|
||||
}
|
||||
+ if (!ret && kerndat_has_ptrace_get_rseq_conf()) {
|
||||
+ pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n");
|
||||
+ ret = -1;
|
||||
+ }
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
kerndat_files_stat();
|
||||
--
|
||||
2.30.0
|
||||
|
||||
702
0006-rseq-initial-support.patch
Normal file
702
0006-rseq-initial-support.patch
Normal file
@ -0,0 +1,702 @@
|
||||
From e444c089ebfb03fb2b6d69a40322d31ab33c0597 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 14:52:35 +0800
|
||||
Subject: [PATCH 06/16] rseq: initial support TODO: 1. properly handle case
|
||||
when the kernel has rseq() support but has no
|
||||
ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support and user processes haven't used
|
||||
rseq(). 2. properly handle "transient" states, when CRIU comes during rseq
|
||||
was executed. We need test for this case with some "heavy" rseq + we need to
|
||||
properly handle RSEQ_CS_* flags.
|
||||
|
||||
Fixes: #1696
|
||||
|
||||
Reported-by: Radostin Stoyanov <radostin@redhat.com>
|
||||
Suggested-by: Florian Weimer <fweimer@redhat.com>
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
compel/include/uapi/ptrace.h | 16 +--
|
||||
criu/cr-dump.c | 99 ++++++++++++++++
|
||||
criu/cr-restore.c | 17 +++
|
||||
criu/include/linux/rseq.h | 144 +++++++++++++++++++++++
|
||||
criu/include/parasite.h | 7 ++
|
||||
criu/include/restorer.h | 7 ++
|
||||
criu/kerndat.c | 2 +-
|
||||
criu/parasite-syscall.c | 11 ++
|
||||
criu/pie/parasite.c | 99 ++++++++++++++++
|
||||
criu/pie/restorer.c | 24 ++++
|
||||
images/Makefile | 1 +
|
||||
images/core.proto | 2 +
|
||||
images/rseq.proto | 9 ++
|
||||
13 files changed, 429 insertions(+), 9 deletions(-)
|
||||
create mode 100644 criu/include/linux/rseq.h
|
||||
create mode 100644 images/rseq.proto
|
||||
|
||||
diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h
|
||||
index bfe28c7..d807a92 100644
|
||||
--- a/compel/include/uapi/ptrace.h
|
||||
+++ b/compel/include/uapi/ptrace.h
|
||||
@@ -66,14 +66,14 @@ typedef struct {
|
||||
} seccomp_metadata_t;
|
||||
|
||||
#ifndef PTRACE_GET_RSEQ_CONFIGURATION
|
||||
-#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
|
||||
-
|
||||
-struct ptrace_rseq_configuration {
|
||||
- __u64 rseq_abi_pointer;
|
||||
- __u32 rseq_abi_size;
|
||||
- __u32 signature;
|
||||
- __u32 flags;
|
||||
- __u32 pad;
|
||||
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
|
||||
+
|
||||
+struct __ptrace_rseq_configuration {
|
||||
+ uint64_t rseq_abi_pointer;
|
||||
+ uint32_t rseq_abi_size;
|
||||
+ uint32_t signature;
|
||||
+ uint32_t flags;
|
||||
+ uint32_t pad;
|
||||
};
|
||||
#endif
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index f07fe6e..91dd08a 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "proc_parse.h"
|
||||
#include "parasite.h"
|
||||
#include "parasite-syscall.h"
|
||||
+#include <compel/ptrace.h>
|
||||
#include "files.h"
|
||||
#include "files-reg.h"
|
||||
#include "shmem.h"
|
||||
@@ -200,6 +201,25 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int check_thread_rseq(pid_t tid, const struct parasite_check_rseq *ti_rseq, bool has_tc_rseq_entry)
|
||||
+{
|
||||
+ if (!kdat.has_rseq || kdat.has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ pr_debug("%d has rseq_inited = %d\n", tid, ti_rseq->rseq_inited);
|
||||
+
|
||||
+ /*
|
||||
+ * We have no kdat.has_ptrace_get_rseq_conf and user
|
||||
+ * process has rseq() used, let's fail dump.
|
||||
+ */
|
||||
+ if (ti_rseq->rseq_inited) {
|
||||
+ pr_err("%d has rseq but kernel lacks get_rseq_conf feature\n", tid);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
struct cr_imgset *glob_imgset;
|
||||
|
||||
static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds)
|
||||
@@ -730,6 +750,17 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread
|
||||
if (!ret)
|
||||
ret = seccomp_dump_thread(pid, tc);
|
||||
|
||||
+ /*
|
||||
+ * We are dumping rseq() in the dump_thread_rseq() function,
|
||||
+ * *before* processes gets infected (because of ptrace requests
|
||||
+ * API restriction). At this point, if the kernel lacks
|
||||
+ * kdat.has_ptrace_get_rseq_conf support we have to ensure
|
||||
+ * that dumpable processes haven't initialized rseq() or
|
||||
+ * fail dump if rseq() was used.
|
||||
+ */
|
||||
+ if (!ret)
|
||||
+ ret = check_thread_rseq(pid, &ti->rseq, !!tc->rseq_entry);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1016,6 +1047,68 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
+{
|
||||
+ struct __ptrace_rseq_configuration rseq;
|
||||
+ RseqEntry *rseqe = NULL;
|
||||
+ int ret;
|
||||
+
|
||||
+ /*
|
||||
+ * If we are here it means that rseq() syscall is supported,
|
||||
+ * but ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported,
|
||||
+ * we can just fail dump here. But this is bad idea, IMHO.
|
||||
+ *
|
||||
+ * So, we will try to detect if victim process was used rseq().
|
||||
+ * See check_rseq() and check_thread_rseq() functions.
|
||||
+ */
|
||||
+ if (!kdat.has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq);
|
||||
+ if (ret != sizeof(rseq)) {
|
||||
+ pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (rseq.flags != 0) {
|
||||
+ pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid,
|
||||
+ rseq.flags);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature);
|
||||
+
|
||||
+ rseqe = xmalloc(sizeof(*rseqe));
|
||||
+ if (!rseqe)
|
||||
+ return -1;
|
||||
+
|
||||
+ rseq_entry__init(rseqe);
|
||||
+
|
||||
+ rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer;
|
||||
+ rseqe->rseq_abi_size = rseq.rseq_abi_size;
|
||||
+ rseqe->signature = rseq.signature;
|
||||
+
|
||||
+ *rseqep = rseqe;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int dump_task_rseq(pid_t pid, struct pstree_item *item)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ /* if rseq() syscall isn't supported then nothing to dump */
|
||||
+ if (!kdat.has_rseq)
|
||||
+ return 0;
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry))
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static struct proc_pid_stat pps_buf;
|
||||
|
||||
static int dump_task_threads(struct parasite_ctl *parasite_ctl, const struct pstree_item *item)
|
||||
@@ -1304,6 +1397,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ ret = dump_task_rseq(pid, item);
|
||||
+ if (ret) {
|
||||
+ pr_err("Dump %d rseq failed %d\n", pid, ret);
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
parasite_ctl = parasite_infect_seized(pid, item, &vmas);
|
||||
if (!parasite_ctl) {
|
||||
pr_err("Can't infect (pid: %d) with parasite\n", pid);
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 5b645c1..b2bd044 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -2975,6 +2975,19 @@ static int prep_sched_info(struct rst_sched_param *sp, ThreadCoreEntry *tc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int prep_rseq(struct rst_rseq_param *rseq, ThreadCoreEntry *tc)
|
||||
+{
|
||||
+ /* compatibility with older CRIU versions */
|
||||
+ if (!tc->rseq_entry)
|
||||
+ return 0;
|
||||
+
|
||||
+ rseq->rseq_abi_pointer = tc->rseq_entry->rseq_abi_pointer;
|
||||
+ rseq->rseq_abi_size = tc->rseq_entry->rseq_abi_size;
|
||||
+ rseq->signature = tc->rseq_entry->signature;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static rlim_t decode_rlim(rlim_t ival)
|
||||
{
|
||||
return ival == -1 ? RLIM_INFINITY : ival;
|
||||
@@ -3704,6 +3717,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr;
|
||||
core_get_tls(tcore, &thread_args[i].tls);
|
||||
|
||||
+ ret = prep_rseq(&thread_args[i].rseq, tcore->thread_core);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
+
|
||||
rst_reloc_creds(&thread_args[i], &creds_pos_next);
|
||||
|
||||
thread_args[i].futex_rla = tcore->thread_core->futex_rla;
|
||||
diff --git a/criu/include/linux/rseq.h b/criu/include/linux/rseq.h
|
||||
new file mode 100644
|
||||
index 0000000..5c1706a
|
||||
--- /dev/null
|
||||
+++ b/criu/include/linux/rseq.h
|
||||
@@ -0,0 +1,144 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
|
||||
+#ifndef _UAPI_LINUX_RSEQ_H
|
||||
+#define _UAPI_LINUX_RSEQ_H
|
||||
+
|
||||
+/*
|
||||
+ * linux/rseq.h
|
||||
+ *
|
||||
+ * Restartable sequences system call API
|
||||
+ *
|
||||
+ * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <asm/byteorder.h>
|
||||
+
|
||||
+enum rseq_cpu_id_state {
|
||||
+ RSEQ_CPU_ID_UNINITIALIZED = -1,
|
||||
+ RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
|
||||
+};
|
||||
+
|
||||
+enum rseq_flags {
|
||||
+ RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
+};
|
||||
+
|
||||
+enum rseq_cs_flags_bit {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
|
||||
+};
|
||||
+
|
||||
+enum rseq_cs_flags {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always
|
||||
+ * contained within a single cache-line. It is usually declared as
|
||||
+ * link-time constant data.
|
||||
+ */
|
||||
+struct rseq_cs {
|
||||
+ /* Version of this structure. */
|
||||
+ __u32 version;
|
||||
+ /* enum rseq_cs_flags */
|
||||
+ __u32 flags;
|
||||
+ __u64 start_ip;
|
||||
+ /* Offset from start_ip. */
|
||||
+ __u64 post_commit_offset;
|
||||
+ __u64 abort_ip;
|
||||
+} __attribute__((aligned(4 * sizeof(__u64))));
|
||||
+
|
||||
+/*
|
||||
+ * struct rseq is aligned on 4 * 8 bytes to ensure it is always
|
||||
+ * contained within a single cache-line.
|
||||
+ *
|
||||
+ * A single struct rseq per thread is allowed.
|
||||
+ */
|
||||
+struct rseq {
|
||||
+ /*
|
||||
+ * Restartable sequences cpu_id_start field. Updated by the
|
||||
+ * kernel. Read by user-space with single-copy atomicity
|
||||
+ * semantics. This field should only be read by the thread which
|
||||
+ * registered this data structure. Aligned on 32-bit. Always
|
||||
+ * contains a value in the range of possible CPUs, although the
|
||||
+ * value may not be the actual current CPU (e.g. if rseq is not
|
||||
+ * initialized). This CPU number value should always be compared
|
||||
+ * against the value of the cpu_id field before performing a rseq
|
||||
+ * commit or returning a value read from a data structure indexed
|
||||
+ * using the cpu_id_start value.
|
||||
+ */
|
||||
+ __u32 cpu_id_start;
|
||||
+ /*
|
||||
+ * Restartable sequences cpu_id field. Updated by the kernel.
|
||||
+ * Read by user-space with single-copy atomicity semantics. This
|
||||
+ * field should only be read by the thread which registered this
|
||||
+ * data structure. Aligned on 32-bit. Values
|
||||
+ * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
|
||||
+ * have a special semantic: the former means "rseq uninitialized",
|
||||
+ * and latter means "rseq initialization failed". This value is
|
||||
+ * meant to be read within rseq critical sections and compared
|
||||
+ * with the cpu_id_start value previously read, before performing
|
||||
+ * the commit instruction, or read and compared with the
|
||||
+ * cpu_id_start value before returning a value loaded from a data
|
||||
+ * structure indexed using the cpu_id_start value.
|
||||
+ */
|
||||
+ __u32 cpu_id;
|
||||
+ /*
|
||||
+ * Restartable sequences rseq_cs field.
|
||||
+ *
|
||||
+ * Contains NULL when no critical section is active for the current
|
||||
+ * thread, or holds a pointer to the currently active struct rseq_cs.
|
||||
+ *
|
||||
+ * Updated by user-space, which sets the address of the currently
|
||||
+ * active rseq_cs at the beginning of assembly instruction sequence
|
||||
+ * block, and set to NULL by the kernel when it restarts an assembly
|
||||
+ * instruction sequence block, as well as when the kernel detects that
|
||||
+ * it is preempting or delivering a signal outside of the range
|
||||
+ * targeted by the rseq_cs. Also needs to be set to NULL by user-space
|
||||
+ * before reclaiming memory that contains the targeted struct rseq_cs.
|
||||
+ *
|
||||
+ * Read and set by the kernel. Set by user-space with single-copy
|
||||
+ * atomicity semantics. This field should only be updated by the
|
||||
+ * thread which registered this data structure. Aligned on 64-bit.
|
||||
+ */
|
||||
+ union {
|
||||
+ __u64 ptr64;
|
||||
+#ifdef __LP64__
|
||||
+ __u64 ptr;
|
||||
+#else
|
||||
+ struct {
|
||||
+#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
|
||||
+ __u32 padding; /* Initialized to zero. */
|
||||
+ __u32 ptr32;
|
||||
+#else /* LITTLE */
|
||||
+ __u32 ptr32;
|
||||
+ __u32 padding; /* Initialized to zero. */
|
||||
+#endif /* ENDIAN */
|
||||
+ } ptr;
|
||||
+#endif
|
||||
+ } rseq_cs;
|
||||
+
|
||||
+ /*
|
||||
+ * Restartable sequences flags field.
|
||||
+ *
|
||||
+ * This field should only be updated by the thread which
|
||||
+ * registered this data structure. Read by the kernel.
|
||||
+ * Mainly used for single-stepping through rseq critical sections
|
||||
+ * with debuggers.
|
||||
+ *
|
||||
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
|
||||
+ * Inhibit instruction sequence block restart on preemption
|
||||
+ * for this thread.
|
||||
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
|
||||
+ * Inhibit instruction sequence block restart on signal
|
||||
+ * delivery for this thread.
|
||||
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
|
||||
+ * Inhibit instruction sequence block restart on migration for
|
||||
+ * this thread.
|
||||
+ */
|
||||
+ __u32 flags;
|
||||
+} __attribute__((aligned(4 * sizeof(__u64))));
|
||||
+
|
||||
+#endif /* _UAPI_LINUX_RSEQ_H */
|
||||
diff --git a/criu/include/parasite.h b/criu/include/parasite.h
|
||||
index 8107aa4..5fde809 100644
|
||||
--- a/criu/include/parasite.h
|
||||
+++ b/criu/include/parasite.h
|
||||
@@ -164,10 +164,17 @@ struct parasite_dump_creds {
|
||||
unsigned int groups[0];
|
||||
};
|
||||
|
||||
+struct parasite_check_rseq {
|
||||
+ bool has_rseq;
|
||||
+ bool has_ptrace_get_rseq_conf; /* no need to check if supported */
|
||||
+ bool rseq_inited;
|
||||
+};
|
||||
+
|
||||
struct parasite_dump_thread {
|
||||
unsigned int *tid_addr;
|
||||
pid_t tid;
|
||||
tls_t tls;
|
||||
+ struct parasite_check_rseq rseq;
|
||||
stack_t sas;
|
||||
int pdeath_sig;
|
||||
char comm[TASK_COMM_LEN];
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index c2ef8f0..c29d869 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -45,6 +45,12 @@ struct rst_sched_param {
|
||||
int prio;
|
||||
};
|
||||
|
||||
+struct rst_rseq_param {
|
||||
+ u64 rseq_abi_pointer;
|
||||
+ u32 rseq_abi_size;
|
||||
+ u32 signature;
|
||||
+};
|
||||
+
|
||||
struct restore_posix_timer {
|
||||
struct str_posix_timer spt;
|
||||
struct itimerspec val;
|
||||
@@ -99,6 +105,7 @@ struct thread_restore_args {
|
||||
struct task_restore_args *ta;
|
||||
|
||||
tls_t tls;
|
||||
+ struct rst_rseq_param rseq;
|
||||
|
||||
siginfo_t *siginfo;
|
||||
unsigned int siginfo_n;
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index 4841387..af7113a 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -837,7 +837,7 @@ static int kerndat_has_ptrace_get_rseq_conf(void)
|
||||
{
|
||||
pid_t pid;
|
||||
int len;
|
||||
- struct ptrace_rseq_configuration rseq;
|
||||
+ struct __ptrace_rseq_configuration rseq;
|
||||
|
||||
pid = fork_and_ptrace_attach(NULL);
|
||||
if (pid < 0)
|
||||
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
|
||||
index 7175ade..ee4fa86 100644
|
||||
--- a/criu/parasite-syscall.c
|
||||
+++ b/criu/parasite-syscall.c
|
||||
@@ -132,6 +132,13 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c
|
||||
return ce->groups ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
+static void init_parasite_rseq_arg(struct parasite_check_rseq *rseq)
|
||||
+{
|
||||
+ rseq->has_rseq = kdat.has_rseq;
|
||||
+ rseq->has_ptrace_get_rseq_conf = kdat.has_ptrace_get_rseq_conf;
|
||||
+ rseq->rseq_inited = false;
|
||||
+}
|
||||
+
|
||||
int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEntry *core)
|
||||
{
|
||||
ThreadCoreEntry *tc = core->thread_core;
|
||||
@@ -144,6 +151,8 @@ int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEn
|
||||
pc = args->creds;
|
||||
pc->cap_last_cap = kdat.last_cap;
|
||||
|
||||
+ init_parasite_rseq_arg(&args->rseq);
|
||||
+
|
||||
ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_THREAD, ctl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@@ -197,6 +206,8 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit
|
||||
|
||||
compel_arch_get_tls_thread(tctl, &args->tls);
|
||||
|
||||
+ init_parasite_rseq_arg(&args->rseq);
|
||||
+
|
||||
ret = compel_run_in_thread(tctl, PARASITE_CMD_DUMP_THREAD);
|
||||
if (ret) {
|
||||
pr_err("Can't init thread in parasite %d\n", pid);
|
||||
diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c
|
||||
index bc0a33c..e49958b 100644
|
||||
--- a/criu/pie/parasite.c
|
||||
+++ b/criu/pie/parasite.c
|
||||
@@ -8,6 +8,8 @@
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
+#include "linux/rseq.h"
|
||||
+
|
||||
#include "common/config.h"
|
||||
#include "int.h"
|
||||
#include "types.h"
|
||||
@@ -167,6 +169,7 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args)
|
||||
}
|
||||
|
||||
static int dump_creds(struct parasite_dump_creds *args);
|
||||
+static int check_rseq(struct parasite_check_rseq *rseq);
|
||||
|
||||
static int dump_thread_common(struct parasite_dump_thread *ti)
|
||||
{
|
||||
@@ -197,6 +200,12 @@ static int dump_thread_common(struct parasite_dump_thread *ti)
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ ret = check_rseq(&ti->rseq);
|
||||
+ if (ret) {
|
||||
+ pr_err("Unable to check if rseq() is initialized: %d\n", ret);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
ret = dump_creds(ti->creds);
|
||||
out:
|
||||
return ret;
|
||||
@@ -313,6 +322,96 @@ grps_err:
|
||||
return -1;
|
||||
}
|
||||
|
||||
+static int check_rseq(struct parasite_check_rseq *rseq)
|
||||
+{
|
||||
+ int ret;
|
||||
+ unsigned long rseq_abi_pointer;
|
||||
+ unsigned long rseq_abi_size;
|
||||
+ uint32_t rseq_signature;
|
||||
+ void *addr;
|
||||
+
|
||||
+ /* no need to do hacky check if we can get all info from ptrace() */
|
||||
+ if (!rseq->has_rseq || rseq->has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * We need to determine if victim process has rseq()
|
||||
+ * initialized, but we have no *any* proper kernel interface
|
||||
+ * supported at this point.
|
||||
+ * Our plan:
|
||||
+ * 1. We know that if we call rseq() syscall and process already
|
||||
+ * has current->rseq filled, then we get:
|
||||
+ * -EINVAL if current->rseq != rseq || rseq_len != sizeof(*rseq),
|
||||
+ * -EPERM if current->rseq_sig != sig),
|
||||
+ * -EBUSY if current->rseq == rseq && rseq_len == sizeof(*rseq) &&
|
||||
+ * current->rseq_sig != sig
|
||||
+ * if current->rseq == NULL (rseq() wasn't used) then we go to:
|
||||
+ * IS_ALIGNED(rseq ...) check, if we fail it we get -EINVAL and it
|
||||
+ * will be hard to distinguish case when rseq() was initialized or not.
|
||||
+ * Let's construct arguments payload
|
||||
+ * with:
|
||||
+ * 1. correct rseq_abi_size
|
||||
+ * 2. aligned and correct rseq_abi_pointer
|
||||
+ * And see what rseq() return to us.
|
||||
+ * If ret value is:
|
||||
+ * 0: it means that rseq *wasn't* used and we successfuly registered it,
|
||||
+ * -EINVAL or : it means that rseq is already initialized,
|
||||
+ * so we *have* to dump it. But as we have has_ptrace_get_rseq_conf = false,
|
||||
+ * we should just fail dump as it's unsafe to skip rseq() dump for processes
|
||||
+ * with rseq() initialized.
|
||||
+ * -EPERM or -EBUSY: should not happen as we take a fresh memory area for rseq
|
||||
+ */
|
||||
+ addr = (void *)sys_mmap(NULL, sizeof(struct rseq), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
+ if (addr == MAP_FAILED) {
|
||||
+ pr_err("mmap() failed for struct rseq ret = %lx\n", (unsigned long)addr);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ memset(addr, 0, sizeof(struct rseq));
|
||||
+
|
||||
+ /* sys_mmap returns page aligned addresses */
|
||||
+ rseq_abi_pointer = (unsigned long)addr;
|
||||
+ rseq_abi_size = (unsigned long)sizeof(struct rseq);
|
||||
+ /* it's not so important to have unique signature for us,
|
||||
+ * because rseq_abi_pointer is guaranteed to be unique
|
||||
+ */
|
||||
+ rseq_signature = 0x12345612;
|
||||
+
|
||||
+ pr_info("\ttrying sys_rseq(%lx, %lx, %x, %x)\n", rseq_abi_pointer, rseq_abi_size, 0, rseq_signature);
|
||||
+ ret = sys_rseq((void *)rseq_abi_pointer, rseq_abi_size, 0, rseq_signature);
|
||||
+ if (ret) {
|
||||
+ if (ret == -EINVAL) {
|
||||
+ pr_info("\trseq is initialized in the victim\n");
|
||||
+ rseq->rseq_inited = true;
|
||||
+
|
||||
+ ret = 0;
|
||||
+ } else {
|
||||
+ pr_err("\tunexpected failure of sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer,
|
||||
+ rseq_abi_size, 0, rseq_signature, ret);
|
||||
+
|
||||
+ ret = -1;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = sys_rseq((void *)rseq_abi_pointer, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, rseq_signature);
|
||||
+ if (ret) {
|
||||
+ pr_err("\tfailed to unregister sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer,
|
||||
+ rseq_abi_size, RSEQ_FLAG_UNREGISTER, rseq_signature, ret);
|
||||
+
|
||||
+ ret = -1;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ pr_info("\tsys_rseq succeed, let's unregister it back... ok Error\n");
|
||||
+ pr_info("\trseq is non-initialized in the victim Error\n");
|
||||
+ rseq->rseq_inited = false;
|
||||
+ ret = 0;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ sys_munmap(addr, sizeof(struct rseq));
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int fill_fds_fown(int fd, struct fd_opts *p)
|
||||
{
|
||||
int flags, ret;
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index fbc89fe..368b5a0 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -459,6 +459,27 @@ static int restore_cpu_affinity(struct task_restore_args *args)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int restore_rseq(struct rst_rseq_param *rseq)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ if (!rseq->rseq_abi_pointer) {
|
||||
+ pr_debug("rseq: nothing to restore\n");
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ pr_debug("rseq: rseq_abi_pointer = %lx signature = %x\n", (unsigned long)rseq->rseq_abi_pointer, rseq->signature);
|
||||
+
|
||||
+ ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 0, rseq->signature);
|
||||
+ if (ret) {
|
||||
+ pr_err("failed sys_rseq(%lx, %lx, %x, %x) = %d\n", (unsigned long)rseq->rseq_abi_pointer,
|
||||
+ (unsigned long)rseq->rseq_abi_size, 0, rseq->signature, ret);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args)
|
||||
{
|
||||
unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0;
|
||||
@@ -583,6 +604,9 @@ static int restore_thread_common(struct thread_restore_args *args)
|
||||
|
||||
restore_tls(&args->tls);
|
||||
|
||||
+ if (restore_rseq(&args->rseq))
|
||||
+ return -1;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/images/Makefile b/images/Makefile
|
||||
index 2eaeb7c..004e22e 100644
|
||||
--- a/images/Makefile
|
||||
+++ b/images/Makefile
|
||||
@@ -71,6 +71,7 @@ proto-obj-y += img-streamer.o
|
||||
proto-obj-y += bpfmap-file.o
|
||||
proto-obj-y += bpfmap-data.o
|
||||
proto-obj-y += apparmor.o
|
||||
+proto-obj-y += rseq.o
|
||||
|
||||
CFLAGS += -iquote $(obj)/
|
||||
|
||||
diff --git a/images/core.proto b/images/core.proto
|
||||
index 39e7f32..b66230e 100644
|
||||
--- a/images/core.proto
|
||||
+++ b/images/core.proto
|
||||
@@ -14,6 +14,7 @@ import "timer.proto";
|
||||
import "creds.proto";
|
||||
import "sa.proto";
|
||||
import "siginfo.proto";
|
||||
+import "rseq.proto";
|
||||
|
||||
import "opts.proto";
|
||||
|
||||
@@ -106,6 +107,7 @@ message thread_core_entry {
|
||||
optional string comm = 13;
|
||||
optional uint64 blk_sigset_extended = 14;
|
||||
required thread_allowedcpus_entry allowed_cpus = 15;
|
||||
+ optional rseq_entry rseq_entry = 16;
|
||||
}
|
||||
|
||||
message task_rlimits_entry {
|
||||
diff --git a/images/rseq.proto b/images/rseq.proto
|
||||
new file mode 100644
|
||||
index 0000000..be28004
|
||||
--- /dev/null
|
||||
+++ b/images/rseq.proto
|
||||
@@ -0,0 +1,9 @@
|
||||
+// SPDX-License-Identifier: MIT
|
||||
+
|
||||
+syntax = "proto2";
|
||||
+
|
||||
+message rseq_entry {
|
||||
+ required uint64 rseq_abi_pointer = 1;
|
||||
+ required uint32 rseq_abi_size = 2;
|
||||
+ required uint32 signature = 3;
|
||||
+}
|
||||
--
|
||||
2.30.0
|
||||
|
||||
217
0007-zdtm-add-simple-test-for-rseq-C-R.patch
Normal file
217
0007-zdtm-add-simple-test-for-rseq-C-R.patch
Normal file
@ -0,0 +1,217 @@
|
||||
From 5005c08e32dc29dbf0b3a2a582e75d249c190d96 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 14:54:28 +0800
|
||||
Subject: [PATCH 07/16] zdtm: add simple test for rseq C/R Signed-off-by:
|
||||
Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
test/zdtm/static/Makefile | 1 +
|
||||
test/zdtm/static/rseq00.c | 174 +++++++++++++++++++++++
|
||||
test/zdtm/static/rseq00.desc | 1 +
|
||||
3 files changed, 176 insertions(+)
|
||||
create mode 100644 test/zdtm/static/rseq00.c
|
||||
create mode 100644 test/zdtm/static/rseq00.desc
|
||||
|
||||
diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile
|
||||
index 70123cf..563d947 100644
|
||||
--- a/test/zdtm/static/Makefile
|
||||
+++ b/test/zdtm/static/Makefile
|
||||
@@ -61,6 +61,7 @@ TST_NOFILE := \
|
||||
pthread02 \
|
||||
pthread_timers \
|
||||
pthread_timers_h \
|
||||
+ rseq00 \
|
||||
vdso00 \
|
||||
vdso01 \
|
||||
vdso02 \
|
||||
diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c
|
||||
new file mode 100644
|
||||
index 0000000..26f41a2
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/static/rseq00.c
|
||||
@@ -0,0 +1,174 @@
|
||||
+/*
|
||||
+ * test for rseq() syscall
|
||||
+ * See also https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/
|
||||
+ * https://github.com/torvalds/linux/commit/d7822b1e24f2df5df98c76f0e94a5416349ff759
|
||||
+ */
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
+#include <signal.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#include <sys/types.h>
|
||||
+#include <sys/wait.h>
|
||||
+#include <sys/stat.h>
|
||||
+#include <sys/mman.h>
|
||||
+#include <fcntl.h>
|
||||
+#include <pthread.h>
|
||||
+#include <syscall.h>
|
||||
+
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+#if defined(__x86_64__)
|
||||
+
|
||||
+const char *test_doc = "Check that rseq() basic C/R works";
|
||||
+const char *test_author = "Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>";
|
||||
+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */
|
||||
+
|
||||
+/* some useful definitions from kernel uapi */
|
||||
+enum rseq_flags {
|
||||
+ RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
+};
|
||||
+
|
||||
+struct rseq {
|
||||
+ uint32_t cpu_id_start;
|
||||
+ uint32_t cpu_id;
|
||||
+ uint64_t rseq_cs;
|
||||
+ uint32_t flags;
|
||||
+} __attribute__((aligned(4 * sizeof(uint64_t))));
|
||||
+
|
||||
+#ifndef __NR_rseq
|
||||
+#define __NR_rseq 334
|
||||
+#endif
|
||||
+/* EOF */
|
||||
+
|
||||
+static __thread volatile struct rseq __rseq_abi;
|
||||
+
|
||||
+#define RSEQ_SIG 0x53053053
|
||||
+
|
||||
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
+{
|
||||
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
|
||||
+}
|
||||
+
|
||||
+static void register_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (rc) {
|
||||
+ fail("Failed to register rseq");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void unregister_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
|
||||
+ if (rc) {
|
||||
+ fail("Failed to unregister rseq");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void check_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (!(rc && errno == EBUSY)) {
|
||||
+ fail("Failed to check rseq %d", rc);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
|
||||
+
|
||||
+static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
+{
|
||||
+ /* clang-format off */
|
||||
+ __asm__ __volatile__ goto(
|
||||
+ ".pushsection __rseq_table, \"aw\"\n\t"
|
||||
+ ".balign 32\n\t"
|
||||
+ "cs_obj:\n\t"
|
||||
+ /* version, flags */
|
||||
+ ".long 0, 0\n\t"
|
||||
+ /* start_ip, post_commit_ip, abort_ip */
|
||||
+ ".quad 1f, (2f-1f), 4f\n\t"
|
||||
+ ".popsection\n\t"
|
||||
+ "1:\n\t"
|
||||
+ "leaq cs_obj(%%rip), %%rax\n\t"
|
||||
+ "movq %%rax, %[rseq_cs]\n\t"
|
||||
+ "cmpl %[cpu_id], %[current_cpu_id]\n\t"
|
||||
+ "jnz 4f\n\t"
|
||||
+ "addq %[count], %[v]\n\t" /* final store */
|
||||
+ "2:\n\t"
|
||||
+ ".pushsection __rseq_failure, \"ax\"\n\t"
|
||||
+ /* Disassembler-friendly signature: nopl <sig>(%rip). */
|
||||
+ ".byte 0x0f, 0x1f, 0x05\n\t"
|
||||
+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */
|
||||
+ "4:\n\t"
|
||||
+ "jmp abort\n\t"
|
||||
+ ".popsection\n\t"
|
||||
+ : /* gcc asm goto does not allow outputs */
|
||||
+ : [cpu_id] "r" (cpu),
|
||||
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
|
||||
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
|
||||
+ /* final store input */
|
||||
+ [v] "m" (*v),
|
||||
+ [count] "er" (count)
|
||||
+ : "memory", "cc", "rax"
|
||||
+ : abort
|
||||
+ );
|
||||
+ /* clang-format on */
|
||||
+
|
||||
+ return 0;
|
||||
+abort:
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ int cpu, ret;
|
||||
+ intptr_t *cpu_data;
|
||||
+ long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+
|
||||
+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data));
|
||||
+ if (!cpu_data) {
|
||||
+ fail("calloc");
|
||||
+ exit(EXIT_FAILURE);
|
||||
+ }
|
||||
+
|
||||
+ register_thread();
|
||||
+
|
||||
+ test_daemon();
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ check_thread();
|
||||
+
|
||||
+ cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
|
||||
+ ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
+ if (ret)
|
||||
+ fail("Failed to increment per-cpu counter");
|
||||
+ else
|
||||
+ test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]);
|
||||
+
|
||||
+ if (cpu_data[cpu] == 2)
|
||||
+ pass();
|
||||
+ else
|
||||
+ fail();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ test_init(argc, argv);
|
||||
+ skip("Unsupported arch");
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
\ No newline at end of file
|
||||
diff --git a/test/zdtm/static/rseq00.desc b/test/zdtm/static/rseq00.desc
|
||||
new file mode 100644
|
||||
index 0000000..0324fa3
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/static/rseq00.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'}
|
||||
--
|
||||
2.30.0
|
||||
|
||||
123
0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus.patch
Normal file
123
0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus.patch
Normal file
@ -0,0 +1,123 @@
|
||||
From 56fad25776a652e143175a22676a1f909476c880 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 14:57:16 +0800
|
||||
Subject: [PATCH 08/16] ci: add Fedora Rawhide based test on Cirrus We have
|
||||
ability to use nested virtualization on Cirrus, and already have "Vagrant
|
||||
Fedora based test (no VDSO)" test, let's do analogical for Fedora Rawhide to
|
||||
get fresh kernel.
|
||||
|
||||
Suggested-by: Adrian Reber <areber@redhat.com>
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
.cirrus.yml | 21 +++++++++++++++++++++
|
||||
scripts/ci/Makefile | 7 +++++--
|
||||
scripts/ci/run-ci-tests.sh | 5 +++++
|
||||
scripts/ci/vagrant.sh | 21 +++++++++++++++++++++
|
||||
4 files changed, 52 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/.cirrus.yml b/.cirrus.yml
|
||||
index 671178d..9716e58 100644
|
||||
--- a/.cirrus.yml
|
||||
+++ b/.cirrus.yml
|
||||
@@ -19,6 +19,27 @@ task:
|
||||
build_script: |
|
||||
make -C scripts/ci vagrant-fedora-no-vdso
|
||||
|
||||
+task:
|
||||
+ name: Vagrant Fedora Rawhide based test
|
||||
+ environment:
|
||||
+ HOME: "/root"
|
||||
+ CIRRUS_WORKING_DIR: "/tmp/criu"
|
||||
+
|
||||
+ compute_engine_instance:
|
||||
+ image_project: cirrus-images
|
||||
+ image: family/docker-kvm
|
||||
+ platform: linux
|
||||
+ cpu: 4
|
||||
+ memory: 16G
|
||||
+ nested_virtualization: true
|
||||
+
|
||||
+ setup_script: |
|
||||
+ scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
+ sudo kvm-ok
|
||||
+ ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
+ build_script: |
|
||||
+ make -C scripts/ci vagrant-fedora-rawhide
|
||||
+
|
||||
task:
|
||||
name: CentOS 8 based test
|
||||
environment:
|
||||
diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile
|
||||
index 02b4d87..9c9264d 100644
|
||||
--- a/scripts/ci/Makefile
|
||||
+++ b/scripts/ci/Makefile
|
||||
@@ -41,7 +41,7 @@ export CONTAINER_TERMINAL
|
||||
ifeq ($(UNAME),x86_64)
|
||||
# On anything besides x86_64 Travis is running unprivileged LXD
|
||||
# containers which do not support running docker with '--privileged'.
|
||||
- CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged -v /lib/modules:/lib/modules --tmpfs /run
|
||||
+ CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged --userns=host --cgroupns=host -v /lib/modules:/lib/modules --tmpfs /run
|
||||
else
|
||||
CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run
|
||||
endif
|
||||
@@ -92,7 +92,10 @@ setup-vagrant:
|
||||
vagrant-fedora-no-vdso: setup-vagrant
|
||||
./vagrant.sh fedora-no-vdso
|
||||
|
||||
-.PHONY: setup-vagrant vagrant-fedora-no-vdso
|
||||
+vagrant-fedora-rawhide: setup-vagrant
|
||||
+ ./vagrant.sh fedora-rawhide
|
||||
+
|
||||
+.PHONY: setup-vagrant vagrant-fedora-no-vdso vagrant-fedora-rawhide
|
||||
|
||||
%:
|
||||
$(MAKE) -C ../build $@$(target-suffix)
|
||||
diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh
|
||||
index 7c66e68..95b4ec7 100755
|
||||
--- a/scripts/ci/run-ci-tests.sh
|
||||
+++ b/scripts/ci/run-ci-tests.sh
|
||||
@@ -194,6 +194,11 @@ if [ "${STREAM_TEST}" = "1" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
+# print some useful debug info
|
||||
+cat /proc/self/status
|
||||
+ls -la /proc/self/ns
|
||||
+cat /proc/self/cgroup
|
||||
+
|
||||
# shellcheck disable=SC2086
|
||||
./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS
|
||||
|
||||
diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh
|
||||
index 839b100..f961b8d 100755
|
||||
--- a/scripts/ci/vagrant.sh
|
||||
+++ b/scripts/ci/vagrant.sh
|
||||
@@ -58,4 +58,25 @@ fedora-no-vdso() {
|
||||
ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -t zdtm/transition/pidfd_store_sk --rpc --pre 2'
|
||||
}
|
||||
|
||||
+fedora-rawhide() {
|
||||
+ #ssh default sudo grubby --update-kernel ALL --args="selinux=0 systemd.unified_cgroup_hierarchy=0"
|
||||
+ ssh default sudo grubby --update-kernel ALL
|
||||
+ #
|
||||
+ # Workaround the problem:
|
||||
+ # error running container: error from /usr/bin/crun creating container for [...]: sd-bus call: Transport endpoint is not connected
|
||||
+ # Let's just use runc instead of crun
|
||||
+ # see also https://github.com/kata-containers/tests/issues/4283
|
||||
+ #
|
||||
+ ssh default 'sudo dnf remove -y crun || true'
|
||||
+ ssh default sudo dnf install -y podman runc
|
||||
+ vagrant reload
|
||||
+ #ssh default sudo setenforce 0
|
||||
+ ssh default cat /proc/cmdline
|
||||
+ ssh default ls -la /proc/self/ns
|
||||
+ ssh default sudo cat /proc/self/status
|
||||
+ ssh default sudo cat /proc/self/cgroup
|
||||
+ #ssh default sudo capsh --print
|
||||
+ ssh default 'cd /vagrant; tar xf criu.tar; cd criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"'
|
||||
+}
|
||||
+
|
||||
$1
|
||||
--
|
||||
2.30.0
|
||||
|
||||
244
0009-include-add-thread_pointer.h-from-Glibc.patch
Normal file
244
0009-include-add-thread_pointer.h-from-Glibc.patch
Normal file
@ -0,0 +1,244 @@
|
||||
From 99da2f789ca92aa52eeca07b97aee2cbd3d60fca Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:00:07 +0800
|
||||
Subject: [PATCH 09/16] include: add thread_pointer.h from Glibc Implementation
|
||||
was taken from the Glibc.
|
||||
|
||||
https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5
|
||||
https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=cb976fba4c51ede7bf8cee5035888527c308dfbc
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
.../arch/aarch64/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
.../arch/arm/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
.../arch/mips/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
.../arch/ppc64/include/asm/thread_pointer.h | 33 +++++++++++++++++
|
||||
.../arch/s390/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
.../arch/x86/include/asm/thread_pointer.h | 37 +++++++++++++++++++
|
||||
6 files changed, 178 insertions(+)
|
||||
create mode 100644 criu/arch/aarch64/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/arm/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/mips/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/ppc64/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/s390/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/x86/include/asm/thread_pointer.h
|
||||
|
||||
diff --git a/criu/arch/aarch64/include/asm/thread_pointer.h b/criu/arch/aarch64/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/aarch64/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/arm/include/asm/thread_pointer.h b/criu/arch/arm/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/arm/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/mips/include/asm/thread_pointer.h b/criu/arch/mips/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/mips/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/ppc64/include/asm/thread_pointer.h b/criu/arch/ppc64/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..304516f
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/ppc64/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,33 @@
|
||||
+/* __thread_pointer definition. powerpc version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+#ifdef __powerpc64__
|
||||
+register void *__thread_register asm("r13");
|
||||
+#else
|
||||
+register void *__thread_register asm("r2");
|
||||
+#endif
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __thread_register;
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
\ No newline at end of file
|
||||
diff --git a/criu/arch/s390/include/asm/thread_pointer.h b/criu/arch/s390/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/s390/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/x86/include/asm/thread_pointer.h b/criu/arch/x86/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..08603ae
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/x86/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* __thread_pointer definition. x86 version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+#if __GNUC_PREREQ(11, 1)
|
||||
+ return __builtin_thread_pointer();
|
||||
+#else
|
||||
+ void *__result;
|
||||
+#ifdef __x86_64__
|
||||
+ __asm__("mov %%fs:0, %0" : "=r"(__result));
|
||||
+#else
|
||||
+ __asm__("mov %%gs:0, %0" : "=r"(__result));
|
||||
+#endif
|
||||
+ return __result;
|
||||
+#endif /* !GCC 11 */
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
\ No newline at end of file
|
||||
--
|
||||
2.30.0
|
||||
|
||||
102
0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch
Normal file
102
0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch
Normal file
@ -0,0 +1,102 @@
|
||||
From d43ad9913c19afa6d80cb8124015d47361152db8 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:00:43 +0800
|
||||
Subject: [PATCH 10/16] clone-noasan: unregister rseq at the thread start for
|
||||
new glibc Fresh glibc does rseq registration by default during
|
||||
start_thread(). [ see
|
||||
https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=95e114a0919d844d8fe07839cb6538b7f5ee920e
|
||||
]
|
||||
|
||||
This cause process crashes during memory restore procedure, because
|
||||
memory which corresponds to the struct rseq will be overwritten.
|
||||
|
||||
See also
|
||||
("nptl: Add public rseq symbols and <sys/rseq.h>")
|
||||
https://sourceware.org/git?p=glibc.git;a=commit;h=c901c3e764d7c7079f006b4e21e877d5036eb4f5
|
||||
("nptl: Add <thread_pointer.h> for defining __thread_pointer")
|
||||
https://sourceware.org/git?p=glibc.git;a=commit;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
criu/clone-noasan.c | 42 +++++++++++++++++++++++++++++++--
|
||||
1 file changed, 40 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c
|
||||
index d657ea2..5f8dd1b 100644
|
||||
--- a/criu/clone-noasan.c
|
||||
+++ b/criu/clone-noasan.c
|
||||
@@ -2,6 +2,13 @@
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
|
||||
+#ifdef __has_include
|
||||
+#if __has_include ("sys/rseq.h")
|
||||
+#include <sys/rseq.h>
|
||||
+#include "asm/thread_pointer.h"
|
||||
+#endif
|
||||
+#endif
|
||||
+
|
||||
#include <compel/plugins/std/syscall-codes.h>
|
||||
|
||||
#include "sched.h"
|
||||
@@ -34,16 +41,45 @@
|
||||
* ... wait for process to finish ...
|
||||
* unlock_last_pid
|
||||
*/
|
||||
+
|
||||
+#if defined(RSEQ_SIG)
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+ /* unregister rseq */
|
||||
+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
|
||||
+}
|
||||
+#else
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+struct call_fn_args {
|
||||
+ int (*fn)(void *);
|
||||
+ void *arg;
|
||||
+};
|
||||
+
|
||||
+int call_fn(void *arg)
|
||||
+{
|
||||
+ struct call_fn_args *cargs = arg;
|
||||
+ unregister_glibc_rseq();
|
||||
+ return cargs->fn(cargs->arg);
|
||||
+}
|
||||
+
|
||||
int clone_noasan(int (*fn)(void *), int flags, void *arg)
|
||||
{
|
||||
void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16);
|
||||
+ struct call_fn_args a = {
|
||||
+ .fn = fn,
|
||||
+ .arg = arg,
|
||||
+ };
|
||||
|
||||
BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK));
|
||||
/*
|
||||
* Reserve some bytes for clone() internal needs
|
||||
* and use as stack the address above this area.
|
||||
*/
|
||||
- return clone(fn, stack_ptr, flags, arg);
|
||||
+ return clone(call_fn, stack_ptr, flags, (void *)&a);
|
||||
}
|
||||
|
||||
int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_signal, pid_t pid)
|
||||
@@ -78,7 +114,9 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_sig
|
||||
c_args.set_tid = ptr_to_u64(&pid);
|
||||
c_args.set_tid_size = 1;
|
||||
pid = syscall(__NR_clone3, &c_args, sizeof(c_args));
|
||||
- if (pid == 0)
|
||||
+ if (pid == 0) {
|
||||
+ unregister_glibc_rseq();
|
||||
exit(fn(arg));
|
||||
+ }
|
||||
return pid;
|
||||
}
|
||||
--
|
||||
2.30.0
|
||||
|
||||
158
0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch
Normal file
158
0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch
Normal file
@ -0,0 +1,158 @@
|
||||
From 4f4d5acc34046954aea9e8ea10b5f71ff5f0fbd5 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:01:34 +0800
|
||||
Subject: [PATCH 11/16] zdtm/static/rseq00: fix rseq test when linking with a
|
||||
fresh Glibc Fresh Glibc does rseq() register by default. We need to
|
||||
unregister rseq before registering our own.
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
test/zdtm/static/rseq00.c | 76 ++++++++++++++++++++-------
|
||||
1 file changed, 58 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c
|
||||
index 26f41a2..87053b8 100644
|
||||
--- a/test/zdtm/static/rseq00.c
|
||||
+++ b/test/zdtm/static/rseq00.c
|
||||
@@ -19,13 +19,48 @@
|
||||
|
||||
#include "zdtmtst.h"
|
||||
|
||||
-#if defined(__x86_64__)
|
||||
+#ifdef __has_include
|
||||
+#if __has_include("sys/rseq.h")
|
||||
+#include <sys/rseq.h>
|
||||
+#endif
|
||||
+#endif
|
||||
+
|
||||
+#if defined(__i386__) || defined(__x86_64__)
|
||||
+
|
||||
+#if defined(RSEQ_SIG)
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+#if __GNUC_PREREQ(11, 1)
|
||||
+ return __builtin_thread_pointer();
|
||||
+#else
|
||||
+ void *__result;
|
||||
+#ifdef __x86_64__
|
||||
+ __asm__("mov %%fs:0, %0" : "=r"(__result));
|
||||
+#else
|
||||
+ __asm__("mov %%gs:0, %0" : "=r"(__result));
|
||||
+#endif
|
||||
+ return __result;
|
||||
+#endif /* !GCC 11 */
|
||||
+}
|
||||
+
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+ /* unregister rseq */
|
||||
+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
|
||||
+}
|
||||
+#else
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
|
||||
const char *test_doc = "Check that rseq() basic C/R works";
|
||||
const char *test_author = "Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>";
|
||||
/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */
|
||||
|
||||
/* some useful definitions from kernel uapi */
|
||||
+#ifndef RSEQ_SIG
|
||||
+
|
||||
enum rseq_flags {
|
||||
RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
};
|
||||
@@ -37,14 +72,21 @@ struct rseq {
|
||||
uint32_t flags;
|
||||
} __attribute__((aligned(4 * sizeof(uint64_t))));
|
||||
|
||||
+#define RSEQ_SIG 0x53053053
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
#ifndef __NR_rseq
|
||||
#define __NR_rseq 334
|
||||
#endif
|
||||
/* EOF */
|
||||
|
||||
-static __thread volatile struct rseq __rseq_abi;
|
||||
+#define RSEQ_TLS_ALLOC 0
|
||||
|
||||
-#define RSEQ_SIG 0x53053053
|
||||
+static volatile struct rseq *rseq_ptr;
|
||||
+#if RSEQ_TLS_ALLOC
|
||||
+static __thread volatile struct rseq __rseq_abi;
|
||||
+#endif
|
||||
|
||||
static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
{
|
||||
@@ -54,27 +96,18 @@ static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags
|
||||
static void register_thread(void)
|
||||
{
|
||||
int rc;
|
||||
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ unregister_glibc_rseq();
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
if (rc) {
|
||||
fail("Failed to register rseq");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
-static void unregister_thread(void)
|
||||
-{
|
||||
- int rc;
|
||||
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
|
||||
- if (rc) {
|
||||
- fail("Failed to unregister rseq");
|
||||
- exit(1);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static void check_thread(void)
|
||||
{
|
||||
int rc;
|
||||
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
if (!(rc && errno == EBUSY)) {
|
||||
fail("Failed to check rseq %d", rc);
|
||||
exit(1);
|
||||
@@ -111,8 +144,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
".popsection\n\t"
|
||||
: /* gcc asm goto does not allow outputs */
|
||||
: [cpu_id] "r" (cpu),
|
||||
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
|
||||
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
|
||||
+ [current_cpu_id] "m" (rseq_ptr->cpu_id),
|
||||
+ [rseq_cs] "m" (rseq_ptr->rseq_cs),
|
||||
/* final store input */
|
||||
[v] "m" (*v),
|
||||
[count] "er" (count)
|
||||
@@ -132,6 +165,13 @@ int main(int argc, char *argv[])
|
||||
intptr_t *cpu_data;
|
||||
long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
+#if RSEQ_TLS_ALLOC
|
||||
+ rseq_ptr = &__rseq_abi;
|
||||
+#else
|
||||
+ //rseq_ptr = malloc(sizeof(struct rseq));
|
||||
+ rseq_ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, 0, 0);
|
||||
+#endif
|
||||
+
|
||||
test_init(argc, argv);
|
||||
|
||||
cpu_data = calloc(nr_cpus, sizeof(*cpu_data));
|
||||
@@ -147,7 +187,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
check_thread();
|
||||
|
||||
- cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
|
||||
+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
|
||||
ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
if (ret)
|
||||
fail("Failed to increment per-cpu counter");
|
||||
--
|
||||
2.30.0
|
||||
|
||||
265
0012-compel-add-helpers-to-get-set-instruction-pointer.patch
Normal file
265
0012-compel-add-helpers-to-get-set-instruction-pointer.patch
Normal file
@ -0,0 +1,265 @@
|
||||
From 06cb51057ce1cc31b79c6321273dfa0b4cb7f980 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:02:08 +0800
|
||||
Subject: [PATCH 12/16] compel: add helpers to get/set instruction pointer
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 7 ++++---
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
compel/include/uapi/infect.h | 6 ++++++
|
||||
compel/src/lib/infect.c | 20 +++++++++++++++++++
|
||||
.../criu/arch/aarch64/include/asm/types.h | 2 ++
|
||||
criu/arch/arm/include/asm/types.h | 2 ++
|
||||
.../criu/arch/mips/include/asm/types.h | 2 ++
|
||||
.../criu/arch/ppc64/include/asm/types.h | 2 ++
|
||||
.../criu/arch/s390/include/asm/types.h | 2 ++
|
||||
criu/arch/x86/include/asm/types.h | 2 ++
|
||||
14 files changed, 67 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
|
||||
index f91e73d..9d4ce7e 100644
|
||||
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -23,10 +23,11 @@ typedef struct user_fpsimd_state user_fpregs_struct_t;
|
||||
#define compel_arch_get_tls_task(ctl, tls)
|
||||
#define compel_arch_get_tls_thread(tctl, tls)
|
||||
|
||||
-#define REG_RES(r) ((uint64_t)(r).regs[0])
|
||||
-#define REG_IP(r) ((uint64_t)(r).pc)
|
||||
-#define REG_SP(r) ((uint64_t)((r).sp))
|
||||
-#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8])
|
||||
+#define REG_RES(r) ((uint64_t)(r).regs[0])
|
||||
+#define REG_IP(r) ((uint64_t)(r).pc)
|
||||
+#define SET_REG_IP(r, val) ((r).pc = (val))
|
||||
+#define REG_SP(r) ((uint64_t)((r).sp))
|
||||
+#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8])
|
||||
|
||||
#define user_regs_native(pregs) true
|
||||
|
||||
diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
|
||||
index 159b6a9..8d32825 100644
|
||||
--- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -56,10 +56,11 @@ struct user_vfp_exc {
|
||||
unsigned long fpinst2;
|
||||
};
|
||||
|
||||
-#define REG_RES(regs) ((regs).ARM_r0)
|
||||
-#define REG_IP(regs) ((regs).ARM_pc)
|
||||
-#define REG_SP(regs) ((regs).ARM_sp)
|
||||
-#define REG_SYSCALL_NR(regs) ((regs).ARM_r7)
|
||||
+#define REG_RES(regs) ((regs).ARM_r0)
|
||||
+#define REG_IP(regs) ((regs).ARM_pc)
|
||||
+#define SET_REG_IP(regs, val) ((regs).ARM_pc = (val))
|
||||
+#define REG_SP(regs) ((regs).ARM_sp)
|
||||
+#define REG_SYSCALL_NR(regs) ((regs).ARM_r7)
|
||||
|
||||
#define user_regs_native(pregs) true
|
||||
|
||||
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
|
||||
index 70b3f85..481566a 100644
|
||||
--- a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -56,10 +56,11 @@ static inline bool user_regs_native(user_regs_struct_t *pregs)
|
||||
#define compel_arch_get_tls_task(ctl, tls)
|
||||
#define compel_arch_get_tls_thread(tctl, tls)
|
||||
|
||||
-#define REG_RES(regs) ((regs).MIPS_v0)
|
||||
-#define REG_IP(regs) ((regs).cp0_epc)
|
||||
-#define REG_SP(regs) ((regs).MIPS_sp)
|
||||
-#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0)
|
||||
+#define REG_RES(regs) ((regs).MIPS_v0)
|
||||
+#define REG_IP(regs) ((regs).cp0_epc)
|
||||
+#define SET_REG_IP(regs, val) ((regs).cp0_epc = (val))
|
||||
+#define REG_SP(regs) ((regs).MIPS_sp)
|
||||
+#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0)
|
||||
|
||||
//#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
|
||||
#define __NR(syscall, compat) __NR_##syscall
|
||||
diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
|
||||
index fe6192e..bf2cc95 100644
|
||||
--- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -72,10 +72,11 @@ typedef struct {
|
||||
} tm;
|
||||
} user_fpregs_struct_t;
|
||||
|
||||
-#define REG_RES(regs) ((uint64_t)(regs).gpr[3])
|
||||
-#define REG_IP(regs) ((uint64_t)(regs).nip)
|
||||
-#define REG_SP(regs) ((uint64_t)(regs).gpr[1])
|
||||
-#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0])
|
||||
+#define REG_RES(regs) ((uint64_t)(regs).gpr[3])
|
||||
+#define REG_IP(regs) ((uint64_t)(regs).nip)
|
||||
+#define SET_REG_IP(regs, val) ((regs).nip = (val))
|
||||
+#define REG_SP(regs) ((uint64_t)(regs).gpr[1])
|
||||
+#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0])
|
||||
|
||||
#define user_regs_native(pregs) true
|
||||
|
||||
diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
|
||||
index 896d70e..87283bc 100644
|
||||
--- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -62,9 +62,10 @@ typedef struct {
|
||||
uint32_t system_call;
|
||||
} user_regs_struct_t;
|
||||
|
||||
-#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2])
|
||||
-#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr)
|
||||
-#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15])
|
||||
+#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2])
|
||||
+#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr)
|
||||
+#define SET_REG_IP(r, val) ((r).prstatus.psw.addr = (val))
|
||||
+#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15])
|
||||
/*
|
||||
* We assume that REG_SYSCALL_NR() is only used for pie code where we
|
||||
* always use svc 0 with opcode in %r1.
|
||||
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
|
||||
index 34b3ad0..b35504f 100644
|
||||
--- a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -127,10 +127,11 @@ typedef struct {
|
||||
|
||||
typedef struct xsave_struct user_fpregs_struct_t;
|
||||
|
||||
-#define REG_RES(regs) get_user_reg(®s, ax)
|
||||
-#define REG_IP(regs) get_user_reg(®s, ip)
|
||||
-#define REG_SP(regs) get_user_reg(®s, sp)
|
||||
-#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax)
|
||||
+#define REG_RES(regs) get_user_reg(®s, ax)
|
||||
+#define REG_IP(regs) get_user_reg(®s, ip)
|
||||
+#define SET_REG_IP(regs, val) set_user_reg(®s, ip, val)
|
||||
+#define REG_SP(regs) get_user_reg(®s, sp)
|
||||
+#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax)
|
||||
|
||||
#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
|
||||
|
||||
diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h
|
||||
index c3d2ee6..389878e 100644
|
||||
--- a/compel/include/uapi/infect.h
|
||||
+++ b/compel/include/uapi/infect.h
|
||||
@@ -168,4 +168,10 @@ extern unsigned long compel_task_size(void);
|
||||
extern uint64_t compel_get_leader_sp(struct parasite_ctl *ctl);
|
||||
extern uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl);
|
||||
|
||||
+extern uint64_t compel_get_leader_ip(struct parasite_ctl *ctl);
|
||||
+extern uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl);
|
||||
+
|
||||
+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v);
|
||||
+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v);
|
||||
+
|
||||
#endif
|
||||
diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c
|
||||
index 0fb9e71..6a13cc1 100644
|
||||
--- a/compel/src/lib/infect.c
|
||||
+++ b/compel/src/lib/infect.c
|
||||
@@ -1686,3 +1686,23 @@ uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl)
|
||||
{
|
||||
return REG_SP(tctl->th.regs);
|
||||
}
|
||||
+
|
||||
+uint64_t compel_get_leader_ip(struct parasite_ctl *ctl)
|
||||
+{
|
||||
+ return REG_IP(ctl->orig.regs);
|
||||
+}
|
||||
+
|
||||
+uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl)
|
||||
+{
|
||||
+ return REG_IP(tctl->th.regs);
|
||||
+}
|
||||
+
|
||||
+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v)
|
||||
+{
|
||||
+ SET_REG_IP(ctl->orig.regs, v);
|
||||
+}
|
||||
+
|
||||
+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v)
|
||||
+{
|
||||
+ SET_REG_IP(tctl->th.regs, v);
|
||||
+}
|
||||
diff --git a/criu/arch/aarch64/include/asm/types.h b/criu/arch/aarch64/include/asm/types.h
|
||||
index c860af1..363c1ca 100644
|
||||
--- a/criu/arch/aarch64/include/asm/types.h
|
||||
+++ b/criu/arch/aarch64/include/asm/types.h
|
||||
@@ -22,6 +22,8 @@ typedef UserAarch64RegsEntry UserRegsEntry;
|
||||
|
||||
#define TI_SP(core) ((core)->ti_aarch64->gpregs->sp)
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_aarch64->gpregs->pc)
|
||||
+
|
||||
static inline void *decode_pointer(uint64_t v)
|
||||
{
|
||||
return (void *)v;
|
||||
diff --git a/criu/arch/arm/include/asm/types.h b/criu/arch/arm/include/asm/types.h
|
||||
index cfcb8a1..93d2dc2 100644
|
||||
--- a/criu/arch/arm/include/asm/types.h
|
||||
+++ b/criu/arch/arm/include/asm/types.h
|
||||
@@ -21,6 +21,8 @@ typedef UserArmRegsEntry UserRegsEntry;
|
||||
|
||||
#define TI_SP(core) ((core)->ti_arm->gpregs->sp)
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_arm->gpregs->ip)
|
||||
+
|
||||
static inline void *decode_pointer(u64 v)
|
||||
{
|
||||
return (void *)(u32)v;
|
||||
diff --git a/criu/arch/mips/include/asm/types.h b/criu/arch/mips/include/asm/types.h
|
||||
index 237471f..2c75b6a 100644
|
||||
--- a/criu/arch/mips/include/asm/types.h
|
||||
+++ b/criu/arch/mips/include/asm/types.h
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->ti_mips
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_mips->gpregs->cp0_epc)
|
||||
+
|
||||
typedef UserMipsRegsEntry UserRegsEntry;
|
||||
|
||||
static inline u64 encode_pointer(void *p)
|
||||
diff --git a/criu/arch/ppc64/include/asm/types.h b/criu/arch/ppc64/include/asm/types.h
|
||||
index fedeff2..d60aadd 100644
|
||||
--- a/criu/arch/ppc64/include/asm/types.h
|
||||
+++ b/criu/arch/ppc64/include/asm/types.h
|
||||
@@ -19,6 +19,8 @@ typedef UserPpc64RegsEntry UserRegsEntry;
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->ti_ppc64
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_ppc64->gpregs->nip)
|
||||
+
|
||||
static inline void *decode_pointer(uint64_t v)
|
||||
{
|
||||
return (void *)v;
|
||||
diff --git a/criu/arch/s390/include/asm/types.h b/criu/arch/s390/include/asm/types.h
|
||||
index 7522cf2..abf12de 100644
|
||||
--- a/criu/arch/s390/include/asm/types.h
|
||||
+++ b/criu/arch/s390/include/asm/types.h
|
||||
@@ -19,6 +19,8 @@ typedef UserS390RegsEntry UserRegsEntry;
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->ti_s390
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_s390->gpregs->psw_addr)
|
||||
+
|
||||
static inline u64 encode_pointer(void *p)
|
||||
{
|
||||
return (u64)p;
|
||||
diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h
|
||||
index a0a8ed9..8919d0a 100644
|
||||
--- a/criu/arch/x86/include/asm/types.h
|
||||
+++ b/criu/arch/x86/include/asm/types.h
|
||||
@@ -28,6 +28,8 @@ static inline int core_is_compat(CoreEntry *c)
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->thread_info
|
||||
|
||||
+#define TI_IP(core) ((core)->thread_info->gpregs->ip)
|
||||
+
|
||||
typedef UserX86RegsEntry UserRegsEntry;
|
||||
|
||||
static inline u64 encode_pointer(void *p)
|
||||
--
|
||||
2.30.0
|
||||
|
||||
248
0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs.patch
Normal file
248
0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs.patch
Normal file
@ -0,0 +1,248 @@
|
||||
From 33abfc12b973560b3d98afdbac7554b8c0542c3d Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:04:54 +0800
|
||||
Subject: [PATCH 13/16] cr-dump: fixup thread IP when inside rseq cs
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
criu/cr-dump.c | 155 +++++++++++++++++++++++++++-
|
||||
criu/include/parasite.h | 2 +
|
||||
criu/include/pstree.h | 1 +
|
||||
3 files changed, 154 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index 91dd08a..a3f8973 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -1047,11 +1047,58 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs)
|
||||
+{
|
||||
+ int ret;
|
||||
+ uint64_t addr;
|
||||
+
|
||||
+ /* rseq is not registered */
|
||||
+ if (!rseq->rseq_abi_pointer)
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * We need to cover the case when victim process was inside rseq critical section
|
||||
+ * at the moment when CRIU comes and seized it. We need to determine the borders
|
||||
+ * of rseq critical section at first. To achieve that we need to access thread
|
||||
+ * memory and read pointer to struct rseq_cs.
|
||||
+ *
|
||||
+ * We have two ways to access thread memory: from the parasite and using ptrace().
|
||||
+ * But it this case we can't use parasite, because if victim process returns to the
|
||||
+ * execution, on the kernel side __rseq_handle_notify_resume hook will be called,
|
||||
+ * then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq
|
||||
+ * will be cleared. So, let's use ptrace(PTRACE_PEEKDATA).
|
||||
+ */
|
||||
+ ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)),
|
||||
+ sizeof(uint64_t));
|
||||
+ if (ret) {
|
||||
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr,
|
||||
+ (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /* (struct rseq)->rseq_cs is NULL */
|
||||
+ if (!addr)
|
||||
+ return 0;
|
||||
+
|
||||
+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs));
|
||||
+ if (ret) {
|
||||
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid,
|
||||
+ (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
{
|
||||
struct __ptrace_rseq_configuration rseq;
|
||||
RseqEntry *rseqe = NULL;
|
||||
int ret;
|
||||
+ CoreEntry *core = item->core[i];
|
||||
+ RseqEntry **rseqep = &core->thread_core->rseq_entry;
|
||||
+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
+ pid_t tid = item->threads[i].real;
|
||||
|
||||
/*
|
||||
* If we are here it means that rseq() syscall is supported,
|
||||
@@ -1076,7 +1123,8 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
return -1;
|
||||
}
|
||||
|
||||
- pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature);
|
||||
+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer,
|
||||
+ rseq.signature);
|
||||
|
||||
rseqe = xmalloc(sizeof(*rseqe));
|
||||
if (!rseqe)
|
||||
@@ -1088,25 +1136,118 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
rseqe->rseq_abi_size = rseq.rseq_abi_size;
|
||||
rseqe->signature = rseq.signature;
|
||||
|
||||
+ if (read_rseq_cs(tid, &rseq, rseq_cs))
|
||||
+ goto err;
|
||||
+
|
||||
+ /* save rseq entry to the image */
|
||||
*rseqep = rseqe;
|
||||
|
||||
return 0;
|
||||
+
|
||||
+err:
|
||||
+ xfree(rseqe);
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
static int dump_task_rseq(pid_t pid, struct pstree_item *item)
|
||||
{
|
||||
int i;
|
||||
+ struct rseq_cs *thread_rseq_cs;
|
||||
|
||||
/* if rseq() syscall isn't supported then nothing to dump */
|
||||
if (!kdat.has_rseq)
|
||||
return 0;
|
||||
|
||||
+ thread_rseq_cs = xzalloc(sizeof(*thread_rseq_cs) * item->nr_threads);
|
||||
+ if (!thread_rseq_cs)
|
||||
+ return -1;
|
||||
+
|
||||
+ dmpi(item)->thread_rseq_cs = thread_rseq_cs;
|
||||
+
|
||||
for (i = 0; i < item->nr_threads; i++) {
|
||||
- if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry))
|
||||
- return -1;
|
||||
+ if (dump_thread_rseq(item, i))
|
||||
+ goto free_rseq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
+
|
||||
+free_rseq:
|
||||
+ xfree(thread_rseq_cs);
|
||||
+ dmpi(item)->thread_rseq_cs = NULL;
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+static bool task_in_rseq(struct rseq_cs *rseq_cs, uint64_t addr)
|
||||
+{
|
||||
+ return addr >= rseq_cs->start_ip && addr < rseq_cs->start_ip + rseq_cs->post_commit_offset;
|
||||
+}
|
||||
+
|
||||
+static int fixup_thread_rseq(struct pstree_item *item, int i)
|
||||
+{
|
||||
+ CoreEntry *core = item->core[i];
|
||||
+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
+ pid_t tid = item->threads[i].real;
|
||||
+
|
||||
+ /* (struct rseq)->rseq_cs is NULL */
|
||||
+ if (!rseq_cs->start_ip)
|
||||
+ return 0;
|
||||
+
|
||||
+ pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
|
||||
+ tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
|
||||
+ rseq_cs->version, (unsigned long)TI_IP(core));
|
||||
+
|
||||
+ if (rseq_cs->version != 0) {
|
||||
+ pr_err("unsupported RSEQ ABI version = %d\n", rseq_cs->version);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (task_in_rseq(rseq_cs, TI_IP(core))) {
|
||||
+ struct pid *tid = &item->threads[i];
|
||||
+
|
||||
+ pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
|
||||
+ tid->real);
|
||||
+
|
||||
+ /*
|
||||
+ * We need to fixup task instruction pointer from
|
||||
+ * the original one (which lays inside rseq critical section)
|
||||
+ * to rseq abort handler address.
|
||||
+ *
|
||||
+ * It's worth to mention that we need to fixup IP in CoreEntry
|
||||
+ * (used when full dump/restore is performed) and also in
|
||||
+ * the parasite regs storage (used if --leave-running option is used,
|
||||
+ * or if dump error occured and process execution is resumed).
|
||||
+ */
|
||||
+ TI_IP(core) = rseq_cs->abort_ip;
|
||||
+
|
||||
+ if (item->pid->real == tid->real) {
|
||||
+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
|
||||
+ } else {
|
||||
+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int fixup_task_rseq(pid_t pid, struct pstree_item *item)
|
||||
+{
|
||||
+ int ret = 0;
|
||||
+ int i;
|
||||
+
|
||||
+ if (!kdat.has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ if (fixup_thread_rseq(item, i)) {
|
||||
+ ret = -1;
|
||||
+ goto exit;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+exit:
|
||||
+ xfree(dmpi(item)->thread_rseq_cs);
|
||||
+ dmpi(item)->thread_rseq_cs = NULL;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static struct proc_pid_stat pps_buf;
|
||||
@@ -1409,6 +1550,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ ret = fixup_task_rseq(pid, item);
|
||||
+ if (ret) {
|
||||
+ pr_err("Fixup rseq for %d failed %d\n", pid, ret);
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
if (fault_injected(FI_DUMP_EARLY)) {
|
||||
pr_info("fault: CRIU sudden detach\n");
|
||||
kill(getpid(), SIGKILL);
|
||||
diff --git a/criu/include/parasite.h b/criu/include/parasite.h
|
||||
index 5fde809..d2a0688 100644
|
||||
--- a/criu/include/parasite.h
|
||||
+++ b/criu/include/parasite.h
|
||||
@@ -10,6 +10,8 @@
|
||||
#include <time.h>
|
||||
#include <signal.h>
|
||||
|
||||
+#include "linux/rseq.h"
|
||||
+
|
||||
#include "image.h"
|
||||
#include "util-pie.h"
|
||||
#include "common/lock.h"
|
||||
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
|
||||
index c5b0fa7..458e5f9 100644
|
||||
--- a/criu/include/pstree.h
|
||||
+++ b/criu/include/pstree.h
|
||||
@@ -63,6 +63,7 @@ struct dmp_info {
|
||||
struct parasite_ctl *parasite_ctl;
|
||||
struct parasite_thread_ctl **thread_ctls;
|
||||
uint64_t *thread_sp;
|
||||
+ struct rseq_cs *thread_rseq_cs;
|
||||
|
||||
/*
|
||||
* Although we don't support dumping different struct creds in general,
|
||||
--
|
||||
2.30.0
|
||||
|
||||
250
0014-zdtm-add-rseq-transition-test-for-amd64.patch
Normal file
250
0014-zdtm-add-rseq-transition-test-for-amd64.patch
Normal file
@ -0,0 +1,250 @@
|
||||
From f76aa4ade354649e3291b5e7274c368740b05417 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:05:34 +0800
|
||||
Subject: [PATCH 14/16] zdtm: add rseq transition test for amd64 Signed-off-by:
|
||||
Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
test/zdtm/transition/Makefile | 1 +
|
||||
test/zdtm/transition/rseq01.c | 208 +++++++++++++++++++
|
||||
test/zdtm/transition/rseq01.desc | 1 +
|
||||
3 files changed, 210 insertions(+)
|
||||
create mode 100644 test/zdtm/transition/rseq01.c
|
||||
create mode 100644 test/zdtm/transition/rseq01.desc
|
||||
|
||||
diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile
|
||||
index 9388157..fae4e27 100644
|
||||
--- a/test/zdtm/transition/Makefile
|
||||
+++ b/test/zdtm/transition/Makefile
|
||||
@@ -23,6 +23,7 @@ TST_NOFILE = \
|
||||
lazy-thp \
|
||||
pid_reuse \
|
||||
pidfd_store_sk \
|
||||
+ rseq01 \
|
||||
|
||||
|
||||
TST_FILE = \
|
||||
diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c
|
||||
new file mode 100644
|
||||
index 0000000..5fac5a6
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq01.c
|
||||
@@ -0,0 +1,208 @@
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
+#include <signal.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#include <sys/types.h>
|
||||
+#include <sys/wait.h>
|
||||
+#include <sys/stat.h>
|
||||
+#include <sys/mman.h>
|
||||
+#include <fcntl.h>
|
||||
+#include <pthread.h>
|
||||
+#include <syscall.h>
|
||||
+
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+#ifdef __has_include
|
||||
+# if __has_include ("sys/rseq.h")
|
||||
+# include <sys/rseq.h>
|
||||
+# endif
|
||||
+#endif
|
||||
+
|
||||
+#if defined(__x86_64__)
|
||||
+
|
||||
+#if defined(__x86_64__) && defined(RSEQ_SIG)
|
||||
+static inline void *thread_pointer(void)
|
||||
+{
|
||||
+ void *result;
|
||||
+ asm("mov %%fs:0, %0" : "=r"(result));
|
||||
+ return result;
|
||||
+}
|
||||
+
|
||||
+static inline void unregister_old_rseq(void)
|
||||
+{
|
||||
+ /* unregister rseq */
|
||||
+ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
|
||||
+}
|
||||
+#else
|
||||
+static inline void unregister_old_rseq(void)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+const char *test_doc = "rseq() transition test";
|
||||
+const char *test_author = "Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>";
|
||||
+
|
||||
+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */
|
||||
+
|
||||
+/* some useful definitions from kernel uapi */
|
||||
+#ifndef RSEQ_SIG
|
||||
+
|
||||
+enum rseq_flags {
|
||||
+ RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
+};
|
||||
+
|
||||
+struct rseq {
|
||||
+ uint32_t cpu_id_start;
|
||||
+ uint32_t cpu_id;
|
||||
+ uint64_t rseq_cs;
|
||||
+ uint32_t flags;
|
||||
+} __attribute__((aligned(4 * sizeof(uint64_t))));
|
||||
+
|
||||
+#define RSEQ_SIG 0x53053053
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#ifndef __NR_rseq
|
||||
+#define __NR_rseq 334
|
||||
+#endif
|
||||
+/* EOF */
|
||||
+
|
||||
+static volatile struct rseq *rseq_ptr;
|
||||
+static __thread volatile struct rseq __rseq_abi;
|
||||
+
|
||||
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
+{
|
||||
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
|
||||
+}
|
||||
+
|
||||
+static void register_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ unregister_old_rseq();
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (rc) {
|
||||
+ fail("Failed to register rseq");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void check_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (!(rc && errno == EBUSY)) {
|
||||
+ fail("Failed to check rseq %d", rc);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
|
||||
+
|
||||
+static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
+{
|
||||
+ double a = 10000000000000000.0;
|
||||
+ double b = -1;
|
||||
+ /*test_msg("enter %f %f\n", a, b);*/
|
||||
+
|
||||
+ /* clang-format off */
|
||||
+ __asm__ __volatile__ goto(
|
||||
+ ".pushsection __rseq_table, \"aw\"\n\t"
|
||||
+ ".balign 32\n\t"
|
||||
+ "cs_obj:\n\t"
|
||||
+ /* version, flags */
|
||||
+ ".long 0, 0\n\t"
|
||||
+ /* start_ip, post_commit_offset, abort_ip */
|
||||
+ ".quad 1f, (2f-1f), 4f\n\t"
|
||||
+ ".popsection\n\t"
|
||||
+ "1:\n\t"
|
||||
+ "leaq cs_obj(%%rip), %%rax\n\t"
|
||||
+ "movq %%rax, %[rseq_cs]\n\t"
|
||||
+ "cmpl %[cpu_id], %[current_cpu_id]\n\t"
|
||||
+ "jnz 4f\n\t"
|
||||
+ "addq %[count], %[v]\n\t" /* final store */
|
||||
+ "mov $10000000, %%rcx\n\t"
|
||||
+ "fldl %[x]\n\t" /* we have st clobbered */
|
||||
+ "5:\n\t"
|
||||
+ "fsqrt\n\t" /* heavy instruction */
|
||||
+ "dec %%rcx\n\t"
|
||||
+ "jnz 5b\n\t"
|
||||
+ "fstpl %[y]\n\t"
|
||||
+ "2:\n\t"
|
||||
+ ".pushsection __rseq_failure, \"ax\"\n\t"
|
||||
+ /* Disassembler-friendly signature: nopl <sig>(%rip). */
|
||||
+ ".byte 0x0f, 0xb9, 0x3d\n\t"
|
||||
+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */
|
||||
+ "4:\n\t"
|
||||
+ /*"fstpl %[y]\n\t"*/
|
||||
+ "jmp %l[abort]\n\t"
|
||||
+ /*"jmp 1b\n\t"*/
|
||||
+ ".popsection\n\t"
|
||||
+ : /* gcc asm goto does not allow outputs */
|
||||
+ : [cpu_id] "r" (cpu),
|
||||
+ [current_cpu_id] "m" (rseq_ptr->cpu_id),
|
||||
+ [rseq_cs] "m" (rseq_ptr->rseq_cs),
|
||||
+ /* final store input */
|
||||
+ [v] "m" (*v),
|
||||
+ [count] "er" (count),
|
||||
+ [x] "m" (a),
|
||||
+ [y] "m" (b)
|
||||
+ : "memory", "cc", "rax", "rcx", "st"
|
||||
+ : abort
|
||||
+ );
|
||||
+ /* clang-format on */
|
||||
+ /*test_msg("exit %f %f\n", a, b);*/
|
||||
+ return 0;
|
||||
+abort:
|
||||
+ /*test_msg("abort %f %f\n", a, b);*/
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ int cpu = 0;
|
||||
+ int ret;
|
||||
+ intptr_t *cpu_data;
|
||||
+ long nr_cpus;
|
||||
+
|
||||
+ rseq_ptr = &__rseq_abi;
|
||||
+ memset((void *)rseq_ptr, 0, sizeof(struct rseq));
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
+
|
||||
+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data));
|
||||
+ if (!cpu_data) {
|
||||
+ fail("calloc");
|
||||
+ exit(EXIT_FAILURE);
|
||||
+ }
|
||||
+ register_thread();
|
||||
+
|
||||
+ test_daemon();
|
||||
+
|
||||
+ while (test_go()) {
|
||||
+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
|
||||
+ ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
+ if (ret)
|
||||
+ fail("Failed to increment per-cpu counter");
|
||||
+ }
|
||||
+
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ check_thread();
|
||||
+ pass();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ test_init(argc, argv);
|
||||
+ skip("Unsupported arch");
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
diff --git a/test/zdtm/transition/rseq01.desc b/test/zdtm/transition/rseq01.desc
|
||||
new file mode 100644
|
||||
index 0000000..0324fa3
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq01.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'}
|
||||
--
|
||||
2.30.0
|
||||
|
||||
330
0015-cr-dump-handle-rseq-flags-field.patch
Normal file
330
0015-cr-dump-handle-rseq-flags-field.patch
Normal file
@ -0,0 +1,330 @@
|
||||
From deac94521c373c13add63eaf88118187ea3c2cb2 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:09:44 +0800
|
||||
Subject: [PATCH 15/16] cr-dump: handle rseq flags field Userspace may
|
||||
configure rseq critical section by def
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
criu/cr-dump.c | 86 +++++++++++++++++++------------
|
||||
criu/cr-restore.c | 63 ++++++++++++++++++++++
|
||||
criu/include/pstree.h | 1 +
|
||||
images/rseq.proto | 1 +
|
||||
4 files changed, 119 insertions(+), 32 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index a3f8973..79387fb 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -1047,13 +1047,13 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs)
|
||||
+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc,
|
||||
+ struct rseq_cs *rseq_cs, struct rseq *rseq)
|
||||
{
|
||||
int ret;
|
||||
- uint64_t addr;
|
||||
|
||||
/* rseq is not registered */
|
||||
- if (!rseq->rseq_abi_pointer)
|
||||
+ if (!rseqc->rseq_abi_pointer)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -1068,22 +1068,21 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str
|
||||
* then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq
|
||||
* will be cleared. So, let's use ptrace(PTRACE_PEEKDATA).
|
||||
*/
|
||||
- ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)),
|
||||
- sizeof(uint64_t));
|
||||
+ ret = ptrace_peek_area(tid, rseq, decode_pointer(rseqc->rseq_abi_pointer),
|
||||
+ sizeof(struct rseq));
|
||||
if (ret) {
|
||||
- pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr,
|
||||
- (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t));
|
||||
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq struct\n", tid, (unsigned long)rseq,
|
||||
+ (unsigned long)(rseqc->rseq_abi_pointer), sizeof(uint64_t));
|
||||
return -1;
|
||||
}
|
||||
|
||||
- /* (struct rseq)->rseq_cs is NULL */
|
||||
- if (!addr)
|
||||
+ if (!rseq->rseq_cs.ptr64)
|
||||
return 0;
|
||||
|
||||
- ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs));
|
||||
+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(rseq->rseq_cs.ptr64), sizeof(struct rseq_cs));
|
||||
if (ret) {
|
||||
pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid,
|
||||
- (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs));
|
||||
+ (unsigned long)rseq_cs, (unsigned long)rseq->rseq_cs.ptr64, sizeof(struct rseq_cs));
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1092,11 +1091,12 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str
|
||||
|
||||
static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
{
|
||||
- struct __ptrace_rseq_configuration rseq;
|
||||
+ struct __ptrace_rseq_configuration rseqc;
|
||||
RseqEntry *rseqe = NULL;
|
||||
int ret;
|
||||
CoreEntry *core = item->core[i];
|
||||
RseqEntry **rseqep = &core->thread_core->rseq_entry;
|
||||
+ struct rseq rseq;
|
||||
struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
pid_t tid = item->threads[i].real;
|
||||
|
||||
@@ -1111,20 +1111,20 @@ static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
if (!kdat.has_ptrace_get_rseq_conf)
|
||||
return 0;
|
||||
|
||||
- ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq);
|
||||
- if (ret != sizeof(rseq)) {
|
||||
+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseqc), &rseqc);
|
||||
+ if (ret != sizeof(rseqc)) {
|
||||
pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
- if (rseq.flags != 0) {
|
||||
+ if (rseqc.flags != 0) {
|
||||
pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid,
|
||||
- rseq.flags);
|
||||
+ rseqc.flags);
|
||||
return -1;
|
||||
}
|
||||
|
||||
- pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer,
|
||||
- rseq.signature);
|
||||
+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseqc.rseq_abi_pointer,
|
||||
+ rseqc.signature);
|
||||
|
||||
rseqe = xmalloc(sizeof(*rseqe));
|
||||
if (!rseqe)
|
||||
@@ -1132,13 +1132,22 @@ static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
|
||||
rseq_entry__init(rseqe);
|
||||
|
||||
- rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer;
|
||||
- rseqe->rseq_abi_size = rseq.rseq_abi_size;
|
||||
- rseqe->signature = rseq.signature;
|
||||
+ rseqe->rseq_abi_pointer = rseqc.rseq_abi_pointer;
|
||||
+ rseqe->rseq_abi_size = rseqc.rseq_abi_size;
|
||||
+ rseqe->signature = rseqc.signature;
|
||||
|
||||
- if (read_rseq_cs(tid, &rseq, rseq_cs))
|
||||
+ if (read_rseq_cs(tid, &rseqc, rseq_cs, &rseq))
|
||||
goto err;
|
||||
|
||||
+ rseqe->has_rseq_cs_pointer = true;
|
||||
+ rseqe->rseq_cs_pointer = rseq.rseq_cs.ptr64;
|
||||
+ pr_err("cs pointer %lx\n", rseqe->rseq_cs_pointer);
|
||||
+ /* we won't save rseq_cs to the image (only pointer),
|
||||
+ * so let's combine flags from both struct rseq and struct rseq_cs
|
||||
+ * (kernel does the same when interpreting RSEQ_CS_FLAG_*)
|
||||
+ */
|
||||
+ rseq_cs->flags |= rseq.flags;
|
||||
+
|
||||
/* save rseq entry to the image */
|
||||
*rseqep = rseqe;
|
||||
|
||||
@@ -1188,11 +1197,11 @@ static int fixup_thread_rseq(struct pstree_item *item, int i)
|
||||
struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
pid_t tid = item->threads[i].real;
|
||||
|
||||
- /* (struct rseq)->rseq_cs is NULL */
|
||||
+ /* equivalent to (struct rseq)->rseq_cs is NULL */
|
||||
if (!rseq_cs->start_ip)
|
||||
return 0;
|
||||
|
||||
- pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
|
||||
+ pr_debug("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
|
||||
tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
|
||||
rseq_cs->version, (unsigned long)TI_IP(core));
|
||||
|
||||
@@ -1204,25 +1213,38 @@ static int fixup_thread_rseq(struct pstree_item *item, int i)
|
||||
if (task_in_rseq(rseq_cs, TI_IP(core))) {
|
||||
struct pid *tid = &item->threads[i];
|
||||
|
||||
- pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
|
||||
- tid->real);
|
||||
-
|
||||
/*
|
||||
* We need to fixup task instruction pointer from
|
||||
* the original one (which lays inside rseq critical section)
|
||||
- * to rseq abort handler address.
|
||||
+ * to rseq abort handler address. But we need to look on rseq_cs->flags
|
||||
+ * (please refer to struct rseq -> flags field description).
|
||||
+ * Naive idea of flags support may be like... let's change instruction pointer (IP)
|
||||
+ * to rseq_cs->abort_ip if !(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL).
|
||||
+ * But unfortunately, it doesn't work properly, because the kernel does
|
||||
+ * clean up of rseq_cs field in the struct rseq (modifies userspace memory).
|
||||
+ * So, we need to preserve original value of (struct rseq)->rseq_cs field in the
|
||||
+ * image and restore it's value before releasing threads.
|
||||
*
|
||||
* It's worth to mention that we need to fixup IP in CoreEntry
|
||||
* (used when full dump/restore is performed) and also in
|
||||
* the parasite regs storage (used if --leave-running option is used,
|
||||
* or if dump error occured and process execution is resumed).
|
||||
*/
|
||||
- TI_IP(core) = rseq_cs->abort_ip;
|
||||
|
||||
- if (item->pid->real == tid->real) {
|
||||
- compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
|
||||
+ if (rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) {
|
||||
+ pr_err("The %d task is in rseq critical section.!!! IP will be set to rseq abort handler addr\n",
|
||||
+ tid->real);
|
||||
} else {
|
||||
- compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
|
||||
+ pr_warn("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
|
||||
+ tid->real);
|
||||
+
|
||||
+ TI_IP(core) = rseq_cs->abort_ip;
|
||||
+
|
||||
+ if (item->pid->real == tid->real) {
|
||||
+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
|
||||
+ } else {
|
||||
+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index b2bd044..864140f 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "common/compiler.h"
|
||||
|
||||
#include "linux/mount.h"
|
||||
+#include "linux/rseq.h"
|
||||
|
||||
#include "clone-noasan.h"
|
||||
#include "cr_options.h"
|
||||
@@ -779,6 +780,7 @@ static int open_cores(int pid, CoreEntry *leader_core)
|
||||
{
|
||||
int i, tpid;
|
||||
CoreEntry **cores = NULL;
|
||||
+ //RseqEntry *rseqs;
|
||||
|
||||
cores = xmalloc(sizeof(*cores) * current->nr_threads);
|
||||
if (!cores)
|
||||
@@ -812,6 +814,19 @@ static int open_cores(int pid, CoreEntry *leader_core)
|
||||
}
|
||||
}
|
||||
|
||||
+
|
||||
+ pr_err("item %lx\n", (uint64_t)current);
|
||||
+
|
||||
+ for (i = 0; i < current->nr_threads; i++) {
|
||||
+ ThreadCoreEntry *tc = cores[i]->thread_core;
|
||||
+
|
||||
+ /* compatibility with older CRIU versions */
|
||||
+ if (!tc->rseq_entry)
|
||||
+ continue;
|
||||
+
|
||||
+ current->rseqe[i] = *tc->rseq_entry;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
err:
|
||||
xfree(cores);
|
||||
@@ -868,8 +883,15 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
|
||||
{
|
||||
unsigned args_len;
|
||||
struct task_restore_args *ta;
|
||||
+ RseqEntry *rseqs;
|
||||
pr_info("Restoring resources\n");
|
||||
|
||||
+ rseqs = shmalloc(sizeof(*rseqs) * current->nr_threads);
|
||||
+ if (!rseqs)
|
||||
+ return -1;
|
||||
+
|
||||
+ current->rseqe = rseqs;
|
||||
+
|
||||
rst_mem_switch_to_private();
|
||||
|
||||
args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) * current->nr_threads, page_size());
|
||||
@@ -1966,6 +1988,44 @@ static int attach_to_tasks(bool root_seized)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int restore_rseq_cs(void)
|
||||
+{
|
||||
+ struct pstree_item *item;
|
||||
+
|
||||
+ for_each_pstree_item(item) {
|
||||
+ int i;
|
||||
+
|
||||
+ if (!task_alive(item))
|
||||
+ continue;
|
||||
+
|
||||
+ if (item->nr_threads == 1) {
|
||||
+ item->threads[0].real = item->pid->real;
|
||||
+ } else {
|
||||
+ if (parse_threads(item->pid->real, &item->threads, &item->nr_threads))
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ pid_t pid = item->threads[i].real;
|
||||
+
|
||||
+ if (!item->rseqe[i].rseq_cs_pointer || !item->rseqe[i].rseq_abi_pointer) {
|
||||
+ pr_err("item %lx rseqe %lx\n", (uint64_t)item, (uint64_t)item->rseqe);
|
||||
+ pr_err("nothing to do with cs_pointer\n");
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("restoring cs ... %lx \n", item->rseqe[i].rseq_cs_pointer);
|
||||
+
|
||||
+ if (ptrace_poke_area(pid, &item->rseqe[i].rseq_cs_pointer, (void *)(item->rseqe[i].rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t))) {
|
||||
+ pr_err("Can't restore memfd args (pid: %d)\n", pid);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int catch_tasks(bool root_seized, enum trace_flags *flag)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
@@ -2400,6 +2460,9 @@ skip_ns_bouncing:
|
||||
if (restore_freezer_state())
|
||||
pr_err("Unable to restore freezer state\n");
|
||||
|
||||
+ /* just before releasing threads we have to restore rseq_cs */
|
||||
+ restore_rseq_cs();
|
||||
+
|
||||
/* Detaches from processes and they continue run through sigreturn. */
|
||||
if (finalize_restore_detach())
|
||||
goto out_kill_network_unlocked;
|
||||
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
|
||||
index 458e5f9..97bef11 100644
|
||||
--- a/criu/include/pstree.h
|
||||
+++ b/criu/include/pstree.h
|
||||
@@ -25,6 +25,7 @@ struct pstree_item {
|
||||
int nr_threads; /* number of threads */
|
||||
struct pid *threads; /* array of threads */
|
||||
CoreEntry **core;
|
||||
+ RseqEntry *rseqe;
|
||||
TaskKobjIdsEntry *ids;
|
||||
union {
|
||||
futex_t task_st;
|
||||
diff --git a/images/rseq.proto b/images/rseq.proto
|
||||
index be28004..45cb847 100644
|
||||
--- a/images/rseq.proto
|
||||
+++ b/images/rseq.proto
|
||||
@@ -6,4 +6,5 @@ message rseq_entry {
|
||||
required uint64 rseq_abi_pointer = 1;
|
||||
required uint32 rseq_abi_size = 2;
|
||||
required uint32 signature = 3;
|
||||
+ optional uint64 rseq_cs_pointer = 4;
|
||||
}
|
||||
--
|
||||
2.30.0
|
||||
|
||||
177
0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch
Normal file
177
0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch
Normal file
@ -0,0 +1,177 @@
|
||||
From bb8295ae4f1224db2236fdd3134912e093ed20d9 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:10:24 +0800
|
||||
Subject: [PATCH 16/16] zdtm: add rseq02 transition test with NO_RESTART CS
|
||||
flag Signed-off-by: Alexander Mikhalitsyn
|
||||
<alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
test/zdtm/transition/Makefile | 2 +
|
||||
test/zdtm/transition/rseq01.c | 61 +++++++++++++++++++-
|
||||
test/zdtm/transition/rseq02.c | 1 +
|
||||
test/zdtm/transition/rseq02.desc | 1 +
|
||||
4 files changed, 63 insertions(+), 2 deletions(-)
|
||||
create mode 120000 test/zdtm/transition/rseq02.c
|
||||
create mode 120000 test/zdtm/transition/rseq02.desc
|
||||
|
||||
diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile
|
||||
index fae4e27..378a4fc 100644
|
||||
--- a/test/zdtm/transition/Makefile
|
||||
+++ b/test/zdtm/transition/Makefile
|
||||
@@ -24,6 +24,7 @@ TST_NOFILE = \
|
||||
pid_reuse \
|
||||
pidfd_store_sk \
|
||||
rseq01 \
|
||||
+ rseq02 \
|
||||
|
||||
|
||||
TST_FILE = \
|
||||
@@ -82,6 +83,7 @@ ptrace: LDFLAGS += -pthread
|
||||
fork2: CFLAGS += -D FORK2
|
||||
thread-bomb.o: CFLAGS += -pthread
|
||||
thread-bomb: LDFLAGS += -pthread
|
||||
+rseq02: CFLAGS += -D NOABORT
|
||||
|
||||
%: %.sh
|
||||
cp $< $@
|
||||
diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c
|
||||
index 5fac5a6..25e1d61 100644
|
||||
--- a/test/zdtm/transition/rseq01.c
|
||||
+++ b/test/zdtm/transition/rseq01.c
|
||||
@@ -53,6 +53,18 @@ enum rseq_flags {
|
||||
RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
};
|
||||
|
||||
+enum rseq_cs_flags_bit {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
|
||||
+};
|
||||
+
|
||||
+enum rseq_cs_flags {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
|
||||
+};
|
||||
+
|
||||
struct rseq {
|
||||
uint32_t cpu_id_start;
|
||||
uint32_t cpu_id;
|
||||
@@ -104,6 +116,7 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
{
|
||||
double a = 10000000000000000.0;
|
||||
double b = -1;
|
||||
+ uint64_t rseq_cs1, rseq_cs2;
|
||||
/*test_msg("enter %f %f\n", a, b);*/
|
||||
|
||||
/* clang-format off */
|
||||
@@ -129,6 +142,9 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
"dec %%rcx\n\t"
|
||||
"jnz 5b\n\t"
|
||||
"fstpl %[y]\n\t"
|
||||
+ "movq %%rax, %[rseq_cs_check2]\n\t"
|
||||
+ "movq %[rseq_cs], %%rax\n\t"
|
||||
+ "movq %%rax, %[rseq_cs_check1]\n\t"
|
||||
"2:\n\t"
|
||||
".pushsection __rseq_failure, \"ax\"\n\t"
|
||||
/* Disassembler-friendly signature: nopl <sig>(%rip). */
|
||||
@@ -143,6 +159,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
: [cpu_id] "r" (cpu),
|
||||
[current_cpu_id] "m" (rseq_ptr->cpu_id),
|
||||
[rseq_cs] "m" (rseq_ptr->rseq_cs),
|
||||
+ [rseq_cs_check1] "m" (rseq_cs1),
|
||||
+ [rseq_cs_check2] "m" (rseq_cs2),
|
||||
/* final store input */
|
||||
[v] "m" (*v),
|
||||
[count] "er" (count),
|
||||
@@ -153,8 +171,20 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
);
|
||||
/* clang-format on */
|
||||
/*test_msg("exit %f %f\n", a, b);*/
|
||||
+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2);
|
||||
+ if (rseq_cs1 != rseq_cs2) {
|
||||
+ /*
|
||||
+ * It means that we finished critical section
|
||||
+ * *normally* (haven't jumped to abort) but the kernel had cleaned up
|
||||
+ * rseq_ptr->rseq_cs before we left critical section
|
||||
+ * and CRIU wasn't restored it correctly.
|
||||
+ * That's a bug picture.
|
||||
+ */
|
||||
+ return -1;
|
||||
+ }
|
||||
return 0;
|
||||
abort:
|
||||
+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2);
|
||||
/*test_msg("abort %f %f\n", a, b);*/
|
||||
return -1;
|
||||
}
|
||||
@@ -177,21 +207,48 @@ int main(int argc, char *argv[])
|
||||
fail("calloc");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
+
|
||||
register_thread();
|
||||
|
||||
+ /*
|
||||
+ * We want to test that RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
|
||||
+ * is handled properly by CRIU, but that flag can be used
|
||||
+ * only with all another flags set.
|
||||
+ * Please, refer to
|
||||
+ * https://github.com/torvalds/linux/blob/master/kernel/rseq.c#L192
|
||||
+ */
|
||||
+#ifdef NOABORT
|
||||
+ rseq_ptr->flags = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT |
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL |
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE;
|
||||
+#endif
|
||||
+
|
||||
test_daemon();
|
||||
|
||||
while (test_go()) {
|
||||
cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
|
||||
ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
- if (ret)
|
||||
+#ifndef NOABORT
|
||||
+ /* just ignore abort */
|
||||
+ ret = 0;
|
||||
+#else
|
||||
+ if (ret) {
|
||||
fail("Failed to increment per-cpu counter");
|
||||
+ break;
|
||||
+ } else {
|
||||
+ //test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]);
|
||||
+ }
|
||||
+#endif
|
||||
}
|
||||
|
||||
test_waitsig();
|
||||
|
||||
check_thread();
|
||||
- pass();
|
||||
+
|
||||
+ if (ret)
|
||||
+ fail();
|
||||
+ else
|
||||
+ pass();
|
||||
|
||||
return 0;
|
||||
}
|
||||
diff --git a/test/zdtm/transition/rseq02.c b/test/zdtm/transition/rseq02.c
|
||||
new file mode 120000
|
||||
index 0000000..d564917
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq02.c
|
||||
@@ -0,0 +1 @@
|
||||
+rseq01.c
|
||||
\ No newline at end of file
|
||||
diff --git a/test/zdtm/transition/rseq02.desc b/test/zdtm/transition/rseq02.desc
|
||||
new file mode 120000
|
||||
index 0000000..b888f0d
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq02.desc
|
||||
@@ -0,0 +1 @@
|
||||
+rseq01.desc
|
||||
\ No newline at end of file
|
||||
--
|
||||
2.30.0
|
||||
|
||||
20
criu.spec
20
criu.spec
@ -1,6 +1,6 @@
|
||||
Name: criu
|
||||
Version: 3.16.1
|
||||
Release: 2
|
||||
Release: 3
|
||||
Provides: crtools = %{version}-%{release}
|
||||
Obsoletes: crtools <= 1.0-2
|
||||
Summary: A tool of Checkpoint/Restore in User-space
|
||||
@ -17,6 +17,21 @@ Obsoletes: %{name}-libs < %{version}-%{release}
|
||||
|
||||
Patch1: 0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch
|
||||
Patch2: 0002-mm-add-pin-memory-method-for-criu.patch
|
||||
Patch3: 0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch
|
||||
Patch4: 0003-kerndat-check-for-rseq-syscall-support.patch
|
||||
Patch5: 0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch
|
||||
Patch6: 0005-cr-check-Add-ptrace-rseq-conf-dump-feature.patch
|
||||
Patch7: 0006-rseq-initial-support.patch
|
||||
Patch8: 0007-zdtm-add-simple-test-for-rseq-C-R.patch
|
||||
Patch9: 0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus.patch
|
||||
Patch10: 0009-include-add-thread_pointer.h-from-Glibc.patch
|
||||
Patch11: 0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch
|
||||
Patch12: 0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch
|
||||
Patch13: 0012-compel-add-helpers-to-get-set-instruction-pointer.patch
|
||||
Patch14: 0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs.patch
|
||||
Patch15: 0014-zdtm-add-rseq-transition-test-for-amd64.patch
|
||||
Patch16: 0015-cr-dump-handle-rseq-flags-field.patch
|
||||
Patch17: 0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch
|
||||
|
||||
%description
|
||||
Checkpoint/Restore in Userspace(CRIU),is a software tool for the linux operating system.
|
||||
@ -99,6 +114,9 @@ chmod 0755 %{buildroot}/run/%{name}/
|
||||
%doc %{_mandir}/man1/{compel.1*,crit.1*,criu-ns.1*}
|
||||
|
||||
%changelog
|
||||
* Fri Mar 4 2022 ningyu <ningyu9@huawei.com> - 3.16.1-3
|
||||
- rseq c/r support
|
||||
|
||||
* Sat Feb 26 2022 luolongjun <luolongjuna@gmail.com> - 3.16.1-2
|
||||
- add support for pin memory
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user