Compare commits
No commits in common. "9199f9251753882d2cfb7244d3ee663c52790583" and "83c92df897bfb6d708d564c5a0de26090618babb" have entirely different histories.
9199f92517
...
83c92df897
@ -1 +1 @@
|
|||||||
79523452a255e123d3659b5b7447d9a060487015
|
d3c42c5e018eaf9bb30b5180356834037e12a91c
|
||||||
|
|||||||
@ -1,646 +0,0 @@
|
|||||||
From afe7c0b38f640c4c3b85aa7a63f225330a4951de Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongjiawei <zhongjiawei1@huawei.com>
|
|
||||||
Date: Thu, 1 Feb 2024 11:37:33 +0800
|
|
||||||
Subject: [PATCH] runc:fix CVE-2024-21626
|
|
||||||
|
|
||||||
---
|
|
||||||
libcontainer/cgroups/file.go | 35 ++--
|
|
||||||
libcontainer/container_linux.go | 9 +
|
|
||||||
libcontainer/init_linux.go | 31 ++++
|
|
||||||
libcontainer/integration/seccomp_test.go | 20 +--
|
|
||||||
libcontainer/setns_init_linux.go | 18 ++
|
|
||||||
libcontainer/standard_init_linux.go | 19 ++
|
|
||||||
libcontainer/utils/utils.go | 36 ----
|
|
||||||
libcontainer/utils/utils_unix.go | 216 +++++++++++++++++++++--
|
|
||||||
8 files changed, 311 insertions(+), 73 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/libcontainer/cgroups/file.go b/libcontainer/cgroups/file.go
|
|
||||||
index bc7f0a39..b78817cd 100644
|
|
||||||
--- a/libcontainer/cgroups/file.go
|
|
||||||
+++ b/libcontainer/cgroups/file.go
|
|
||||||
@@ -10,6 +10,7 @@ import (
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
+ "github.com/opencontainers/runc/libcontainer/utils"
|
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
@@ -81,16 +82,16 @@ var (
|
|
||||||
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
|
||||||
TestMode bool
|
|
||||||
|
|
||||||
- cgroupFd int = -1
|
|
||||||
- prepOnce sync.Once
|
|
||||||
- prepErr error
|
|
||||||
- resolveFlags uint64
|
|
||||||
+ cgroupRootHandle *os.File
|
|
||||||
+ prepOnce sync.Once
|
|
||||||
+ prepErr error
|
|
||||||
+ resolveFlags uint64
|
|
||||||
)
|
|
||||||
|
|
||||||
func prepareOpenat2() error {
|
|
||||||
prepOnce.Do(func() {
|
|
||||||
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
|
||||||
- Flags: unix.O_DIRECTORY | unix.O_PATH,
|
|
||||||
+ Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
|
||||||
@@ -101,15 +102,16 @@ func prepareOpenat2() error {
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
+ file := os.NewFile(uintptr(fd), cgroupfsDir)
|
|
||||||
+
|
|
||||||
var st unix.Statfs_t
|
|
||||||
- if err = unix.Fstatfs(fd, &st); err != nil {
|
|
||||||
+ if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
|
|
||||||
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
|
||||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
- cgroupFd = fd
|
|
||||||
-
|
|
||||||
+ cgroupRootHandle = file
|
|
||||||
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
|
||||||
if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
|
||||||
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
|
||||||
@@ -136,7 +138,7 @@ func openFile(dir, file string, flags int) (*os.File, error) {
|
|
||||||
return openFallback(path, flags, mode)
|
|
||||||
}
|
|
||||||
|
|
||||||
- fd, err := unix.Openat2(cgroupFd, relPath,
|
|
||||||
+ fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
|
|
||||||
&unix.OpenHow{
|
|
||||||
Resolve: resolveFlags,
|
|
||||||
Flags: uint64(flags) | unix.O_CLOEXEC,
|
|
||||||
@@ -144,20 +146,21 @@ func openFile(dir, file string, flags int) (*os.File, error) {
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
|
||||||
- // Check if cgroupFd is still opened to cgroupfsDir
|
|
||||||
+ // Check if cgroupRootHandle is still opened to cgroupfsDir
|
|
||||||
// (happens when this package is incorrectly used
|
|
||||||
// across the chroot/pivot_root/mntns boundary, or
|
|
||||||
// when /sys/fs/cgroup is remounted).
|
|
||||||
//
|
|
||||||
// TODO: if such usage will ever be common, amend this
|
|
||||||
- // to reopen cgroupFd and retry openat2.
|
|
||||||
- fdStr := strconv.Itoa(cgroupFd)
|
|
||||||
- fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
|
|
||||||
+ // to reopen cgroupRootHandle and retry openat2.
|
|
||||||
+ fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
|
|
||||||
+ defer closer()
|
|
||||||
+ fdDest, _ := os.Readlink(fdPath)
|
|
||||||
if fdDest != cgroupfsDir {
|
|
||||||
- // Wrap the error so it is clear that cgroupFd
|
|
||||||
+ // Wrap the error so it is clear that cgroupRootHandle
|
|
||||||
// is opened to an unexpected/wrong directory.
|
|
||||||
- err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
|
|
||||||
- fdStr, fdDest, cgroupfsDir, err)
|
|
||||||
+ err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
|
|
||||||
+ cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
|
|
||||||
}
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
|
|
||||||
index 1189e5af..5d2b4362 100644
|
|
||||||
--- a/libcontainer/container_linux.go
|
|
||||||
+++ b/libcontainer/container_linux.go
|
|
||||||
@@ -362,6 +362,15 @@ func (c *linuxContainer) start(process *Process) (retErr error) {
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
+ // Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
|
|
||||||
+ // to make sure we don't leak any files into "runc init". Any files to be
|
|
||||||
+ // passed to "runc init" through ExtraFiles will get dup2'd by the Go
|
|
||||||
+ // runtime and thus their O_CLOEXEC flag will be cleared. This is some
|
|
||||||
+ // additional protection against attacks like CVE-2024-21626, by making
|
|
||||||
+ // sure we never leak files to "runc init" we didn't intend to.
|
|
||||||
+ if err := utils.CloseExecFrom(3); err != nil {
|
|
||||||
+ return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
|
|
||||||
+ }
|
|
||||||
if err := parent.start(); err != nil {
|
|
||||||
return fmt.Errorf("unable to start container process: %w", err)
|
|
||||||
}
|
|
||||||
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
|
|
||||||
index 2e4c5935..8318e5d2 100644
|
|
||||||
--- a/libcontainer/init_linux.go
|
|
||||||
+++ b/libcontainer/init_linux.go
|
|
||||||
@@ -8,6 +8,7 @@ import (
|
|
||||||
"io"
|
|
||||||
"net"
|
|
||||||
"os"
|
|
||||||
+ "path/filepath"
|
|
||||||
"strings"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
@@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
+// verifyCwd ensures that the current directory is actually inside the mount
|
|
||||||
+// namespace root of the current process.
|
|
||||||
+func verifyCwd() error {
|
|
||||||
+ // getcwd(2) on Linux detects if cwd is outside of the rootfs of the
|
|
||||||
+ // current mount namespace root, and in that case prefixes "(unreachable)"
|
|
||||||
+ // to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
|
|
||||||
+ // when this happens and return ENOENT rather than returning a non-absolute
|
|
||||||
+ // path. In both cases we can therefore easily detect if we have an invalid
|
|
||||||
+ // cwd by checking the return value of getcwd(3). See getcwd(3) for more
|
|
||||||
+ // details, and CVE-2024-21626 for the security issue that motivated this
|
|
||||||
+ // check.
|
|
||||||
+ //
|
|
||||||
+ // We have to use unix.Getwd() here because os.Getwd() has a workaround for
|
|
||||||
+ // $PWD which involves doing stat(.), which can fail if the current
|
|
||||||
+ // directory is inaccessible to the container process.
|
|
||||||
+ if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
|
|
||||||
+ return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
|
|
||||||
+ } else if err != nil {
|
|
||||||
+ return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
|
|
||||||
+ } else if !filepath.IsAbs(wd) {
|
|
||||||
+ // We shouldn't ever hit this, but check just in case.
|
|
||||||
+ return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
|
|
||||||
+ }
|
|
||||||
+ return nil
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// finalizeNamespace drops the caps, sets the correct user
|
|
||||||
// and working dir, and closes any leaked file descriptors
|
|
||||||
// before executing the command inside the namespace
|
|
||||||
@@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error {
|
|
||||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+ // Make sure our final working directory is inside the container.
|
|
||||||
+ if err := verifyCwd(); err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
if err := system.ClearKeepCaps(); err != nil {
|
|
||||||
return fmt.Errorf("unable to clear keep caps: %w", err)
|
|
||||||
}
|
|
||||||
diff --git a/libcontainer/integration/seccomp_test.go b/libcontainer/integration/seccomp_test.go
|
|
||||||
index a7eeefb1..34ba5b27 100644
|
|
||||||
--- a/libcontainer/integration/seccomp_test.go
|
|
||||||
+++ b/libcontainer/integration/seccomp_test.go
|
|
||||||
@@ -13,7 +13,7 @@ import (
|
|
||||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
|
||||||
)
|
|
||||||
|
|
||||||
-func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
|
|
||||||
+func TestSeccompDenySyslogWithErrno(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
@@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
|
|
||||||
DefaultAction: configs.Allow,
|
|
||||||
Syscalls: []*configs.Syscall{
|
|
||||||
{
|
|
||||||
- Name: "getcwd",
|
|
||||||
+ Name: "syslog",
|
|
||||||
Action: configs.Errno,
|
|
||||||
ErrnoRet: &errnoRet,
|
|
||||||
},
|
|
||||||
@@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
|
|
||||||
buffers := newStdBuffers()
|
|
||||||
pwd := &libcontainer.Process{
|
|
||||||
Cwd: "/",
|
|
||||||
- Args: []string{"pwd"},
|
|
||||||
+ Args: []string{"dmesg"},
|
|
||||||
Env: standardEnvironment,
|
|
||||||
Stdin: buffers.Stdin,
|
|
||||||
Stdout: buffers.Stdout,
|
|
||||||
@@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
|
|
||||||
}
|
|
||||||
|
|
||||||
if exitCode == 0 {
|
|
||||||
- t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
|
|
||||||
+ t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
|
|
||||||
}
|
|
||||||
|
|
||||||
- expected := "pwd: getcwd: No such process"
|
|
||||||
+ expected := "dmesg: klogctl: No such process"
|
|
||||||
actual := strings.Trim(buffers.Stderr.String(), "\n")
|
|
||||||
if actual != expected {
|
|
||||||
t.Fatalf("Expected output %s but got %s\n", expected, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
-func TestSeccompDenyGetcwd(t *testing.T) {
|
|
||||||
+func TestSeccompDenySyslog(t *testing.T) {
|
|
||||||
if testing.Short() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
@@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
|
|
||||||
DefaultAction: configs.Allow,
|
|
||||||
Syscalls: []*configs.Syscall{
|
|
||||||
{
|
|
||||||
- Name: "getcwd",
|
|
||||||
+ Name: "syslog",
|
|
||||||
Action: configs.Errno,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
@@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
|
|
||||||
buffers := newStdBuffers()
|
|
||||||
pwd := &libcontainer.Process{
|
|
||||||
Cwd: "/",
|
|
||||||
- Args: []string{"pwd"},
|
|
||||||
+ Args: []string{"dmesg"},
|
|
||||||
Env: standardEnvironment,
|
|
||||||
Stdin: buffers.Stdin,
|
|
||||||
Stdout: buffers.Stdout,
|
|
||||||
@@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) {
|
|
||||||
}
|
|
||||||
|
|
||||||
if exitCode == 0 {
|
|
||||||
- t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
|
|
||||||
+ t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
|
|
||||||
}
|
|
||||||
|
|
||||||
- expected := "pwd: getcwd: Operation not permitted"
|
|
||||||
+ expected := "dmesg: klogctl: Operation not permitted"
|
|
||||||
actual := strings.Trim(buffers.Stderr.String(), "\n")
|
|
||||||
if actual != expected {
|
|
||||||
t.Fatalf("Expected output %s but got %s\n", expected, actual)
|
|
||||||
diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go
|
|
||||||
index e9b8d62a..1eea8512 100644
|
|
||||||
--- a/libcontainer/setns_init_linux.go
|
|
||||||
+++ b/libcontainer/setns_init_linux.go
|
|
||||||
@@ -108,5 +108,23 @@ func (l *linuxSetnsInit) Init() error {
|
|
||||||
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
|
|
||||||
}
|
|
||||||
|
|
||||||
+ // Close all file descriptors we are not passing to the container. This is
|
|
||||||
+ // necessary because the execve target could use internal runc fds as the
|
|
||||||
+ // execve path, potentially giving access to binary files from the host
|
|
||||||
+ // (which can then be opened by container processes, leading to container
|
|
||||||
+ // escapes). Note that because this operation will close any open file
|
|
||||||
+ // descriptors that are referenced by (*os.File) handles from underneath
|
|
||||||
+ // the Go runtime, we must not do any file operations after this point
|
|
||||||
+ // (otherwise the (*os.File) finaliser could close the wrong file). See
|
|
||||||
+ // CVE-2024-21626 for more information as to why this protection is
|
|
||||||
+ // necessary.
|
|
||||||
+ //
|
|
||||||
+ // This is not needed for runc-dmz, because the extra execve(2) step means
|
|
||||||
+ // that all O_CLOEXEC file descriptors have already been closed and thus
|
|
||||||
+ // the second execve(2) from runc-dmz cannot access internal file
|
|
||||||
+ // descriptors from runc.
|
|
||||||
+ if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
|
||||||
}
|
|
||||||
diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go
|
|
||||||
index c288c884..e0d8b4a7 100644
|
|
||||||
--- a/libcontainer/standard_init_linux.go
|
|
||||||
+++ b/libcontainer/standard_init_linux.go
|
|
||||||
@@ -20,6 +20,7 @@ import (
|
|
||||||
"github.com/opencontainers/runc/libcontainer/keys"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
|
||||||
+ "github.com/opencontainers/runc/libcontainer/utils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type linuxStandardInit struct {
|
|
||||||
@@ -276,6 +277,24 @@ func (l *linuxStandardInit) Init() error {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
+ // Close all file descriptors we are not passing to the container. This is
|
|
||||||
+ // necessary because the execve target could use internal runc fds as the
|
|
||||||
+ // execve path, potentially giving access to binary files from the host
|
|
||||||
+ // (which can then be opened by container processes, leading to container
|
|
||||||
+ // escapes). Note that because this operation will close any open file
|
|
||||||
+ // descriptors that are referenced by (*os.File) handles from underneath
|
|
||||||
+ // the Go runtime, we must not do any file operations after this point
|
|
||||||
+ // (otherwise the (*os.File) finaliser could close the wrong file). See
|
|
||||||
+ // CVE-2024-21626 for more information as to why this protection is
|
|
||||||
+ // necessary.
|
|
||||||
+ //
|
|
||||||
+ // This is not needed for runc-dmz, because the extra execve(2) step means
|
|
||||||
+ // that all O_CLOEXEC file descriptors have already been closed and thus
|
|
||||||
+ // the second execve(2) from runc-dmz cannot access internal file
|
|
||||||
+ // descriptors from runc.
|
|
||||||
+ if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
return system.Exec(name, l.config.Args[0:], os.Environ())
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go
|
|
||||||
index 6b9fc343..dd96abe1 100644
|
|
||||||
--- a/libcontainer/utils/utils.go
|
|
||||||
+++ b/libcontainer/utils/utils.go
|
|
||||||
@@ -3,15 +3,12 @@ package utils
|
|
||||||
import (
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
|
||||||
- "fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
- "strconv"
|
|
||||||
"strings"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
- securejoin "github.com/cyphar/filepath-securejoin"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
@@ -99,39 +96,6 @@ func stripRoot(root, path string) string {
|
|
||||||
return CleanPath("/" + path)
|
|
||||||
}
|
|
||||||
|
|
||||||
-// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
|
||||||
-// corresponding to the unsafePath resolved within the root. Before passing the
|
|
||||||
-// fd, this path is verified to have been inside the root -- so operating on it
|
|
||||||
-// through the passed fdpath should be safe. Do not access this path through
|
|
||||||
-// the original path strings, and do not attempt to use the pathname outside of
|
|
||||||
-// the passed closure (the file handle will be freed once the closure returns).
|
|
||||||
-func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
|
||||||
- // Remove the root then forcefully resolve inside the root.
|
|
||||||
- unsafePath = stripRoot(root, unsafePath)
|
|
||||||
- path, err := securejoin.SecureJoin(root, unsafePath)
|
|
||||||
- if err != nil {
|
|
||||||
- return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- // Open the target path.
|
|
||||||
- fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
|
||||||
- if err != nil {
|
|
||||||
- return fmt.Errorf("open o_path procfd: %w", err)
|
|
||||||
- }
|
|
||||||
- defer fh.Close()
|
|
||||||
-
|
|
||||||
- // Double-check the path is the one we expected.
|
|
||||||
- procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
|
||||||
- if realpath, err := os.Readlink(procfd); err != nil {
|
|
||||||
- return fmt.Errorf("procfd verification failed: %w", err)
|
|
||||||
- } else if realpath != path {
|
|
||||||
- return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- // Run the closure.
|
|
||||||
- return fn(procfd)
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
// SearchLabels searches a list of key-value pairs for the provided key and
|
|
||||||
// returns the corresponding value. The pairs must be separated with '='.
|
|
||||||
func SearchLabels(labels []string, query string) string {
|
|
||||||
diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go
|
|
||||||
index 220d0b43..f57f0874 100644
|
|
||||||
--- a/libcontainer/utils/utils_unix.go
|
|
||||||
+++ b/libcontainer/utils/utils_unix.go
|
|
||||||
@@ -5,9 +5,16 @@ package utils
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
+ "math"
|
|
||||||
"os"
|
|
||||||
+ "path/filepath"
|
|
||||||
+ "runtime"
|
|
||||||
"strconv"
|
|
||||||
+ "sync"
|
|
||||||
+ _ "unsafe" // for go:linkname
|
|
||||||
|
|
||||||
+ securejoin "github.com/cyphar/filepath-securejoin"
|
|
||||||
+ "github.com/sirupsen/logrus"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
@@ -23,10 +30,39 @@ func EnsureProcHandle(fh *os.File) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
-// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
|
|
||||||
-// the process (except for those below the given fd value).
|
|
||||||
-func CloseExecFrom(minFd int) error {
|
|
||||||
- fdDir, err := os.Open("/proc/self/fd")
|
|
||||||
+var (
|
|
||||||
+ haveCloseRangeCloexecBool bool
|
|
||||||
+ haveCloseRangeCloexecOnce sync.Once
|
|
||||||
+)
|
|
||||||
+
|
|
||||||
+func haveCloseRangeCloexec() bool {
|
|
||||||
+ haveCloseRangeCloexecOnce.Do(func() {
|
|
||||||
+ // Make sure we're not closing a random file descriptor.
|
|
||||||
+ tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return
|
|
||||||
+ }
|
|
||||||
+ defer unix.Close(tmpFd)
|
|
||||||
+
|
|
||||||
+ err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
|
|
||||||
+ // Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
|
|
||||||
+ // -ENOSYS and -EINVAL ultimately mean we don't have support, but any
|
|
||||||
+ // other potential error would imply that even the most basic close
|
|
||||||
+ // operation wouldn't work.
|
|
||||||
+ haveCloseRangeCloexecBool = err == nil
|
|
||||||
+ })
|
|
||||||
+ return haveCloseRangeCloexecBool
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+type fdFunc func(fd int)
|
|
||||||
+
|
|
||||||
+// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
|
|
||||||
+// the current process.
|
|
||||||
+func fdRangeFrom(minFd int, fn fdFunc) error {
|
|
||||||
+ procSelfFd, closer := ProcThreadSelf("fd")
|
|
||||||
+ defer closer()
|
|
||||||
+
|
|
||||||
+ fdDir, err := os.Open(procSelfFd)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
@@ -50,20 +86,178 @@ func CloseExecFrom(minFd int) error {
|
|
||||||
if fd < minFd {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
- // Intentionally ignore errors from unix.CloseOnExec -- the cases where
|
|
||||||
- // this might fail are basically file descriptors that have already
|
|
||||||
- // been closed (including and especially the one that was created when
|
|
||||||
- // os.ReadDir did the "opendir" syscall).
|
|
||||||
- unix.CloseOnExec(fd)
|
|
||||||
+ // Ignore the file descriptor we used for readdir, as it will be closed
|
|
||||||
+ // when we return.
|
|
||||||
+ if uintptr(fd) == fdDir.Fd() {
|
|
||||||
+ continue
|
|
||||||
+ }
|
|
||||||
+ // Run the closure.
|
|
||||||
+ fn(fd)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
-// NewSockPair returns a new unix socket pair
|
|
||||||
-func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
|
||||||
+// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
|
|
||||||
+// equal to minFd in the current process.
|
|
||||||
+func CloseExecFrom(minFd int) error {
|
|
||||||
+ // Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
|
|
||||||
+ if haveCloseRangeCloexec() {
|
|
||||||
+ err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
|
|
||||||
+ return os.NewSyscallError("close_range", err)
|
|
||||||
+ }
|
|
||||||
+ // Otherwise, fall back to the standard loop.
|
|
||||||
+ return fdRangeFrom(minFd, unix.CloseOnExec)
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
|
|
||||||
+
|
|
||||||
+// In order to make sure we do not close the internal epoll descriptors the Go
|
|
||||||
+// runtime uses, we need to ensure that we skip descriptors that match
|
|
||||||
+// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
|
|
||||||
+// unfortunately there's no other way to be sure we're only keeping the file
|
|
||||||
+// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
|
|
||||||
+func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
|
|
||||||
+
|
|
||||||
+// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
|
|
||||||
+// current process, except for those critical to Go's runtime (such as the
|
|
||||||
+// netpoll management descriptors).
|
|
||||||
+//
|
|
||||||
+// NOTE: That this function is incredibly dangerous to use in most Go code, as
|
|
||||||
+// closing file descriptors from underneath *os.File handles can lead to very
|
|
||||||
+// bad behaviour (the closed file descriptor can be re-used and then any
|
|
||||||
+// *os.File operations would apply to the wrong file). This function is only
|
|
||||||
+// intended to be called from the last stage of runc init.
|
|
||||||
+func UnsafeCloseFrom(minFd int) error {
|
|
||||||
+ // We cannot use close_range(2) even if it is available, because we must
|
|
||||||
+ // not close some file descriptors.
|
|
||||||
+ return fdRangeFrom(minFd, func(fd int) {
|
|
||||||
+ if runtime_IsPollDescriptor(uintptr(fd)) {
|
|
||||||
+ // These are the Go runtimes internal netpoll file descriptors.
|
|
||||||
+ // These file descriptors are operated on deep in the Go scheduler,
|
|
||||||
+ // and closing those files from underneath Go can result in panics.
|
|
||||||
+ // There is no issue with keeping them because they are not
|
|
||||||
+ // executable and are not useful to an attacker anyway. Also we
|
|
||||||
+ // don't have any choice.
|
|
||||||
+ return
|
|
||||||
+ }
|
|
||||||
+ // There's nothing we can do about errors from close(2), and the
|
|
||||||
+ // only likely error to be seen is EBADF which indicates the fd was
|
|
||||||
+ // already closed (in which case, we got what we wanted).
|
|
||||||
+ _ = unix.Close(fd)
|
|
||||||
+ })
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+// NewSockPair returns a new SOCK_STREAM unix socket pair.
|
|
||||||
+func NewSockPair(name string) (parent, child *os.File, err error) {
|
|
||||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
|
||||||
+// corresponding to the unsafePath resolved within the root. Before passing the
|
|
||||||
+// fd, this path is verified to have been inside the root -- so operating on it
|
|
||||||
+// through the passed fdpath should be safe. Do not access this path through
|
|
||||||
+// the original path strings, and do not attempt to use the pathname outside of
|
|
||||||
+// the passed closure (the file handle will be freed once the closure returns).
|
|
||||||
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
|
||||||
+ // Remove the root then forcefully resolve inside the root.
|
|
||||||
+ unsafePath = stripRoot(root, unsafePath)
|
|
||||||
+ path, err := securejoin.SecureJoin(root, unsafePath)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ procSelfFd, closer := ProcThreadSelf("fd/")
|
|
||||||
+ defer closer()
|
|
||||||
+
|
|
||||||
+ // Open the target path.
|
|
||||||
+ fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return fmt.Errorf("open o_path procfd: %w", err)
|
|
||||||
+ }
|
|
||||||
+ defer fh.Close()
|
|
||||||
+
|
|
||||||
+ procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd())))
|
|
||||||
+ // Double-check the path is the one we expected.
|
|
||||||
+ if realpath, err := os.Readlink(procfd); err != nil {
|
|
||||||
+ return fmt.Errorf("procfd verification failed: %w", err)
|
|
||||||
+ } else if realpath != path {
|
|
||||||
+ return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return fn(procfd)
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+type ProcThreadSelfCloser func()
|
|
||||||
+
|
|
||||||
+var (
|
|
||||||
+ haveProcThreadSelf bool
|
|
||||||
+ haveProcThreadSelfOnce sync.Once
|
|
||||||
+)
|
|
||||||
+
|
|
||||||
+// ProcThreadSelf returns a string that is equivalent to
|
|
||||||
+// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where
|
|
||||||
+// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin,
|
|
||||||
+// meaning that the passed string needs to be trusted. The caller _must_ call
|
|
||||||
+// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread)
|
|
||||||
+// *only once* after it has finished using the returned path string.
|
|
||||||
+func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
|
|
||||||
+ haveProcThreadSelfOnce.Do(func() {
|
|
||||||
+ if _, err := os.Stat("/proc/thread-self/"); err == nil {
|
|
||||||
+ haveProcThreadSelf = true
|
|
||||||
+ } else {
|
|
||||||
+ logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
|
|
||||||
+ }
|
|
||||||
+ })
|
|
||||||
+
|
|
||||||
+ // We need to lock our thread until the caller is done with the path string
|
|
||||||
+ // because any non-atomic operation on the path (such as opening a file,
|
|
||||||
+ // then reading it) could be interrupted by the Go runtime where the
|
|
||||||
+ // underlying thread is swapped out and the original thread is killed,
|
|
||||||
+ // resulting in pull-your-hair-out-hard-to-debug issues in the caller. In
|
|
||||||
+ // addition, the pre-3.17 fallback makes everything non-atomic because the
|
|
||||||
+ // same thing could happen between unix.Gettid() and the path operations.
|
|
||||||
+ //
|
|
||||||
+ // In theory, we don't need to lock in the atomic user case when using
|
|
||||||
+ // /proc/thread-self/, but it's better to be safe than sorry (and there are
|
|
||||||
+ // only one or two truly atomic users of /proc/thread-self/).
|
|
||||||
+ runtime.LockOSThread()
|
|
||||||
+
|
|
||||||
+ threadSelf := "/proc/thread-self/"
|
|
||||||
+ if !haveProcThreadSelf {
|
|
||||||
+ // Pre-3.17 kernels did not have /proc/thread-self, so do it manually.
|
|
||||||
+ threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/"
|
|
||||||
+ if _, err := os.Stat(threadSelf); err != nil {
|
|
||||||
+ // Unfortunately, this code is called from rootfs_linux.go where we
|
|
||||||
+ // are running inside the pid namespace of the container but /proc
|
|
||||||
+ // is the host's procfs. Unfortunately there is no real way to get
|
|
||||||
+ // the correct tid to use here (the kernel age means we cannot do
|
|
||||||
+ // things like set up a private fsopen("proc") -- even scanning
|
|
||||||
+ // NSpid in all of the tasks in /proc/self/task/*/status requires
|
|
||||||
+ // Linux 4.1).
|
|
||||||
+ //
|
|
||||||
+ // So, we just have to assume that /proc/self is acceptable in this
|
|
||||||
+ // one specific case.
|
|
||||||
+ if os.Getpid() == 1 {
|
|
||||||
+ logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
|
|
||||||
+ } else {
|
|
||||||
+ // This should never happen, but the fallback should work in most cases...
|
|
||||||
+ logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
|
|
||||||
+ }
|
|
||||||
+ threadSelf = "/proc/self/"
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return threadSelf + subpath, runtime.UnlockOSThread
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to
|
|
||||||
+// create a /proc/thread-self handle for given file descriptor.
|
|
||||||
+//
|
|
||||||
+// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
|
|
||||||
+// without using fmt.Sprintf to avoid unneeded overhead.
|
|
||||||
+func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
|
|
||||||
+ return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
|
|
||||||
+}
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,36 +0,0 @@
|
|||||||
From 032f0a78d6f4ba49a48ca1ae3d53e6dc8726ff1f Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongjiawei <zhongjiawei1@huawei.com>
|
|
||||||
Date: Tue, 6 Feb 2024 11:20:48 +0800
|
|
||||||
Subject: [PATCH] runc:check cmd exist
|
|
||||||
|
|
||||||
---
|
|
||||||
libcontainer/setns_init_linux.go | 6 ++++++
|
|
||||||
1 file changed, 6 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go
|
|
||||||
index 1eea851..f1dcab6 100644
|
|
||||||
--- a/libcontainer/setns_init_linux.go
|
|
||||||
+++ b/libcontainer/setns_init_linux.go
|
|
||||||
@@ -4,6 +4,7 @@ import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
+ "os/exec"
|
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/opencontainers/selinux/go-selinux"
|
|
||||||
@@ -86,6 +87,11 @@ func (l *linuxSetnsInit) Init() error {
|
|
||||||
if err := finalizeNamespace(l.config); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
+ // Check for the arg early to make sure it exists.
|
|
||||||
+ _, err := exec.LookPath(l.config.Args[0])
|
|
||||||
+ if err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
From eefc6ae2544a6819da9f92c5aa8e65d356da4c96 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
|
||||||
Date: Sat, 9 Mar 2024 21:30:56 +0900
|
|
||||||
Subject: [PATCH] features: implement returning
|
|
||||||
potentiallyUnsafeConfigAnnotations list
|
|
||||||
|
|
||||||
See https://github.com/opencontainers/runtime-spec/blob/v1.2.0/features.md#unsafe-annotations-in-configjson
|
|
||||||
|
|
||||||
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
|
||||||
---
|
|
||||||
features.go | 5 +++++
|
|
||||||
types/features/features.go | 6 ++++++
|
|
||||||
2 files changed, 11 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/features.go b/features.go
|
|
||||||
index c9cd15c..7f76e7a 100644
|
|
||||||
--- a/features.go
|
|
||||||
+++ b/features.go
|
|
||||||
@@ -55,6 +55,11 @@ var featuresCommand = cli.Command{
|
|
||||||
Enabled: &tru,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
+ PotentiallyUnsafeConfigAnnotations: []string{
|
|
||||||
+ "bundle",
|
|
||||||
+ "org.systemd.property.", // prefix form
|
|
||||||
+ "org.criu.config",
|
|
||||||
+ },
|
|
||||||
}
|
|
||||||
|
|
||||||
if seccomp.Enabled {
|
|
||||||
diff --git a/types/features/features.go b/types/features/features.go
|
|
||||||
index c6269ca..8b467f7 100644
|
|
||||||
--- a/types/features/features.go
|
|
||||||
+++ b/types/features/features.go
|
|
||||||
@@ -25,6 +25,12 @@ type Features struct {
|
|
||||||
// Annotations contains implementation-specific annotation strings,
|
|
||||||
// such as the implementation version, and third-party extensions.
|
|
||||||
Annotations map[string]string `json:"annotations,omitempty"`
|
|
||||||
+
|
|
||||||
+ // PotentiallyUnsafeConfigAnnotations the list of the potential unsafe annotations
|
|
||||||
+ // that may appear in `config.json`.
|
|
||||||
+ //
|
|
||||||
+ // A value that ends with "." is interpreted as a prefix of annotations.
|
|
||||||
+ PotentiallyUnsafeConfigAnnotations []string `json:"potentiallyUnsafeConfigAnnotations,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Linux is specific to Linux.
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
34
runc.spec
34
runc.spec
@ -1,9 +1,9 @@
|
|||||||
%global _bindir /usr/bin
|
%global _bindir /usr/bin
|
||||||
%global debug_package %{nil}
|
%global debug_package %{nil}
|
||||||
|
|
||||||
Name: runc
|
Name: docker-runc
|
||||||
Version: 1.1.3
|
Version: 1.1.3
|
||||||
Release: 27
|
Release: 22
|
||||||
Summary: runc is a CLI tool for spawning and running containers according to the OCI specification.
|
Summary: runc is a CLI tool for spawning and running containers according to the OCI specification.
|
||||||
|
|
||||||
License: ASL 2.0
|
License: ASL 2.0
|
||||||
@ -54,36 +54,6 @@ install -p -m 755 runc $RPM_BUILD_ROOT/%{_bindir}/runc
|
|||||||
%{_bindir}/runc
|
%{_bindir}/runc
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Wed Jun 19 2024 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-27
|
|
||||||
- Type:bugfix
|
|
||||||
- CVE:NA
|
|
||||||
- SUG:NA
|
|
||||||
- DESC:Set temporary single CPU affinity before cgroup cpuset transition
|
|
||||||
|
|
||||||
* Thu Jun 13 2024 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-26
|
|
||||||
- Type:bugfix
|
|
||||||
- CVE:NA
|
|
||||||
- SUG:NA
|
|
||||||
- DESC:modify runc rpm package name from docker-runc to runc
|
|
||||||
|
|
||||||
* Fri May 24 2024 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-25
|
|
||||||
- Type:CVE
|
|
||||||
- CVE:CVE-2024-3154
|
|
||||||
- SUG:NA
|
|
||||||
- DESC:fix CVE-2024-3154
|
|
||||||
|
|
||||||
* Tue Feb 06 2024 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-24
|
|
||||||
- Type:bugfix
|
|
||||||
- CVE:NA
|
|
||||||
- SUG:NA
|
|
||||||
- DESC:check cmd exist
|
|
||||||
|
|
||||||
* Thu Feb 01 2024 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-23
|
|
||||||
- Type:CVE
|
|
||||||
- CVE:CVE-2024-21626
|
|
||||||
- SUG:NA
|
|
||||||
- DESC:fix CVE-2024-21626
|
|
||||||
|
|
||||||
* Thu Dec 21 2023 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-22
|
* Thu Dec 21 2023 zhongjiawei<zhongjiawei1@huawei.com> - 1.1.3-22
|
||||||
- Type:bugfix
|
- Type:bugfix
|
||||||
- CVE:NA
|
- CVE:NA
|
||||||
|
|||||||
@ -48,7 +48,3 @@ patch/0047-runc-Fix-tmpfs-mode-opts-when-dir-already-exists.patch
|
|||||||
patch/0048-runc-Fix-undefined-behavior.patch
|
patch/0048-runc-Fix-undefined-behavior.patch
|
||||||
patch/0049-runc-nsexec-Check-for-errors-in-write_log.patch
|
patch/0049-runc-nsexec-Check-for-errors-in-write_log.patch
|
||||||
patch/0050-runc-increase-the-number-of-cgroup-deletion-retries.patch
|
patch/0050-runc-increase-the-number-of-cgroup-deletion-retries.patch
|
||||||
patch/0051-runc-fix-CVE-2024-21626.patch
|
|
||||||
patch/0052-runc-check-cmd-exist.patch
|
|
||||||
patch/0053-runc-fix-CVE-2024-3154.patch
|
|
||||||
patch/0054-runc-Set-temporary-single-CPU-affinity-before-cgroup-cpus.patch
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user