kata-containers/runtime/patches/0055-kata_runtime-support-host-cgroup-with-emulator-polic.patch
holyfei c709612f2a kata-containers: modify kata-containers version
Fix #I4KI81
reason: modify kata-containers version and update
it to 1.11.1

Signed-off-by: holyfei <yangfeiyu20092010@163.com>
2021-11-30 20:08:25 +08:00

429 lines
12 KiB
Diff

From ce7523dfe1bb60cf54254e16a103fd3fc9503618 Mon Sep 17 00:00:00 2001
From: yangfeiyu <yangfeiyu2@huawei.com>
Date: Thu, 17 Sep 2020 10:38:38 +0800
Subject: [PATCH 3/5] kata_runtime: support host cgroup with emulator policy
reason: support host cgroup with emulator policy when
sandbox_cgroup_with_emulator is set true
Signed-off-by: yangfeiyu <yangfeiyu2@huawei.com>
---
cli/create.go | 38 ++++++++++++
virtcontainers/api.go | 10 ++-
virtcontainers/cgroups.go | 132 ++++++++++++++++++++++++++++++++++------
virtcontainers/persist/fs/fs.go | 8 +++
virtcontainers/pkg/oci/utils.go | 14 +++++
virtcontainers/sandbox.go | 70 ++++++++++++++++++++-
6 files changed, 250 insertions(+), 22 deletions(-)
diff --git a/cli/create.go b/cli/create.go
index 02cb2c5..b14434b 100644
--- a/cli/create.go
+++ b/cli/create.go
@@ -11,6 +11,7 @@ import (
"errors"
"fmt"
"os"
+ "path/filepath"
"github.com/kata-containers/runtime/pkg/katautils"
vc "github.com/kata-containers/runtime/virtcontainers"
@@ -134,11 +135,48 @@ func create(ctx context.Context, containerID, bundlePath, console, pidFilePath s
var process vc.Process
switch containerType {
case vc.PodSandbox:
+ if runtimeConfig.SandboxCgroupWithEmulator {
+ // create the sandbox level cgroup
+ cgroupPath := ociSpec.Linux.CgroupsPath
+ if err = vci.CreateSandboxCgroup(ctx, cgroupPath); err != nil {
+ return err
+ }
+
+ defer func() {
+ if err != nil {
+ _ = vci.DestroySandboxCgroup(ctx, cgroupPath)
+ }
+ }()
+
+ // add kata-runtime create process into <path>/vcpu cgroup
+ vcpuCgroupPath := filepath.Join(cgroupPath, "vcpu")
+ if err = vci.AddPidToSandboxCgroup(ctx, os.Getpid(), vcpuCgroupPath); err != nil {
+ return err
+ }
+ }
+
_, process, err = katautils.CreateSandbox(ctx, vci, ociSpec, runtimeConfig, rootFs, containerID, bundlePath, console, disableOutput, systemdCgroup, false)
if err != nil {
return err
}
case vc.PodContainer:
+ if runtimeConfig.SandboxCgroupWithEmulator {
+ sandboxID, err := oci.GetSandboxIDFromAnnotations(&ociSpec)
+ if err != nil {
+ return fmt.Errorf("container annotation doesn't contain sandboxID")
+ }
+
+ sandboxCgroupPath, err := vci.GetSandboxCgroupPath(ctx, sandboxID)
+ if err != nil {
+ return err
+ }
+
+ // add kata-runtime create process into <path>/vcpu cgroup
+ vcpuCgroupPath := filepath.Join(sandboxCgroupPath, "vcpu")
+ if err = vci.AddPidToSandboxCgroup(ctx, os.Getpid(), vcpuCgroupPath); err != nil {
+ return err
+ }
+ }
process, err = katautils.CreateContainer(ctx, vci, nil, ociSpec, rootFs, containerID, bundlePath, console, disableOutput, false)
if err != nil {
return err
diff --git a/virtcontainers/api.go b/virtcontainers/api.go
index 08bcbb5..38c8235 100644
--- a/virtcontainers/api.go
+++ b/virtcontainers/api.go
@@ -103,9 +103,7 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
}()
// Move runtime to sandbox cgroup so all process are created there.
- if s.config.SandboxCgroupWithEmulator{
- // emulator
- } else if s.config.SandboxCgroupOnly {
+ if !s.config.SandboxCgroupWithEmulator && s.config.SandboxCgroupOnly {
if err := s.setupSandboxCgroup(); err != nil {
return nil, err
}
@@ -129,6 +127,12 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
return nil, err
}
+ if s.config.SandboxCgroupWithEmulator {
+ if err := s.setupHostCgroupsWithEmulator(); err != nil {
+ return nil, err
+ }
+ }
+
// Create Containers
if err = s.createContainers(); err != nil {
return nil, err
diff --git a/virtcontainers/cgroups.go b/virtcontainers/cgroups.go
index df0ec30..65d2001 100644
--- a/virtcontainers/cgroups.go
+++ b/virtcontainers/cgroups.go
@@ -9,19 +9,15 @@ package virtcontainers
import (
"bufio"
"context"
- "encoding/json"
"fmt"
- "io/ioutil"
"os"
"path/filepath"
+ "strconv"
"strings"
"github.com/containerd/cgroups"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
-
- "github.com/kata-containers/runtime/virtcontainers/store"
- "github.com/kata-containers/runtime/virtcontainers/types"
)
type cgroupPather interface {
@@ -32,7 +28,11 @@ type cgroupPather interface {
// unconstrained cgroups are placed here.
// for example /sys/fs/cgroup/memory/kata/$CGPATH
// where path is defined by the containers manager
-const cgroupKataPath = "/kata/"
+const (
+ cgroupKataPath = "/kata/"
+ vcpuCgroupName = "vcpu"
+ emulatorCgroupName = "emulator"
+)
var cgroupsLoadFunc = cgroups.Load
var cgroupsNewFunc = cgroups.New
@@ -105,24 +105,16 @@ func deleteCgroup(hierarchy cgroups.Hierarchy, cgroupPath string) error {
// GetSandboxCgroupPath return the cgroup path of specified sandbox
func GetSandboxCgroupPath(ctx context.Context, sandboxID string) (string, error) {
- stateFilePath := filepath.Join(store.RunStoragePath(), sandboxID, store.StateFile)
-
- fileData, err := ioutil.ReadFile(stateFilePath)
+ config, err := loadSandboxConfig(sandboxID)
if err != nil {
return "", err
}
- state := types.SandboxState{}
-
- if err := json.Unmarshal(fileData, &state); err != nil {
- return "", err
- }
-
- if state.CgroupPath == "" {
- return "", fmt.Errorf("get sandbox cgroup path error: cgroupPath is empty")
+ if config.Cgroups == nil {
+ return "", fmt.Errorf("the cgroups of sandbox %s is nil", sandboxID)
}
- return state.CgroupPath, nil
+ return config.Cgroups.Path, nil
}
// AddPidToSandboxCgroup add kata-runtime create process to cgroup
@@ -276,3 +268,107 @@ func validCPUResources(cpuSpec *specs.LinuxCPU) *specs.LinuxCPU {
return &cpu
}
+
+// getQemuTaskWithoutVcpu filter out tasks under /proc/{qemu pid}/task, to find out the task of not VCPU,
+// VCPU task is filtered by "query-cpus" qmp command
+func getQemuTaskWithoutVcpu(sandbox *Sandbox, vmPid int) []int {
+ procPath := fmt.Sprintf("/proc/%d/task", vmPid)
+
+ dirReader, err := os.Open(procPath)
+ if err != nil {
+ logrus.Warningf("cannot open %s: %s", procPath, err)
+ return nil
+ }
+
+ defer dirReader.Close()
+
+ dirs, err := dirReader.Readdirnames(0)
+ if err != nil {
+ logrus.Warningf("walking dirs in %s failed: %s", procPath, err)
+ return nil
+ }
+
+ vcpuThreadInfo, err := sandbox.hypervisor.getThreadIDs()
+ if err != nil {
+ logrus.Warnf("get hypervisor Thread ID failed: %v", err)
+ return nil
+ }
+
+ var vcpuThreadIDs []int
+ for _, value := range vcpuThreadInfo.vcpus {
+ vcpuThreadIDs = append(vcpuThreadIDs, value)
+ }
+
+ var allThreadIDs []int
+ for _, dir := range dirs {
+ p, err := strconv.Atoi(dir)
+ if err != nil {
+ logrus.Warnf("can not change string dir: %s to int type", dir)
+ return nil
+ }
+
+ allThreadIDs = append(allThreadIDs, p)
+ }
+
+ nonVCPUThreads := diffSlice(allThreadIDs, vcpuThreadIDs)
+
+ return nonVCPUThreads
+}
+
+func pulloutQemuThread(sandbox *Sandbox, vmPid int, path string) error {
+ control, err := cgroups.New(cgroups.SingleSubsystem(cgroups.V1, cgroups.Cpu),
+ cgroups.StaticPath(path),
+ &specs.LinuxResources{})
+ if err != nil {
+ return err
+ }
+ taskIds := getQemuTaskWithoutVcpu(sandbox, vmPid)
+ if len(taskIds) == 0 {
+ logrus.Warnf("no taskId id in qemu other than vcpu found of pid %d", vmPid)
+ return nil
+ }
+ for _, taskId := range taskIds {
+ if err := control.AddTask(cgroups.Process{
+ Pid: taskId,
+ }); err != nil {
+ logrus.Errorf("failed to add task %d to cgroup of %s", taskId, path)
+ return err
+ }
+ }
+
+ return nil
+}
+
+// checkCgroupExist check cgroup exist or not
+func checkCgroupExist(hierarchy cgroups.Hierarchy, path string) bool {
+ subSystems, _ := hierarchy()
+ for _, s := range cgroupPathers(subSystems) {
+ if _, err := os.Lstat(s.Path(path)); err != nil {
+ if os.IsNotExist(err) {
+ return false
+ }
+ }
+ }
+
+ return true
+}
+
+// diffSlice return the s1 - s2
+func diffSlice(s1, s2 []int) []int {
+ var diffSlice []int
+ for _, p := range s1 {
+ if !isInSlice(p, s2) {
+ diffSlice = append(diffSlice, p)
+ }
+ }
+ return diffSlice
+}
+
+func isInSlice(i int, s []int) bool {
+ for _, v := range s {
+ if i == v {
+ return true
+ }
+ }
+ return false
+}
diff --git a/virtcontainers/persist/fs/fs.go b/virtcontainers/persist/fs/fs.go
index 38efdba..641d64e 100644
--- a/virtcontainers/persist/fs/fs.go
+++ b/virtcontainers/persist/fs/fs.go
@@ -14,6 +14,8 @@ import (
"path/filepath"
"syscall"
+ "github.com/opencontainers/runc/libcontainer/configs"
+
persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
"github.com/sirupsen/logrus"
)
@@ -78,6 +80,12 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai
return fmt.Errorf("sandbox container id required")
}
+ if ss.Config.Cgroups == nil {
+ ss.Config.Cgroups = &configs.Cgroup{
+ Path: ss.CgroupPath,
+ }
+ }
+
fs.sandboxState = &ss
fs.containerState = cs
diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go
index 91067fb..e8ef41b 100644
--- a/virtcontainers/pkg/oci/utils.go
+++ b/virtcontainers/pkg/oci/utils.go
@@ -1136,3 +1136,17 @@ func validateSandboxDNS(value string) error {
return nil
}
+
+func GetSandboxIDFromAnnotations(s *specs.Spec) (string, error) {
+ if s == nil {
+ return "", fmt.Errorf("spec is nil")
+ }
+
+ for _, v := range CRISandboxNameKeyList {
+ if sandboxID, ok := s.Annotations[v]; ok {
+ return sandboxID, nil
+ }
+ }
+
+ return "", fmt.Errorf("failed to find the sandbox ID")
+}
diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go
index b479cf5..ca4e700 100644
--- a/virtcontainers/sandbox.go
+++ b/virtcontainers/sandbox.go
@@ -2162,7 +2162,9 @@ func (s *Sandbox) cgroupsDelete() error {
var cgroupSubsystems cgroups.Hierarchy
if s.config.SandboxCgroupWithEmulator {
- // emulator
+ if err := deleteCgroup(cgroups.V1, s.state.CgroupPath); err != nil {
+ return err
+ }
} else if s.config.SandboxCgroupOnly {
return s.cgroupMgr.Destroy()
}
@@ -2381,6 +2383,68 @@ func (s *Sandbox) setupSandboxCgroup() error {
return nil
}
+func (s *Sandbox) setupHostCgroupsWithEmulator() error {
+ if len(s.config.Containers) == 0 {
+ return nil
+ }
+
+ sandboxContainerSpec := s.GetPatchedOCISpec()
+ if sandboxContainerSpec == nil {
+ return fmt.Errorf("sandbox container should not be empty")
+ }
+
+ // Set sandbox's cgroup path
+ s.state.CgroupPath = sandboxContainerSpec.Linux.CgroupsPath
+
+ if !checkCgroupExist(cgroups.V1, s.state.CgroupPath) {
+ return fmt.Errorf("sandbox's cgroup %s doesn't exist", s.state.CgroupPath)
+ }
+
+ // pull out qemu threads other than vcpu to the cgroup of "<path>/emulator"
+ if s.config.HypervisorType == QemuHypervisor {
+ emulatorCgroupPath := filepath.Join(s.state.CgroupPath, emulatorCgroupName)
+ hypervisorPids := s.hypervisor.getPids()
+ if len(hypervisorPids) == 0 || hypervisorPids[0] == 0 {
+ return fmt.Errorf("hypervisor pid: %v invalid", hypervisorPids)
+ }
+ if err := pulloutQemuThread(s, hypervisorPids[0], emulatorCgroupPath); err != nil {
+ return err
+ }
+ }
+
+ // limit cpu to "<path>/vcpu"
+ vcpuCgroupPath := filepath.Join(s.state.CgroupPath, vcpuCgroupName)
+ vcpuResources := specs.LinuxResources{
+ CPU: s.cpuResources(),
+ }
+ if err := applyResourceLimit(&vcpuResources, vcpuCgroupPath); err != nil {
+ return err
+ }
+
+ // limit blkio resource to "<path>"
+
+ // limit files resource
+
+ return nil
+}
+
+func applyResourceLimit(resources *specs.LinuxResources, cgroupPath string) error {
+ if resources == nil {
+ return nil
+ }
+
+ control, err := cgroupsLoadFunc(cgroups.V1, cgroups.StaticPath(cgroupPath))
+ if err != nil {
+ return fmt.Errorf("could not load cgroup %v: %v", cgroupPath, err)
+ }
+
+ if err = control.Update(resources); err != nil {
+ return fmt.Errorf("could not update cgroup %v: %v", cgroupPath, err)
+ }
+
+ return nil
+}
+
// GetPatchedOCISpec returns sandbox's OCI specification
// This OCI specification was patched when the sandbox was created
// by containerCapabilities(), SetEphemeralStorageType() and others
@@ -2452,6 +2516,10 @@ func (s *Sandbox) forceDeleteSandbox() {
c.forceDeleteContainer()
}
+ if err := deleteCgroup(cgroups.V1, s.state.CgroupPath); err != nil {
+ s.Logger().Warnf("sandbox forceDelete cgroups failed: %v", err)
+ }
+
globalSandboxList.removeSandbox(s.id)
if s.monitor != nil {
--
1.8.3.1