Fix #I4KI81 reason: modify kata-containers version and update it to 1.11.1 Signed-off-by: holyfei <yangfeiyu20092010@163.com>
429 lines
12 KiB
Diff
429 lines
12 KiB
Diff
From ce7523dfe1bb60cf54254e16a103fd3fc9503618 Mon Sep 17 00:00:00 2001
|
|
From: yangfeiyu <yangfeiyu2@huawei.com>
|
|
Date: Thu, 17 Sep 2020 10:38:38 +0800
|
|
Subject: [PATCH 3/5] kata_runtime: support host cgroup with emulator policy
|
|
|
|
reason: support host cgroup with emulator policy when
|
|
sandbox_cgroup_with_emulator is set true
|
|
|
|
Signed-off-by: yangfeiyu <yangfeiyu2@huawei.com>
|
|
---
|
|
cli/create.go | 38 ++++++++++++
|
|
virtcontainers/api.go | 10 ++-
|
|
virtcontainers/cgroups.go | 132 ++++++++++++++++++++++++++++++++++------
|
|
virtcontainers/persist/fs/fs.go | 8 +++
|
|
virtcontainers/pkg/oci/utils.go | 14 +++++
|
|
virtcontainers/sandbox.go | 70 ++++++++++++++++++++-
|
|
6 files changed, 250 insertions(+), 22 deletions(-)
|
|
|
|
diff --git a/cli/create.go b/cli/create.go
|
|
index 02cb2c5..b14434b 100644
|
|
--- a/cli/create.go
|
|
+++ b/cli/create.go
|
|
@@ -11,6 +11,7 @@ import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
+ "path/filepath"
|
|
|
|
"github.com/kata-containers/runtime/pkg/katautils"
|
|
vc "github.com/kata-containers/runtime/virtcontainers"
|
|
@@ -134,11 +135,48 @@ func create(ctx context.Context, containerID, bundlePath, console, pidFilePath s
|
|
var process vc.Process
|
|
switch containerType {
|
|
case vc.PodSandbox:
|
|
+ if runtimeConfig.SandboxCgroupWithEmulator {
|
|
+ // create the sandbox level cgroup
|
|
+ cgroupPath := ociSpec.Linux.CgroupsPath
|
|
+ if err = vci.CreateSandboxCgroup(ctx, cgroupPath); err != nil {
|
|
+ return err
|
|
+ }
|
|
+
|
|
+ defer func() {
|
|
+ if err != nil {
|
|
+ _ = vci.DestroySandboxCgroup(ctx, cgroupPath)
|
|
+ }
|
|
+ }()
|
|
+
|
|
+ // add kata-runtime create process into <path>/vcpu cgroup
|
|
+ vcpuCgroupPath := filepath.Join(cgroupPath, "vcpu")
|
|
+ if err = vci.AddPidToSandboxCgroup(ctx, os.Getpid(), vcpuCgroupPath); err != nil {
|
|
+ return err
|
|
+ }
|
|
+ }
|
|
+
|
|
_, process, err = katautils.CreateSandbox(ctx, vci, ociSpec, runtimeConfig, rootFs, containerID, bundlePath, console, disableOutput, systemdCgroup, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case vc.PodContainer:
|
|
+ if runtimeConfig.SandboxCgroupWithEmulator {
|
|
+ sandboxID, err := oci.GetSandboxIDFromAnnotations(&ociSpec)
|
|
+ if err != nil {
|
|
+ return fmt.Errorf("container annotation doesn't contain sandboxID")
|
|
+ }
|
|
+
|
|
+ sandboxCgroupPath, err := vci.GetSandboxCgroupPath(ctx, sandboxID)
|
|
+ if err != nil {
|
|
+ return err
|
|
+ }
|
|
+
|
|
+ // add kata-runtime create process into <path>/vcpu cgroup
|
|
+ vcpuCgroupPath := filepath.Join(sandboxCgroupPath, "vcpu")
|
|
+ if err = vci.AddPidToSandboxCgroup(ctx, os.Getpid(), vcpuCgroupPath); err != nil {
|
|
+ return err
|
|
+ }
|
|
+ }
|
|
process, err = katautils.CreateContainer(ctx, vci, nil, ociSpec, rootFs, containerID, bundlePath, console, disableOutput, false)
|
|
if err != nil {
|
|
return err
|
|
diff --git a/virtcontainers/api.go b/virtcontainers/api.go
|
|
index 08bcbb5..38c8235 100644
|
|
--- a/virtcontainers/api.go
|
|
+++ b/virtcontainers/api.go
|
|
@@ -103,9 +103,7 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
|
|
}()
|
|
|
|
// Move runtime to sandbox cgroup so all process are created there.
|
|
- if s.config.SandboxCgroupWithEmulator{
|
|
- // emulator
|
|
- } else if s.config.SandboxCgroupOnly {
|
|
+ if !s.config.SandboxCgroupWithEmulator && s.config.SandboxCgroupOnly {
|
|
if err := s.setupSandboxCgroup(); err != nil {
|
|
return nil, err
|
|
}
|
|
@@ -129,6 +127,12 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
|
|
return nil, err
|
|
}
|
|
|
|
+ if s.config.SandboxCgroupWithEmulator {
|
|
+ if err := s.setupHostCgroupsWithEmulator(); err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+ }
|
|
+
|
|
// Create Containers
|
|
if err = s.createContainers(); err != nil {
|
|
return nil, err
|
|
diff --git a/virtcontainers/cgroups.go b/virtcontainers/cgroups.go
|
|
index df0ec30..65d2001 100644
|
|
--- a/virtcontainers/cgroups.go
|
|
+++ b/virtcontainers/cgroups.go
|
|
@@ -9,19 +9,15 @@ package virtcontainers
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
- "encoding/json"
|
|
"fmt"
|
|
- "io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
+ "strconv"
|
|
"strings"
|
|
|
|
"github.com/containerd/cgroups"
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
-
|
|
- "github.com/kata-containers/runtime/virtcontainers/store"
|
|
- "github.com/kata-containers/runtime/virtcontainers/types"
|
|
)
|
|
|
|
type cgroupPather interface {
|
|
@@ -32,7 +28,11 @@ type cgroupPather interface {
|
|
// unconstrained cgroups are placed here.
|
|
// for example /sys/fs/cgroup/memory/kata/$CGPATH
|
|
// where path is defined by the containers manager
|
|
-const cgroupKataPath = "/kata/"
|
|
+const (
|
|
+ cgroupKataPath = "/kata/"
|
|
+ vcpuCgroupName = "vcpu"
|
|
+ emulatorCgroupName = "emulator"
|
|
+)
|
|
|
|
var cgroupsLoadFunc = cgroups.Load
|
|
var cgroupsNewFunc = cgroups.New
|
|
@@ -105,24 +105,16 @@ func deleteCgroup(hierarchy cgroups.Hierarchy, cgroupPath string) error {
|
|
|
|
// GetSandboxCgroupPath return the cgroup path of specified sandbox
|
|
func GetSandboxCgroupPath(ctx context.Context, sandboxID string) (string, error) {
|
|
- stateFilePath := filepath.Join(store.RunStoragePath(), sandboxID, store.StateFile)
|
|
-
|
|
- fileData, err := ioutil.ReadFile(stateFilePath)
|
|
+ config, err := loadSandboxConfig(sandboxID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
- state := types.SandboxState{}
|
|
-
|
|
- if err := json.Unmarshal(fileData, &state); err != nil {
|
|
- return "", err
|
|
- }
|
|
-
|
|
- if state.CgroupPath == "" {
|
|
- return "", fmt.Errorf("get sandbox cgroup path error: cgroupPath is empty")
|
|
+ if config.Cgroups == nil {
|
|
+ return "", fmt.Errorf("the cgroups of sandbox %s is nil", sandboxID)
|
|
}
|
|
|
|
- return state.CgroupPath, nil
|
|
+ return config.Cgroups.Path, nil
|
|
}
|
|
|
|
// AddPidToSandboxCgroup add kata-runtime create process to cgroup
|
|
@@ -276,3 +268,107 @@ func validCPUResources(cpuSpec *specs.LinuxCPU) *specs.LinuxCPU {
|
|
|
|
return &cpu
|
|
}
|
|
+
|
|
+// getQemuTaskWithoutVcpu filter out tasks under /proc/{qemu pid}/task, to find out the task of not VCPU,
|
|
+// VCPU task is filtered by "query-cpus" qmp command
|
|
+func getQemuTaskWithoutVcpu(sandbox *Sandbox, vmPid int) []int {
|
|
+ procPath := fmt.Sprintf("/proc/%d/task", vmPid)
|
|
+
|
|
+ dirReader, err := os.Open(procPath)
|
|
+ if err != nil {
|
|
+ logrus.Warningf("cannot open %s: %s", procPath, err)
|
|
+ return nil
|
|
+ }
|
|
+
|
|
+ defer dirReader.Close()
|
|
+
|
|
+ dirs, err := dirReader.Readdirnames(0)
|
|
+ if err != nil {
|
|
+ logrus.Warningf("walking dirs in %s failed: %s", procPath, err)
|
|
+ return nil
|
|
+ }
|
|
+
|
|
+ vcpuThreadInfo, err := sandbox.hypervisor.getThreadIDs()
|
|
+ if err != nil {
|
|
+ logrus.Warnf("get hypervisor Thread ID failed: %v", err)
|
|
+ return nil
|
|
+ }
|
|
+
|
|
+ var vcpuThreadIDs []int
|
|
+ for _, value := range vcpuThreadInfo.vcpus {
|
|
+ vcpuThreadIDs = append(vcpuThreadIDs, value)
|
|
+ }
|
|
+
|
|
+ var allThreadIDs []int
|
|
+ for _, dir := range dirs {
|
|
+ p, err := strconv.Atoi(dir)
|
|
+ if err != nil {
|
|
+ logrus.Warnf("can not change string dir: %s to int type", dir)
|
|
+ return nil
|
|
+ }
|
|
+
|
|
+ allThreadIDs = append(allThreadIDs, p)
|
|
+ }
|
|
+
|
|
+ nonVCPUThreads := diffSlice(allThreadIDs, vcpuThreadIDs)
|
|
+
|
|
+ return nonVCPUThreads
|
|
+}
|
|
+
|
|
+func pulloutQemuThread(sandbox *Sandbox, vmPid int, path string) error {
|
|
+ control, err := cgroups.New(cgroups.SingleSubsystem(cgroups.V1, cgroups.Cpu),
|
|
+ cgroups.StaticPath(path),
|
|
+ &specs.LinuxResources{})
|
|
+ if err != nil {
|
|
+ return err
|
|
+ }
|
|
+ taskIds := getQemuTaskWithoutVcpu(sandbox, vmPid)
|
|
+ if len(taskIds) == 0 {
|
|
+ logrus.Warnf("no taskId id in qemu other than vcpu found of pid %d", vmPid)
|
|
+ return nil
|
|
+ }
|
|
+ for _, taskId := range taskIds {
|
|
+ if err := control.AddTask(cgroups.Process{
|
|
+ Pid: taskId,
|
|
+ }); err != nil {
|
|
+ logrus.Errorf("failed to add task %d to cgroup of %s", taskId, path)
|
|
+ return err
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return nil
|
|
+}
|
|
+
|
|
+// checkCgroupExist check cgroup exist or not
|
|
+func checkCgroupExist(hierarchy cgroups.Hierarchy, path string) bool {
|
|
+ subSystems, _ := hierarchy()
|
|
+ for _, s := range cgroupPathers(subSystems) {
|
|
+ if _, err := os.Lstat(s.Path(path)); err != nil {
|
|
+ if os.IsNotExist(err) {
|
|
+ return false
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return true
|
|
+}
|
|
+
|
|
+// diffSlice return the s1 - s2
|
|
+func diffSlice(s1, s2 []int) []int {
|
|
+ var diffSlice []int
|
|
+ for _, p := range s1 {
|
|
+ if !isInSlice(p, s2) {
|
|
+ diffSlice = append(diffSlice, p)
|
|
+ }
|
|
+ }
|
|
+ return diffSlice
|
|
+}
|
|
+
|
|
+func isInSlice(i int, s []int) bool {
|
|
+ for _, v := range s {
|
|
+ if i == v {
|
|
+ return true
|
|
+ }
|
|
+ }
|
|
+ return false
|
|
+}
|
|
diff --git a/virtcontainers/persist/fs/fs.go b/virtcontainers/persist/fs/fs.go
|
|
index 38efdba..641d64e 100644
|
|
--- a/virtcontainers/persist/fs/fs.go
|
|
+++ b/virtcontainers/persist/fs/fs.go
|
|
@@ -14,6 +14,8 @@ import (
|
|
"path/filepath"
|
|
"syscall"
|
|
|
|
+ "github.com/opencontainers/runc/libcontainer/configs"
|
|
+
|
|
persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
@@ -78,6 +80,12 @@ func (fs *FS) ToDisk(ss persistapi.SandboxState, cs map[string]persistapi.Contai
|
|
return fmt.Errorf("sandbox container id required")
|
|
}
|
|
|
|
+ if ss.Config.Cgroups == nil {
|
|
+ ss.Config.Cgroups = &configs.Cgroup{
|
|
+ Path: ss.CgroupPath,
|
|
+ }
|
|
+ }
|
|
+
|
|
fs.sandboxState = &ss
|
|
fs.containerState = cs
|
|
|
|
diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go
|
|
index 91067fb..e8ef41b 100644
|
|
--- a/virtcontainers/pkg/oci/utils.go
|
|
+++ b/virtcontainers/pkg/oci/utils.go
|
|
@@ -1136,3 +1136,17 @@ func validateSandboxDNS(value string) error {
|
|
|
|
return nil
|
|
}
|
|
+
|
|
+func GetSandboxIDFromAnnotations(s *specs.Spec) (string, error) {
|
|
+ if s == nil {
|
|
+ return "", fmt.Errorf("spec is nil")
|
|
+ }
|
|
+
|
|
+ for _, v := range CRISandboxNameKeyList {
|
|
+ if sandboxID, ok := s.Annotations[v]; ok {
|
|
+ return sandboxID, nil
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return "", fmt.Errorf("failed to find the sandbox ID")
|
|
+}
|
|
diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go
|
|
index b479cf5..ca4e700 100644
|
|
--- a/virtcontainers/sandbox.go
|
|
+++ b/virtcontainers/sandbox.go
|
|
@@ -2162,7 +2162,9 @@ func (s *Sandbox) cgroupsDelete() error {
|
|
var cgroupSubsystems cgroups.Hierarchy
|
|
|
|
if s.config.SandboxCgroupWithEmulator {
|
|
- // emulator
|
|
+ if err := deleteCgroup(cgroups.V1, s.state.CgroupPath); err != nil {
|
|
+ return err
|
|
+ }
|
|
} else if s.config.SandboxCgroupOnly {
|
|
return s.cgroupMgr.Destroy()
|
|
}
|
|
@@ -2381,6 +2383,68 @@ func (s *Sandbox) setupSandboxCgroup() error {
|
|
return nil
|
|
}
|
|
|
|
+func (s *Sandbox) setupHostCgroupsWithEmulator() error {
|
|
+ if len(s.config.Containers) == 0 {
|
|
+ return nil
|
|
+ }
|
|
+
|
|
+ sandboxContainerSpec := s.GetPatchedOCISpec()
|
|
+ if sandboxContainerSpec == nil {
|
|
+ return fmt.Errorf("sandbox container should not be empty")
|
|
+ }
|
|
+
|
|
+ // Set sandbox's cgroup path
|
|
+ s.state.CgroupPath = sandboxContainerSpec.Linux.CgroupsPath
|
|
+
|
|
+ if !checkCgroupExist(cgroups.V1, s.state.CgroupPath) {
|
|
+ return fmt.Errorf("sandbox's cgroup %s doesn't exist", s.state.CgroupPath)
|
|
+ }
|
|
+
|
|
+ // pull out qemu threads other than vcpu to the cgroup of "<path>/emulator"
|
|
+ if s.config.HypervisorType == QemuHypervisor {
|
|
+ emulatorCgroupPath := filepath.Join(s.state.CgroupPath, emulatorCgroupName)
|
|
+ hypervisorPids := s.hypervisor.getPids()
|
|
+ if len(hypervisorPids) == 0 || hypervisorPids[0] == 0 {
|
|
+ return fmt.Errorf("hypervisor pid: %v invalid", hypervisorPids)
|
|
+ }
|
|
+ if err := pulloutQemuThread(s, hypervisorPids[0], emulatorCgroupPath); err != nil {
|
|
+ return err
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // limit cpu to "<path>/vcpu"
|
|
+ vcpuCgroupPath := filepath.Join(s.state.CgroupPath, vcpuCgroupName)
|
|
+ vcpuResources := specs.LinuxResources{
|
|
+ CPU: s.cpuResources(),
|
|
+ }
|
|
+ if err := applyResourceLimit(&vcpuResources, vcpuCgroupPath); err != nil {
|
|
+ return err
|
|
+ }
|
|
+
|
|
+ // limit blkio resource to "<path>"
|
|
+
|
|
+ // limit files resource
|
|
+
|
|
+ return nil
|
|
+}
|
|
+
|
|
+func applyResourceLimit(resources *specs.LinuxResources, cgroupPath string) error {
|
|
+ if resources == nil {
|
|
+ return nil
|
|
+ }
|
|
+
|
|
+ control, err := cgroupsLoadFunc(cgroups.V1, cgroups.StaticPath(cgroupPath))
|
|
+ if err != nil {
|
|
+ return fmt.Errorf("could not load cgroup %v: %v", cgroupPath, err)
|
|
+ }
|
|
+
|
|
+ if err = control.Update(resources); err != nil {
|
|
+ return fmt.Errorf("could not update cgroup %v: %v", cgroupPath, err)
|
|
+ }
|
|
+
|
|
+ return nil
|
|
+}
|
|
+
|
|
// GetPatchedOCISpec returns sandbox's OCI specification
|
|
// This OCI specification was patched when the sandbox was created
|
|
// by containerCapabilities(), SetEphemeralStorageType() and others
|
|
@@ -2452,6 +2516,10 @@ func (s *Sandbox) forceDeleteSandbox() {
|
|
c.forceDeleteContainer()
|
|
}
|
|
|
|
+ if err := deleteCgroup(cgroups.V1, s.state.CgroupPath); err != nil {
|
|
+ s.Logger().Warnf("sandbox forceDelete cgroups failed: %v", err)
|
|
+ }
|
|
+
|
|
globalSandboxList.removeSandbox(s.id)
|
|
|
|
if s.monitor != nil {
|
|
--
|
|
1.8.3.1
|
|
|