!54 kata-containers: adapt with iSulad and support for new hypervisor type "stratovirt"

From: @holyfei
Reviewed-by: @caihaomin
Signed-off-by: @caihaomin
This commit is contained in:
openeuler-ci-bot 2021-08-23 11:37:59 +00:00 committed by Gitee
commit 28367f671f
9 changed files with 5088 additions and 2 deletions

View File

@ -2,7 +2,7 @@
%global debug_package %{nil}
%define VERSION 2.1.0
%define RELEASE 1
%define RELEASE 2
Name: kata-containers
Version: %{VERSION}
@ -87,9 +87,9 @@ install -p -m 750 %{_builddir}/kata-containers/src/runtime/kata-netmon %{buildro
install -p -m 750 %{_builddir}/kata-containers/src/runtime/kata-monitor %{buildroot}/usr/bin/
install -p -m 750 %{_builddir}/kata-containers/src/runtime/containerd-shim-kata-v2 %{buildroot}/usr/bin/
install -p -m 640 -D %{_builddir}/kata-containers/src/runtime/cli/config/configuration-qemu.toml %{buildroot}/usr/share/defaults/kata-containers/configuration.toml
install -p -m 640 -D %{_builddir}/kata-containers/src/runtime/cli/config/configuration-stratovirt.toml %{buildroot}/usr/share/defaults/kata-containers/configuration-stratovirt.toml
install -p -m 640 ./build/kata-containers-initrd.img %{buildroot}/var/lib/kata/
mkdir -p -m 750 %{buildroot}/usr/share/defaults/kata-containers/
install -p -m 640 -D %{_builddir}/kata-containers/src/runtime/cli/config/configuration-qemu.toml %{buildroot}/usr/share/defaults/kata-containers/configuration.toml
strip %{buildroot}/usr/bin/kata*
strip %{buildroot}/usr/bin/containerd-shim-kata-v2
@ -107,6 +107,12 @@ strip %{buildroot}/usr/bin/containerd-shim-kata-v2
%doc
%changelog
* Wed Aug 20 2021 yangfeiyu <yangfeiyu2@huawei.com> - 2.1.0-2
- Type:enhancement
- ID:NA
- SUG:NA
- DESC:support with stratovirt and isulad
* Wed Aug 18 2021 yangfeiyu <yangfeiyu2@huawei.com> - 2.1.0-1
- Type:enhancement
- ID:NA

View File

@ -0,0 +1,837 @@
From f56d66f196bee808526e86df2c3c063a887c6fef Mon Sep 17 00:00:00 2001
From: Wei Gao <gaowei66@huawei.com>
Date: Sat, 7 Aug 2021 10:39:11 +0800
Subject: [PATCH 1/6] runtime: add support of new sandbox hypervisor type
StratoVirt.
Signed-off-by: Wei Gao <gaowei66@huawei.com>
---
src/runtime/pkg/katautils/config.go | 96 +++-
src/runtime/virtcontainers/hypervisor.go | 12 +
src/runtime/virtcontainers/stratovirt.go | 642 +++++++++++++++++++++++
3 files changed, 749 insertions(+), 1 deletion(-)
create mode 100644 src/runtime/virtcontainers/stratovirt.go
diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go
index 6114aa39..f94ac4fd 100644
--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -50,6 +50,7 @@ const (
clhHypervisorTableType = "clh"
qemuHypervisorTableType = "qemu"
acrnHypervisorTableType = "acrn"
+ stratovirtHypervisorTable = "stratovirt"
// the maximum amount of PCI bridges that can be cold plugged in a VM
maxPCIBridges uint32 = 5
@@ -870,6 +871,96 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
}, nil
}
+func newStratovirtHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
+ hypervisor, err := h.path()
+ if err != nil {
+ return vc.HypervisorConfig{}, err
+ }
+
+ kernel, err := h.kernel()
+ if err != nil {
+ return vc.HypervisorConfig{}, err
+ }
+
+ initrd, image, err := h.getInitrdAndImage()
+ if err != nil {
+ return vc.HypervisorConfig{}, err
+ }
+
+ if image != "" && initrd != "" {
+ return vc.HypervisorConfig{},
+ errors.New("having both an image and an initrd defined in the configuration file is not supported")
+ }
+
+ if image == "" && initrd == "" {
+ return vc.HypervisorConfig{},
+ errors.New("either image or initrd must be defined in the configuration file")
+ }
+
+ kernelParams := h.kernelParams()
+ machineType := h.machineType()
+
+ blockDriver, err := h.blockDeviceDriver()
+ if err != nil {
+ return vc.HypervisorConfig{}, err
+ }
+
+ sharedFS, err := h.sharedFS()
+ if err != nil {
+ return vc.HypervisorConfig{}, err
+ }
+
+ if sharedFS == config.VirtioFS && h.VirtioFSDaemon == "" {
+ return vc.HypervisorConfig{},
+ errors.New("cannot enable virtio-fs without daemon path in configuration file")
+ }
+
+ if vSock, err := utils.SupportsVsocks(); !vSock {
+ return vc.HypervisorConfig{}, err
+ }
+
+ return vc.HypervisorConfig{
+ HypervisorPath: hypervisor,
+ HypervisorPathList: h.HypervisorPathList,
+ KernelPath: kernel,
+ InitrdPath: initrd,
+ ImagePath: image,
+ KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
+ HypervisorMachineType: machineType,
+ NumVCPUs: h.defaultVCPUs(),
+ DefaultMaxVCPUs: h.defaultMaxVCPUs(),
+ MemorySize: h.defaultMemSz(),
+ MemSlots: h.defaultMemSlots(),
+ MemOffset: h.defaultMemOffset(),
+ EntropySource: h.GetEntropySource(),
+ EntropySourceList: h.EntropySourceList,
+ DefaultBridges: h.defaultBridges(),
+ DisableBlockDeviceUse: h.DisableBlockDeviceUse,
+ SharedFS: sharedFS,
+ VirtioFSDaemon: h.VirtioFSDaemon,
+ VirtioFSDaemonList: h.VirtioFSDaemonList,
+ VirtioFSCacheSize: h.VirtioFSCacheSize,
+ VirtioFSCache: h.defaultVirtioFSCache(),
+ VirtioFSExtraArgs: h.VirtioFSExtraArgs,
+ FileBackedMemRootDir: h.FileBackedMemRootDir,
+ FileBackedMemRootList: h.FileBackedMemRootList,
+ Mlock: !h.Swap,
+ Debug: h.Debug,
+ DisableNestingChecks: h.DisableNestingChecks,
+ BlockDeviceDriver: blockDriver,
+ BlockDeviceCacheSet: h.BlockDeviceCacheSet,
+ BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
+ BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
+ EnableIOThreads: h.EnableIOThreads,
+ DisableVhostNet: h.DisableVhostNet,
+ EnableVhostUserStore: h.EnableVhostUserStore,
+ VhostUserStorePath: h.vhostUserStorePath(),
+ VhostUserStorePathList: h.VhostUserStorePathList,
+ GuestHookPath: h.guestHookPath(),
+ EnableAnnotations: h.EnableAnnotations,
+ }, nil
+}
+
func newFactoryConfig(f factory) (oci.FactoryConfig, error) {
if f.TemplatePath == "" {
f.TemplatePath = defaultTemplatePath
@@ -903,6 +994,9 @@ func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, confi
case clhHypervisorTableType:
config.HypervisorType = vc.ClhHypervisor
hConfig, err = newClhHypervisorConfig(hypervisor)
+ case stratovirtHypervisorTable:
+ config.HypervisorType = vc.StratovirtHypervisor
+ hConfig, err = newStratovirtHypervisorConfig(hypervisor)
}
if err != nil {
@@ -1287,7 +1381,7 @@ func checkHypervisorConfig(config vc.HypervisorConfig) error {
memSizeMB := int64(config.MemorySize)
if memSizeMB == 0 {
- return errors.New("VM memory cannot be zero")
+ return errors.New(fmt.Sprintf("The VM memory cannot be zero, %s", config.ImagePath))
}
mb := int64(1024 * 1024)
diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go
index 767215b6..615baa80 100644
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@@ -44,6 +44,9 @@ const (
// ClhHypervisor is the ICH hypervisor.
ClhHypervisor HypervisorType = "clh"
+ // StratovirtHypervisor is the StratoVirt hypervisor
+ StratovirtHypervisor HypervisorType = "stratovirt"
+
// MockHypervisor is a mock hypervisor for testing purposes
MockHypervisor HypervisorType = "mock"
)
@@ -159,6 +162,9 @@ func (hType *HypervisorType) Set(value string) error {
case "clh":
*hType = ClhHypervisor
return nil
+ case "stratovirt":
+ *hType = StratovirtHypervisor
+ return nil
case "mock":
*hType = MockHypervisor
return nil
@@ -178,6 +184,8 @@ func (hType *HypervisorType) String() string {
return string(AcrnHypervisor)
case ClhHypervisor:
return string(ClhHypervisor)
+ case StratovirtHypervisor:
+ return string(StratovirtHypervisor)
case MockHypervisor:
return string(MockHypervisor)
default:
@@ -207,6 +215,10 @@ func newHypervisor(hType HypervisorType) (hypervisor, error) {
return &cloudHypervisor{
store: store,
}, nil
+ case StratovirtHypervisor:
+ return &stratovirt{
+ store: store,
+ }, nil
case MockHypervisor:
return &mockHypervisor{}, nil
default:
diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go
new file mode 100644
index 00000000..4fec96d3
--- /dev/null
+++ b/src/runtime/virtcontainers/stratovirt.go
@@ -0,0 +1,642 @@
+package virtcontainers
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+ "time"
+
+ govmmQemu "github.com/kata-containers/govmm/qemu"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+
+ "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
+ persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api"
+ "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
+ "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
+ "go.opentelemetry.io/otel"
+ otelLabel "go.opentelemetry.io/otel/label"
+ otelTrace "go.opentelemetry.io/otel/trace"
+)
+
+const defaultDummyMac = "22:33:44:aa:bb:"
+const mmioBlkCount = 4
+const mmioNetCount = 2
+const randomDevice = "/dev/urandom"
+
+type stratovirtDev struct {
+ dev interface{}
+ devType deviceType
+}
+
+type stratovirt struct {
+ id string
+ ctx context.Context
+ sandbox *Sandbox
+ store persistapi.PersistDriver
+ config HypervisorConfig
+ pid int
+ consolePath string
+ socketPath string
+ qmpMonitorCh qmpChannel
+ devices []stratovirtDev
+ HotpluggedVCPUs []CPUDevice
+ mmioBlkSlots [mmioBlkCount]bool
+ mmioNetSlots [mmioNetCount]bool
+}
+
+func (s *stratovirt) Logger() *logrus.Entry {
+ return virtLog.WithField("subsystem", "stratovirt")
+}
+
+func (s *stratovirt) trace(parent context.Context, name string) (otelTrace.Span, context.Context) {
+ if parent == nil {
+ s.Logger().WithField("type", "bug").Error("trace called before context set")
+ parent = context.Background()
+ }
+
+ tracer := otel.Tracer("kata")
+ ctx, span := tracer.Start(parent, name, otelTrace.WithAttributes(otelLabel.String("source", "runtime"), otelLabel.String("package", "virtcontainers"), otelLabel.String("subsystem", "hypervisor"), otelLabel.String("type", "stratovirt"), otelLabel.String("sandbox_id", s.id)))
+
+ return span, ctx
+}
+
+func (s *stratovirt) getKernelCmdLine(useImage bool) string {
+ var params []string
+
+ if useImage {
+ params = append(params, "root=/dev/vda")
+ }
+
+ params = append(params, "pci=off")
+ params = append(params, "reboot=k")
+ params = append(params, "panic=1")
+ params = append(params, "iommu=off")
+ params = append(params, "acpi=off")
+ params = append(params, "quiet")
+ params = append(params, "agent.use_vsock=true")
+ params = append(params, "random.trust_cpu=on")
+ params = append(params, "rw")
+ params = append(params, SerializeParams(s.config.KernelParams, "=")...)
+
+ return strings.Join(params, " ")
+}
+
+func (s *stratovirt) hypervisorConfig() HypervisorConfig {
+ return s.config
+}
+
+func (s *stratovirt) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error {
+ s.ctx = ctx
+
+ var span otelTrace.Span
+ span, _ = s.trace(ctx, "createSandbox")
+ defer span.End()
+
+ s.id = id
+ s.config = *hypervisorConfig
+
+ s.socketPath = filepath.Join(s.store.RunVMStoragePath(), id, "qmp.sock")
+ s.consolePath = filepath.Join(s.store.RunVMStoragePath(), id, "console.sock")
+ s.qmpMonitorCh = qmpChannel{
+ ctx: s.ctx,
+ path: s.socketPath,
+ }
+
+ return nil
+}
+
+func (s *stratovirt) waitSandBoxStarted(timeout int) error {
+ timeStart := time.Now()
+ for {
+ err := s.qmpSetup()
+ if err == nil {
+ break
+ }
+
+ if int(time.Since(timeStart).Seconds()) > timeout {
+ return fmt.Errorf("Failed to connect to StratoVirt instance (timeout %ds): %v", timeout, err)
+ }
+
+ time.Sleep(time.Duration(50) * time.Millisecond)
+ }
+
+ if err := s.qmpMonitorCh.qmp.ExecuteQMPCapabilities(s.qmpMonitorCh.ctx); err != nil {
+ s.Logger().WithError(err).Error(qmpCapErrMsg)
+ return err
+ }
+
+ return nil
+}
+
+func (s *stratovirt) startSandbox(ctx context.Context, timeout int) error {
+ span, _ := s.trace(ctx, "startSandbox")
+ defer span.End()
+
+ var params []string
+ var use_image bool
+ params = append(params, "-name", fmt.Sprintf("sandbox-%s", s.id))
+ params = append(params, "-qmp", fmt.Sprintf("unix:%s,server,nowait", s.socketPath))
+
+ if kernelPath, err := s.config.KernelAssetPath(); err == nil {
+ params = append(params, "-kernel", kernelPath)
+ }
+
+ initrdPath, err := s.config.InitrdAssetPath()
+ if err != nil {
+ return err
+ }
+
+ if initrdPath == "" {
+ imagePath, err := s.config.ImageAssetPath()
+ if err != nil {
+ return err
+ }
+ use_image = true
+ s.mmioBlkSlots[0] = true
+ params = append(params, "-device", "virtio-blk-device,drive=rootfs")
+ params = append(params, "-drive", fmt.Sprintf("id=rootfs,file=%s,direct=off", imagePath))
+ } else {
+ use_image = false
+ params = append(params, "-initrd", initrdPath)
+ }
+
+ params = append(params, "-append", s.getKernelCmdLine(use_image))
+ params = append(params, "-smp", fmt.Sprintf("%d", s.config.NumVCPUs))
+ params = append(params, "-m", fmt.Sprintf("%d", uint64(s.config.MemorySize)))
+ params = append(params, "-device", "virtio-serial-device")
+ params = append(params, "-device", "virtconsole,chardev=charconsole0,id=virtioconsole0")
+ params = append(params, "-object", fmt.Sprintf("rng-random,id=objrng0,filename=%s", randomDevice))
+ params = append(params, "-device", "virtio-rng-device,rng=objrng0")
+ params = append(params, "-chardev", fmt.Sprintf("socket,id=charconsole0,path=%s,server,nowait", s.consolePath))
+ params = append(params, "-pidfile", filepath.Join(s.store.RunVMStoragePath(), s.id, "pid"))
+
+ // add devices to cmdline
+ for _, d := range s.devices {
+ switch v := d.dev.(type) {
+ case Endpoint:
+ name := v.Name()
+ mac := v.HardwareAddr()
+ tapName := v.NetworkPair().TapInterface.TAPIface.Name
+ params = append(params, "-device", fmt.Sprintf("virtio-net-device,netdev=%s,id=%s,mac=%s", name, name, mac))
+ params = append(params, "-netdev", fmt.Sprintf("tap,id=%s,ifname=%s", name, tapName))
+ case config.BlockDrive:
+ id := v.ID
+ path := v.File
+ params = append(params, "-device", fmt.Sprintf("virtio-blk-device, drive=%s", id))
+ params = append(params, "-drive", fmt.Sprintf("id=%s,file=%s", id, path))
+ case types.VSock:
+ v.VhostFd.Close()
+ params = append(params, "-device", fmt.Sprintf("vhost-vsock-device,id=vsock-id,guest-cid=%d", v.ContextID))
+ default:
+ s.Logger().Error("Adding device type is unsupported")
+ }
+ }
+
+ // daemonize
+ params = append(params, "-daemonize")
+
+ // append logfile only on debug
+ if s.config.Debug {
+ dir := filepath.Join(s.store.RunVMStoragePath(), s.id)
+ params = append(params, "-D", fmt.Sprintf("%s/stratovirt.log", dir))
+ }
+
+ dir := filepath.Join(s.store.RunVMStoragePath(), s.id)
+ err = os.MkdirAll(dir, DirMode)
+ if err != nil {
+ return err
+ }
+ defer func() {
+ if err != nil {
+ if err := os.RemoveAll(dir); err != nil {
+ s.Logger().WithError(err).Error("Fail to clean up vm dir %s", dir)
+ }
+ }
+ }()
+
+ binPath, err := s.config.HypervisorAssetPath()
+ if err != nil {
+ s.Logger().WithField("Fail to get hypervisor bin path", err).Error()
+ return err
+ }
+
+ cmd := exec.CommandContext(s.ctx, binPath, params...)
+ s.Logger().Info("StratoVirt start with params: ", cmd)
+
+ if err := cmd.Start(); err != nil {
+ s.Logger().WithField("Error starting hypervisor, please check the params", err).Error()
+ return err
+ }
+ s.pid = cmd.Process.Pid
+
+ if err = s.waitSandBoxStarted(timeout); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (s *stratovirt) stopSandbox(ctx context.Context, force bool) error {
+ span, _ := s.trace(ctx, "stopSandbox")
+ defer span.End()
+
+ defer func() {
+ dir := filepath.Join(s.store.RunVMStoragePath(), s.id)
+ link, _ := filepath.EvalSymlinks(dir)
+
+ if err := os.RemoveAll(dir); err != nil {
+ s.Logger().WithError(err).Warnf("Failed to clean up vm dir %s", dir)
+ }
+
+ if link != dir && link != "" {
+ if err := os.RemoveAll(link); err != nil {
+ s.Logger().WithError(err).WithField("link", link).Warn("Failed to remove vm path link %s", link)
+ }
+ }
+ }()
+
+ if !force {
+ err := s.qmpSetup()
+ if err != nil {
+ return err
+ }
+
+ err = s.qmpMonitorCh.qmp.ExecuteQuit(s.qmpMonitorCh.ctx)
+ if err != nil {
+ s.Logger().WithError(err).Error("Fail to execute qmp: QUIT")
+ return err
+ }
+ } else {
+ if s.pid > 0 {
+ syscall.Kill(s.pid, syscall.SIGKILL)
+ }
+ }
+ return nil
+}
+
+func (s *stratovirt) pauseSandbox(ctx context.Context) error {
+ return nil
+}
+
+func (s *stratovirt) saveSandbox() error {
+ return nil
+}
+
+func (s *stratovirt) resumeSandbox(ctx context.Context) error {
+ return nil
+}
+
+func (s *stratovirt) addDevice(ctx context.Context, devInfo interface{}, devType deviceType) error {
+ span, _ := s.trace(ctx, "addDevice")
+ defer span.End()
+
+ dev := stratovirtDev{
+ dev: devInfo,
+ devType: devType,
+ }
+ s.devices = append(s.devices, dev)
+
+ return nil
+}
+
+func (s *stratovirt) getDevSlot(Name string, isPut bool) (slot int, err error) {
+ Name = filepath.Base(strings.ToLower(Name))
+
+ if strings.HasPrefix(Name, "eth") {
+ idxStr := strings.TrimPrefix(Name, "eth")
+ if idxStr == Name {
+ return 0, fmt.Errorf("Could not parse idx from Name %q", Name)
+ }
+
+ idx, err := strconv.Atoi(idxStr)
+ if err != nil {
+ return 0, fmt.Errorf("Could not convert to int from Str %q", idxStr)
+ }
+
+ if !isPut && s.mmioNetSlots[idx] {
+ return 0, fmt.Errorf("GetDevSlot failed, slot is being used %q", idxStr)
+ }
+ s.mmioNetSlots[idx] = !isPut
+
+ return idx, nil
+ } else if strings.HasPrefix(Name, "vd") {
+ charStr := strings.TrimPrefix(Name, "vd")
+ if charStr == Name {
+ return 0, fmt.Errorf("Could not parse idx from Name %q", Name)
+ }
+
+ char := []rune(charStr)
+ idx := int(char[0] - 'a')
+
+ if !isPut && s.mmioBlkSlots[idx] {
+ return 0, fmt.Errorf("GetDevSlot failed, slot is being used %q", charStr)
+ }
+ s.mmioBlkSlots[idx] = !isPut
+
+ return idx, nil
+ }
+
+ return 0, fmt.Errorf("GetDevSlot failed, Name is invalid %q", Name)
+}
+
+func (s *stratovirt) hotplugNet(endpoint Endpoint, op operation) (err error) {
+ err = s.qmpSetup()
+ if err != nil {
+ return err
+ }
+ var tap TapInterface
+
+ switch endpoint.Type() {
+ case VethEndpointType:
+ drive := endpoint.(*VethEndpoint)
+ tap = drive.NetPair.TapInterface
+ case TapEndpointType:
+ drive := endpoint.(*TapEndpoint)
+ tap = drive.TapInterface
+ default:
+ return fmt.Errorf("Endpoint is not supported")
+ }
+
+ switch op {
+ case addDevice:
+ var (
+ VMFdNames []string
+ VhostFdNames []string
+ )
+ for i, VMFd := range tap.VMFds {
+ fdName := fmt.Sprintf("fd%d", i)
+ if err := s.qmpMonitorCh.qmp.ExecuteGetFD(s.qmpMonitorCh.ctx, fdName, VMFd); err != nil {
+ return err
+ }
+ VMFdNames = append(VMFdNames, fdName)
+ }
+ for i, VhostFd := range tap.VhostFds {
+ fdName := fmt.Sprintf("vhostfd%d", i)
+ if err := s.qmpMonitorCh.qmp.ExecuteGetFD(s.qmpMonitorCh.ctx, fdName, VhostFd); err != nil {
+ return err
+ }
+ VhostFd.Close()
+ VhostFdNames = append(VhostFdNames, fdName)
+ }
+
+ slot, err := s.getDevSlot(endpoint.Name(), false)
+ if err != nil {
+ return fmt.Errorf("Could not get unused slot for %q", endpoint.Name())
+ }
+
+ if len(VMFdNames) != 0 || len(VhostFdNames) != 0 {
+ if err := s.qmpMonitorCh.qmp.ExecuteNetdevAddByFds(s.qmpMonitorCh.ctx, "tap", tap.ID, VMFdNames, VhostFdNames); err != nil {
+ s.getDevSlot(endpoint.Name(), true)
+ return err
+ }
+ } else {
+ if err := s.qmpMonitorCh.qmp.ExecuteNetdevAdd(s.qmpMonitorCh.ctx, "tap", tap.ID, tap.TAPIface.Name, "no", "no", 0); err != nil {
+ s.getDevSlot(endpoint.Name(), true)
+ return err
+ }
+ }
+ if err := s.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(s.qmpMonitorCh.ctx, tap.Name, tap.ID, endpoint.HardwareAddr(), fmt.Sprintf("%d", slot), "", "", 0, false); err != nil {
+ s.getDevSlot(endpoint.Name(), true)
+ return err
+ }
+ case removeDevice:
+ if err := s.qmpMonitorCh.qmp.ExecuteDeviceDel(s.qmpMonitorCh.ctx, tap.ID); err != nil {
+ return err
+ }
+ if err := s.qmpMonitorCh.qmp.ExecuteNetdevDel(s.qmpMonitorCh.ctx, tap.ID); err != nil {
+ return err
+ }
+ default:
+ return fmt.Errorf("Operation is not supported")
+ }
+
+ return nil
+}
+
+func (s *stratovirt) hotplugBlk(drive *config.BlockDrive, op operation) (err error) {
+ err = s.qmpSetup()
+ if err != nil {
+ return err
+ }
+
+ switch op {
+ case addDevice:
+ driver := "virtio-blk-pci"
+ slot, err := s.getDevSlot(drive.VirtPath, false)
+ if err != nil {
+ return fmt.Errorf("Could not get unused slot for %q", drive.VirtPath)
+ }
+
+ if err := s.qmpMonitorCh.qmp.ExecuteBlockdevAdd(s.qmpMonitorCh.ctx, drive.File, drive.ID, false); err != nil {
+ s.getDevSlot(drive.VirtPath, true)
+ return err
+ }
+
+ if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, fmt.Sprintf("%d", slot), "", "", 0, true, false); err != nil {
+ s.getDevSlot(drive.VirtPath, true)
+ return err
+ }
+ case removeDevice:
+ if err := s.qmpMonitorCh.qmp.ExecuteDeviceDel(s.qmpMonitorCh.ctx, drive.ID); err != nil {
+ return err
+ }
+ if err := s.qmpMonitorCh.qmp.ExecuteBlockdevDel(s.qmpMonitorCh.ctx, drive.ID); err != nil {
+ return err
+ }
+
+ s.getDevSlot(drive.VirtPath, true)
+ default:
+ return fmt.Errorf("Operation is not supported")
+ }
+
+ return nil
+}
+
+func (s *stratovirt) hotplugAddDevice(ctx context.Context, devInfo interface{}, devType deviceType) (interface{}, error) {
+ span, _ := s.trace(ctx, "hotplugAddDevice")
+ defer span.End()
+
+ switch devType {
+ case netDev:
+ return nil, s.hotplugNet(devInfo.(Endpoint), addDevice)
+ case blockDev:
+ return nil, s.hotplugBlk(devInfo.(*config.BlockDrive), addDevice)
+ default:
+ return nil, fmt.Errorf("Hotplug add device failed: unsupported device type '%v'", devType)
+ }
+}
+
+func (s *stratovirt) hotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType deviceType) (interface{}, error) {
+ span, _ := s.trace(ctx, "hotplugRemoveDevice")
+ defer span.End()
+
+ switch devType {
+ case netDev:
+ return nil, s.hotplugNet(devInfo.(Endpoint), removeDevice)
+ case blockDev:
+ return nil, s.hotplugBlk(devInfo.(*config.BlockDrive), removeDevice)
+ default:
+ return nil, fmt.Errorf("Hotplug remove device: unsupported device type '%v'", devType)
+ }
+}
+
+func (s *stratovirt) resizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error) {
+ return 0, memoryDevice{}, nil
+}
+
+func (s *stratovirt) resizeVCPUs(ctx context.Context, reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) {
+ return 0, 0, nil
+}
+
+func (s *stratovirt) getSandboxConsole(ctx context.Context, id string) (string, string, error) {
+ span, _ := s.trace(ctx, "getSandboxConsole")
+ defer span.End()
+
+ var consolePath string
+ if s.config.Debug {
+ consolePath = s.consolePath
+ } else {
+ consolePath = ""
+ }
+ consoleURL, err := utils.BuildSocketPath(consolePath)
+ if err != nil {
+ return consoleProtoUnix, "", err
+ }
+ return consoleProtoUnix, consoleURL, nil
+
+}
+
+func (s *stratovirt) disconnect(ctx context.Context) {
+ span, _ := s.trace(ctx, "disconnect")
+ defer span.End()
+
+ s.qmpTeardown()
+}
+
+func (s *stratovirt) capabilities(ctx context.Context) types.Capabilities {
+ span, _ := s.trace(ctx, "capabilities")
+ defer span.End()
+
+ var caps types.Capabilities
+ caps.SetBlockDeviceHotplugSupport()
+
+ return caps
+}
+
+func (s *stratovirt) qmpTeardown() {
+ if s.qmpMonitorCh.qmp != nil {
+ s.qmpMonitorCh.qmp.Shutdown()
+ <-s.qmpMonitorCh.disconn
+ s.qmpMonitorCh.qmp = nil
+ s.qmpMonitorCh.disconn = nil
+ }
+}
+
+func (s *stratovirt) qmpSetup() error {
+ s.qmpTeardown()
+
+ cfg := govmmQemu.QMPConfig{Logger: newQMPLogger()}
+
+ // Auto-closed by QMPStart().
+ disconnectCh := make(chan struct{})
+
+ qmp, _, err := govmmQemu.QMPStart(s.qmpMonitorCh.ctx, s.qmpMonitorCh.path, cfg, disconnectCh)
+ if err != nil {
+ s.Logger().WithError(err).Error("Failed to connect to StratoVirt instance")
+ return err
+ }
+
+ s.qmpMonitorCh.qmp = qmp
+ s.qmpMonitorCh.disconn = disconnectCh
+
+ return nil
+}
+
+func (s *stratovirt) getThreadIDs(ctx context.Context) (vcpuThreadIDs, error) {
+ span, _ := s.trace(ctx, "getThreadIDs")
+ defer span.End()
+
+ tid := vcpuThreadIDs{}
+ if err := s.qmpSetup(); err != nil {
+ return tid, err
+ }
+
+ cpuInfos, err := s.qmpMonitorCh.qmp.ExecQueryCpus(s.qmpMonitorCh.ctx)
+ if err != nil {
+ s.Logger().WithError(err).Error("Failed to query cpu infos")
+ return tid, err
+ }
+
+ tid.vcpus = make(map[int]int, len(cpuInfos))
+ for _, i := range cpuInfos {
+ if i.ThreadID > 0 {
+ tid.vcpus[i.CPU] = i.ThreadID
+ }
+ }
+ return tid, nil
+}
+
+func (s *stratovirt) cleanup(ctx context.Context) error {
+ span, _ := s.trace(ctx, "cleanup")
+ defer span.End()
+
+ s.qmpTeardown()
+
+ return nil
+}
+
+func (s *stratovirt) getPids() []int {
+ return []int{s.pid}
+}
+
+func (s *stratovirt) getVirtioFsPid() *int {
+ return nil
+}
+
+func (s *stratovirt) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error {
+ return errors.New("stratovirt is not supported by VM cache")
+}
+
+func (s *stratovirt) toGrpc(ctx context.Context) ([]byte, error) {
+ return nil, errors.New("stratovirt is not supported by VM cache")
+}
+
+func (s *stratovirt) check() error {
+ if err := syscall.Kill(s.pid, syscall.Signal(0)); err != nil {
+ return errors.Wrapf(err, "Failed to ping StratoVirt process")
+ }
+
+ return nil
+}
+
+func (s *stratovirt) generateSocket(id string) (interface{}, error) {
+ return generateVMSocket(id, s.store.RunVMStoragePath())
+}
+
+func (s *stratovirt) isRateLimiterBuiltin() bool {
+ return true
+}
+
+func (s *stratovirt) save() (p persistapi.HypervisorState) {
+ pids := s.getPids()
+ p.Pid = pids[0]
+ p.Type = string(StratovirtHypervisor)
+ return
+}
+
+func (s *stratovirt) load(p persistapi.HypervisorState) {
+ s.pid = p.Pid
+
+ return
+}
+
+func (s *stratovirt) setSandbox(sandbox *Sandbox) {
+ s.sandbox = sandbox
+ return
+}
--
2.21.1 (Apple Git-122.3)

View File

@ -0,0 +1,174 @@
From 1ffd95187a61582e858dd37c0ab434d3159a0f52 Mon Sep 17 00:00:00 2001
From: Wei Gao <gaowei66@huawei.com>
Date: Mon, 9 Aug 2021 14:26:35 +0800
Subject: [PATCH 2/6] agent: add support of new sandbox hypervisor kind
StratoVirt.
1. add new grpc interface `UpdateInterfaceHwAddrByName`.
2. comment out rescan_pci temporarily.
Signed-off-by: Wei Gao <gaowei66@huawei.com>
---
src/agent/protocols/protos/agent.proto | 5 +++
src/agent/src/netlink.rs | 31 ++++++++++++++++
src/agent/src/rpc.rs | 51 +++++++++++++++++++++++++-
3 files changed, 85 insertions(+), 2 deletions(-)
diff --git a/src/agent/protocols/protos/agent.proto b/src/agent/protocols/protos/agent.proto
index 6cbf5a28..e00f5c63 100644
--- a/src/agent/protocols/protos/agent.proto
+++ b/src/agent/protocols/protos/agent.proto
@@ -46,6 +46,7 @@ service AgentService {
// networking
rpc UpdateInterface(UpdateInterfaceRequest) returns (types.Interface);
+ rpc UpdateInterfaceHwAddrByName(UpdateInterfaceHwAddrByNameRequest) returns (types.Interface);
rpc UpdateRoutes(UpdateRoutesRequest) returns (Routes);
rpc ListInterfaces(ListInterfacesRequest) returns(Interfaces);
rpc ListRoutes(ListRoutesRequest) returns (Routes);
@@ -308,6 +309,10 @@ message UpdateInterfaceRequest {
types.Interface interface = 1;
}
+message UpdateInterfaceHwAddrByNameRequest {
+ types.Interface interface = 1;
+}
+
message UpdateRoutesRequest {
Routes routes = 1;
}
diff --git a/src/agent/src/netlink.rs b/src/agent/src/netlink.rs
index 3ab6dbaa..82632d1b 100644
--- a/src/agent/src/netlink.rs
+++ b/src/agent/src/netlink.rs
@@ -104,6 +104,29 @@ impl Handle {
Ok(())
}
+ pub async fn update_interface_hw_addr_by_name(&mut self, iface: &Interface) -> Result<()> {
+ let link = self.find_link(LinkFilter::Name(&iface.name)).await?;
+
+ // Delete all addresses associated with the link
+ let addresses = self
+ .list_addresses(AddressFilter::LinkIndex(link.index()))
+ .await?;
+ self.delete_addresses(addresses).await?;
+
+ if iface.IPAddresses.len() == 0 {
+ self.enable_link(link.index(), false).await?;
+ }
+
+ // Update hardware mac address
+ let mac_addr = parse_mac_address(iface.get_hwAddr())
+ .with_context(|| format!("Failed to parse MAC address: {}", iface.get_hwAddr()))?;
+ self.link_set_hw_addr(link.index(), mac_addr)
+ .await
+ .with_context(|| format!("Could not set {:?} to {}", mac_addr, link.name()))?;
+
+ Ok(())
+ }
+
pub async fn handle_localhost(&self) -> Result<()> {
let link = self.find_link(LinkFilter::Name("lo")).await?;
self.enable_link(link.index(), true).await?;
@@ -216,6 +239,14 @@ impl Handle {
Ok(())
}
+ async fn link_set_hw_addr(&self, link_index: u32, hw_addr: [u8; 6]) -> Result<()> {
+ let link_req = self.handle.link().set(link_index);
+ let set_req = link_req.address(hw_addr.to_vec());
+ set_req.execute().await?;
+
+ Ok(())
+ }
+
async fn query_routes(
&self,
ip_version: Option<IpVersion>,
diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs
index 92025af3..2cc1c983 100644
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
@@ -40,7 +40,7 @@ use nix::sys::stat;
use nix::unistd::{self, Pid};
use rustjail::process::ProcessOperations;
-use crate::device::{add_devices, rescan_pci_bus, update_device_cgroup};
+use crate::device::{add_devices, update_device_cgroup};
use crate::linux_abi::*;
use crate::metrics::get_metrics;
use crate::mount::{add_storages, remove_mounts, BareMount, STORAGE_HANDLER_LIST};
@@ -123,7 +123,9 @@ impl AgentService {
// re-scan PCI bus
// looking for hidden devices
- rescan_pci_bus().context("Could not rescan PCI bus")?;
+ // FIXME: Comment out this code temporarily, because once the PCIBus is scanned,
+ // the device hot-plug event is lost
+ // rescan_pci_bus().context("Could not rescan PCI bus")?;
// Some devices need some extra processing (the ones invoked with
// --device for instance), and that's what this call is doing. It
@@ -797,6 +799,34 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
Ok(interface)
}
+ async fn update_interface_hw_addr_by_name(
+ &self,
+ _ctx: &TtrpcContext,
+ req: protocols::agent::UpdateInterfaceHwAddrByNameRequest,
+ ) -> ttrpc::Result<Interface> {
+ let interface = req.interface.into_option().ok_or_else(|| {
+ ttrpc_error(
+ ttrpc::Code::INVALID_ARGUMENT,
+ "empty update interface request".to_string(),
+ )
+ })?;
+
+ self.sandbox
+ .lock()
+ .await
+ .rtnl
+ .update_interface_hw_addr_by_name(&interface)
+ .await
+ .map_err(|e| {
+ ttrpc_error(
+ ttrpc::Code::INTERNAL,
+ format!("update interface hw addr: {:?}", e),
+ )
+ })?;
+
+ Ok(interface)
+ }
+
async fn update_routes(
&self,
_ctx: &TtrpcContext,
@@ -1670,6 +1700,23 @@ mod tests {
assert!(result.is_err(), "expected update interface to fail");
}
+ #[tokio::test]
+ async fn test_update_interface_hw_addr_by_name() {
+ let logger = slog::Logger::root(slog::Discard, o!());
+ let sandbox = Sandbox::new(&logger).unwrap();
+
+ let agent_service = Box::new(AgentService {
+ sandbox: Arc::new(Mutex::new(sandbox)),
+ });
+
+ let req = protocols::agent::UpdateInterfaceHwAddrByNameRequest::default();
+ let ctx = mk_ttrpc_context();
+
+ let result = agent_service.update_interface_hw_addr_by_name(&ctx, req).await;
+
+ assert!(result.is_err(), "expected update interface to fail");
+ }
+
#[tokio::test]
async fn test_update_routes() {
let logger = slog::Logger::root(slog::Discard, o!());
--
2.21.1 (Apple Git-122.3)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,542 @@
From 950c0db14a9a9baccefd83e87893d7f40c2bd13d Mon Sep 17 00:00:00 2001
From: Wei Gao <gaowei66@huawei.com>
Date: Mon, 9 Aug 2021 14:47:19 +0800
Subject: [PATCH 4/6] configuration: add configuration generator for hypervisor
type stratovirt.
Signed-off-by: Wei Gao <gaowei66@huawei.com>
---
src/runtime/.gitignore | 1 +
src/runtime/Makefile | 40 +-
src/runtime/arch/amd64-options.mk | 3 +
src/runtime/arch/arm64-options.mk | 3 +
.../config/configuration-stratovirt.toml.in | 356 ++++++++++++++++++
5 files changed, 402 insertions(+), 1 deletion(-)
create mode 100644 src/runtime/cli/config/configuration-stratovirt.toml.in
diff --git a/src/runtime/.gitignore b/src/runtime/.gitignore
index 52b9e4e5..0a630a07 100644
--- a/src/runtime/.gitignore
+++ b/src/runtime/.gitignore
@@ -10,6 +10,7 @@ coverage.html
/cli/config/configuration-fc.toml
/cli/config/configuration-qemu.toml
/cli/config/configuration-clh.toml
+/cli/config/configuration-stratovirt.toml
/cli/config-generated.go
/cli/containerd-shim-kata-v2/config-generated.go
/cli/coverage.html
diff --git a/src/runtime/Makefile b/src/runtime/Makefile
index 4a69c05c..ea2cd296 100644
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -74,6 +74,7 @@ QEMUBINDIR := $(PREFIXDEPS)/bin
CLHBINDIR := $(PREFIXDEPS)/bin
FCBINDIR := $(PREFIXDEPS)/bin
ACRNBINDIR := $(PREFIXDEPS)/bin
+STRATOVIRTBINDIR := $(PREFIXDEPS)/bin
SYSCONFDIR := /etc
LOCALSTATEDIR := /var
@@ -93,6 +94,7 @@ GENERATED_VARS = \
CONFIG_QEMU_IN \
CONFIG_CLH_IN \
CONFIG_FC_IN \
+ CONFIG_STRATOVIRT_IN \
$(USER_VARS)
SCRIPTS += $(COLLECT_SCRIPT)
SCRIPTS_DIR := $(BINDIR)
@@ -116,12 +118,13 @@ HYPERVISOR_ACRN = acrn
HYPERVISOR_FC = firecracker
HYPERVISOR_QEMU = qemu
HYPERVISOR_CLH = cloud-hypervisor
+HYPERVISOR_STRATOVIRT = stratovirt
# Determines which hypervisor is specified in $(CONFIG_FILE).
DEFAULT_HYPERVISOR ?= $(HYPERVISOR_QEMU)
# List of hypervisors this build system can generate configuration for.
-HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH)
+HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) $(HYPERVISOR_STRATOVIRT)
QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD)
QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"]
@@ -141,6 +144,9 @@ ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"]
ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD)
ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"]
+STRATOVIRTPATH = $(STRATOVIRTBINDIR)/$(STRATOVIRTCMD)
+STRATOVIRTVALIDHYPERVISORPATHS := [\"$(STRATOVIRTPATH)\"]
+
NETMONCMD := $(BIN_PREFIX)-netmon
NETMONPATH := $(PKGLIBEXECDIR)/$(NETMONCMD)
@@ -267,6 +273,29 @@ ifneq (,$(CLHCMD))
KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH)
endif
+ifneq (,$(STRATOVIRTCMD))
+ KNOWN_HYPERVISORS += $(HYPERVISOR_STRATOVIRT)
+
+ CONFIG_FILE_STRATOVIRT = configuration-stratovirt.toml
+ CONFIG_STRATOVIRT = $(CLI_DIR)/config/$(CONFIG_FILE_STRATOVIRT)
+ CONFIG_STRATOVIRT_IN = $(CONFIG_STRATOVIRT).in
+
+ CONFIG_PATH_STRATOVIRT = $(abspath $(CONFDIR)/$(CONFIG_FILE_STRATOVIRT))
+ CONFIG_PATHS += $(CONFIG_PATH_STRATOVIRT)
+
+ SYSCONFIG_STRATOVIRT = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_STRATOVIRT))
+ SYSCONFIG_PATHS += $(SYSCONFIG_STRATOVIRT)
+
+ CONFIGS += $(CONFIG_STRATOVIRT)
+
+ # stratovirt-specific options (all should be suffixed by "_STRATOVIRT")
+ DEFBLOCKSTORAGEDRIVER_STRATOVIRT := virtio-mmio
+ DEFNETWORKMODEL_STRATOVIRT := none
+ KENRELTYPE_STRATOVIRT = uncompressed
+ KERNEL_NAME_STRATOVIRT = $(call MAKE_KERNEL_NAME,$(KENRELTYPE_STRATOVIRT))
+ KERNELPATH_STRATOVIRT = $(KERNELDIR)/$(KERNEL_NAME_STRATOVIRT)
+endif
+
ifneq (,$(FCCMD))
KNOWN_HYPERVISORS += $(HYPERVISOR_FC)
@@ -363,6 +392,7 @@ USER_VARS += BINDIR
USER_VARS += CONFIG_ACRN_IN
USER_VARS += CONFIG_CLH_IN
USER_VARS += CONFIG_FC_IN
+USER_VARS += CONFIG_STRATOVIRT_IN
USER_VARS += CONFIG_PATH
USER_VARS += CONFIG_QEMU_IN
USER_VARS += DESTDIR
@@ -382,6 +412,8 @@ USER_VARS += FCPATH
USER_VARS += FCVALIDHYPERVISORPATHS
USER_VARS += FCJAILERPATH
USER_VARS += FCVALIDJAILERPATHS
+USER_VARS += STRATOVIRTPATH
+USER_VARS += STRATOVIRTVALIDHYPERVISORPATHS
USER_VARS += SYSCONFIG
USER_VARS += IMAGENAME
USER_VARS += IMAGEPATH
@@ -395,6 +427,7 @@ USER_VARS += KERNELPATH_ACRN
USER_VARS += KERNELPATH
USER_VARS += KERNELPATH_CLH
USER_VARS += KERNELPATH_FC
+USER_VARS += KERNELPATH_STRATOVIRT
USER_VARS += KERNELVIRTIOFSPATH
USER_VARS += FIRMWAREPATH
USER_VARS += MACHINEACCELERATORS
@@ -434,12 +467,14 @@ USER_VARS += DEFNETWORKMODEL_ACRN
USER_VARS += DEFNETWORKMODEL_CLH
USER_VARS += DEFNETWORKMODEL_FC
USER_VARS += DEFNETWORKMODEL_QEMU
+USER_VARS += DEFNETWORKMODEL_STRATOVIRT
USER_VARS += DEFDISABLEGUESTSECCOMP
USER_VARS += DEFAULTEXPFEATURES
USER_VARS += DEFDISABLEBLOCK
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
USER_VARS += DEFBLOCKSTORAGEDRIVER_FC
USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
+USER_VARS += DEFBLOCKSTORAGEDRIVER_STRATOVIRT
USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
USER_VARS += DEFVIRTIOFSDAEMON
USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS
@@ -773,6 +808,9 @@ ifneq (,$(findstring $(HYPERVISOR_FC),$(KNOWN_HYPERVISORS)))
endif
ifneq (,$(findstring $(HYPERVISOR_ACRN),$(KNOWN_HYPERVISORS)))
@printf "\t$(HYPERVISOR_ACRN) hypervisor path (ACRNPATH) : %s\n" $(abspath $(ACRNPATH))
+endif
+ifneq (,$(findstring $(HYPERVISOR_STRATOVIRT),$(KNOWN_HYPERVISORS)))
+ @printf "\t$(HYPERVISOR_STRATOVIRT) hypervisor path (STRATOVIRTPATH) : %s\n" $(abspath $(STRATOVIRTPATH))
endif
@printf "\tassets path (PKGDATADIR) : %s\n" $(abspath $(PKGDATADIR))
@printf "\tshim path (PKGLIBEXECDIR) : %s\n" $(abspath $(PKGLIBEXECDIR))
diff --git a/src/runtime/arch/amd64-options.mk b/src/runtime/arch/amd64-options.mk
index 83af8cc0..ff2af9e6 100644
--- a/src/runtime/arch/amd64-options.mk
+++ b/src/runtime/arch/amd64-options.mk
@@ -23,3 +23,6 @@ ACRNCTLCMD := acrnctl
# cloud-hypervisor binary name
CLHCMD := cloud-hypervisor
+
+# stratovirt binary name
+STRATOVIRTCMD := stratovirt
\ No newline at end of file
diff --git a/src/runtime/arch/arm64-options.mk b/src/runtime/arch/arm64-options.mk
index ad5ef5d4..2ad3f657 100644
--- a/src/runtime/arch/arm64-options.mk
+++ b/src/runtime/arch/arm64-options.mk
@@ -19,3 +19,6 @@ FCJAILERCMD := jailer
# cloud-hypervisor binary name
CLHCMD := cloud-hypervisor
+
+# stratovirt binary name
+STRATOVIRTCMD := stratovirt
\ No newline at end of file
diff --git a/src/runtime/cli/config/configuration-stratovirt.toml.in b/src/runtime/cli/config/configuration-stratovirt.toml.in
new file mode 100644
index 00000000..5c83c3c9
--- /dev/null
+++ b/src/runtime/cli/config/configuration-stratovirt.toml.in
@@ -0,0 +1,356 @@
+# Copyright (c) 2017-2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# XXX: WARNING: this file is auto-generated.
+# XXX:
+# XXX: Source file: "@CONFIG_STRATOVIRT_IN@"
+# XXX: Project:
+# XXX: Name: @PROJECT_NAME@
+# XXX: Type: @PROJECT_TYPE@
+
+[hypervisor.stratovirt]
+path = "@STRATOVIRTPATH@"
+kernel = "@KERNELPATH_STRATOVIRT@"
+image = "@IMAGEPATH@"
+
+# List of valid annotation names for the hypervisor
+# Each member of the list is a regular expression, which is the base name
+# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
+enable_annotations = @DEFENABLEANNOTATIONS@
+
+# List of valid annotations values for the hypervisor
+# Each member of the list is a path pattern as described by glob(3).
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends: @STRATOVIRTVALIDHYPERVISORPATHS@
+valid_hypervisor_paths = @STRATOVIRTVALIDHYPERVISORPATHS@
+
+# Optional space-separated list of options to pass to the guest kernel.
+# For example, use `kernel_params = "vsyscall=emulate"` if you are having
+# trouble running pre-2.15 glibc.
+#
+# WARNING: - any parameter specified here will take priority over the default
+# parameter value of the same name used to start the virtual machine.
+# Do not set values here unless you understand the impact of doing so as you
+# may stop the virtual machine from booting.
+# To see the list of default parameters, enable hypervisor debug, create a
+# container and look for 'default-kernel-parameters' log entries.
+kernel_params = "@KERNELPARAMS@"
+
+# Default number of vCPUs per SB/VM:
+# unspecified or 0 --> will be set to @DEFVCPUS@
+# < 0 --> will be set to the actual number of physical cores
+# > 0 <= number of physical cores --> will be set to the specified number
+# > number of physical cores --> will be set to the actual number of physical cores
+default_vcpus = 1
+
+# Default maximum number of vCPUs per SB/VM:
+# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
+# of vCPUs supported by KVM if that number is exceeded
+# > 0 <= number of physical cores --> will be set to the specified number
+# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
+# of vCPUs supported by KVM if that number is exceeded
+# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
+# the actual number of physical cores is greater than it.
+# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
+# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
+# can be added to a SB/VM, but the memory footprint will be big. Another example, with
+# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
+# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
+# unless you know what are you doing.
+# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
+default_maxvcpus = @DEFMAXVCPUS@
+
+# Bridges can be used to hot plug devices.
+# Limitations:
+# * Currently only pci bridges are supported
+# * Until 30 devices per bridge can be hot plugged.
+# * Until 5 PCI bridges can be cold plugged per VM.
+# This limitation could be a bug in the kernel
+# Default number of bridges per SB/VM:
+# unspecified or 0 --> will be set to @DEFBRIDGES@
+# > 1 <= 5 --> will be set to the specified number
+# > 5 --> will be set to 5
+default_bridges = @DEFBRIDGES@
+
+# Default memory size in MiB for SB/VM.
+# If unspecified then it will be set @DEFMEMSZ@ MiB.
+default_memory = @DEFMEMSZ@
+#
+# Default memory slots per SB/VM.
+# If unspecified then it will be set @DEFMEMSLOTS@.
+# This is will determine the times that memory will be hotadded to sandbox/VM.
+# memory_slots = @DEFMEMSLOTS@
+
+# The size in MiB will be plused to max memory of hypervisor.
+# It is the memory address space for the NVDIMM devie.
+# If set block storage driver (block_device_driver) to "nvdimm",
+# should set memory_offset to the size of block device.
+# Default 0
+# memory_offset = 0
+
+# Disable block device from being used for a container's rootfs.
+# In case of a storage driver like devicemapper where a container's
+# root file system is backed by a block device, the block device is passed
+# directly to the hypervisor for performance reasons.
+# This flag prevents the block device from being passed to the hypervisor,
+# 9pfs is used instead to pass the rootfs.
+disable_block_device_use = @DEFDISABLEBLOCK@
+
+# Block storage driver to be used for the hypervisor in case the container
+# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
+# or nvdimm.
+block_device_driver = "@DEFBLOCKSTORAGEDRIVER_STRATOVIRT@"
+
+# Specifies cache-related options will be set to block devices or not.
+# Default false
+#block_device_cache_set = true
+
+# Specifies cache-related options for block devices.
+# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
+# Default false
+# block_device_cache_direct = true
+
+# Specifies cache-related options for block devices.
+# Denotes whether flush requests for the device are ignored.
+# Default false
+# block_device_cache_noflush = true
+
+# Enable pre allocation of VM RAM, default false
+# Enabling this will result in lower container density
+# as all of the memory will be allocated and locked
+# This is useful when you want to reserve all the memory
+# upfront or in the cases where you want memory latencies
+# to be very predictable
+# Default false
+# enable_mem_prealloc = true
+
+# Enable huge pages for VM RAM, default false
+# Enabling this will result in the VM memory
+# being allocated using huge pages.
+# This is useful when you want to use vhost-user network
+# stacks within the container. This will automatically
+# result in memory pre allocation
+# enable_hugepages = true
+
+# Enable vIOMMU, default false
+# Enabling this will result in the VM having a vIOMMU device
+# This will also add the following options to the kernel's
+# command line: intel_iommu=on,iommu=pt
+# enable_iommu = true
+
+# Enable swap of vm memory. Default false.
+# The behaviour is undefined if mem_prealloc is also set to true
+# enable_swap = true
+
+# This option changes the default hypervisor and kernel parameters
+# to enable debug output where available.
+#
+# Default false
+# enable_debug = true
+
+# Disable the customizations done in the runtime when it detects
+# that it is running on top a VMM. This will result in the runtime
+# behaving as it would when running on bare metal.
+#
+# disable_nesting_checks = true
+
+# This is the msize used for 9p shares. It is the number of bytes
+# used for 9p packet payload.
+# msize_9p =
+
+# VFIO devices are hotplugged on a bridge by default.
+# Enable hotplugging on root bus. This may be required for devices with
+# a large PCI bar, as this is a current limitation with hotplugging on
+# a bridge.
+# Default false
+# hotplug_vfio_on_root_bus = true
+
+#
+# Default entropy source.
+# The path to a host source of entropy (including a real hardware RNG)
+# /dev/urandom and /dev/random are two main options.
+# Be aware that /dev/random is a blocking source of entropy. If the host
+# runs out of entropy, the VMs boot time will increase leading to get startup
+# timeouts.
+# The source of entropy /dev/urandom is non-blocking and provides a
+# generally acceptable source of entropy. It should work well for pretty much
+# all practical purposes.
+# entropy_source= ""
+
+# List of valid annotations values for entropy_source
+# The default if not set is empty (all annotations rejected.)
+# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
+valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
+
+# Path to OCI hook binaries in the *guest rootfs*.
+# This does not affect host-side hooks which must instead be added to
+# the OCI spec passed to the runtime.
+#
+# You can create a rootfs with hooks by customizing the osbuilder scripts:
+# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
+#
+# Hooks must be stored in a subdirectory of guest_hook_path according to their
+# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
+# The agent will scan these directories for executable files and add them, in
+# lexicographical order, to the lifecycle of the guest container.
+# Hooks are executed in the runtime namespace of the guest. See the official documentation:
+# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
+# Warnings will be logged if any error is encountered will scanning for hooks,
+# but it will not abort container execution.
+# guest_hook_path = "/usr/share/oci/hooks"
+
+[factory]
+# VM templating support. Once enabled, new VMs are created from template
+# using vm cloning. They will share the same initial kernel, initramfs and
+# agent memory by mapping it readonly. It helps speeding up new container
+# creation and saves a lot of memory if there are many kata containers running
+# on the same host.
+#
+# When disabled, new VMs are created from scratch.
+#
+# Note: Requires "initrd=" to be set ("image=" is not supported).
+#
+# Default false
+#enable_template = true
+
+[agent.@PROJECT_TYPE@]
+# If enabled, make the agent display debug-level messages.
+# (default: disabled)
+#enable_debug = true
+
+# Enable agent tracing.
+#
+# If enabled, the default trace mode is "dynamic" and the
+# default trace type is "isolated". The trace mode and type are set
+# explicity with the `trace_type=` and `trace_mode=` options.
+#
+# Notes:
+#
+# - Tracing is ONLY enabled when `enable_tracing` is set: explicitly
+# setting `trace_mode=` and/or `trace_type=` without setting `enable_tracing`
+# will NOT activate agent tracing.
+#
+# - See https://github.com/kata-containers/agent/blob/master/TRACING.md for
+# full details.
+#
+# (default: disabled)
+#enable_tracing = true
+#
+#trace_mode = "dynamic"
+#trace_type = "isolated"
+
+# Comma separated list of kernel modules and their parameters.
+# These modules will be loaded in the guest kernel using modprobe(8).
+# The following example can be used to load two kernel modules with parameters
+# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
+# The first word is considered as the module name and the rest as its parameters.
+# Container will not be started when:
+# * A kernel module is specified and the modprobe command is not installed in the guest
+# or it fails loading the module.
+# * The module is not available in the guest or it doesn't met the guest kernel
+# requirements, like architecture and version.
+#
+kernel_modules=[]
+
+# Enable debug console.
+
+# If enabled, user can connect guest OS running inside hypervisor
+# through "kata-runtime exec <sandbox-id>" command
+
+#debug_console_enabled = true
+
+# Agent connection dialing timeout value in seconds
+# (default: 30)
+#dial_timeout = 30
+
+[netmon]
+# If enabled, the network monitoring process gets started when the
+# sandbox is created. This allows for the detection of some additional
+# network being added to the existing network namespace, after the
+# sandbox has been created.
+# (default: disabled)
+#enable_netmon = true
+
+# Specify the path to the netmon binary.
+path = "@NETMONPATH@"
+
+# If enabled, netmon messages will be sent to the system log
+# (default: disabled)
+#enable_debug = true
+
+[runtime]
+# If enabled, the runtime will log additional debug messages to the
+# system log
+# (default: disabled)
+#enable_debug = true
+#
+# Internetworking model
+# Determines how the VM should be connected to the
+# the container network interface
+# Options:
+#
+# - macvtap
+# Used when the Container network interface can be bridged using
+# macvtap.
+#
+# - none
+# Used when customize network. Only creates a tap device. No veth pair.
+#
+# - tcfilter
+# Uses tc filter rules to redirect traffic from the network interface
+# provided by plugin to a tap interface connected to the VM.
+#
+internetworking_model="@DEFNETWORKMODEL_STRATOVIRT@"
+
+# disable guest seccomp
+# Determines whether container seccomp profiles are passed to the virtual
+# machine and applied by the kata agent. If set to true, seccomp is not applied
+# within the guest
+# (default: true)
+disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
+
+# If enabled, the runtime will create opentracing.io traces and spans.
+# (See https://www.jaegertracing.io/docs/getting-started).
+# (default: disabled)
+#enable_tracing = true
+
+# Set the full url to the Jaeger HTTP Thrift collector.
+# The default if not set will be "http://localhost:14268/api/traces"
+#jaeger_endpoint = ""
+
+# Sets the username to be used if basic auth is required for Jaeger.
+#jaeger_user = ""
+
+# Sets the password to be used if basic auth is required for Jaeger.
+#jaeger_password = ""
+
+# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
+# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
+# `disable_new_netns` conflicts with `enable_netmon`
+# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
+# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
+# (like OVS) directly.
+# If you are using docker, `disable_new_netns` only works with `docker run --net=none`
+# (default: false)
+#disable_new_netns = true
+
+# if enable, the runtime will add all the kata processes inside one dedicated cgroup.
+# The container cgroups in the host are not created, just one single cgroup per sandbox.
+# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
+# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
+# The sandbox cgroup is constrained if there is no container type annotation.
+# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
+sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
+
+# Enabled experimental feature list, format: ["a", "b"].
+# Experimental features are features not stable enough for production,
+# they may break compatibility, and are prepared for a big version bump.
+# Supported experimental features:
+# (default: [])
+experimental=@DEFAULTEXPFEATURES@
+
+# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
+# (default: false)
+# enable_pprof = true
--
2.21.1 (Apple Git-122.3)

View File

@ -0,0 +1,655 @@
From 45c8e108497eb93d69afd38e6281b837e65cf3ec Mon Sep 17 00:00:00 2001
From: Wei Gao <gaowei66@huawei.com>
Date: Mon, 9 Aug 2021 14:55:41 +0800
Subject: [PATCH 5/6] runtime: add the secure component "ozone" support for
hypervisor type stratovirt.
Signed-off-by: Wei Gao <gaowei66@huawei.com>
---
src/runtime/Makefile | 4 +
src/runtime/arch/amd64-options.mk | 4 +-
src/runtime/arch/arm64-options.mk | 4 +-
.../config/configuration-stratovirt.toml.in | 10 +
.../pkg/katautils/config-settings.go.in | 1 +
src/runtime/pkg/katautils/config.go | 18 +
src/runtime/virtcontainers/hypervisor.go | 3 +
src/runtime/virtcontainers/persist.go | 1 +
.../virtcontainers/persist/api/config.go | 3 +
src/runtime/virtcontainers/stratovirt.go | 309 ++++++++++++++----
10 files changed, 292 insertions(+), 65 deletions(-)
diff --git a/src/runtime/Makefile b/src/runtime/Makefile
index ea2cd296..745bcc10 100644
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -146,6 +146,8 @@ ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"]
STRATOVIRTPATH = $(STRATOVIRTBINDIR)/$(STRATOVIRTCMD)
STRATOVIRTVALIDHYPERVISORPATHS := [\"$(STRATOVIRTPATH)\"]
+STRATOVIRTOZONEPATH = $(STRATOVIRTBINDIR)/$(STRATOVIRTOZONECMD)
+STRATOVIRTVALIDOZONEPATHS = [\"$(STRATOVIRTOZONEPATH)\"]
NETMONCMD := $(BIN_PREFIX)-netmon
NETMONPATH := $(PKGLIBEXECDIR)/$(NETMONCMD)
@@ -414,6 +416,8 @@ USER_VARS += FCJAILERPATH
USER_VARS += FCVALIDJAILERPATHS
USER_VARS += STRATOVIRTPATH
USER_VARS += STRATOVIRTVALIDHYPERVISORPATHS
+USER_VARS += STRATOVIRTOZONEPATH
+USER_VARS += STRATOVIRTVALIDOZONEPATHS
USER_VARS += SYSCONFIG
USER_VARS += IMAGENAME
USER_VARS += IMAGEPATH
diff --git a/src/runtime/arch/amd64-options.mk b/src/runtime/arch/amd64-options.mk
index ff2af9e6..4c6c329a 100644
--- a/src/runtime/arch/amd64-options.mk
+++ b/src/runtime/arch/amd64-options.mk
@@ -25,4 +25,6 @@ ACRNCTLCMD := acrnctl
CLHCMD := cloud-hypervisor
# stratovirt binary name
-STRATOVIRTCMD := stratovirt
\ No newline at end of file
+STRATOVIRTCMD := stratovirt
+# stratovirt's ozone binary name
+STRATOVIRTOZONECMD := ozone
\ No newline at end of file
diff --git a/src/runtime/arch/arm64-options.mk b/src/runtime/arch/arm64-options.mk
index 2ad3f657..5dfa2c80 100644
--- a/src/runtime/arch/arm64-options.mk
+++ b/src/runtime/arch/arm64-options.mk
@@ -21,4 +21,6 @@ FCJAILERCMD := jailer
CLHCMD := cloud-hypervisor
# stratovirt binary name
-STRATOVIRTCMD := stratovirt
\ No newline at end of file
+STRATOVIRTCMD := stratovirt
+# stratovirt's ozone binary name
+STRATOVIRTOZONECMD := ozone
\ No newline at end of file
diff --git a/src/runtime/cli/config/configuration-stratovirt.toml.in b/src/runtime/cli/config/configuration-stratovirt.toml.in
index 5c83c3c9..b557b71f 100644
--- a/src/runtime/cli/config/configuration-stratovirt.toml.in
+++ b/src/runtime/cli/config/configuration-stratovirt.toml.in
@@ -26,6 +26,16 @@ enable_annotations = @DEFENABLEANNOTATIONS@
# Your distribution recommends: @STRATOVIRTVALIDHYPERVISORPATHS@
valid_hypervisor_paths = @STRATOVIRTVALIDHYPERVISORPATHS@
+# Path for the ozone specific to stratovirt
+# If the ozone path is set, stratovirt will be launched in
+# ozone secure environment. It is disabled by default.
+# ozone_path = "@STRATOVIRTOZONEPATH@"
+
+# List of valid ozone path values for the hypervisor
+# Each member of the list can be a regular expression
+# The default if not set is empty (all annotations rejected.)
+# valid_jailer_paths = @STRATOVIRTVALIDOZONEPATHS@
+
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
# trouble running pre-2.15 glibc.
diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in
index 7cd9138b..c168c608 100644
--- a/src/runtime/pkg/katautils/config-settings.go.in
+++ b/src/runtime/pkg/katautils/config-settings.go.in
@@ -17,6 +17,7 @@ var defaultInitrdPath = "/usr/share/kata-containers/kata-containers-initrd.img"
var defaultFirmwarePath = ""
var defaultMachineAccelerators = ""
var defaultCPUFeatures = ""
+var defaultOzonePath = "/usr/bin/ozone"
var systemdUnitName = "kata-containers.target"
const defaultKernelParams = ""
diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go
index f94ac4fd..828c2a43 100644
--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -92,6 +92,7 @@ type hypervisor struct {
FileBackedMemRootDir string `toml:"file_mem_backend"`
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
+ OzonePath string `toml:"ozone_path"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
@@ -452,6 +453,16 @@ func (h hypervisor) getInitrdAndImage() (initrd string, image string, err error)
return
}
+func (h hypervisor) ozonePath() (string, error) {
+ p := h.OzonePath
+
+ if h.OzonePath == "" {
+ return "", nil
+ }
+
+ return ResolvePath(p)
+}
+
func (h hypervisor) getRxRateLimiterCfg() uint64 {
return h.RxRateLimiterMaxRate
}
@@ -877,6 +888,11 @@ func newStratovirtHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
return vc.HypervisorConfig{}, err
}
+ ozone, err := h.ozonePath()
+ if err != nil {
+ return vc.HypervisorConfig{}, err
+ }
+
kernel, err := h.kernel()
if err != nil {
return vc.HypervisorConfig{}, err
@@ -925,6 +941,7 @@ func newStratovirtHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
+ OzonePath: ozone,
KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
HypervisorMachineType: machineType,
NumVCPUs: h.defaultVCPUs(),
@@ -1155,6 +1172,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate,
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
SGXEPCSize: defaultSGXEPCSize,
+ OzonePath: defaultOzonePath,
}
}
diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go
index 615baa80..04e14b4e 100644
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@@ -302,6 +302,9 @@ type HypervisorConfig struct {
// JailerPathList is the list of jailer paths names allowed in annotations
JailerPathList []string
+ // OzonePath is the ozone executable host path.
+ OzonePath string
+
// BlockDeviceDriver specifies the driver to be used for block device
// either VirtioSCSI or VirtioBlock with the default driver being defaultBlockDriver
BlockDeviceDriver string
diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go
index 203495e8..ae499c97 100644
--- a/src/runtime/virtcontainers/persist.go
+++ b/src/runtime/virtcontainers/persist.go
@@ -219,6 +219,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
HypervisorCtlPathList: sconfig.HypervisorConfig.HypervisorCtlPathList,
JailerPath: sconfig.HypervisorConfig.JailerPath,
JailerPathList: sconfig.HypervisorConfig.JailerPathList,
+ OzonePath: sconfig.HypervisorConfig.OzonePath,
BlockDeviceDriver: sconfig.HypervisorConfig.BlockDeviceDriver,
HypervisorMachineType: sconfig.HypervisorConfig.HypervisorMachineType,
MemoryPath: sconfig.HypervisorConfig.MemoryPath,
diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go
index 3bd5567d..88903723 100644
--- a/src/runtime/virtcontainers/persist/api/config.go
+++ b/src/runtime/virtcontainers/persist/api/config.go
@@ -76,6 +76,9 @@ type HypervisorConfig struct {
// JailerPathList is the list of jailer paths names allowed in annotations
JailerPathList []string
+ // OzonePath is the ozone executable host path.
+ OzonePath string
+
// BlockDeviceDriver specifies the driver to be used for block device
// either VirtioSCSI or VirtioBlock with the default driver being defaultBlockDriver
BlockDeviceDriver string
diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go
index 0f473e31..47daa817 100644
--- a/src/runtime/virtcontainers/stratovirt.go
+++ b/src/runtime/virtcontainers/stratovirt.go
@@ -3,6 +3,7 @@ package virtcontainers
import (
"context"
"fmt"
+ "io/ioutil"
"os"
"os/exec"
"path/filepath"
@@ -24,10 +25,15 @@ import (
otelTrace "go.opentelemetry.io/otel/trace"
)
-const defaultDummyMac = "22:33:44:aa:bb:"
-const mmioBlkCount = 4
-const mmioNetCount = 2
-const randomDevice = "/dev/urandom"
+const (
+ apiSocket = "qmp.socket"
+ debugSocket = "console.socket"
+ ozoneBaseDir = "/srv/ozone/stratovirt"
+ defaultDummyMac = "22:33:44:aa:bb:"
+ mmioBlkCount = 4
+ mmioNetCount = 2
+ randomDevice = "/dev/urandom"
+)
type stratovirtDev struct {
dev interface{}
@@ -40,10 +46,19 @@ type stratovirt struct {
sandbox *Sandbox
store persistapi.PersistDriver
config HypervisorConfig
+ rootfsPath string
+ kernelPath string
pid int
consolePath string
socketPath string
+ netNSPath string
qmpMonitorCh qmpChannel
+ ozoneRoot string
+ ozoneRes []string
+ useOzone bool
+ useImage bool
+ pidfile string
+ logfile string
devices []stratovirtDev
HotpluggedVCPUs []CPUDevice
mmioBlkSlots [mmioBlkCount]bool
@@ -66,10 +81,10 @@ func (s *stratovirt) trace(parent context.Context, name string) (otelTrace.Span,
return span, ctx
}
-func (s *stratovirt) getKernelCmdLine(useImage bool) string {
+func (s *stratovirt) getKernelCmdLine() string {
var params []string
- if useImage {
+ if s.useImage {
params = append(params, "root=/dev/vda")
}
@@ -100,14 +115,49 @@ func (s *stratovirt) createSandbox(ctx context.Context, id string, networkNS Net
s.id = id
s.config = *hypervisorConfig
-
- s.socketPath = filepath.Join(s.store.RunVMStoragePath(), id, "qmp.sock")
- s.consolePath = filepath.Join(s.store.RunVMStoragePath(), id, "console.sock")
+ if s.config.OzonePath == "" {
+ s.useOzone = false
+ s.pidfile = filepath.Join(s.store.RunVMStoragePath(), s.id, "pid")
+ s.logfile = filepath.Join(s.store.RunVMStoragePath(), s.id, "/stratovirt.log")
+ s.socketPath = filepath.Join(s.store.RunVMStoragePath(), id, apiSocket)
+ s.consolePath = filepath.Join(s.store.RunVMStoragePath(), id, debugSocket)
+ } else {
+ s.useOzone = true
+ s.ozoneRoot = filepath.Join(ozoneBaseDir, s.id)
+ s.pidfile = filepath.Join(s.ozoneRoot, "pid")
+ s.logfile = filepath.Join(s.ozoneRoot, "stratovirt.log")
+ s.socketPath = filepath.Join(s.ozoneRoot, apiSocket)
+ s.consolePath = filepath.Join(s.ozoneRoot, debugSocket)
+ }
+ s.netNSPath = networkNS.NetNsPath
s.qmpMonitorCh = qmpChannel{
ctx: s.ctx,
path: s.socketPath,
}
+ if kernelPath, err := s.config.KernelAssetPath(); err == nil {
+ s.kernelPath = kernelPath
+ s.ozoneRes = append(s.ozoneRes, s.kernelPath)
+ }
+
+ initrdPath, err := s.config.InitrdAssetPath()
+ if err != nil {
+ return err
+ }
+
+ if initrdPath == "" {
+ imagePath, err := s.config.ImageAssetPath()
+ if err != nil {
+ return err
+ }
+ s.useImage = true
+ s.rootfsPath = imagePath
+ } else {
+ s.useImage = false
+ s.rootfsPath = initrdPath
+ }
+ s.ozoneRes = append(s.ozoneRes, s.rootfsPath)
+
return nil
}
@@ -134,48 +184,43 @@ func (s *stratovirt) waitSandBoxStarted(timeout int) error {
return nil
}
-func (s *stratovirt) startSandbox(ctx context.Context, timeout int) error {
- span, _ := s.trace(ctx, "startSandbox")
- defer span.End()
-
+func (s *stratovirt) createbaseParams() []string {
var params []string
- var use_image bool
+
params = append(params, "-name", fmt.Sprintf("sandbox-%s", s.id))
- params = append(params, "-qmp", fmt.Sprintf("unix:%s,server,nowait", s.socketPath))
+ params = append(params, "-append", s.getKernelCmdLine())
+ params = append(params, "-smp", fmt.Sprintf("%d", s.config.NumVCPUs))
+ params = append(params, "-m", fmt.Sprintf("%d", uint64(s.config.MemorySize)))
+ params = append(params, "-device", "virtio-serial-device")
+ params = append(params, "-device", "virtconsole,chardev=charconsole0,id=virtioconsole0")
+ params = append(params, "-object", fmt.Sprintf("rng-random,id=objrng0,filename=%s", randomDevice))
+ params = append(params, "-device", "virtio-rng-device,rng=objrng0")
- if kernelPath, err := s.config.KernelAssetPath(); err == nil {
- params = append(params, "-kernel", kernelPath)
- }
+ // daemonize
+ params = append(params, "-daemonize")
- initrdPath, err := s.config.InitrdAssetPath()
- if err != nil {
- return err
+ return params
+}
+
+func (s *stratovirt) createOzoneParams(params []string) ([]string, error) {
+ params = append(params, "-qmp", fmt.Sprintf("unix:%s,server,nowait", apiSocket))
+ params = append(params, "-chardev", fmt.Sprintf("socket,id=charconsole0,path=%s,server,nowait", debugSocket))
+ params = append(params, "-kernel", filepath.Base(s.kernelPath))
+ params = append(params, "-pidfile", filepath.Base(s.pidfile))
+
+ // append logfile only on debug
+ if s.config.Debug {
+ params = append(params, "-D", filepath.Base(s.logfile))
}
- if initrdPath == "" {
- imagePath, err := s.config.ImageAssetPath()
- if err != nil {
- return err
- }
- use_image = true
+ if s.useImage {
s.mmioBlkSlots[0] = true
params = append(params, "-device", "virtio-blk-device,drive=rootfs")
- params = append(params, "-drive", fmt.Sprintf("id=rootfs,file=%s,direct=off", imagePath))
+ params = append(params, "-drive", fmt.Sprintf("id=rootfs,file=%s,direct=off", filepath.Base(s.rootfsPath)))
} else {
- use_image = false
- params = append(params, "-initrd", initrdPath)
+ params = append(params, "-initrd", filepath.Base(s.rootfsPath))
}
- params = append(params, "-append", s.getKernelCmdLine(use_image))
- params = append(params, "-smp", fmt.Sprintf("%d", s.config.NumVCPUs))
- params = append(params, "-m", fmt.Sprintf("%d", uint64(s.config.MemorySize)))
- params = append(params, "-device", "virtio-serial-device")
- params = append(params, "-device", "virtconsole,chardev=charconsole0,id=virtioconsole0")
- params = append(params, "-object", fmt.Sprintf("rng-random,id=objrng0,filename=%s", randomDevice))
- params = append(params, "-device", "virtio-rng-device,rng=objrng0")
- params = append(params, "-chardev", fmt.Sprintf("socket,id=charconsole0,path=%s,server,nowait", s.consolePath))
- params = append(params, "-pidfile", filepath.Join(s.store.RunVMStoragePath(), s.id, "pid"))
-
// add devices to cmdline
for _, d := range s.devices {
switch v := d.dev.(type) {
@@ -188,8 +233,9 @@ func (s *stratovirt) startSandbox(ctx context.Context, timeout int) error {
case config.BlockDrive:
id := v.ID
path := v.File
- params = append(params, "-device", fmt.Sprintf("virtio-blk-device, drive=%s", id))
- params = append(params, "-drive", fmt.Sprintf("id=%s,file=%s", id, path))
+ s.ozoneRes = append(s.ozoneRes, path)
+ params = append(params, "-device", fmt.Sprintf("virtio-blk-device,drive=%s", id))
+ params = append(params, "-drive", fmt.Sprintf("id=%s,file=%s", id, filepath.Base(path)))
case types.VSock:
v.VhostFd.Close()
params = append(params, "-device", fmt.Sprintf("vhost-vsock-device,id=vsock-id,guest-cid=%d", v.ContextID))
@@ -198,42 +244,125 @@ func (s *stratovirt) startSandbox(ctx context.Context, timeout int) error {
}
}
- // daemonize
- params = append(params, "-daemonize")
+ return params, nil
+}
+
+func (s *stratovirt) createParams(params []string) ([]string, error) {
+ params = append(params, "-qmp", fmt.Sprintf("unix:%s,server,nowait", s.socketPath))
+ params = append(params, "-chardev", fmt.Sprintf("socket,id=charconsole0,path=%s,server,nowait", s.consolePath))
+ params = append(params, "-kernel", s.kernelPath)
+ params = append(params, "-pidfile", s.pidfile)
// append logfile only on debug
if s.config.Debug {
- dir := filepath.Join(s.store.RunVMStoragePath(), s.id)
- params = append(params, "-D", fmt.Sprintf("%s/stratovirt.log", dir))
+ params = append(params, "-D", s.logfile)
+ }
+
+ if s.useImage {
+ s.mmioBlkSlots[0] = true
+ params = append(params, "-device", "virtio-blk-device,drive=rootfs")
+ params = append(params, "-drive", fmt.Sprintf("id=rootfs,file=%s,direct=off", s.rootfsPath))
+ } else {
+ params = append(params, "-initrd", s.rootfsPath)
}
- dir := filepath.Join(s.store.RunVMStoragePath(), s.id)
- err = os.MkdirAll(dir, DirMode)
+ // add devices to cmdline
+ for _, d := range s.devices {
+ switch v := d.dev.(type) {
+ case Endpoint:
+ name := v.Name()
+ mac := v.HardwareAddr()
+ tapName := v.NetworkPair().TapInterface.TAPIface.Name
+ params = append(params, "-device", fmt.Sprintf("virtio-net-device,netdev=%s,id=%s,mac=%s", name, name, mac))
+ params = append(params, "-netdev", fmt.Sprintf("tap,id=%s,ifname=%s", name, tapName))
+ case config.BlockDrive:
+ id := v.ID
+ path := v.File
+ params = append(params, "-device", fmt.Sprintf("virtio-blk-device,drive=%s", id))
+ params = append(params, "-drive", fmt.Sprintf("id=%s,file=%s", id, path))
+ case types.VSock:
+ v.VhostFd.Close()
+ params = append(params, "-device", fmt.Sprintf("vhost-vsock-device,id=vsock-id,guest-cid=%d", v.ContextID))
+ default:
+ s.Logger().Error("Adding device type is unsupported")
+ }
+ }
+
+ return params, nil
+}
+
+func (s *stratovirt) startSandbox(ctx context.Context, timeout int) error {
+ span, _ := s.trace(ctx, "startSandbox")
+ defer span.End()
+
+ var err error
+ var cmd *exec.Cmd
+
+ params := s.createbaseParams()
+
+ stratovirtBinPath, err := s.config.HypervisorAssetPath()
if err != nil {
return err
}
- defer func() {
+
+ if s.useOzone {
+ var ozoneParams []string
+ extend_params, err := s.createOzoneParams(params)
if err != nil {
- if err := os.RemoveAll(dir); err != nil {
- s.Logger().WithError(err).Error("Fail to clean up vm dir %s", dir)
+ return err
+ }
+ ozoneParams = append(ozoneParams, "-exec-file", stratovirtBinPath)
+ ozoneParams = append(ozoneParams, "-name", s.id)
+ ozoneParams = append(ozoneParams, "-gid", "0")
+ ozoneParams = append(ozoneParams, "-uid", "0")
+ if s.netNSPath != "" {
+ ozoneParams = append(ozoneParams, "-netns", s.netNSPath)
+ }
+
+ ozoneParams = append(ozoneParams, "-source")
+ ozoneParams = append(ozoneParams, s.ozoneRes...)
+
+ defer func() {
+ if err != nil {
+ ozoneParams = append(ozoneParams, "-clean-resource")
+ cmd = exec.CommandContext(s.ctx, s.config.OzonePath, ozoneParams...)
+ if err := cmd.Run(); err != nil {
+ s.Logger().WithError(err).Error("Failed to clean up ozone dir %s", s.ozoneRoot)
+ }
}
+ }()
+
+ ozoneParams = append(ozoneParams, "--")
+ ozoneParams = append(ozoneParams, extend_params...)
+ cmd = exec.CommandContext(s.ctx, s.config.OzonePath, ozoneParams...)
+ s.Logger().Info("StratoVirt/Ozone start with params: ", cmd)
+ } else {
+ params, err = s.createParams(params)
+ if err != nil {
+ return err
}
- }()
- binPath, err := s.config.HypervisorAssetPath()
- if err != nil {
- s.Logger().WithField("Fail to get hypervisor bin path", err).Error()
- return err
- }
+ dir := filepath.Join(s.store.RunVMStoragePath(), s.id)
+ err = os.MkdirAll(dir, DirMode)
+ if err != nil {
+ return err
+ }
+ defer func() {
+ if err != nil {
+ if err := os.RemoveAll(dir); err != nil {
+ s.Logger().WithError(err).Error("Fail to clean up vm dir %s", dir)
+ }
+ }
+ }()
- cmd := exec.CommandContext(s.ctx, binPath, params...)
- s.Logger().Info("StratoVirt start with params: ", cmd)
+ cmd = exec.CommandContext(s.ctx, stratovirtBinPath, params...)
+ s.Logger().Info("StratoVirt start with params: ", cmd)
+ }
if err := cmd.Start(); err != nil {
s.Logger().WithField("Error starting hypervisor, please check the params", err).Error()
return err
}
- s.pid = cmd.Process.Pid
if err = s.waitSandBoxStarted(timeout); err != nil {
return err
@@ -420,6 +549,7 @@ func (s *stratovirt) hotplugNet(ctx context.Context, endpoint Endpoint, op opera
}
func (s *stratovirt) hotplugBlk(drive *config.BlockDrive, op operation) (err error) {
+ var filePath string
err = s.qmpSetup()
if err != nil {
return err
@@ -427,13 +557,18 @@ func (s *stratovirt) hotplugBlk(drive *config.BlockDrive, op operation) (err err
switch op {
case addDevice:
- driver := "virtio-blk-pci"
+ driver := "virtio-blk-mmio"
+ if s.useOzone {
+ filePath, err = s.updateOzoneRes(drive.File, true)
+ } else {
+ filePath = drive.File
+ }
slot, err := s.getDevSlot(drive.VirtPath, false)
if err != nil {
return fmt.Errorf("Could not get unused slot for %q", drive.VirtPath)
}
- if err := s.qmpMonitorCh.qmp.ExecuteBlockdevAdd(s.qmpMonitorCh.ctx, drive.File, drive.ID, false); err != nil {
+ if err := s.qmpMonitorCh.qmp.ExecuteBlockdevAdd(s.qmpMonitorCh.ctx, filePath, drive.ID, false); err != nil {
s.getDevSlot(drive.VirtPath, true)
return err
}
@@ -443,6 +578,9 @@ func (s *stratovirt) hotplugBlk(drive *config.BlockDrive, op operation) (err err
return err
}
case removeDevice:
+ if s.useOzone {
+ s.updateOzoneRes(drive.File, false)
+ }
if err := s.qmpMonitorCh.qmp.ExecuteDeviceDel(s.qmpMonitorCh.ctx, drive.ID); err != nil {
return err
}
@@ -582,17 +720,62 @@ func (s *stratovirt) getThreadIDs(ctx context.Context) (vcpuThreadIDs, error) {
return tid, nil
}
+func (s *stratovirt) updateOzoneRes(src string, add bool) (string, error) {
+ dst := filepath.Join(s.ozoneRoot, filepath.Base(src))
+ if add {
+ if err := bindMount(context.Background(), src, dst, false, "slave"); err != nil {
+ s.Logger().WithField("bindMount failed", err).Error()
+ return "", err
+ }
+ } else {
+ syscall.Unmount(dst, syscall.MNT_DETACH)
+ }
+ return filepath.Base(src), nil
+}
+
+func (s *stratovirt) cleanOzoneRes() {
+ s.updateOzoneRes(s.rootfsPath, false)
+ s.updateOzoneRes(s.kernelPath, false)
+
+ if err := os.RemoveAll(s.ozoneRoot); err != nil {
+ s.Logger().WithField("cleanupOzone failed", err).Error()
+ }
+}
+
func (s *stratovirt) cleanup(ctx context.Context) error {
span, _ := s.trace(ctx, "cleanup")
defer span.End()
s.qmpTeardown()
+ if s.useOzone {
+ s.cleanOzoneRes()
+ }
return nil
}
func (s *stratovirt) getPids() []int {
- return []int{s.pid}
+ var pids []int
+ if s.pid != 0 {
+ pids = append(pids, s.pid)
+ } else {
+ pid, err := ioutil.ReadFile(s.pidfile)
+ if err != nil {
+ s.Logger().WithError(err).Error("Read pid file failed.")
+ return []int{0}
+ }
+
+ p, err := strconv.Atoi(strings.Trim(string(pid), "\n\t "))
+ if err != nil {
+ s.Logger().WithError(err).Error("Get pid from pid file failed.")
+ return []int{0}
+ }
+
+ pids = append(pids, p)
+ s.pid = p
+ }
+
+ return pids
}
func (s *stratovirt) getVirtioFsPid() *int {
--
2.21.1 (Apple Git-122.3)

View File

@ -0,0 +1,295 @@
From 77ed6fefe70edde63b01d797b76f389bc82bb1a0 Mon Sep 17 00:00:00 2001
From: Wei Gao <gaowei66@huawei.com>
Date: Mon, 9 Aug 2021 14:57:06 +0800
Subject: [PATCH 6/6] factory: add the template factory support for hypervisor
type stratovirt.
Signed-off-by: Wei Gao <gaowei66@huawei.com>
---
src/runtime/pkg/katautils/config.go | 2 +-
.../factory/template/template.go | 21 +++--
src/runtime/virtcontainers/kata_agent.go | 7 +-
src/runtime/virtcontainers/stratovirt.go | 89 +++++++++++++++++--
src/runtime/virtcontainers/vm.go | 28 ++++--
5 files changed, 125 insertions(+), 22 deletions(-)
diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go
index 828c2a43..718677b4 100644
--- a/src/runtime/pkg/katautils/config.go
+++ b/src/runtime/pkg/katautils/config.go
@@ -1363,7 +1363,7 @@ func checkNetNsConfig(config oci.RuntimeConfig) error {
// checkFactoryConfig ensures the VM factory configuration is valid.
func checkFactoryConfig(config oci.RuntimeConfig) error {
if config.FactoryConfig.Template {
- if config.HypervisorConfig.InitrdPath == "" {
+ if config.HypervisorConfig.InitrdPath == "" && (config.HypervisorType != vc.StratovirtHypervisor) {
return errors.New("Factory option enable_template requires an initrd image")
}
}
diff --git a/src/runtime/virtcontainers/factory/template/template.go b/src/runtime/virtcontainers/factory/template/template.go
index 66070126..02497097 100644
--- a/src/runtime/virtcontainers/factory/template/template.go
+++ b/src/runtime/virtcontainers/factory/template/template.go
@@ -96,11 +96,15 @@ func (t *template) prepareTemplateFiles() error {
if err != nil {
return err
}
- flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV)
- opts := fmt.Sprintf("size=%dM", t.config.HypervisorConfig.MemorySize+templateDeviceStateSize)
- if err = syscall.Mount("tmpfs", t.statePath, "tmpfs", flags, opts); err != nil {
- t.close()
- return err
+
+ // If use hypervisor stratovirt, no need to create template path with ramdisk.
+ if t.config.HypervisorType != vc.StratovirtHypervisor {
+ flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV)
+ opts := fmt.Sprintf("size=%dM", t.config.HypervisorConfig.MemorySize+templateDeviceStateSize)
+ if err = syscall.Mount("tmpfs", t.statePath, "tmpfs", flags, opts); err != nil {
+ t.close()
+ return err
+ }
}
f, err := os.Create(t.statePath + "/memory")
if err != nil {
@@ -126,8 +130,11 @@ func (t *template) createTemplateVM(ctx context.Context) error {
}
defer vm.Stop(ctx)
- if err = vm.Disconnect(ctx); err != nil {
- return err
+ // Create template on hypervisor stratovirt, don't have connection with agent.
+ if config.HypervisorType != vc.StratovirtHypervisor {
+ if err = vm.Disconnect(ctx); err != nil {
+ return err
+ }
}
// Sleep a bit to let the agent grpc server clean up
diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go
index 13d31658..bc882c70 100644
--- a/src/runtime/virtcontainers/kata_agent.go
+++ b/src/runtime/virtcontainers/kata_agent.go
@@ -1306,8 +1306,11 @@ func (k *kataAgent) buildContainerRootfs(ctx context.Context, sandbox *Sandbox,
// TODO: remove dependency on shared fs path. shared fs is just one kind of storage source.
// we should not always use shared fs path for all kinds of storage. Instead, all storage
// should be bind mounted to a tmpfs path for containers to use.
- if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil {
- return nil, err
+ // If boot from template on stratovirt, no need to mkdir mount path.
+ if !((sandbox.config.HypervisorType == StratovirtHypervisor) && sandbox.config.HypervisorConfig.BootFromTemplate) {
+ if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil {
+ return nil, err
+ }
}
return rootfs, nil
}
diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go
index 47daa817..e9b2ba85 100644
--- a/src/runtime/virtcontainers/stratovirt.go
+++ b/src/runtime/virtcontainers/stratovirt.go
@@ -48,6 +48,7 @@ type stratovirt struct {
config HypervisorConfig
rootfsPath string
kernelPath string
+ templatePath string
pid int
consolePath string
socketPath string
@@ -115,7 +116,7 @@ func (s *stratovirt) createSandbox(ctx context.Context, id string, networkNS Net
s.id = id
s.config = *hypervisorConfig
- if s.config.OzonePath == "" {
+ if (s.config.OzonePath == "") || s.config.BootToBeTemplate {
s.useOzone = false
s.pidfile = filepath.Join(s.store.RunVMStoragePath(), s.id, "pid")
s.logfile = filepath.Join(s.store.RunVMStoragePath(), s.id, "/stratovirt.log")
@@ -129,6 +130,20 @@ func (s *stratovirt) createSandbox(ctx context.Context, id string, networkNS Net
s.socketPath = filepath.Join(s.ozoneRoot, apiSocket)
s.consolePath = filepath.Join(s.ozoneRoot, debugSocket)
}
+
+ if s.config.VMid != "" && s.useOzone {
+ // Make sure the symlinks do not exist
+ os.RemoveAll(s.ozoneRoot)
+ ozoneVmRoot := filepath.Join(ozoneBaseDir, s.config.VMid)
+ if err := os.Symlink(ozoneVmRoot, s.ozoneRoot); err != nil {
+ return err
+ }
+ }
+
+ if s.config.BootFromTemplate || s.config.BootToBeTemplate {
+ s.templatePath = strings.Replace(s.config.DevicesStatePath, "/state", "", -1)
+ }
+
s.netNSPath = networkNS.NetNsPath
s.qmpMonitorCh = qmpChannel{
ctx: s.ctx,
@@ -221,6 +236,12 @@ func (s *stratovirt) createOzoneParams(params []string) ([]string, error) {
params = append(params, "-initrd", filepath.Base(s.rootfsPath))
}
+ // handle boot from template
+ if s.config.BootFromTemplate {
+ s.ozoneRes = append(s.ozoneRes, s.templatePath)
+ params = append(params, "-incoming", fmt.Sprintf("file:%s", filepath.Base(s.templatePath)))
+ }
+
// add devices to cmdline
for _, d := range s.devices {
switch v := d.dev.(type) {
@@ -266,6 +287,11 @@ func (s *stratovirt) createParams(params []string) ([]string, error) {
params = append(params, "-initrd", s.rootfsPath)
}
+ // handle boot from template
+ if s.config.BootFromTemplate {
+ params = append(params, "-incoming", fmt.Sprintf("file:%s", s.templatePath))
+ }
+
// add devices to cmdline
for _, d := range s.devices {
switch v := d.dev.(type) {
@@ -410,14 +436,55 @@ func (s *stratovirt) stopSandbox(ctx context.Context, force bool) error {
}
func (s *stratovirt) pauseSandbox(ctx context.Context) error {
- return nil
+ span, _ := s.trace(ctx, "pauseSandbox")
+ defer span.End()
+
+ return s.togglePauseSandbox(ctx, true)
}
func (s *stratovirt) saveSandbox() error {
+ s.Logger().Info("save sandbox")
+
+ err := s.qmpSetup()
+ if err != nil {
+ return err
+ }
+
+ // BootToBeTemplate sets the VM to be a template that other VMs can can clone from.
+ // We would want to bypass shared memory when saving VM to local file through migrate.
+ if s.config.BootToBeTemplate {
+ err = s.qmpMonitorCh.qmp.ExecSetMigrateArguments(s.qmpMonitorCh.ctx, fmt.Sprintf("file:%s", s.templatePath))
+ if err != nil {
+ s.Logger().WithError(err).Error("exec migration")
+ return err
+ }
+ }
+
return nil
}
func (s *stratovirt) resumeSandbox(ctx context.Context) error {
+ span, _ := s.trace(ctx, "resumeSandbox")
+ defer span.End()
+
+ return s.togglePauseSandbox(ctx, false)
+}
+
+func (s *stratovirt) togglePauseSandbox(ctx context.Context, pause bool) error {
+ span, _ := s.trace(ctx, "togglePauseSandbox")
+ defer span.End()
+
+ err := s.qmpSetup()
+ if err != nil {
+ return err
+ }
+
+ if pause {
+ s.qmpMonitorCh.qmp.ExecuteStop(s.qmpMonitorCh.ctx)
+ } else {
+ s.qmpMonitorCh.qmp.ExecuteCont(s.qmpMonitorCh.ctx)
+ }
+
return nil
}
@@ -734,11 +801,23 @@ func (s *stratovirt) updateOzoneRes(src string, add bool) (string, error) {
}
func (s *stratovirt) cleanOzoneRes() {
- s.updateOzoneRes(s.rootfsPath, false)
- s.updateOzoneRes(s.kernelPath, false)
+ // Umount all resource in ozoneRoot
+ if dir, err := ioutil.ReadDir(s.ozoneRoot); err == nil {
+ for _, file := range dir {
+ syscall.Unmount(filepath.Join(s.ozoneRoot, file.Name()), syscall.MNT_DETACH)
+ }
+ }
if err := os.RemoveAll(s.ozoneRoot); err != nil {
- s.Logger().WithField("cleanupOzone failed", err).Error()
+ s.Logger().WithField("cleanup Ozone failed", err).Error()
+ }
+
+ // If have VMid, the VM is boot from template. ozoneVmRoot also need clean.
+ if s.config.VMid != "" {
+ ozoneVmRoot := filepath.Join(ozoneBaseDir, s.config.VMid)
+ if err := os.RemoveAll(ozoneVmRoot); err != nil {
+ s.Logger().WithField("cleanup Ozone failed", err).Error()
+ }
}
}
diff --git a/src/runtime/virtcontainers/vm.go b/src/runtime/virtcontainers/vm.go
index e6f02b6e..c4f9df73 100644
--- a/src/runtime/virtcontainers/vm.go
+++ b/src/runtime/virtcontainers/vm.go
@@ -142,13 +142,19 @@ func NewVM(ctx context.Context, config VMConfig) (*VM, error) {
}()
// 4. check agent aliveness
- // VMs booted from template are paused, do not check
- if !config.HypervisorConfig.BootFromTemplate {
+ // On hypervisor StratoVirt, VMs booted from template are running, check agent
+ // On other hypervisors, VMs booted from template are paused, do not check
+ if config.HypervisorType == StratovirtHypervisor {
+ if !config.HypervisorConfig.BootToBeTemplate {
+ virtLog.WithField("vm", id).Info("check agent status")
+ err = agent.check(ctx)
+ }
+ } else if !config.HypervisorConfig.BootFromTemplate {
virtLog.WithField("vm", id).Info("check agent status")
err = agent.check(ctx)
- if err != nil {
- return nil, err
- }
+ }
+ if err != nil {
+ return nil, err
}
return &VM{
@@ -329,9 +335,16 @@ func (v *VM) assignSandbox(s *Sandbox) error {
// - link 9pfs share path from sandbox dir (/run/kata-containers/shared/sandboxes/sbid/) to vm dir (/run/vc/vm/vmid/shared/)
vmSharePath := buildVMSharePath(v.id, v.store.RunVMStoragePath())
- vmSockDir := filepath.Join(v.store.RunVMStoragePath(), v.id)
sbSharePath := getMountPath(s.id)
- sbSockDir := filepath.Join(v.store.RunVMStoragePath(), s.id)
+ var vmSockDir string
+ var sbSockDir string
+ if v.hypervisor.hypervisorConfig().OzonePath != "" {
+ vmSockDir = filepath.Join(ozoneBaseDir, v.id)
+ sbSockDir = filepath.Join(ozoneBaseDir, s.id)
+ } else {
+ vmSockDir = filepath.Join(v.store.RunVMStoragePath(), v.id)
+ sbSockDir = filepath.Join(v.store.RunVMStoragePath(), s.id)
+ }
v.logger().WithFields(logrus.Fields{
"vmSharePath": vmSharePath,
@@ -359,6 +372,7 @@ func (v *VM) assignSandbox(s *Sandbox) error {
s.hypervisor = v.hypervisor
s.config.HypervisorConfig.VMid = v.id
+ s.config.HypervisorConfig.BootFromTemplate = true
return nil
}
--
2.21.1 (Apple Git-122.3)

View File

@ -0,0 +1,224 @@
From d4605dafaa9c326a5cf24c28d0c1efe6c9997f49 Mon Sep 17 00:00:00 2001
From: holyfei <yangfeiyu20092010@163.com>
Date: Sat, 21 Aug 2021 17:08:46 +0800
Subject: [PATCH] kata-containers: support with iSulad
reason: support with iSulad
Signed-off-by: holyfei <yangfeiyu20092010@163.com>
---
src/agent/rustjail/src/cgroups/fs/mod.rs | 2 +-
src/runtime/containerd-shim-v2/container.go | 9 +++
src/runtime/containerd-shim-v2/service.go | 55 +++++++++++++++++++
src/runtime/containerd-shim-v2/start.go | 10 ++++
.../containerd/runtime/v2/shim/shim.go | 8 ++-
5 files changed, 81 insertions(+), 3 deletions(-)
diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs
index 7f41cb4..6c3bb32 100644
--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -369,7 +369,7 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool
if let Some(swappiness) = memory.swappiness {
if (0..=100).contains(&swappiness) {
mem_controller.set_swappiness(swappiness as u64)?;
- } else {
+ } else if swappiness != -1 {
return Err(anyhow!(
"invalid value:{}. valid memory swappiness range is 0-100",
swappiness
diff --git a/src/runtime/containerd-shim-v2/container.go b/src/runtime/containerd-shim-v2/container.go
index faea0e2..d563888 100644
--- a/src/runtime/containerd-shim-v2/container.go
+++ b/src/runtime/containerd-shim-v2/container.go
@@ -7,10 +7,13 @@ package containerdshim
import (
"io"
+ "os"
+ "path"
"time"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
+ cdshim "github.com/containerd/containerd/runtime/v2/shim"
taskAPI "github.com/containerd/containerd/runtime/v2/task"
"github.com/opencontainers/runtime-spec/specs-go"
@@ -37,6 +40,8 @@ type container struct {
status task.Status
terminal bool
mounted bool
+ exitFifo string
+ exitFd *os.File
}
func newContainer(s *service, r *taskAPI.CreateTaskRequest, containerType vc.ContainerType, spec *specs.Spec, mounted bool) (*container, error) {
@@ -49,6 +54,9 @@ func newContainer(s *service, r *taskAPI.CreateTaskRequest, containerType vc.Con
spec = &specs.Spec{}
}
+ dir := os.Getenv(cdshim.ExitFifoDir)
+ exitFifo := path.Join(dir, r.ID, exitFifoName)
+
c := &container{
s: s,
spec: spec,
@@ -65,6 +73,7 @@ func newContainer(s *service, r *taskAPI.CreateTaskRequest, containerType vc.Con
exitCh: make(chan uint32, 1),
stdinCloser: make(chan struct{}),
mounted: mounted,
+ exitFifo: exitFifo,
}
return c, nil
}
diff --git a/src/runtime/containerd-shim-v2/service.go b/src/runtime/containerd-shim-v2/service.go
index 1003f8e..e13283c 100644
--- a/src/runtime/containerd-shim-v2/service.go
+++ b/src/runtime/containerd-shim-v2/service.go
@@ -6,13 +6,16 @@
package containerdshim
import (
+ "bytes"
"context"
+ "encoding/binary"
"io/ioutil"
"os"
sysexec "os/exec"
"sync"
"syscall"
"time"
+ "unsafe"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
@@ -51,6 +54,8 @@ const (
// A time span used to wait for publish a containerd event,
// once it costs a longer time than timeOut, it will be canceld.
timeOut = 5 * time.Second
+
+ exitFifoName = "exit_fifo"
)
var (
@@ -1019,6 +1024,10 @@ func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (_ *taskAPI.
func (s *service) processExits() {
for e := range s.ec {
s.checkProcesses(e)
+
+ if os.Getenv(cdshim.ExitFifoDir) != "" {
+ s.closeExitFifo(e)
+ }
}
}
@@ -1070,3 +1079,49 @@ func (s *service) getContainerStatus(containerID string) (task.Status, error) {
return status, nil
}
+
+func isBigEndian() (ret bool) {
+ i := int(0x1)
+ bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i))
+ return bs[0] == 0
+}
+
+func (s *service) closeExitFifo(e exit) {
+ if e.execid != "" {
+ // not a container, no need to close exit fifo
+ return
+ }
+
+ var ret uint32
+ var nativeEndian binary.ByteOrder
+
+ s.mu.Lock()
+ c, err := s.getContainer(e.id)
+ s.mu.Unlock()
+
+ if err != nil {
+ logrus.WithError(err).Errorf("Process container:%v exit fifo failed", e.id)
+ return
+ }
+
+ ret = <-c.exitCh
+ // refill the exitCh with the container process's exit code in case
+ // there were other waits on this process.
+ c.exitCh <- ret
+
+ if isBigEndian() {
+ nativeEndian = binary.BigEndian
+ } else {
+ nativeEndian = binary.LittleEndian
+ }
+
+ bytesBuffer := bytes.NewBuffer([]byte{})
+ binary.Write(bytesBuffer, nativeEndian, &ret)
+
+ _, err = c.exitFd.Write(bytesBuffer.Bytes())
+ if err != nil {
+ logrus.WithError(err).Error("write exit fifo failed")
+ }
+
+ c.exitFd.Close()
+}
diff --git a/src/runtime/containerd-shim-v2/start.go b/src/runtime/containerd-shim-v2/start.go
index 72420e4..e89dc48 100644
--- a/src/runtime/containerd-shim-v2/start.go
+++ b/src/runtime/containerd-shim-v2/start.go
@@ -8,8 +8,11 @@ package containerdshim
import (
"context"
"fmt"
+ "golang.org/x/sys/unix"
+ "os"
"github.com/containerd/containerd/api/types/task"
+ cdshim "github.com/containerd/containerd/runtime/v2/shim"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
)
@@ -59,6 +62,13 @@ func startContainer(ctx context.Context, s *service, c *container) error {
c.status = task.StatusRunning
+ if os.Getenv(cdshim.ExitFifoDir) != "" {
+ c.exitFd, err = os.OpenFile(c.exitFifo, unix.O_WRONLY|unix.O_NONBLOCK|unix.O_CLOEXEC, 0)
+ if err != nil {
+ return err
+ }
+ }
+
stdin, stdout, stderr, err := s.sandbox.IOStream(c.id, c.id)
if err != nil {
return err
diff --git a/src/runtime/vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go b/src/runtime/vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go
index d60d496..946c386 100644
--- a/src/runtime/vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go
+++ b/src/runtime/vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go
@@ -84,6 +84,8 @@ var (
action string
)
+var ExitFifoDir = "EXIT_FIFO_DIR"
+
func parseFlags() {
flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs")
flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim")
@@ -198,8 +200,10 @@ func run(id string, initFunc Init, config Config) error {
}
return nil
default:
- if err := setLogger(ctx, idFlag); err != nil {
- return err
+ if os.Getenv("EXIT_FIFO_DIR") == "" {
+ if err := setLogger(ctx, idFlag); err != nil {
+ return err
+ }
}
client := NewShimClient(ctx, service, signals)
return client.Serve()
--
2.23.0

View File

@ -0,0 +1,7 @@
0001-runtime-add-support-of-new-sandbox-hypervisor-type-S.patch
0002-agent-add-support-of-new-sandbox-hypervisor-kind-Str.patch
0003-runtime-implement-updateInterfaceHwAddrByName-interf.patch
0004-configuration-add-configuration-generator-for-hyperv.patch
0005-runtime-add-the-secure-component-ozone-support-for-h.patch
0006-factory-add-the-template-factory-support-for-hypervi.patch
0007-kata-containers-support-with-iSulad.patch