kata-containers/agent/patches/0001-agent-add-agent.netlink_recv_buf_size-flag-to-set-ne.patch
holyfei c709612f2a kata-containers: modify kata-containers version
Fix #I4KI81
reason: modify kata-containers version and update
it to 1.11.1

Signed-off-by: holyfei <yangfeiyu20092010@163.com>
2021-11-30 20:08:25 +08:00

235 lines
8.5 KiB
Diff

From ac1d7806f8de2f8ca393df08a9c62d1045c4afdc Mon Sep 17 00:00:00 2001
From: jiangpengfei <jiangpengfei9@huawei.com>
Date: Tue, 11 Dec 2018 18:27:02 -0500
Subject: [PATCH 01/16] agent: add agent.netlink_recv_buf_size flag to set
netlink recv buf size
fixes: #813
reason: If hotplug huge size memory(for example 128GB) into guest,
kernel will produce a lot of memory add uevents and send to netlink socket,
however netlink socket default receive buffer size is 4KB, which is too small
to receive all memory add uevents.
Since hotplug huge size memory is not common case, so we consider add an agent
flag agent.netlink_recv_buf_size to set netlink socket recv buffer size.
Signed-off-by: jiangpengfei <jiangpengfei9@huawei.com>
---
README.md | 13 +++++++++++++
agent.go | 10 +++++++++-
config.go | 15 ++++++++++++++
config_test.go | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
pkg/uevent/uevent.go | 15 +++++++++++---
5 files changed, 104 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index cec65a4..16f96a4 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,19 @@ The pipe's capacity for stdout/stderr can be modified by specifying the `agent.c
to the guest kernel command line. For example, `agent.container_pipe_size=2097152` will set the stdout and stderr
pipes to 2097152 bytes.
+## Uevent Netlink Socket Receive Buffer Size
+
+When hotplugging a huge size memory into the Kata VM, the kernel in the VM will produce a lot of memory object add
+uevents and send all these uevents to Kata agent by netlink socket. However, default netlink socket receive buffer
+size is 4KB, which is too small and can only hold 256 memory add uevents. If memory add uevents number is larger
+than 256, the left uevents can not be received and processed by Kata agent.
+
+The uevent netlink socket receive buffer size can be modified by specifying the `agent.netlink_recv_buf_size` flag
+to the guest kernel command line. For example, `agent.netlink_recv_buf_size=2MB` will set the uevent netlink socket
+receive buffer size to 2MB value. `agent.netlink_recv_buf_size` valid value range is `[4KB ~ 4MB]` and value can be
+set in human-readable memory format or pure digital number format(default memory unit is byte).
+
+
[1]: https://github.com/firecracker-microvm/firecracker/blob/master/docs/vsock.md
[2]: https://golang.org/pkg/time/#ParseDuration
[3]: http://man7.org/linux/man-pages/man7/pipe.7.html
diff --git a/agent.go b/agent.go
index 2d2c293..c1cac08 100644
--- a/agent.go
+++ b/agent.go
@@ -190,6 +190,14 @@ var unifiedCgroupHierarchy = false
// Size in bytes of the stdout/stderr pipes created for each container.
var containerPipeSize = uint32(0)
+const (
+ minNetlinkSockRecvBufSize = 4 * 1024
+ maxNetlinkSockRecvBufSize = 4 * 1024 * 1024
+)
+
+// Size in bytes of the netlink socket recv buf size
+var netlinkSockRecvBufSize = uint32(0)
+
// commType is used to denote the communication channel type used.
type commType int
@@ -708,7 +716,7 @@ func (s *sandbox) waitForStopServer() {
func (s *sandbox) listenToUdevEvents() {
fieldLogger := agentLog.WithField("subsystem", "udevlistener")
- uEvHandler, err := uevent.NewHandler()
+ uEvHandler, err := uevent.NewHandler(netlinkSockRecvBufSize)
if err != nil {
fieldLogger.Warnf("Error starting uevent listening loop %s", err)
return
diff --git a/config.go b/config.go
index 4530096..6c7d473 100644
--- a/config.go
+++ b/config.go
@@ -7,11 +7,13 @@
package main
import (
+ "fmt"
"io/ioutil"
"strconv"
"strings"
"time"
+ "github.com/docker/go-units"
"github.com/sirupsen/logrus"
"google.golang.org/grpc/codes"
grpcStatus "google.golang.org/grpc/status"
@@ -29,6 +31,7 @@ const (
hotplugTimeoutFlag = optionPrefix + "hotplug_timeout"
unifiedCgroupHierarchyFlag = optionPrefix + "unified_cgroup_hierarchy"
containerPipeSizeFlag = optionPrefix + "container_pipe_size"
+ netlinkSockRecvBufSizeFlag = optionPrefix + "netlink_recv_buf_size"
traceModeStatic = "static"
traceModeDynamic = "dynamic"
traceTypeIsolated = "isolated"
@@ -155,6 +158,18 @@ func parseCmdlineOption(option string) error {
return err
}
unifiedCgroupHierarchy = flag
+ case netlinkSockRecvBufSizeFlag:
+ bufSizeInBytes, err := units.RAMInBytes(split[valuePosition])
+ if err != nil {
+ return err
+ }
+
+ if bufSizeInBytes < minNetlinkSockRecvBufSize || bufSizeInBytes > maxNetlinkSockRecvBufSize {
+ return fmt.Errorf("invalid netlink socket recv buf size: %d (valid size range %s-%s bytes)", bufSizeInBytes,
+ units.BytesSize(minNetlinkSockRecvBufSize), units.BytesSize(maxNetlinkSockRecvBufSize))
+ }
+
+ netlinkSockRecvBufSize = uint32(bufSizeInBytes)
default:
if strings.HasPrefix(split[optionPosition], optionPrefix) {
return grpcStatus.Errorf(codes.NotFound, "Unknown option %s", split[optionPosition])
diff --git a/config_test.go b/config_test.go
index 2a23133..f40f17a 100644
--- a/config_test.go
+++ b/config_test.go
@@ -486,3 +486,58 @@ func TestParseCmdlineOptionContainerPipeSize(t *testing.T) {
assert.Equal(d.expectedContainerPipeSize, containerPipeSize, "test %d (%+v)", i, d)
}
}
+
+func TestParseCmdlineOptionNetlinkSockRecvBufSize(t *testing.T) {
+ assert := assert.New(t)
+
+ type testData struct {
+ option string
+ shouldErr bool
+ expectedNetlinkSockRecvBufSize uint32
+ }
+
+ data := []testData{
+ {"", false, 0},
+ {"netlink_recv_buf_siz", false, 0},
+ {"netlink_recv_buf_size", false, 0},
+ {"netlink_recv_buf_size=", false, 0},
+ {"netlink_recv_buf_size=4096", false, 0},
+ {"netlink_recv_buf_size=4KB", false, 0},
+ {"agent.netlink_recv_buf_size=", true, 0},
+ {"agent.netlink_recv_buf_size=foobar", true, 0},
+ {"agent.netlink_recv_buf_size=-1", true, 0},
+ {"agent.netlink_recv_buf_size=0", true, 0},
+ {"agent.netlink_recv_buf_size=100", true, 0},
+ {"agent.netlink_recv_buf_size=3KB", true, 0},
+ {"agent.netlink_recv_buf_size=3.6KB", true, 0},
+ {"agent.netlink_recv_buf_size=4095", true, 0},
+ {"agent.netlink_recv_buf_size=4096xB", true, 0},
+ {"agent.netlink_recv_buf_size=4096", false, 4096},
+ {"agent.netlink_recv_buf_size=4097", false, 4097},
+ {"agent.netlink_recv_buf_size=4096.0", false, 4096},
+ {"agent.netlink_recv_buf_size=1024KB", false, 1048576},
+ {"agent.netlink_recv_buf_size=1MB", false, 1048576},
+ {"agent.netlink_recv_buf_size=4194303", false, 4194303},
+ {"agent.netlink_recv_buf_size=3.999MB", false, 4193255},
+ {"agent.netlink_recv_buf_size=4194304", false, 4194304},
+ {"agent.netlink_recv_buf_size=4MB", false, 4194304},
+ {"agent.netlink_recv_buf_size=4.001MB", true, 0},
+ {"agent.netlink_recv_buf_size=4194305", true, 0},
+ {"agent.netlink_recv_buf_size=100MB", true, 0},
+ {"agent.netlink_recv_buf_size=1GB", true, 0},
+ }
+
+ for i, d := range data {
+ // reset the netlink socket recv buffer size
+ netlinkSockRecvBufSize = 0
+
+ err := parseCmdlineOption(d.option)
+ if d.shouldErr {
+ assert.Error(err)
+ } else {
+ assert.NoError(err)
+ }
+
+ assert.Equal(d.expectedNetlinkSockRecvBufSize, netlinkSockRecvBufSize, "test %d (%+v)", i, d)
+ }
+}
diff --git a/pkg/uevent/uevent.go b/pkg/uevent/uevent.go
index fc2c127..fa84086 100644
--- a/pkg/uevent/uevent.go
+++ b/pkg/uevent/uevent.go
@@ -10,6 +10,7 @@ import (
"bufio"
"io"
"strings"
+ "syscall"
"golang.org/x/sys/unix"
"google.golang.org/grpc/codes"
@@ -33,7 +34,7 @@ type ReaderCloser struct {
}
// NewReaderCloser returns an io.ReadCloser handle for uevent.
-func NewReaderCloser() (io.ReadCloser, error) {
+func NewReaderCloser(netlinkRecvBufSize uint32) (io.ReadCloser, error) {
nl := unix.SockaddrNetlink{
Family: unix.AF_NETLINK,
// Passing Pid as 0 here allows the kernel to take care of assigning
@@ -47,6 +48,14 @@ func NewReaderCloser() (io.ReadCloser, error) {
return nil, err
}
+ // If netlinkRecvBufSize > 0, set netlink socket recv buffer size to netlinkRecvBufSize
+ if netlinkRecvBufSize > 0 {
+ err = unix.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, int(netlinkRecvBufSize))
+ if err != nil {
+ return nil, err
+ }
+ }
+
if err := unix.Bind(fd, &nl); err != nil {
return nil, err
}
@@ -85,8 +94,8 @@ type Handler struct {
}
// NewHandler returns a uevent handler.
-func NewHandler() (*Handler, error) {
- rdCloser, err := NewReaderCloser()
+func NewHandler(netlinkRecvBufSize uint32) (*Handler, error) {
+ rdCloser, err := NewReaderCloser(netlinkRecvBufSize)
if err != nil {
return nil, err
}
--
2.14.3 (Apple Git-98)