Fix #I4KI81 reason: modify kata-containers version and update it to 1.11.1 Signed-off-by: holyfei <yangfeiyu20092010@163.com>
235 lines
8.5 KiB
Diff
235 lines
8.5 KiB
Diff
From ac1d7806f8de2f8ca393df08a9c62d1045c4afdc Mon Sep 17 00:00:00 2001
|
|
From: jiangpengfei <jiangpengfei9@huawei.com>
|
|
Date: Tue, 11 Dec 2018 18:27:02 -0500
|
|
Subject: [PATCH 01/16] agent: add agent.netlink_recv_buf_size flag to set
|
|
netlink recv buf size
|
|
|
|
fixes: #813
|
|
|
|
reason: If hotplug huge size memory(for example 128GB) into guest,
|
|
kernel will produce a lot of memory add uevents and send to netlink socket,
|
|
however netlink socket default receive buffer size is 4KB, which is too small
|
|
to receive all memory add uevents.
|
|
Since hotplug huge size memory is not common case, so we consider add an agent
|
|
flag agent.netlink_recv_buf_size to set netlink socket recv buffer size.
|
|
|
|
Signed-off-by: jiangpengfei <jiangpengfei9@huawei.com>
|
|
---
|
|
README.md | 13 +++++++++++++
|
|
agent.go | 10 +++++++++-
|
|
config.go | 15 ++++++++++++++
|
|
config_test.go | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
pkg/uevent/uevent.go | 15 +++++++++++---
|
|
5 files changed, 104 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/README.md b/README.md
|
|
index cec65a4..16f96a4 100644
|
|
--- a/README.md
|
|
+++ b/README.md
|
|
@@ -98,6 +98,19 @@ The pipe's capacity for stdout/stderr can be modified by specifying the `agent.c
|
|
to the guest kernel command line. For example, `agent.container_pipe_size=2097152` will set the stdout and stderr
|
|
pipes to 2097152 bytes.
|
|
|
|
+## Uevent Netlink Socket Receive Buffer Size
|
|
+
|
|
+When hotplugging a huge size memory into the Kata VM, the kernel in the VM will produce a lot of memory object add
|
|
+uevents and send all these uevents to Kata agent by netlink socket. However, default netlink socket receive buffer
|
|
+size is 4KB, which is too small and can only hold 256 memory add uevents. If memory add uevents number is larger
|
|
+than 256, the left uevents can not be received and processed by Kata agent.
|
|
+
|
|
+The uevent netlink socket receive buffer size can be modified by specifying the `agent.netlink_recv_buf_size` flag
|
|
+to the guest kernel command line. For example, `agent.netlink_recv_buf_size=2MB` will set the uevent netlink socket
|
|
+receive buffer size to 2MB value. `agent.netlink_recv_buf_size` valid value range is `[4KB ~ 4MB]` and value can be
|
|
+set in human-readable memory format or pure digital number format(default memory unit is byte).
|
|
+
|
|
+
|
|
[1]: https://github.com/firecracker-microvm/firecracker/blob/master/docs/vsock.md
|
|
[2]: https://golang.org/pkg/time/#ParseDuration
|
|
[3]: http://man7.org/linux/man-pages/man7/pipe.7.html
|
|
diff --git a/agent.go b/agent.go
|
|
index 2d2c293..c1cac08 100644
|
|
--- a/agent.go
|
|
+++ b/agent.go
|
|
@@ -190,6 +190,14 @@ var unifiedCgroupHierarchy = false
|
|
// Size in bytes of the stdout/stderr pipes created for each container.
|
|
var containerPipeSize = uint32(0)
|
|
|
|
+const (
|
|
+ minNetlinkSockRecvBufSize = 4 * 1024
|
|
+ maxNetlinkSockRecvBufSize = 4 * 1024 * 1024
|
|
+)
|
|
+
|
|
+// Size in bytes of the netlink socket recv buf size
|
|
+var netlinkSockRecvBufSize = uint32(0)
|
|
+
|
|
// commType is used to denote the communication channel type used.
|
|
type commType int
|
|
|
|
@@ -708,7 +716,7 @@ func (s *sandbox) waitForStopServer() {
|
|
func (s *sandbox) listenToUdevEvents() {
|
|
fieldLogger := agentLog.WithField("subsystem", "udevlistener")
|
|
|
|
- uEvHandler, err := uevent.NewHandler()
|
|
+ uEvHandler, err := uevent.NewHandler(netlinkSockRecvBufSize)
|
|
if err != nil {
|
|
fieldLogger.Warnf("Error starting uevent listening loop %s", err)
|
|
return
|
|
diff --git a/config.go b/config.go
|
|
index 4530096..6c7d473 100644
|
|
--- a/config.go
|
|
+++ b/config.go
|
|
@@ -7,11 +7,13 @@
|
|
package main
|
|
|
|
import (
|
|
+ "fmt"
|
|
"io/ioutil"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
+ "github.com/docker/go-units"
|
|
"github.com/sirupsen/logrus"
|
|
"google.golang.org/grpc/codes"
|
|
grpcStatus "google.golang.org/grpc/status"
|
|
@@ -29,6 +31,7 @@ const (
|
|
hotplugTimeoutFlag = optionPrefix + "hotplug_timeout"
|
|
unifiedCgroupHierarchyFlag = optionPrefix + "unified_cgroup_hierarchy"
|
|
containerPipeSizeFlag = optionPrefix + "container_pipe_size"
|
|
+ netlinkSockRecvBufSizeFlag = optionPrefix + "netlink_recv_buf_size"
|
|
traceModeStatic = "static"
|
|
traceModeDynamic = "dynamic"
|
|
traceTypeIsolated = "isolated"
|
|
@@ -155,6 +158,18 @@ func parseCmdlineOption(option string) error {
|
|
return err
|
|
}
|
|
unifiedCgroupHierarchy = flag
|
|
+ case netlinkSockRecvBufSizeFlag:
|
|
+ bufSizeInBytes, err := units.RAMInBytes(split[valuePosition])
|
|
+ if err != nil {
|
|
+ return err
|
|
+ }
|
|
+
|
|
+ if bufSizeInBytes < minNetlinkSockRecvBufSize || bufSizeInBytes > maxNetlinkSockRecvBufSize {
|
|
+ return fmt.Errorf("invalid netlink socket recv buf size: %d (valid size range %s-%s bytes)", bufSizeInBytes,
|
|
+ units.BytesSize(minNetlinkSockRecvBufSize), units.BytesSize(maxNetlinkSockRecvBufSize))
|
|
+ }
|
|
+
|
|
+ netlinkSockRecvBufSize = uint32(bufSizeInBytes)
|
|
default:
|
|
if strings.HasPrefix(split[optionPosition], optionPrefix) {
|
|
return grpcStatus.Errorf(codes.NotFound, "Unknown option %s", split[optionPosition])
|
|
diff --git a/config_test.go b/config_test.go
|
|
index 2a23133..f40f17a 100644
|
|
--- a/config_test.go
|
|
+++ b/config_test.go
|
|
@@ -486,3 +486,58 @@ func TestParseCmdlineOptionContainerPipeSize(t *testing.T) {
|
|
assert.Equal(d.expectedContainerPipeSize, containerPipeSize, "test %d (%+v)", i, d)
|
|
}
|
|
}
|
|
+
|
|
+func TestParseCmdlineOptionNetlinkSockRecvBufSize(t *testing.T) {
|
|
+ assert := assert.New(t)
|
|
+
|
|
+ type testData struct {
|
|
+ option string
|
|
+ shouldErr bool
|
|
+ expectedNetlinkSockRecvBufSize uint32
|
|
+ }
|
|
+
|
|
+ data := []testData{
|
|
+ {"", false, 0},
|
|
+ {"netlink_recv_buf_siz", false, 0},
|
|
+ {"netlink_recv_buf_size", false, 0},
|
|
+ {"netlink_recv_buf_size=", false, 0},
|
|
+ {"netlink_recv_buf_size=4096", false, 0},
|
|
+ {"netlink_recv_buf_size=4KB", false, 0},
|
|
+ {"agent.netlink_recv_buf_size=", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=foobar", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=-1", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=0", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=100", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=3KB", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=3.6KB", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=4095", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=4096xB", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=4096", false, 4096},
|
|
+ {"agent.netlink_recv_buf_size=4097", false, 4097},
|
|
+ {"agent.netlink_recv_buf_size=4096.0", false, 4096},
|
|
+ {"agent.netlink_recv_buf_size=1024KB", false, 1048576},
|
|
+ {"agent.netlink_recv_buf_size=1MB", false, 1048576},
|
|
+ {"agent.netlink_recv_buf_size=4194303", false, 4194303},
|
|
+ {"agent.netlink_recv_buf_size=3.999MB", false, 4193255},
|
|
+ {"agent.netlink_recv_buf_size=4194304", false, 4194304},
|
|
+ {"agent.netlink_recv_buf_size=4MB", false, 4194304},
|
|
+ {"agent.netlink_recv_buf_size=4.001MB", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=4194305", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=100MB", true, 0},
|
|
+ {"agent.netlink_recv_buf_size=1GB", true, 0},
|
|
+ }
|
|
+
|
|
+ for i, d := range data {
|
|
+ // reset the netlink socket recv buffer size
|
|
+ netlinkSockRecvBufSize = 0
|
|
+
|
|
+ err := parseCmdlineOption(d.option)
|
|
+ if d.shouldErr {
|
|
+ assert.Error(err)
|
|
+ } else {
|
|
+ assert.NoError(err)
|
|
+ }
|
|
+
|
|
+ assert.Equal(d.expectedNetlinkSockRecvBufSize, netlinkSockRecvBufSize, "test %d (%+v)", i, d)
|
|
+ }
|
|
+}
|
|
diff --git a/pkg/uevent/uevent.go b/pkg/uevent/uevent.go
|
|
index fc2c127..fa84086 100644
|
|
--- a/pkg/uevent/uevent.go
|
|
+++ b/pkg/uevent/uevent.go
|
|
@@ -10,6 +10,7 @@ import (
|
|
"bufio"
|
|
"io"
|
|
"strings"
|
|
+ "syscall"
|
|
|
|
"golang.org/x/sys/unix"
|
|
"google.golang.org/grpc/codes"
|
|
@@ -33,7 +34,7 @@ type ReaderCloser struct {
|
|
}
|
|
|
|
// NewReaderCloser returns an io.ReadCloser handle for uevent.
|
|
-func NewReaderCloser() (io.ReadCloser, error) {
|
|
+func NewReaderCloser(netlinkRecvBufSize uint32) (io.ReadCloser, error) {
|
|
nl := unix.SockaddrNetlink{
|
|
Family: unix.AF_NETLINK,
|
|
// Passing Pid as 0 here allows the kernel to take care of assigning
|
|
@@ -47,6 +48,14 @@ func NewReaderCloser() (io.ReadCloser, error) {
|
|
return nil, err
|
|
}
|
|
|
|
+ // If netlinkRecvBufSize > 0, set netlink socket recv buffer size to netlinkRecvBufSize
|
|
+ if netlinkRecvBufSize > 0 {
|
|
+ err = unix.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, int(netlinkRecvBufSize))
|
|
+ if err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+ }
|
|
+
|
|
if err := unix.Bind(fd, &nl); err != nil {
|
|
return nil, err
|
|
}
|
|
@@ -85,8 +94,8 @@ type Handler struct {
|
|
}
|
|
|
|
// NewHandler returns a uevent handler.
|
|
-func NewHandler() (*Handler, error) {
|
|
- rdCloser, err := NewReaderCloser()
|
|
+func NewHandler(netlinkRecvBufSize uint32) (*Handler, error) {
|
|
+ rdCloser, err := NewReaderCloser(netlinkRecvBufSize)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
--
|
|
2.14.3 (Apple Git-98)
|
|
|