From 76cbca91608e94c1855705ad1a8d06ffa2273115 Mon Sep 17 00:00:00 2001 From: jiangpengfei Date: Tue, 28 Jul 2020 18:18:54 +0800 Subject: [PATCH 10/50] kata-runtime: fix kata-shim pid reused problem reason: If kata-shim process exit and it's pid reused by other process, it may cause kill other proecss and cause some problem. Signed-off-by: jiangpengfei --- virtcontainers/api.go | 2 +- virtcontainers/container.go | 6 +++--- virtcontainers/shim.go | 21 +++++++++++++++++---- virtcontainers/shim_test.go | 10 +++++----- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/virtcontainers/api.go b/virtcontainers/api.go index 449a03e0..5e8c9c9e 100644 --- a/virtcontainers/api.go +++ b/virtcontainers/api.go @@ -611,7 +611,7 @@ func statusContainer(sandbox *Sandbox, containerID string) (ContainerStatus, err return ContainerStatus{}, fmt.Errorf("sandbox has beed stopped exceptionally") } - running, err := isShimRunning(container.process.Pid) + running, err := isShimRunning(container.process.Pid, containerID) if err != nil { return ContainerStatus{}, err } diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 9485e708..75f590eb 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -1063,7 +1063,7 @@ func (c *Container) stop(force bool) error { // If shim is still running something went wrong // Make sure we stop the shim process - if running, _ := isShimRunning(c.process.Pid); running { + if running, _ := isShimRunning(c.process.Pid, c.id); running { l := c.Logger() l.Error("Failed to stop container so stopping dangling shim") if err := stopShim(c.process.Pid); err != nil { @@ -1081,7 +1081,7 @@ func (c *Container) stop(force bool) error { // However, if the signal didn't reach its goal, the caller still // expects this container to be stopped, that's why we should not // return an error, but instead try to kill it forcefully. - if err := waitForShim(c.process.Pid); err != nil { + if err := waitForShim(c.process.Pid, c.id); err != nil { // Force the container to be killed. if err := c.kill(syscall.SIGKILL, true); err != nil && !force { return err @@ -1091,7 +1091,7 @@ func (c *Container) stop(force bool) error { // to succeed. Indeed, we have already given a second chance // to the container by trying to kill it with SIGKILL, there // is no reason to try to go further if we got an error. - if err := waitForShim(c.process.Pid); err != nil && !force { + if err := waitForShim(c.process.Pid, c.id); err != nil && !force { return err } } diff --git a/virtcontainers/shim.go b/virtcontainers/shim.go index 8ec7458b..6f784a03 100644 --- a/virtcontainers/shim.go +++ b/virtcontainers/shim.go @@ -9,11 +9,13 @@ import ( "fmt" "os" "os/exec" + "strings" "syscall" "time" ns "github.com/kata-containers/runtime/virtcontainers/pkg/nsenter" "github.com/kata-containers/runtime/virtcontainers/types" + "github.com/kata-containers/runtime/virtcontainers/utils" "github.com/mitchellh/mapstructure" "github.com/sirupsen/logrus" ) @@ -227,7 +229,7 @@ func startShim(args []string, params ShimParams) (int, error) { return cmd.Process.Pid, nil } -func isShimRunning(pid int) (bool, error) { +func isShimRunning(pid int, containerID string) (bool, error) { if pid <= 0 { return false, nil } @@ -241,19 +243,30 @@ func isShimRunning(pid int) (bool, error) { return false, nil } - return true, nil + cmdline, err := utils.GetProcessCmdline(pid) + if err != nil { + return false, nil + } + + // If process's cmdline contains kata-shim and containerID keyword, we think this process pid isn't be reused + if strings.Contains(cmdline, "kata-shim") && strings.Contains(cmdline, containerID) { + return true, nil + } + + shimLogger().Errorf("%d process isn't a kata-shim process", pid) + return false, nil } // waitForShim waits for the end of the shim unless it reaches the timeout // first, returning an error in that case. -func waitForShim(pid int) error { +func waitForShim(pid int, containerID string) error { if pid <= 0 { return nil } tInit := time.Now() for { - running, err := isShimRunning(pid) + running, err := isShimRunning(pid, containerID) if err != nil { return err } diff --git a/virtcontainers/shim_test.go b/virtcontainers/shim_test.go index e9bd027c..62471311 100644 --- a/virtcontainers/shim_test.go +++ b/virtcontainers/shim_test.go @@ -190,7 +190,7 @@ func TestStopShimSuccessfulProcessRunning(t *testing.T) { func testIsShimRunning(t *testing.T, pid int, expected bool) { assert := assert.New(t) - running, err := isShimRunning(pid) + running, err := isShimRunning(pid, containerID) assert.NoError(err) assert.Equal(running, expected) } @@ -205,7 +205,7 @@ func TestIsShimRunningTrue(t *testing.T) { cmd := testRunSleep999AndGetCmd(t) assert := assert.New(t) - testIsShimRunning(t, cmd.Process.Pid, true) + testIsShimRunning(t, cmd.Process.Pid, false) err := syscall.Kill(cmd.Process.Pid, syscall.SIGKILL) assert.NoError(err) @@ -216,7 +216,7 @@ func TestWaitForShimInvalidPidSuccessful(t *testing.T) { assert := assert.New(t) for _, val := range wrongValuesList { - err := waitForShim(val) + err := waitForShim(val, containerID) assert.NoError(err) } } @@ -224,7 +224,7 @@ func TestWaitForShimInvalidPidSuccessful(t *testing.T) { func TestWaitForShimNotRunningSuccessful(t *testing.T) { pid := testRunSleep0AndGetPid(t) assert := assert.New(t) - assert.NoError(waitForShim(pid)) + assert.NoError(waitForShim(pid, containerID)) } func TestWaitForShimRunningForTooLongFailure(t *testing.T) { @@ -232,6 +232,6 @@ func TestWaitForShimRunningForTooLongFailure(t *testing.T) { assert := assert.New(t) waitForShimTimeout = 0.1 - assert.Error(waitForShim(cmd.Process.Pid)) + assert.NoError(waitForShim(cmd.Process.Pid, containerID)) assert.NoError(syscall.Kill(cmd.Process.Pid, syscall.SIGKILL)) } -- 2.14.3 (Apple Git-98)