kata-containers/agent/patches/0011-agent-fix-agent-reap-agent-process-blocked-problem.patch
jiangpengfei 9a08f603ad kata-containers: move all kata related source repo into one repo kata-containers
reason: in order to make manage kata-containers related source code more easy,
we decide to move all kata related source repo into kata-containers repo.

Signed-off-by: jiangpengfei <jiangpengfei9@huawei.com>
2020-12-31 17:34:19 +08:00

79 lines
2.6 KiB
Diff

From 3ac1232a2e3fbfc0465473e5d81cde41847c4252 Mon Sep 17 00:00:00 2001
From: jiangpengfei <jiangpengfei9@huawei.com>
Date: Wed, 19 Aug 2020 11:47:37 +0800
Subject: [PATCH 11/16] agent: fix agent reap agent process blocked problem
reason: add container waitProcess() timeout when
container process status is D/T.
Signed-off-by: jiangpengfei <jiangpengfei9@huawei.com>
---
grpc.go | 43 +++++++++++++++++++++++++++++++++----------
1 file changed, 33 insertions(+), 10 deletions(-)
diff --git a/grpc.go b/grpc.go
index de2cae7..3dd088e 100644
--- a/grpc.go
+++ b/grpc.go
@@ -49,6 +49,11 @@ const (
libcontainerPath = "/run/libcontainer"
)
+// keep waitProcessTimeout value same as value in kata-runtime wait WaitProcessRequest response
+const (
+ waitProcessTimeOut = 10
+)
+
var (
sysfsCPUOnlinePath = "/sys/devices/system/cpu"
sysfsMemOnlinePath = "/sys/devices/system/memory"
@@ -996,17 +1001,35 @@ func (a *agentGRPC) WaitProcess(ctx context.Context, req *pb.WaitProcessRequest)
ctr.deleteProcess(proc.id)
})
- // Using helper function wait() to deal with the subreaper.
- libContProcess := (*reaperLibcontainerProcess)(&(proc.process))
- exitCode, err := a.sandbox.subreaper.wait(proc.exitCodeCh, libContProcess)
- if err != nil {
- return &pb.WaitProcessResponse{}, err
+ done := make(chan error)
+ var exitCode int = 0
+ go func() {
+ // Using helper function wait() to deal with the subreaper.
+ libContProcess := (*reaperLibcontainerProcess)(&(proc.process))
+ var err error
+ exitCode, err = a.sandbox.subreaper.wait(proc.exitCodeCh, libContProcess)
+ if err != nil {
+ done <- err
+ close(done)
+ return
+ }
+ // refill the exitCodeCh with the exitcode which can be read out
+ // by another WaitProcess(). Since this channel isn't be closed,
+ // here the refill will always success and it will be free by GC
+ // once the process exits.
+ proc.exitCodeCh <- exitCode
+
+ close(done)
+ }()
+
+ select {
+ case err := <-done:
+ if err != nil {
+ return &pb.WaitProcessResponse{}, err
+ }
+ case <-time.After(time.Duration(waitProcessTimeOut) * time.Second):
+ return &pb.WaitProcessResponse{}, grpcStatus.Errorf(codes.DeadlineExceeded, "agent wait reap container process timeout reached after %ds", waitProcessTimeOut)
}
- //refill the exitCodeCh with the exitcode which can be read out
- //by another WaitProcess(). Since this channel isn't be closed,
- //here the refill will always success and it will be free by GC
- //once the process exits.
- proc.exitCodeCh <- exitCode
return &pb.WaitProcessResponse{
Status: int32(exitCode),
--
2.14.3 (Apple Git-98)