X Tutup
Skip to content

Commit 9afc778

Browse files
authored
Merge pull request containerd#6111 from crosbymichael/latency-metrics
[cri] add sandbox and container latency metrics
2 parents 4aea5da + 91bbaf6 commit 9afc778

File tree

10 files changed

+111
-0
lines changed

10 files changed

+111
-0
lines changed

pkg/cri/server/container_create.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
102102
return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
103103
}
104104

105+
start := time.Now()
105106
// Run container using the same runtime with sandbox.
106107
sandboxInfo, err := sandbox.Container.Info(ctx)
107108
if err != nil {
@@ -278,6 +279,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
278279
return nil, errors.Wrapf(err, "failed to add container %q into store", id)
279280
}
280281

282+
containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start)
283+
281284
return &runtime.CreateContainerResponse{ContainerId: id}, nil
282285
}
283286

pkg/cri/server/container_list.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package server
1818

1919
import (
20+
"time"
21+
2022
"golang.org/x/net/context"
2123

2224
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@@ -26,6 +28,7 @@ import (
2628

2729
// ListContainers lists all containers matching the filter.
2830
func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
31+
start := time.Now()
2932
// List all containers from store.
3033
containersInStore := c.containerStore.List()
3134

@@ -35,6 +38,8 @@ func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContaine
3538
}
3639

3740
containers = c.filterCRIContainers(containers, r.GetFilter())
41+
42+
containerListTimer.UpdateSince(start)
3843
return &runtime.ListContainersResponse{Containers: containers}, nil
3944
}
4045

pkg/cri/server/container_remove.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package server
1818

1919
import (
20+
"time"
21+
2022
"github.com/containerd/containerd"
2123
"github.com/containerd/containerd/errdefs"
2224
"github.com/containerd/containerd/log"
@@ -30,6 +32,7 @@ import (
3032

3133
// RemoveContainer removes the container.
3234
func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
35+
start := time.Now()
3336
container, err := c.containerStore.Get(r.GetContainerId())
3437
if err != nil {
3538
if !errdefs.IsNotFound(err) {
@@ -40,6 +43,10 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
4043
return &runtime.RemoveContainerResponse{}, nil
4144
}
4245
id := container.ID
46+
i, err := container.Container.Info(ctx)
47+
if err != nil {
48+
return nil, errors.Wrap(err, "get container info")
49+
}
4350

4451
// Forcibly stop the containers if they are in running or unknown state
4552
state := container.Status.Get().State()
@@ -99,6 +106,8 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
99106

100107
c.containerNameIndex.ReleaseByKey(id)
101108

109+
containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start)
110+
102111
return &runtime.RemoveContainerResponse{}, nil
103112
}
104113

pkg/cri/server/container_start.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,17 @@ import (
4040

4141
// StartContainer starts the container.
4242
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
43+
start := time.Now()
4344
cntr, err := c.containerStore.Get(r.GetContainerId())
4445
if err != nil {
4546
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
4647
}
4748

49+
info, err := cntr.Container.Info(ctx)
50+
if err != nil {
51+
return nil, errors.Wrap(err, "get container info")
52+
}
53+
4854
id := cntr.ID
4955
meta := cntr.Metadata
5056
container := cntr.Container
@@ -170,6 +176,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
170176
// It handles the TaskExit event and update container state after this.
171177
c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh)
172178

179+
containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start)
180+
173181
return &runtime.StartContainerResponse{}, nil
174182
}
175183

pkg/cri/server/container_stop.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535

3636
// StopContainer stops a running container with a grace period (i.e., timeout).
3737
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
38+
start := time.Now()
3839
// Get container config from container store.
3940
container, err := c.containerStore.Get(r.GetContainerId())
4041
if err != nil {
@@ -45,6 +46,13 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer
4546
return nil, err
4647
}
4748

49+
i, err := container.Container.Info(ctx)
50+
if err != nil {
51+
return nil, errors.Wrap(err, "get container info")
52+
}
53+
54+
containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start)
55+
4856
return &runtime.StopContainerResponse{}, nil
4957
}
5058

pkg/cri/server/metrics.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package server
18+
19+
import (
20+
metrics "github.com/docker/go-metrics"
21+
)
22+
23+
var (
24+
sandboxListTimer metrics.Timer
25+
sandboxCreateNetworkTimer metrics.Timer
26+
sandboxDeleteNetwork metrics.Timer
27+
28+
sandboxRuntimeCreateTimer metrics.LabeledTimer
29+
sandboxRuntimeStopTimer metrics.LabeledTimer
30+
sandboxRemoveTimer metrics.LabeledTimer
31+
32+
containerListTimer metrics.Timer
33+
containerRemoveTimer metrics.LabeledTimer
34+
containerCreateTimer metrics.LabeledTimer
35+
containerStopTimer metrics.LabeledTimer
36+
containerStartTimer metrics.LabeledTimer
37+
)
38+
39+
func init() {
40+
// these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle.
41+
ns := metrics.NewNamespace("containerd", "cri", nil)
42+
43+
sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes")
44+
sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox")
45+
sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network")
46+
47+
sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime")
48+
sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime")
49+
sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime")
50+
51+
containerListTimer = ns.NewTimer("container_list", "time to list containers")
52+
containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime")
53+
containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime")
54+
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
55+
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
56+
57+
metrics.Register(ns)
58+
}

pkg/cri/server/sandbox_list.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package server
1818

1919
import (
20+
"time"
21+
2022
"golang.org/x/net/context"
2123
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2224

@@ -25,6 +27,7 @@ import (
2527

2628
// ListPodSandbox returns a list of Sandbox.
2729
func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
30+
start := time.Now()
2831
// List all sandboxes from store.
2932
sandboxesInStore := c.sandboxStore.List()
3033
var sandboxes []*runtime.PodSandbox
@@ -36,6 +39,8 @@ func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandb
3639
}
3740

3841
sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter())
42+
43+
sandboxListTimer.UpdateSince(start)
3944
return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil
4045
}
4146

pkg/cri/server/sandbox_remove.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package server
1818

1919
import (
20+
"time"
21+
2022
"github.com/containerd/containerd"
2123
"github.com/containerd/containerd/errdefs"
2224
"github.com/containerd/containerd/log"
@@ -30,6 +32,7 @@ import (
3032
// RemovePodSandbox removes the sandbox. If there are running containers in the
3133
// sandbox, they should be forcibly removed.
3234
func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) {
35+
start := time.Now()
3336
sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
3437
if err != nil {
3538
if !errdefs.IsNotFound(err) {
@@ -108,5 +111,7 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
108111
// Release the sandbox name reserved for the sandbox.
109112
c.sandboxNameIndex.ReleaseByKey(id)
110113

114+
sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start)
115+
111116
return &runtime.RemovePodSandboxResponse{}, nil
112117
}

pkg/cri/server/sandbox_run.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"path/filepath"
2323
goruntime "runtime"
2424
"strings"
25+
"time"
2526

2627
"github.com/containerd/containerd"
2728
containerdio "github.com/containerd/containerd/cio"
@@ -123,6 +124,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
123124
}
124125

125126
if podNetwork {
127+
netStart := time.Now()
126128
// If it is not in host network namespace then create a namespace and set the sandbox
127129
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
128130
// namespaces. If the pod is in host network namespace then both are empty and should not
@@ -163,8 +165,10 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
163165
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
164166
return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
165167
}
168+
sandboxCreateNetworkTimer.UpdateSince(netStart)
166169
}
167170

171+
runtimeStart := time.Now()
168172
// Create sandbox container.
169173
// NOTE: sandboxContainerSpec SHOULD NOT have side
170174
// effect, e.g. accessing/creating files, so that we can test
@@ -348,6 +352,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
348352
// but we don't care about sandbox TaskOOM right now, so it is fine.
349353
c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh)
350354

355+
sandboxRuntimeCreateTimer.WithValues(ociRuntime.Type).UpdateSince(runtimeStart)
356+
351357
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
352358
}
353359

pkg/cri/server/sandbox_stop.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
5454
// Stop all containers inside the sandbox. This terminates the container forcibly,
5555
// and container may still be created, so production should not rely on this behavior.
5656
// TODO(random-liu): Introduce a state in sandbox to avoid future container creation.
57+
stop := time.Now()
5758
containers := c.containerStore.List()
5859
for _, container := range containers {
5960
if container.SandboxID != id {
@@ -77,9 +78,11 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
7778
return errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state)
7879
}
7980
}
81+
sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop)
8082

8183
// Teardown network for sandbox.
8284
if sandbox.NetNS != nil {
85+
netStop := time.Now()
8386
// Use empty netns path if netns is not available. This is defined in:
8487
// https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
8588
if closed, err := sandbox.NetNS.Closed(); err != nil {
@@ -93,6 +96,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
9396
if err := sandbox.NetNS.Remove(); err != nil {
9497
return errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
9598
}
99+
sandboxDeleteNetwork.UpdateSince(netStop)
96100
}
97101

98102
log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)

0 commit comments

Comments
 (0)
X Tutup