Skip to content

Commit f5bc6da

Browse files
committed
cli: add otel sdk tracing and metric providers to the core cli
This adds the code used by buildx and compose into the default CLI program to help normalize the usage of these APIs and allow code reuse between projects. It also allows these projects to benefit from improvements or changes that may be made by another team. At the moment, these APIs are a pretty thin layer on the OTEL SDK. It configures an additional exporter to a docker endpoint that's used for usage collection and is only active if the option is configured in docker desktop. This also upgrades the OTEL version to v1.19 which is the one being used by buildkit, buildx, compose, etc. Signed-off-by: Jonathan A. Sternberg <jonathan.sternberg@docker.com>
1 parent 99eb502 commit f5bc6da

File tree

192 files changed

+39642
-3
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+39642
-3
lines changed

cli/command/cli.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"github.com/pkg/errors"
3737
"github.com/spf13/cobra"
3838
notaryclient "github.com/theupdateframework/notary/client"
39+
"go.opentelemetry.io/otel/sdk/resource"
3940
)
4041

4142
const defaultInitTimeout = 2 * time.Second
@@ -65,6 +66,7 @@ type Cli interface {
6566
ContextStore() store.Store
6667
CurrentContext() string
6768
DockerEndpoint() docker.Endpoint
69+
TelemetryClient
6870
}
6971

7072
// DockerCli is an instance the docker command line client.
@@ -85,6 +87,9 @@ type DockerCli struct {
8587
dockerEndpoint docker.Endpoint
8688
contextStoreConfig store.Config
8789
initTimeout time.Duration
90+
res *resource.Resource
91+
resOpts []resource.Option
92+
resOnce sync.Once
8893

8994
// baseCtx is the base context used for internal operations. In the future
9095
// this may be replaced by explicitly passing a context to functions that

cli/command/telemetry.go

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
package command
2+
3+
import (
4+
"context"
5+
"os"
6+
"path/filepath"
7+
"time"
8+
9+
"github.com/docker/distribution/uuid"
10+
"go.opentelemetry.io/otel"
11+
"go.opentelemetry.io/otel/metric"
12+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
13+
"go.opentelemetry.io/otel/sdk/metric/metricdata"
14+
"go.opentelemetry.io/otel/sdk/resource"
15+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
16+
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
17+
"go.opentelemetry.io/otel/trace"
18+
)
19+
20+
// TracerProvider is an extension of the trace.TracerProvider interface for CLI programs.
21+
type TracerProvider interface {
22+
trace.TracerProvider
23+
ForceFlush(ctx context.Context) error
24+
Shutdown(ctx context.Context) error
25+
}
26+
27+
// MeterProvider is an extension of the metric.MeterProvider interface for CLI programs.
28+
type MeterProvider interface {
29+
metric.MeterProvider
30+
ForceFlush(ctx context.Context) error
31+
Shutdown(ctx context.Context) error
32+
}
33+
34+
// TelemetryClient provides the methods for using OTEL tracing or metrics.
35+
type TelemetryClient interface {
36+
// Resource returns the OTEL Resource configured with this TelemetryClient.
37+
// This resource may be created lazily, but the resource should be the same
38+
// each time this function is invoked.
39+
Resource() *resource.Resource
40+
41+
// TracerProvider returns a TracerProvider. This TracerProvider will be configured
42+
// with the default tracing components for a CLI program along with any options given
43+
// for the SDK.
44+
TracerProvider(opts ...sdktrace.TracerProviderOption) TracerProvider
45+
46+
// MeterProvider returns a MeterProvider. This MeterProvider will be configured
47+
// with the default metric components for a CLI program along with any options given
48+
// for the SDK.
49+
MeterProvider(opts ...sdkmetric.Option) MeterProvider
50+
}
51+
52+
func (cli *DockerCli) Resource() *resource.Resource {
53+
cli.resOnce.Do(func() {
54+
if cli.res == nil {
55+
opts := []resource.Option{
56+
resource.WithDetectors(serviceNameDetector{}),
57+
resource.WithAttributes(
58+
// Use a unique instance id so OTEL knows that each invocation
59+
// of the CLI is its own instance. Without this, downstream
60+
// OTEL processors may think the same process is restarting
61+
// continuously.
62+
semconv.ServiceInstanceID(uuid.Generate().String()),
63+
),
64+
resource.WithFromEnv(),
65+
resource.WithTelemetrySDK(),
66+
}
67+
if len(cli.resOpts) > 0 {
68+
opts = append(opts, cli.resOpts...)
69+
}
70+
71+
res, err := resource.New(context.Background(), opts...)
72+
if err != nil {
73+
otel.Handle(err)
74+
}
75+
cli.res = res
76+
}
77+
// Clear the resource options since they'll never be used again and to allow
78+
// the garbage collector to retrieve that memory.
79+
cli.resOpts = nil
80+
})
81+
return cli.res
82+
}
83+
84+
func (cli *DockerCli) TracerProvider(opts ...sdktrace.TracerProviderOption) TracerProvider {
85+
exp, err := dockerSpanExporter(cli)
86+
if err != nil {
87+
otel.Handle(err)
88+
}
89+
defaultOpts := []sdktrace.TracerProviderOption{
90+
sdktrace.WithResource(cli.Resource()),
91+
sdktrace.WithBatcher(exp),
92+
}
93+
return sdktrace.NewTracerProvider(append(defaultOpts, opts...)...)
94+
}
95+
96+
func (cli *DockerCli) MeterProvider(opts ...sdkmetric.Option) MeterProvider {
97+
exp, err := dockerMetricExporter(cli)
98+
if err != nil {
99+
otel.Handle(err)
100+
}
101+
defaultOpts := []sdkmetric.Option{
102+
sdkmetric.WithResource(cli.Resource()),
103+
sdkmetric.WithReader(newCLIReader(exp)),
104+
}
105+
return sdkmetric.NewMeterProvider(append(defaultOpts, opts...)...)
106+
}
107+
108+
// WithResourceOptions configures additional options for the default resource. The default
109+
// resource will continue to include its default options.
110+
func WithResourceOptions(opts ...resource.Option) CLIOption {
111+
return func(cli *DockerCli) error {
112+
cli.resOpts = append(cli.resOpts, opts...)
113+
return nil
114+
}
115+
}
116+
117+
// WithResource overwrites the default resource and prevents its creation.
118+
func WithResource(res *resource.Resource) CLIOption {
119+
return func(cli *DockerCli) error {
120+
cli.res = res
121+
return nil
122+
}
123+
}
124+
125+
type serviceNameDetector struct{}
126+
127+
func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
128+
return resource.StringDetector(
129+
semconv.SchemaURL,
130+
semconv.ServiceNameKey,
131+
func() (string, error) {
132+
return filepath.Base(os.Args[0]), nil
133+
},
134+
).Detect(ctx)
135+
}
136+
137+
// cliReader is an implementation of Reader that will automatically
138+
// report to a designated Exporter when Shutdown is called.
139+
type cliReader struct {
140+
sdkmetric.Reader
141+
exporter sdkmetric.Exporter
142+
}
143+
144+
func newCLIReader(exp sdkmetric.Exporter) sdkmetric.Reader {
145+
reader := sdkmetric.NewManualReader(
146+
sdkmetric.WithTemporalitySelector(deltaTemporality),
147+
)
148+
return &cliReader{
149+
Reader: reader,
150+
exporter: exp,
151+
}
152+
}
153+
154+
const shutdownTimeout = 2 * time.Second
155+
156+
func (r *cliReader) Shutdown(ctx context.Context) error {
157+
var rm metricdata.ResourceMetrics
158+
if err := r.Reader.Collect(ctx, &rm); err != nil {
159+
return err
160+
}
161+
162+
// Place a pretty tight constraint on the actual reporting.
163+
// We don't want CLI metrics to prevent the CLI from exiting
164+
// so if there's some kind of issue we need to abort pretty
165+
// quickly.
166+
ctx, cancel := context.WithTimeout(ctx, shutdownTimeout)
167+
defer cancel()
168+
169+
return r.exporter.Export(ctx, &rm)
170+
}
171+
172+
// deltaTemporality sets the Temporality of every instrument to delta.
173+
//
174+
// This isn't really needed since we create a unique resource on each invocation,
175+
// but it can help with cardinality concerns for downstream processors since they can
176+
// perform aggregation for a time interval and then discard the data once that time
177+
// period has passed. Cumulative temporality would imply to the downstream processor
178+
// that they might receive a successive point and they may unnecessarily keep state
179+
// they really shouldn't.
180+
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
181+
return metricdata.DeltaTemporality
182+
}

cli/command/telemetry_docker.go

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package command
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/url"
7+
"path"
8+
9+
"github.com/pkg/errors"
10+
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
11+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
12+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
13+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
14+
)
15+
16+
const otelConfigFieldName = "otel"
17+
18+
// dockerExporterOtlpEndpoint retrieves the OTLP endpoint used for the docker reporter
19+
// from the current context.
20+
func dockerExporterOtlpEndpoint(cli Cli) (endpoint string, secure bool, err error) {
21+
meta, err := cli.ContextStore().GetMetadata(cli.CurrentContext())
22+
if err != nil {
23+
return "", false, err
24+
}
25+
26+
var otelCfg interface{}
27+
switch m := meta.Metadata.(type) {
28+
case DockerContext:
29+
otelCfg = m.AdditionalFields[otelConfigFieldName]
30+
case map[string]interface{}:
31+
otelCfg = m[otelConfigFieldName]
32+
}
33+
34+
if otelCfg == nil {
35+
return "", false, nil
36+
}
37+
38+
otelMap, ok := otelCfg.(map[string]interface{})
39+
if !ok {
40+
return "", false, errors.Errorf(
41+
"unexpected type for field %q: %T (expected: %T)",
42+
otelConfigFieldName,
43+
otelCfg,
44+
otelMap,
45+
)
46+
}
47+
48+
// keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/
49+
endpoint, ok = otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string)
50+
if !ok {
51+
return "", false, nil
52+
}
53+
54+
// Parse the endpoint. The docker config expects the endpoint to be
55+
// in the form of a URL to match the environment variable, but this
56+
// option doesn't correspond directly to WithEndpoint.
57+
//
58+
// We pretend we're the same as the environment reader.
59+
u, err := url.Parse(endpoint)
60+
if err != nil {
61+
return "", false, errors.Errorf("docker otel endpoint is invalid: %s", err)
62+
}
63+
64+
switch u.Scheme {
65+
case "unix":
66+
// Unix sockets are a bit weird. OTEL seems to imply they
67+
// can be used as an environment variable and are handled properly,
68+
// but they don't seem to be as the behavior of the environment variable
69+
// is to strip the scheme from the endpoint, but the underlying implementation
70+
// needs the scheme to use the correct resolver.
71+
//
72+
// We'll just handle this in a special way and add the unix:// back to the endpoint.
73+
endpoint = fmt.Sprintf("unix://%s", path.Join(u.Host, u.Path))
74+
case "https":
75+
secure = true
76+
fallthrough
77+
case "http":
78+
endpoint = path.Join(u.Host, u.Path)
79+
}
80+
return endpoint, secure, nil
81+
}
82+
83+
func dockerSpanExporter(cli Cli) (sdktrace.SpanExporter, error) {
84+
endpoint, secure, err := dockerExporterOtlpEndpoint(cli)
85+
if err != nil {
86+
return nil, err
87+
}
88+
89+
opts := []otlptracegrpc.Option{
90+
otlptracegrpc.WithEndpoint(endpoint),
91+
}
92+
if !secure {
93+
opts = append(opts, otlptracegrpc.WithInsecure())
94+
}
95+
96+
exp, err := otlptracegrpc.New(context.Background(), opts...)
97+
if err != nil {
98+
return nil, err
99+
}
100+
return exp, nil
101+
}
102+
103+
func dockerMetricExporter(cli Cli) (sdkmetric.Exporter, error) {
104+
endpoint, secure, err := dockerExporterOtlpEndpoint(cli)
105+
if err != nil {
106+
return nil, err
107+
}
108+
109+
opts := []otlpmetricgrpc.Option{
110+
otlpmetricgrpc.WithEndpoint(endpoint),
111+
}
112+
if !secure {
113+
opts = append(opts, otlpmetricgrpc.WithInsecure())
114+
}
115+
116+
exp, err := otlpmetricgrpc.New(context.Background(), opts...)
117+
if err != nil {
118+
return nil, err
119+
}
120+
return exp, nil
121+
}

vendor.mod

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ require (
3838
github.com/tonistiigi/go-rosetta v0.0.0-20200727161949-f79598599c5d
3939
github.com/xeipuuv/gojsonschema v1.2.0
4040
go.opentelemetry.io/otel v1.19.0
41+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
42+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0
43+
go.opentelemetry.io/otel/metric v1.19.0
44+
go.opentelemetry.io/otel/sdk v1.19.0
45+
go.opentelemetry.io/otel/sdk/metric v1.19.0
46+
go.opentelemetry.io/otel/trace v1.19.0
4147
golang.org/x/sync v0.6.0
4248
golang.org/x/sys v0.16.0
4349
golang.org/x/term v0.15.0
@@ -52,6 +58,7 @@ require (
5258
github.com/Microsoft/go-winio v0.6.1 // indirect
5359
github.com/Microsoft/hcsshim v0.11.4 // indirect
5460
github.com/beorn7/perks v1.0.1 // indirect
61+
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
5562
github.com/cespare/xxhash/v2 v2.2.0 // indirect
5663
github.com/containerd/log v0.1.0 // indirect
5764
github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
@@ -62,6 +69,7 @@ require (
6269
github.com/go-logr/stdr v1.2.2 // indirect
6370
github.com/golang/protobuf v1.5.3 // indirect
6471
github.com/gorilla/mux v1.8.1 // indirect
72+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect
6573
github.com/inconshreveable/mousetrap v1.1.0 // indirect
6674
github.com/klauspost/compress v1.17.4 // indirect
6775
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
@@ -78,13 +86,15 @@ require (
7886
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
7987
go.etcd.io/etcd/raft/v3 v3.5.6 // indirect
8088
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect
81-
go.opentelemetry.io/otel/metric v1.19.0 // indirect
82-
go.opentelemetry.io/otel/trace v1.19.0 // indirect
89+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect
90+
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect
91+
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
8392
golang.org/x/crypto v0.17.0 // indirect
8493
golang.org/x/mod v0.14.0 // indirect
8594
golang.org/x/net v0.19.0 // indirect
8695
golang.org/x/time v0.3.0 // indirect
8796
golang.org/x/tools v0.16.0 // indirect
97+
google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98 // indirect
8898
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect
8999
google.golang.org/grpc v1.58.3 // indirect
90100
google.golang.org/protobuf v1.31.0 // indirect

0 commit comments

Comments
 (0)