Skip to content

Commit 89db01e

Browse files
committed
cli: add otel sdk tracing and metric providers to the core cli
This adds the code used by buildx and compose into the default CLI program to help normalize the usage of these APIs and allow code reuse between projects. It also allows these projects to benefit from improvements or changes that may be made by another team. At the moment, these APIs are a pretty thin layer on the OTEL SDK. It configures an additional exporter to a docker endpoint that's used for usage collection and is only active if the option is configured in docker desktop. This also upgrades the OTEL version to v1.19 which is the one being used by buildkit, buildx, compose, etc. Signed-off-by: Jonathan A. Sternberg <jonathan.sternberg@docker.com>
1 parent b4d0328 commit 89db01e

236 files changed

Lines changed: 42128 additions & 1505 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cli/command/cli.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ type Cli interface {
6565
ContextStore() store.Store
6666
CurrentContext() string
6767
DockerEndpoint() docker.Endpoint
68+
TelemetryClient
6869
}
6970

7071
// DockerCli is an instance the docker command line client.
@@ -85,6 +86,7 @@ type DockerCli struct {
8586
dockerEndpoint docker.Endpoint
8687
contextStoreConfig store.Config
8788
initTimeout time.Duration
89+
res telemetryResource
8890

8991
// baseCtx is the base context used for internal operations. In the future
9092
// this may be replaced by explicitly passing a context to functions that

cli/command/telemetry.go

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
package command
2+
3+
import (
4+
"context"
5+
"os"
6+
"path/filepath"
7+
"sync"
8+
"time"
9+
10+
"github.com/docker/distribution/uuid"
11+
"go.opentelemetry.io/otel"
12+
"go.opentelemetry.io/otel/metric"
13+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
14+
"go.opentelemetry.io/otel/sdk/metric/metricdata"
15+
"go.opentelemetry.io/otel/sdk/resource"
16+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
17+
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
18+
"go.opentelemetry.io/otel/trace"
19+
)
20+
21+
const exportTimeout = 50 * time.Millisecond
22+
23+
// TracerProvider is an extension of the trace.TracerProvider interface for CLI programs.
24+
type TracerProvider interface {
25+
trace.TracerProvider
26+
ForceFlush(ctx context.Context) error
27+
Shutdown(ctx context.Context) error
28+
}
29+
30+
// MeterProvider is an extension of the metric.MeterProvider interface for CLI programs.
31+
type MeterProvider interface {
32+
metric.MeterProvider
33+
ForceFlush(ctx context.Context) error
34+
Shutdown(ctx context.Context) error
35+
}
36+
37+
// TelemetryClient provides the methods for using OTEL tracing or metrics.
38+
type TelemetryClient interface {
39+
// Resource returns the OTEL Resource configured with this TelemetryClient.
40+
// This resource may be created lazily, but the resource should be the same
41+
// each time this function is invoked.
42+
Resource() *resource.Resource
43+
44+
// TracerProvider returns a TracerProvider. This TracerProvider will be configured
45+
// with the default tracing components for a CLI program along with any options given
46+
// for the SDK.
47+
TracerProvider(ctx context.Context, opts ...sdktrace.TracerProviderOption) TracerProvider
48+
49+
// MeterProvider returns a MeterProvider. This MeterProvider will be configured
50+
// with the default metric components for a CLI program along with any options given
51+
// for the SDK.
52+
MeterProvider(ctx context.Context, opts ...sdkmetric.Option) MeterProvider
53+
}
54+
55+
func (cli *DockerCli) Resource() *resource.Resource {
56+
return cli.res.Get()
57+
}
58+
59+
func (cli *DockerCli) TracerProvider(ctx context.Context, opts ...sdktrace.TracerProviderOption) TracerProvider {
60+
allOpts := make([]sdktrace.TracerProviderOption, 0, len(opts)+2)
61+
allOpts = append(allOpts, sdktrace.WithResource(cli.Resource()))
62+
allOpts = append(allOpts, dockerSpanExporter(ctx, cli)...)
63+
allOpts = append(allOpts, opts...)
64+
return sdktrace.NewTracerProvider(allOpts...)
65+
}
66+
67+
func (cli *DockerCli) MeterProvider(ctx context.Context, opts ...sdkmetric.Option) MeterProvider {
68+
allOpts := make([]sdkmetric.Option, 0, len(opts)+2)
69+
allOpts = append(allOpts, sdkmetric.WithResource(cli.Resource()))
70+
allOpts = append(allOpts, dockerMetricExporter(ctx, cli)...)
71+
allOpts = append(allOpts, opts...)
72+
return sdkmetric.NewMeterProvider(allOpts...)
73+
}
74+
75+
// WithResourceOptions configures additional options for the default resource. The default
76+
// resource will continue to include its default options.
77+
func WithResourceOptions(opts ...resource.Option) CLIOption {
78+
return func(cli *DockerCli) error {
79+
cli.res.AppendOptions(opts...)
80+
return nil
81+
}
82+
}
83+
84+
// WithResource overwrites the default resource and prevents its creation.
85+
func WithResource(res *resource.Resource) CLIOption {
86+
return func(cli *DockerCli) error {
87+
cli.res.Set(res)
88+
return nil
89+
}
90+
}
91+
92+
type telemetryResource struct {
93+
res *resource.Resource
94+
opts []resource.Option
95+
once sync.Once
96+
}
97+
98+
func (r *telemetryResource) Set(res *resource.Resource) {
99+
r.res = res
100+
}
101+
102+
func (r *telemetryResource) Get() *resource.Resource {
103+
r.once.Do(r.init)
104+
return r.res
105+
}
106+
107+
func (r *telemetryResource) init() {
108+
if r.res != nil {
109+
r.opts = nil
110+
return
111+
}
112+
113+
opts := append(r.defaultOptions(), r.opts...)
114+
res, err := resource.New(context.Background(), opts...)
115+
if err != nil {
116+
otel.Handle(err)
117+
}
118+
r.res = res
119+
120+
// Clear the resource options since they'll never be used again and to allow
121+
// the garbage collector to retrieve that memory.
122+
r.opts = nil
123+
}
124+
125+
func (r *telemetryResource) defaultOptions() []resource.Option {
126+
return []resource.Option{
127+
resource.WithDetectors(serviceNameDetector{}),
128+
resource.WithAttributes(
129+
// Use a unique instance id so OTEL knows that each invocation
130+
// of the CLI is its own instance. Without this, downstream
131+
// OTEL processors may think the same process is restarting
132+
// continuously.
133+
semconv.ServiceInstanceID(uuid.Generate().String()),
134+
),
135+
resource.WithFromEnv(),
136+
resource.WithTelemetrySDK(),
137+
}
138+
}
139+
140+
func (r *telemetryResource) AppendOptions(opts ...resource.Option) {
141+
if r.res != nil {
142+
return
143+
}
144+
r.opts = append(r.opts, opts...)
145+
}
146+
147+
type serviceNameDetector struct{}
148+
149+
func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
150+
return resource.StringDetector(
151+
semconv.SchemaURL,
152+
semconv.ServiceNameKey,
153+
func() (string, error) {
154+
return filepath.Base(os.Args[0]), nil
155+
},
156+
).Detect(ctx)
157+
}
158+
159+
// cliReader is an implementation of Reader that will automatically
160+
// report to a designated Exporter when Shutdown is called.
161+
type cliReader struct {
162+
sdkmetric.Reader
163+
exporter sdkmetric.Exporter
164+
}
165+
166+
func newCLIReader(exp sdkmetric.Exporter) sdkmetric.Reader {
167+
reader := sdkmetric.NewManualReader(
168+
sdkmetric.WithTemporalitySelector(deltaTemporality),
169+
)
170+
return &cliReader{
171+
Reader: reader,
172+
exporter: exp,
173+
}
174+
}
175+
176+
func (r *cliReader) Shutdown(ctx context.Context) error {
177+
var rm metricdata.ResourceMetrics
178+
if err := r.Reader.Collect(ctx, &rm); err != nil {
179+
return err
180+
}
181+
182+
// Place a pretty tight constraint on the actual reporting.
183+
// We don't want CLI metrics to prevent the CLI from exiting
184+
// so if there's some kind of issue we need to abort pretty
185+
// quickly.
186+
ctx, cancel := context.WithTimeout(ctx, exportTimeout)
187+
defer cancel()
188+
189+
return r.exporter.Export(ctx, &rm)
190+
}
191+
192+
// deltaTemporality sets the Temporality of every instrument to delta.
193+
//
194+
// This isn't really needed since we create a unique resource on each invocation,
195+
// but it can help with cardinality concerns for downstream processors since they can
196+
// perform aggregation for a time interval and then discard the data once that time
197+
// period has passed. Cumulative temporality would imply to the downstream processor
198+
// that they might receive a successive point and they may unnecessarily keep state
199+
// they really shouldn't.
200+
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
201+
return metricdata.DeltaTemporality
202+
}

cli/command/telemetry_docker.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package command
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/url"
7+
"path"
8+
9+
"github.com/pkg/errors"
10+
"go.opentelemetry.io/otel"
11+
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
12+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
13+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
14+
sdktrace "go.opentelemetry.io/otel/sdk/trace"
15+
)
16+
17+
const otelContextFieldName = "otel"
18+
19+
// dockerExporterOTLPEndpoint retrieves the OTLP endpoint used for the docker reporter
20+
// from the current context.
21+
func dockerExporterOTLPEndpoint(cli Cli) (endpoint string, secure bool) {
22+
meta, err := cli.ContextStore().GetMetadata(cli.CurrentContext())
23+
if err != nil {
24+
otel.Handle(err)
25+
return "", false
26+
}
27+
28+
var otelCfg any
29+
switch m := meta.Metadata.(type) {
30+
case DockerContext:
31+
otelCfg = m.AdditionalFields[otelContextFieldName]
32+
case map[string]any:
33+
otelCfg = m[otelContextFieldName]
34+
}
35+
36+
if otelCfg == nil {
37+
return "", false
38+
}
39+
40+
otelMap, ok := otelCfg.(map[string]any)
41+
if !ok {
42+
otel.Handle(errors.Errorf(
43+
"unexpected type for field %q: %T (expected: %T)",
44+
otelContextFieldName,
45+
otelCfg,
46+
otelMap,
47+
))
48+
return "", false
49+
}
50+
51+
// keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/
52+
endpoint, ok = otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string)
53+
if !ok {
54+
return "", false
55+
}
56+
57+
// Parse the endpoint. The docker config expects the endpoint to be
58+
// in the form of a URL to match the environment variable, but this
59+
// option doesn't correspond directly to WithEndpoint.
60+
//
61+
// We pretend we're the same as the environment reader.
62+
u, err := url.Parse(endpoint)
63+
if err != nil {
64+
otel.Handle(errors.Errorf("docker otel endpoint is invalid: %s", err))
65+
return "", false
66+
}
67+
68+
switch u.Scheme {
69+
case "unix":
70+
// Unix sockets are a bit weird. OTEL seems to imply they
71+
// can be used as an environment variable and are handled properly,
72+
// but they don't seem to be as the behavior of the environment variable
73+
// is to strip the scheme from the endpoint, but the underlying implementation
74+
// needs the scheme to use the correct resolver.
75+
//
76+
// We'll just handle this in a special way and add the unix:// back to the endpoint.
77+
endpoint = fmt.Sprintf("unix://%s", path.Join(u.Host, u.Path))
78+
case "https":
79+
secure = true
80+
fallthrough
81+
case "http":
82+
endpoint = path.Join(u.Host, u.Path)
83+
}
84+
return endpoint, secure
85+
}
86+
87+
func dockerSpanExporter(ctx context.Context, cli Cli) []sdktrace.TracerProviderOption {
88+
endpoint, secure := dockerExporterOTLPEndpoint(cli)
89+
if endpoint == "" {
90+
return nil
91+
}
92+
93+
opts := []otlptracegrpc.Option{
94+
otlptracegrpc.WithEndpoint(endpoint),
95+
}
96+
if !secure {
97+
opts = append(opts, otlptracegrpc.WithInsecure())
98+
}
99+
100+
exp, err := otlptracegrpc.New(ctx, opts...)
101+
if err != nil {
102+
otel.Handle(err)
103+
return nil
104+
}
105+
return []sdktrace.TracerProviderOption{sdktrace.WithBatcher(exp, sdktrace.WithExportTimeout(exportTimeout))}
106+
}
107+
108+
func dockerMetricExporter(ctx context.Context, cli Cli) []sdkmetric.Option {
109+
endpoint, secure := dockerExporterOTLPEndpoint(cli)
110+
if endpoint == "" {
111+
return nil
112+
}
113+
114+
opts := []otlpmetricgrpc.Option{
115+
otlpmetricgrpc.WithEndpoint(endpoint),
116+
}
117+
if !secure {
118+
opts = append(opts, otlpmetricgrpc.WithInsecure())
119+
}
120+
121+
exp, err := otlpmetricgrpc.New(ctx, opts...)
122+
if err != nil {
123+
otel.Handle(err)
124+
return nil
125+
}
126+
return []sdkmetric.Option{sdkmetric.WithReader(newCLIReader(exp))}
127+
}

vendor.mod

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ require (
3838
github.com/tonistiigi/go-rosetta v0.0.0-20200727161949-f79598599c5d
3939
github.com/xeipuuv/gojsonschema v1.2.0
4040
go.opentelemetry.io/otel v1.21.0
41+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.44.0
42+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0
43+
go.opentelemetry.io/otel/metric v1.21.0
44+
go.opentelemetry.io/otel/sdk v1.21.0
45+
go.opentelemetry.io/otel/sdk/metric v1.21.0
46+
go.opentelemetry.io/otel/trace v1.21.0
4147
golang.org/x/sync v0.6.0
4248
golang.org/x/sys v0.16.0
4349
golang.org/x/term v0.15.0
@@ -52,16 +58,18 @@ require (
5258
github.com/Microsoft/go-winio v0.6.1 // indirect
5359
github.com/Microsoft/hcsshim v0.11.4 // indirect
5460
github.com/beorn7/perks v1.0.1 // indirect
61+
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
5562
github.com/cespare/xxhash/v2 v2.2.0 // indirect
5663
github.com/containerd/log v0.1.0 // indirect
5764
github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
5865
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
5966
github.com/docker/go-metrics v0.0.1 // indirect
6067
github.com/felixge/httpsnoop v1.0.4 // indirect
61-
github.com/go-logr/logr v1.3.0 // indirect
68+
github.com/go-logr/logr v1.4.1 // indirect
6269
github.com/go-logr/stdr v1.2.2 // indirect
6370
github.com/golang/protobuf v1.5.4 // indirect
6471
github.com/gorilla/mux v1.8.1 // indirect
72+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect
6573
github.com/inconshreveable/mousetrap v1.1.0 // indirect
6674
github.com/klauspost/compress v1.17.4 // indirect
6775
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
@@ -78,14 +86,15 @@ require (
7886
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
7987
go.etcd.io/etcd/raft/v3 v3.5.6 // indirect
8088
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect
81-
go.opentelemetry.io/otel/metric v1.21.0 // indirect
82-
go.opentelemetry.io/otel/trace v1.21.0 // indirect
89+
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 // indirect
90+
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
8391
golang.org/x/crypto v0.17.0 // indirect
8492
golang.org/x/mod v0.14.0 // indirect
8593
golang.org/x/net v0.19.0 // indirect
8694
golang.org/x/time v0.3.0 // indirect
8795
golang.org/x/tools v0.16.0 // indirect
96+
google.golang.org/genproto/googleapis/api v0.0.0-20231002182017-d307bd883b97 // indirect
8897
google.golang.org/genproto/googleapis/rpc v0.0.0-20231016165738-49dd2c1f3d0b // indirect
89-
google.golang.org/grpc v1.59.0 // indirect
98+
google.golang.org/grpc v1.60.1 // indirect
9099
google.golang.org/protobuf v1.33.0 // indirect
91100
)

0 commit comments

Comments
 (0)