Skip to content

Commit 222a994

Browse files
authored
Merge pull request #7188 from twz123/watch-termination-grace-period
Enable API server's watch termination grace period by default
2 parents 4d7d4f9 + adaa2c8 commit 222a994

5 files changed

Lines changed: 108 additions & 6 deletions

File tree

cmd/controller/controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ func (c *command) start(ctx context.Context, flags *config.ControllerOptions, de
308308
LogLevel: c.LogLevels.KubeAPIServer,
309309
Storage: storageBackend,
310310
EnableKonnectivity: enableKonnectivity,
311+
StopTimeout: flags.APIServerStopTimeout,
311312

312313
// If k0s reconciles the kubernetes endpoint, the API server shouldn't do it.
313314
DisableEndpointReconciler: enableK0sEndpointReconciler,

cmd/controller/controller_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Examples:
5050
Note: Token can be passed either as a CLI argument, a flag, or an environment variable
5151
5252
Flags:
53+
--api-server-stop-timeout duration time to wait for the API server to stop
5354
-c, --config string config file, use '-' to read the config from stdin (default `+defaultConfigPath+`)
5455
--cri-socket string container runtime socket to use, default to internal containerd. Format: [remote|docker]:[path-to-socket]
5556
--data-dir string Data Directory for k0s. DO NOT CHANGE for an existing setup, things will break! (default `+defaultDataDir+`)
@@ -82,3 +83,21 @@ Flags:
8283
-v, --verbose Verbose logging (default true)
8384
`, out.String())
8485
}
86+
87+
func TestControllerCmd_Flags(t *testing.T) {
88+
if runtime.GOOS != "linux" {
89+
t.Skip("Running controllers is only supported on Linux")
90+
}
91+
92+
t.Run("api-server-stop-timeout", func(t *testing.T) {
93+
expected := `invalid argument "0s" for "--api-server-stop-timeout" flag: must be positive`
94+
var stdout, stderr strings.Builder
95+
underTest := cmd.NewRootCmd()
96+
underTest.SetArgs([]string{"controller", "--api-server-stop-timeout", "0s"})
97+
underTest.SetOut(&stdout)
98+
underTest.SetErr(&stderr)
99+
assert.ErrorContains(t, underTest.Execute(), expected)
100+
assert.Empty(t, stdout.String())
101+
assert.Equal(t, "Error: "+expected+"\n", stderr.String())
102+
})
103+
}

cmd/install/controller_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ With the controller subcommand you can setup a single node cluster by running:
4343
4444
4545
Flags:
46+
--api-server-stop-timeout duration time to wait for the API server to stop
4647
-c, --config string config file, use '-' to read the config from stdin (default `+defaultConfigPath+`)
4748
--cri-socket string container runtime socket to use, default to internal containerd. Format: [remote|docker]:[path-to-socket]
4849
--data-dir string Data Directory for k0s. DO NOT CHANGE for an existing setup, things will break! (default `+defaultDataDir+`)

pkg/component/controller/apiserver.go

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"path/filepath"
1717
"strconv"
1818
"strings"
19+
"time"
1920

2021
"github.com/sirupsen/logrus"
2122

@@ -38,6 +39,7 @@ type APIServer struct {
3839
Storage manager.Component
3940
EnableKonnectivity bool
4041
DisableEndpointReconciler bool
42+
StopTimeout time.Duration
4143

4244
supervisor *supervisor.Supervisor
4345
executablePath string
@@ -154,19 +156,59 @@ func (a *APIServer) Start(ctx context.Context) error {
154156
args["endpoint-reconciler-type"] = "none"
155157
}
156158

159+
stopTimeout := a.StopTimeout
160+
161+
// If the timeout hasn't been specified, do a
162+
// best guess based on the API server flags.
163+
if stopTimeout <= 0 {
164+
requestTimeout := 1 * time.Minute
165+
if value, ok := args["request-timeout"]; ok {
166+
if parsed, err := time.ParseDuration(value); err == nil {
167+
requestTimeout = parsed
168+
}
169+
}
170+
171+
watchTerminationGrace := 0 * time.Second
172+
if value, ok := args["shutdown-watch-termination-grace-period"]; ok {
173+
if parsed, err := time.ParseDuration(value); err == nil {
174+
watchTerminationGrace = parsed
175+
}
176+
}
177+
178+
stopTimeout = max(requestTimeout, watchTerminationGrace) + (2 * time.Second)
179+
180+
// Clamp the timeout between 5 and 20 seconds. We can't wait for too long
181+
// currently because the init system will likely kill the process otherwise.
182+
stopTimeout = max(5*time.Second, min(stopTimeout, 20*time.Second))
183+
}
184+
185+
// Enable the API server's watch-drain facility on shutdown, if that flag
186+
// hasn't been specified by the user. Without this flag, the API server will
187+
// almost always encounter the request timeout if anything is connected to
188+
// it via client-go watches. These have a timeout of between five and ten
189+
// minutes. Note that other types of long-running requests, such as log
190+
// streams, can still prevent a timely shutdown. However, there's not much
191+
// that can be done about them apart from setting a short request timeout.
192+
if _, ok := args["shutdown-watch-termination-grace-period"]; !ok {
193+
if gracePeriod := stopTimeout - 2*time.Second; gracePeriod > 0 {
194+
args["shutdown-watch-termination-grace-period"] = gracePeriod.String()
195+
}
196+
}
197+
157198
var apiServerArgs []string
158199
for name, value := range args {
159200
apiServerArgs = append(apiServerArgs, fmt.Sprintf("--%s=%s", name, value))
160201
}
161202
apiServerArgs = append(apiServerArgs, a.ClusterConfig.Spec.API.RawArgs...)
162203

163204
a.supervisor = &supervisor.Supervisor{
164-
Name: kubeAPIComponentName,
165-
BinPath: a.executablePath,
166-
RunDir: a.K0sVars.RunDir,
167-
DataDir: a.K0sVars.DataDir,
168-
Args: apiServerArgs,
169-
UID: a.uid,
205+
Name: kubeAPIComponentName,
206+
BinPath: a.executablePath,
207+
RunDir: a.K0sVars.RunDir,
208+
DataDir: a.K0sVars.DataDir,
209+
Args: apiServerArgs,
210+
UID: a.uid,
211+
TimeoutStop: stopTimeout,
170212
}
171213

172214
etcdArgs, err := getEtcdArgs(a.ClusterConfig.Spec.Storage, a.K0sVars)

pkg/config/cli.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package config
55

66
import (
7+
"errors"
78
"fmt"
89
"runtime"
910
"slices"
@@ -57,6 +58,7 @@ type ControllerOptions struct {
5758
EnableMetricsScraper bool
5859
KubeControllerManagerExtraArgs string
5960
FeatureGates featuregate.FeatureGates
61+
APIServerStopTimeout time.Duration
6062

6163
enableWorker, singleNode bool
6264
}
@@ -302,6 +304,7 @@ func GetControllerFlags(controllerOpts *ControllerOptions) *pflag.FlagSet {
302304
flagset.StringVar(&controllerOpts.KubeControllerManagerExtraArgs, "kube-controller-manager-extra-args", "", "extra args for kube-controller-manager")
303305
flagset.BoolVar(&controllerOpts.InitOnly, "init-only", false, "only initialize controller and exit")
304306
flagset.Var(&controllerOpts.FeatureGates, "feature-gates", "feature gates to enable (comma separated list of key=value pairs)")
307+
flagset.Var((*positiveDurationFlag)(&controllerOpts.APIServerStopTimeout), "api-server-stop-timeout", "time to wait for the API server to stop")
305308
return flagset
306309
}
307310

@@ -334,3 +337,39 @@ func GetCmdOpts(cobraCmd command) (*CLIOptions, error) {
334337
K0sVars: k0sVars,
335338
}, nil
336339
}
340+
341+
type positiveDurationFlag time.Duration
342+
343+
// Type implements [pflag.Value].
344+
func (f *positiveDurationFlag) Type() string {
345+
return "duration"
346+
}
347+
348+
// String implements [pflag.Value].
349+
func (f *positiveDurationFlag) String() string {
350+
if *(*time.Duration)(f) <= 0 {
351+
return ""
352+
}
353+
354+
return (*time.Duration)(f).String()
355+
}
356+
357+
// Set implements [pflag.Value].
358+
func (f *positiveDurationFlag) Set(value string) error {
359+
if value == "" {
360+
*(*time.Duration)(f) = 0
361+
return nil
362+
}
363+
364+
parsed, err := time.ParseDuration(value)
365+
if err != nil {
366+
return err
367+
}
368+
369+
if parsed <= 0 {
370+
return errors.New("must be positive")
371+
}
372+
373+
*(*time.Duration)(f) = parsed
374+
return nil
375+
}

0 commit comments

Comments
 (0)