diff --git a/pkg/apis/core/common/constants.go b/pkg/apis/core/common/constants.go index 3f3c0a6746..dd8879288a 100644 --- a/pkg/apis/core/common/constants.go +++ b/pkg/apis/core/common/constants.go @@ -24,6 +24,8 @@ const ( ClusterReady ClusterConditionType = "Ready" // ClusterOffline means the cluster is temporarily down or not reachable ClusterOffline ClusterConditionType = "Offline" + // ClusterConfigMalformed means the cluster's configuration may be malformed. + ClusterConfigMalformed ClusterConditionType = "ConfigMalformed" ) const ( diff --git a/pkg/apis/core/v1beta1/validation/validation.go b/pkg/apis/core/v1beta1/validation/validation.go index 283f0189a9..e9ab57be6b 100644 --- a/pkg/apis/core/v1beta1/validation/validation.go +++ b/pkg/apis/core/v1beta1/validation/validation.go @@ -255,7 +255,7 @@ func validateDisabledTLSValidations(disabledTLSValidations []v1beta1.TLSValidati func validateClusterCondition(cc *v1beta1.ClusterCondition, path *field.Path) field.ErrorList { var allErrs field.ErrorList - allErrs = append(allErrs, validateEnumStrings(path.Child("type"), string(cc.Type), []string{string(common.ClusterReady), string(common.ClusterOffline)})...) + allErrs = append(allErrs, validateEnumStrings(path.Child("type"), string(cc.Type), []string{string(common.ClusterReady), string(common.ClusterOffline), string(common.ClusterConfigMalformed)})...) allErrs = append(allErrs, validateEnumStrings(path.Child("status"), string(cc.Status), []string{string(corev1.ConditionTrue), string(corev1.ConditionFalse), string(corev1.ConditionUnknown)})...) if cc.LastProbeTime.IsZero() { diff --git a/pkg/controller/kubefedcluster/clusterclient.go b/pkg/controller/kubefedcluster/clusterclient.go index 03066f716a..b936109569 100644 --- a/pkg/controller/kubefedcluster/clusterclient.go +++ b/pkg/controller/kubefedcluster/clusterclient.go @@ -46,14 +46,16 @@ const ( LabelZoneRegion = "failure-domain.beta.kubernetes.io/region" // Common ClusterConditions for KubeFedClusterStatus - ClusterReady = "ClusterReady" - HealthzOk = "/healthz responded with ok" - ClusterNotReady = "ClusterNotReady" - HealthzNotOk = "/healthz responded without ok" - ClusterNotReachableReason = "ClusterNotReachable" - ClusterNotReachableMsg = "cluster is not reachable" - ClusterReachableReason = "ClusterReachable" - ClusterReachableMsg = "cluster is reachable" + ClusterReady = "ClusterReady" + HealthzOk = "/healthz responded with ok" + ClusterNotReady = "ClusterNotReady" + HealthzNotOk = "/healthz responded without ok" + ClusterNotReachableReason = "ClusterNotReachable" + ClusterNotReachableMsg = "cluster is not reachable" + ClusterReachableReason = "ClusterReachable" + ClusterReachableMsg = "cluster is reachable" + ClusterConfigMalformedReason = "ClusterConfigMalformed" + ClusterConfigMalformedMsg = "cluster's configuration may be malformed" ) // ClusterClient provides methods for determining the status and zones of a @@ -67,17 +69,14 @@ type ClusterClient struct { // The kubeClient is used to configure the ClusterClient's internal client // with information from a kubeconfig stored in a kubernetes secret. func NewClusterClientSet(c *fedv1b1.KubeFedCluster, client generic.Client, fedNamespace string, timeout time.Duration) (*ClusterClient, error) { + var clusterClientSet = ClusterClient{clusterName: c.Name} clusterConfig, err := util.BuildClusterConfig(c, client, fedNamespace) if err != nil { - return nil, err + return &clusterClientSet, err } clusterConfig.Timeout = timeout - var clusterClientSet = ClusterClient{clusterName: c.Name} - clusterClientSet.kubeClient = kubeclientset.NewForConfigOrDie((restclient.AddUserAgent(clusterConfig, UserAgentName))) - if clusterClientSet.kubeClient == nil { - return nil, nil - } - return &clusterClientSet, nil + clusterClientSet.kubeClient, err = kubeclientset.NewForConfig(restclient.AddUserAgent(clusterConfig, UserAgentName)) + return &clusterClientSet, err } // GetClusterHealthStatus gets the kubernetes cluster health status by requesting "/healthz" @@ -124,6 +123,21 @@ func (c *ClusterClient) GetClusterHealthStatus() (*fedv1b1.KubeFedClusterStatus, LastProbeTime: currentTime, LastTransitionTime: ¤tTime, } + clusterConfigMalformedReason := ClusterConfigMalformedReason + clusterConfigMalformedMsg := ClusterConfigMalformedMsg + newClusterConfigMalformedCondition := fedv1b1.ClusterCondition{ + Type: fedcommon.ClusterConfigMalformed, + Status: corev1.ConditionTrue, + Reason: &clusterConfigMalformedReason, + Message: &clusterConfigMalformedMsg, + LastProbeTime: currentTime, + LastTransitionTime: ¤tTime, + } + if c.kubeClient == nil { + clusterStatus.Conditions = append(clusterStatus.Conditions, newClusterConfigMalformedCondition) + metrics.RegisterKubefedClusterTotal(metrics.ClusterNotReady, c.clusterName) + return &clusterStatus, nil + } body, err := c.kubeClient.DiscoveryClient.RESTClient().Get().AbsPath("/healthz").Do(context.Background()).Raw() if err != nil { runtime.HandleError(errors.Wrapf(err, "Failed to do cluster health check for cluster %q", c.clusterName)) diff --git a/pkg/controller/kubefedcluster/controller.go b/pkg/controller/kubefedcluster/controller.go index 797c70e0e8..c0a94f38f0 100644 --- a/pkg/controller/kubefedcluster/controller.go +++ b/pkg/controller/kubefedcluster/controller.go @@ -175,7 +175,7 @@ func (cc *ClusterController) addToClusterSet(obj *fedv1b1.KubeFedCluster) { cc.mu.Lock() defer cc.mu.Unlock() clusterData := cc.clusterDataMap[obj.Name] - if clusterData != nil && clusterData.clusterKubeClient != nil { + if clusterData != nil && clusterData.clusterKubeClient.kubeClient != nil { return } @@ -183,10 +183,9 @@ func (cc *ClusterController) addToClusterSet(obj *fedv1b1.KubeFedCluster) { // create the restclient of cluster restClient, err := NewClusterClientSet(obj, cc.client, cc.fedNamespace, cc.clusterHealthCheckConfig.Timeout) - if err != nil || restClient == nil { + if err != nil || restClient.kubeClient == nil { cc.RecordError(obj, "MalformedClusterConfig", errors.Wrap(err, "The configuration for this cluster may be malformed")) - klog.Errorf("The configuration for cluster %s may be malformed", obj.Name) - return + klog.Errorf("The configuration for cluster %q may be malformed: %v", obj.Name, err) } cc.clusterDataMap[obj.Name] = &ClusterData{clusterKubeClient: restClient, cachedObj: obj.DeepCopy()} } @@ -217,7 +216,7 @@ func (cc *ClusterController) updateClusterStatus() error { cluster := obj.DeepCopy() clusterData := cc.clusterDataMap[cluster.Name] cc.mu.RUnlock() - if clusterData == nil { + if clusterData == nil || clusterData.clusterKubeClient.kubeClient == nil { // Retry adding cluster client cc.addToClusterSet(cluster) cc.mu.RLock() diff --git a/pkg/controller/sync/status/status.go b/pkg/controller/sync/status/status.go index 7e34da6c11..c22daf48c1 100644 --- a/pkg/controller/sync/status/status.go +++ b/pkg/controller/sync/status/status.go @@ -173,6 +173,7 @@ func IsRecoverableError(status PropagationStatus) bool { DeletionFailed, LabelRemovalFailed, RetrievalFailed, + ClientRetrievalFailed, CreationTimedOut, UpdateTimedOut, DeletionTimedOut,