diff --git a/core/src/main/java/feast/core/grpc/HealthServiceImpl.java b/core/src/main/java/feast/core/grpc/HealthServiceImpl.java new file mode 100644 index 00000000000..3bd2f8748fe --- /dev/null +++ b/core/src/main/java/feast/core/grpc/HealthServiceImpl.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2020 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.grpc; + +import feast.core.service.AccessManagementService; +import io.grpc.Status; +import io.grpc.health.v1.HealthGrpc.HealthImplBase; +import io.grpc.health.v1.HealthProto.HealthCheckRequest; +import io.grpc.health.v1.HealthProto.HealthCheckResponse; +import io.grpc.health.v1.HealthProto.HealthCheckResponse.ServingStatus; +import io.grpc.stub.StreamObserver; +import lombok.extern.slf4j.Slf4j; +import org.lognet.springboot.grpc.GRpcService; +import org.springframework.beans.factory.annotation.Autowired; + +@Slf4j +@GRpcService +public class HealthServiceImpl extends HealthImplBase { + private final AccessManagementService accessManagementService; + + @Autowired + public HealthServiceImpl(AccessManagementService accessManagementService) { + this.accessManagementService = accessManagementService; + } + + @Override + public void check( + HealthCheckRequest request, StreamObserver responseObserver) { + try { + accessManagementService.listProjects(); + responseObserver.onNext( + HealthCheckResponse.newBuilder().setStatus(ServingStatus.SERVING).build()); + responseObserver.onCompleted(); + } catch (Exception e) { + log.error("Health Check: unable to retrieve projects.\nError: %s", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } + } +} diff --git a/core/src/main/java/feast/core/http/HealthController.java b/core/src/main/java/feast/core/http/HealthController.java deleted file mode 100644 index 2451ed793ed..00000000000 --- a/core/src/main/java/feast/core/http/HealthController.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.http; - -import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR; - -import java.sql.Connection; -import java.sql.SQLException; -import javax.sql.DataSource; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestMethod; -import org.springframework.web.bind.annotation.RestController; - -/** Web http for pod health-check endpoints. */ -@Slf4j -@RestController -public class HealthController { - - private final DataSource db; - - @Autowired - public HealthController(DataSource datasource) { - this.db = datasource; - } - - /** - * /ping endpoint checks if the application is ready to serve traffic by checking if it is able to - * access the metadata db. - */ - @RequestMapping(value = "/ping", method = RequestMethod.GET) - public ResponseEntity ping() { - return ResponseEntity.ok("pong"); - } - - /** - * /healthz endpoint checks if the application is healthy by checking if the application still has - * access to the metadata db. - */ - @RequestMapping(value = "/healthz", method = RequestMethod.GET) - public ResponseEntity healthz() { - try (Connection conn = db.getConnection()) { - if (conn.isValid(10)) { - return ResponseEntity.ok("healthy"); - } - log.error("Unable to reach DB"); - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) - .body("Unable to establish connection with DB"); - } catch (SQLException e) { - log.error("Unable to reach DB: {}", e); - return ResponseEntity.status(INTERNAL_SERVER_ERROR).body(e.getMessage()); - } - } -} diff --git a/core/src/test/java/feast/core/http/HealthControllerTest.java b/core/src/test/java/feast/core/http/HealthControllerTest.java deleted file mode 100644 index 2fcd622f34a..00000000000 --- a/core/src/test/java/feast/core/http/HealthControllerTest.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.http; - -import static org.junit.Assert.assertEquals; -import static org.mockito.Mockito.*; - -import java.sql.Connection; -import java.sql.SQLException; -import javax.sql.DataSource; -import org.junit.Test; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; - -public class HealthControllerTest { - @Test - public void ping() { - HealthController healthController = new HealthController(null); - assertEquals(ResponseEntity.ok("pong"), healthController.ping()); - } - - @Test - public void healthz() { - assertEquals(ResponseEntity.ok("healthy"), mockHealthyController().healthz()); - assertEquals( - ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) - .body("Unable to establish connection with DB"), - mockUnhealthyControllerBecauseInvalidConn().healthz()); - assertEquals( - ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("mocked sqlexception"), - mockUnhealthyControllerBecauseSQLException().healthz()); - } - - private HealthController mockHealthyController() { - DataSource mockDataSource = mock(DataSource.class); - Connection mockConnection = mock(Connection.class); - try { - when(mockConnection.isValid(any(int.class))).thenReturn(Boolean.TRUE); - when(mockDataSource.getConnection()).thenReturn(mockConnection); - } catch (Exception e) { - e.printStackTrace(); - } - return new HealthController(mockDataSource); - } - - private HealthController mockUnhealthyControllerBecauseInvalidConn() { - DataSource mockDataSource = mock(DataSource.class); - Connection mockConnection = mock(Connection.class); - try { - when(mockConnection.isValid(any(int.class))).thenReturn(Boolean.FALSE); - when(mockDataSource.getConnection()).thenReturn(mockConnection); - } catch (Exception ignored) { - } - return new HealthController(mockDataSource); - } - - private HealthController mockUnhealthyControllerBecauseSQLException() { - DataSource mockDataSource = mock(DataSource.class); - Connection mockConnection = mock(Connection.class); - try { - when(mockDataSource.getConnection()).thenThrow(new SQLException("mocked sqlexception")); - } catch (SQLException ignored) { - } - return new HealthController(mockDataSource); - } -} diff --git a/infra/charts/feast/charts/feast-core/templates/deployment.yaml b/infra/charts/feast/charts/feast-core/templates/deployment.yaml index 1f4fd996efa..179e3a6a094 100644 --- a/infra/charts/feast/charts/feast-core/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-core/templates/deployment.yaml @@ -125,9 +125,8 @@ spec: {{- if .Values.livenessProbe.enabled }} livenessProbe: - httpGet: - path: /healthz - port: {{ .Values.service.http.targetPort }} + exec: + command: ["/usr/bin/grpc-health-probe", "-addr=:{{ .Values.service.grpc.targetPort }}"] initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.livenessProbe.periodSeconds }} successThreshold: {{ .Values.livenessProbe.successThreshold }} @@ -137,9 +136,8 @@ spec: {{- if .Values.readinessProbe.enabled }} readinessProbe: - httpGet: - path: /healthz - port: {{ .Values.service.http.targetPort }} + exec: + command: ["/usr/bin/grpc-health-probe", "-addr=:{{ .Values.service.grpc.targetPort }}"] initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.readinessProbe.periodSeconds }} successThreshold: {{ .Values.readinessProbe.successThreshold }} diff --git a/infra/charts/feast/charts/feast-core/values.yaml b/infra/charts/feast/charts/feast-core/values.yaml index 5032e8d87ae..cc7bb49f0f9 100644 --- a/infra/charts/feast/charts/feast-core/values.yaml +++ b/infra/charts/feast/charts/feast-core/values.yaml @@ -53,9 +53,11 @@ prometheus: # prometheus.enabled -- Flag to enable scraping of Feast Core metrics enabled: true +# By default we disable the liveness probe, since if the DB fails restarting core will not result +# in application healing. livenessProbe: # livenessProbe.enabled -- Flag to enabled the probe - enabled: true + enabled: false # livenessProbe.initialDelaySeconds -- Delay before the probe is initiated initialDelaySeconds: 60 # livenessProbe.periodSeconds -- How often to perform the probe diff --git a/infra/docker/core/Dockerfile b/infra/docker/core/Dockerfile index 7e469ed7f61..c7ba81a4134 100644 --- a/infra/docker/core/Dockerfile +++ b/infra/docker/core/Dockerfile @@ -25,15 +25,26 @@ RUN mvn --also-make --projects core,ingestion -Drevision=$REVISION \ RUN apt-get -qq update && apt-get -y install unar && \ unar /build/core/target/feast-core-$REVISION.jar -o /build/core/target/ +# +# Download grpc_health_probe to run health check for Feast Serving +# https://kubernetes.io/blog/2018/10/01/health-checking-grpc-servers-on-kubernetes/ +# +RUN wget -q https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.3.1/grpc_health_probe-linux-amd64 \ + -O /usr/bin/grpc-health-probe && \ + chmod +x /usr/bin/grpc-health-probe + # ============================================================ # Build stage 2: Production # ============================================================ FROM openjdk:11-jre as production ARG REVISION=dev + COPY --from=builder /build/core/target/feast-core-$REVISION.jar /opt/feast/feast-core.jar # Required for staging jar dependencies when submitting Dataflow jobs. COPY --from=builder /build/core/target/feast-core-$REVISION /opt/feast/feast-core +COPY --from=builder /usr/bin/grpc-health-probe /usr/bin/grpc-health-probe + CMD ["java",\ "-Xms2048m",\ "-Xmx2048m",\