diff --git a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml index c209a75ee58..ec04acd3013 100644 --- a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml @@ -9,6 +9,13 @@ metadata: chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} release: {{ .Release.Name }} heritage: {{ .Release.Service }} + annotations: + {{- if .Values.prometheus.enabled }} + {{ $config := index .Values "application.yaml" }} + prometheus.io/path: /metrics + prometheus.io/port: "{{ $config.server.port }}" + prometheus.io/scrape: "true" + {{- end }} spec: replicas: {{ .Values.replicaCount }} selector: diff --git a/infra/charts/feast/charts/feast-serving/values.yaml b/infra/charts/feast/charts/feast-serving/values.yaml index 5780eda4e42..8f109a09059 100644 --- a/infra/charts/feast/charts/feast-serving/values.yaml +++ b/infra/charts/feast/charts/feast-serving/values.yaml @@ -63,6 +63,8 @@ application.yaml: grpc: port: 6566 enable-reflection: true + server: + port: 8080 spring: main: web-application-type: none @@ -177,6 +179,9 @@ ingress: # - host: chart-example.local # port: http +prometheus: + enabled: true + resources: {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little diff --git a/serving/pom.xml b/serving/pom.xml index a5d4ae4f493..9de7d24b29e 100644 --- a/serving/pom.xml +++ b/serving/pom.xml @@ -168,6 +168,30 @@ 0.31.0 + + + io.prometheus + simpleclient + 0.8.0 + + + + io.prometheus + simpleclient_hotspot + 0.8.0 + + + + io.prometheus + simpleclient_servlet + 0.8.0 + + + io.prometheus + simpleclient_spring_boot + 0.8.0 + + com.google.cloud diff --git a/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java b/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java index b475369c4db..ead8e67b73d 100644 --- a/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java +++ b/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java @@ -3,12 +3,16 @@ import feast.serving.FeastProperties; import io.opentracing.Tracer; import io.opentracing.noop.NoopTracerFactory; +import io.prometheus.client.hotspot.DefaultExports; +import io.prometheus.client.exporter.MetricsServlet; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.web.servlet.ServletRegistrationBean; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @Configuration public class InstrumentationConfig { + private FeastProperties feastProperties; @Autowired @@ -16,6 +20,12 @@ public InstrumentationConfig(FeastProperties feastProperties) { this.feastProperties = feastProperties; } + @Bean + public ServletRegistrationBean servletRegistrationBean() { + DefaultExports.initialize(); + return new ServletRegistrationBean(new MetricsServlet(), "/metrics"); + } + @Bean public Tracer tracer() { if (!feastProperties.getTracing().isEnabled()) { diff --git a/serving/src/main/java/feast/serving/service/BigQueryServingService.java b/serving/src/main/java/feast/serving/service/BigQueryServingService.java index 9d93d4dd06c..be8ccf7fee1 100644 --- a/serving/src/main/java/feast/serving/service/BigQueryServingService.java +++ b/serving/src/main/java/feast/serving/service/BigQueryServingService.java @@ -1,6 +1,8 @@ package feast.serving.service; import static feast.serving.util.BigQueryUtil.getTimestampLimitQuery; +import static feast.serving.util.Metrics.requestCount; +import static feast.serving.util.Metrics.requestLatency; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryException; @@ -37,6 +39,7 @@ import feast.serving.ServingAPIProto.JobType; import feast.serving.util.BigQueryUtil; import io.grpc.Status; +import io.prometheus.client.Histogram.Timer; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -100,11 +103,13 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest getF */ @Override public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeaturesRequest) { - + Timer getBatchFeaturesTimer = requestLatency.labels("getBatchFeatures").startTimer(); List featureSetSpecs = getFeaturesRequest.getFeatureSetsList().stream() - .map(featureSet -> - specService.getFeatureSet(featureSet.getName(), featureSet.getVersion()) + .map(featureSet -> { + requestCount.labels(featureSet.getName()).inc(); + return specService.getFeatureSet(featureSet.getName(), featureSet.getVersion()); + } ) .collect(Collectors.toList()); @@ -233,6 +238,7 @@ public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeat }) .start(); + getBatchFeaturesTimer.observeDuration(); return GetBatchFeaturesResponse.newBuilder().setJob(feastJob).build(); } diff --git a/serving/src/main/java/feast/serving/service/CachedSpecService.java b/serving/src/main/java/feast/serving/service/CachedSpecService.java index df08fb7b384..dd1be0bf7d8 100644 --- a/serving/src/main/java/feast/serving/service/CachedSpecService.java +++ b/serving/src/main/java/feast/serving/service/CachedSpecService.java @@ -16,6 +16,7 @@ import feast.core.StoreProto.Store.Subscription; import feast.serving.exception.SpecRetrievalException; import io.grpc.StatusRuntimeException; +import io.prometheus.client.Gauge; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -40,6 +41,15 @@ public class CachedSpecService { private final LoadingCache featureSetSpecCache; private Store store; + private static Gauge featureSetsCount = Gauge.build().name("feature_set_count") + .subsystem("feast_serving") + .help("number of feature sets served by this instance") + .register(); + private static Gauge cacheLastUpdated = Gauge.build().name("cache_last_updated") + .subsystem("feast_serving") + .help("epoch time of the last time the cache was updated") + .register(); + public CachedSpecService(CoreSpecService coreService, Path configPath) { this.configPath = configPath; this.coreService = coreService; @@ -102,6 +112,9 @@ public void populateCache() { this.store = updateStore(readConfig(configPath)); Map featureSetSpecMap = getFeatureSetSpecMap(); featureSetSpecCache.putAll(featureSetSpecMap); + + featureSetsCount.set(featureSetSpecCache.size()); + cacheLastUpdated.set(System.currentTimeMillis()); } public void scheduledPopulateCache() { diff --git a/serving/src/main/java/feast/serving/service/RedisServingService.java b/serving/src/main/java/feast/serving/service/RedisServingService.java index 69feec724fb..aec8a4bfb4c 100644 --- a/serving/src/main/java/feast/serving/service/RedisServingService.java +++ b/serving/src/main/java/feast/serving/service/RedisServingService.java @@ -16,6 +16,11 @@ package feast.serving.service; +import static feast.serving.util.Metrics.missingKeyCount; +import static feast.serving.util.Metrics.requestLatency; +import static feast.serving.util.Metrics.requestCount; +import static feast.serving.util.Metrics.staleKeyCount; + import com.google.common.collect.Maps; import com.google.protobuf.AbstractMessageLite; import com.google.protobuf.Duration; @@ -41,6 +46,7 @@ import io.grpc.Status; import io.opentracing.Scope; import io.opentracing.Tracer; +import io.prometheus.client.Histogram.Timer; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -61,7 +67,9 @@ public RedisServingService(JedisPool jedisPool, CachedSpecService specService, T this.tracer = tracer; } - /** {@inheritDoc} */ + /** + * {@inheritDoc} + */ @Override public GetFeastServingInfoResponse getFeastServingInfo( GetFeastServingInfoRequest getFeastServingInfoRequest) { @@ -70,10 +78,13 @@ public GetFeastServingInfoResponse getFeastServingInfo( .build(); } - /** {@inheritDoc} */ + /** + * {@inheritDoc} + */ @Override public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest request) { try (Scope scope = tracer.buildSpan("Redis-getOnlineFeatures").startActive(true)) { + Timer getOnlineFeaturesTimer = requestLatency.labels("getOnlineFeatures").startTimer(); GetOnlineFeaturesResponse.Builder getOnlineFeaturesResponseBuilder = GetOnlineFeaturesResponse.newBuilder(); @@ -114,6 +125,7 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest requ featureValuesMap.values().stream() .map(m -> FieldValues.newBuilder().putAllFields(m).build()) .collect(Collectors.toList()); + getOnlineFeaturesTimer.observeDuration(); return getOnlineFeaturesResponseBuilder.addAllFieldValues(fieldValues).build(); } } @@ -166,9 +178,11 @@ private RedisKey makeRedisKey( for (int i = 0; i < featureSetEntityNames.size(); i++) { String entityName = featureSetEntityNames.get(i); - if (!fieldsMap.containsKey(entityName)){ + if (!fieldsMap.containsKey(entityName)) { throw Status.INVALID_ARGUMENT - .withDescription(String.format("Entity row fields \"%s\" does not contain required entity field \"%s\"", fieldsMap.keySet().toString(), entityName)) + .withDescription(String + .format("Entity row fields \"%s\" does not contain required entity field \"%s\"", + fieldsMap.keySet().toString(), entityName)) .asRuntimeException(); } @@ -186,33 +200,45 @@ private void sendAndProcessMultiGet( throws InvalidProtocolBufferException { List jedisResps = sendMultiGet(redisKeys); - + Timer processResponseTimer = requestLatency.labels("processResponse") + .startTimer(); try (Scope scope = tracer.buildSpan("Redis-processResponse").startActive(true)) { String featureSetId = String.format("%s:%d", featureSetRequest.getName(), featureSetRequest.getVersion()); + Map nullValues = featureSetRequest.getFeatureNamesList().stream() .collect( Collectors.toMap( name -> featureSetId + ":" + name, name -> Value.newBuilder().build())); + for (int i = 0; i < jedisResps.size(); i++) { EntityRow entityRow = entityRows.get(i); Map featureValues = featureValuesMap.get(entityRow); + byte[] jedisResponse = jedisResps.get(i); if (jedisResponse == null) { + missingKeyCount.labels(featureSetRequest.getName()).inc(); featureValues.putAll(nullValues); continue; } + FeatureRow featureRow = FeatureRow.parseFrom(jedisResponse); + boolean stale = isStale(featureSetRequest, entityRow, featureRow); if (stale) { + staleKeyCount.labels(featureSetRequest.getName()).inc(); featureValues.putAll(nullValues); continue; } + + requestCount.labels(featureSetRequest.getName()).inc(); featureRow.getFieldsList().stream() .filter(f -> featureSetRequest.getFeatureNamesList().contains(f.getName())) .forEach(f -> featureValues.put(featureSetId + ":" + f.getName(), f.getValue())); } + } finally { + processResponseTimer.observeDuration(); } } @@ -237,6 +263,7 @@ private boolean isStale( */ private List sendMultiGet(List keys) { try (Scope scope = tracer.buildSpan("Redis-sendMultiGet").startActive(true)) { + Timer sendMultiGetTimer = requestLatency.labels("sendMultiGet").startTimer(); try (Jedis jedis = jedisPool.getResource()) { byte[][] binaryKeys = keys.stream() @@ -249,6 +276,8 @@ private List sendMultiGet(List keys) { .withDescription("Unable to retrieve feature from Redis") .withCause(e) .asRuntimeException(); + } finally { + sendMultiGetTimer.observeDuration(); } } } diff --git a/serving/src/main/java/feast/serving/util/Metrics.java b/serving/src/main/java/feast/serving/util/Metrics.java new file mode 100644 index 00000000000..a4463cf6897 --- /dev/null +++ b/serving/src/main/java/feast/serving/util/Metrics.java @@ -0,0 +1,37 @@ +package feast.serving.util; + +import io.prometheus.client.Counter; +import io.prometheus.client.Histogram; +import io.prometheus.client.Summary; + +public class Metrics { + + public static final Histogram requestLatency = Histogram.build() + .buckets(2, 4, 6, 8, 10, 15, 20, 25, 30, 35, 50) + .name("request_latency_ms") + .subsystem("feast_serving") + .help("Request latency in milliseconds.") + .labelNames("method") + .register(); + + public static final Counter requestCount = Counter.build() + .name("request_feature_count") + .subsystem("feast_serving") + .help("number of feature rows requested") + .labelNames("feature_set_name") + .register(); + + public static final Counter missingKeyCount = Counter.build() + .name("missing_feature_count") + .subsystem("feast_serving") + .help("number requested feature rows that were not found") + .labelNames("feature_set_name") + .register(); + + public static final Counter staleKeyCount = Counter.build() + .name("stale_feature_count") + .subsystem("feast_serving") + .help("number requested feature rows that were stale") + .labelNames("feature_set_name") + .register(); +} diff --git a/serving/src/main/resources/application.yml b/serving/src/main/resources/application.yml index 6ef68d710cf..95d3a3e9c41 100644 --- a/serving/src/main/resources/application.yml +++ b/serving/src/main/resources/application.yml @@ -55,6 +55,6 @@ grpc: server: # The port number on which the Tomcat webserver that serves REST API endpoints should listen - # It is set by default to 8080 so it does not conflict with Tomcat webserver on Feast Core + # It is set by default to 8081 so it does not conflict with Tomcat webserver on Feast Core # if both Feast Core and Serving are running on the same machine port: ${SERVER_PORT:8081} \ No newline at end of file