diff --git a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml
index c209a75ee58..ec04acd3013 100644
--- a/infra/charts/feast/charts/feast-serving/templates/deployment.yaml
+++ b/infra/charts/feast/charts/feast-serving/templates/deployment.yaml
@@ -9,6 +9,13 @@ metadata:
chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
+ annotations:
+ {{- if .Values.prometheus.enabled }}
+ {{ $config := index .Values "application.yaml" }}
+ prometheus.io/path: /metrics
+ prometheus.io/port: "{{ $config.server.port }}"
+ prometheus.io/scrape: "true"
+ {{- end }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
diff --git a/infra/charts/feast/charts/feast-serving/values.yaml b/infra/charts/feast/charts/feast-serving/values.yaml
index 5780eda4e42..8f109a09059 100644
--- a/infra/charts/feast/charts/feast-serving/values.yaml
+++ b/infra/charts/feast/charts/feast-serving/values.yaml
@@ -63,6 +63,8 @@ application.yaml:
grpc:
port: 6566
enable-reflection: true
+ server:
+ port: 8080
spring:
main:
web-application-type: none
@@ -177,6 +179,9 @@ ingress:
# - host: chart-example.local
# port: http
+prometheus:
+ enabled: true
+
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
diff --git a/serving/pom.xml b/serving/pom.xml
index a5d4ae4f493..9de7d24b29e 100644
--- a/serving/pom.xml
+++ b/serving/pom.xml
@@ -168,6 +168,30 @@
0.31.0
+
+
+ io.prometheus
+ simpleclient
+ 0.8.0
+
+
+
+ io.prometheus
+ simpleclient_hotspot
+ 0.8.0
+
+
+
+ io.prometheus
+ simpleclient_servlet
+ 0.8.0
+
+
+ io.prometheus
+ simpleclient_spring_boot
+ 0.8.0
+
+
com.google.cloud
diff --git a/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java b/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java
index b475369c4db..ead8e67b73d 100644
--- a/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java
+++ b/serving/src/main/java/feast/serving/configuration/InstrumentationConfig.java
@@ -3,12 +3,16 @@
import feast.serving.FeastProperties;
import io.opentracing.Tracer;
import io.opentracing.noop.NoopTracerFactory;
+import io.prometheus.client.hotspot.DefaultExports;
+import io.prometheus.client.exporter.MetricsServlet;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.web.servlet.ServletRegistrationBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class InstrumentationConfig {
+
private FeastProperties feastProperties;
@Autowired
@@ -16,6 +20,12 @@ public InstrumentationConfig(FeastProperties feastProperties) {
this.feastProperties = feastProperties;
}
+ @Bean
+ public ServletRegistrationBean servletRegistrationBean() {
+ DefaultExports.initialize();
+ return new ServletRegistrationBean(new MetricsServlet(), "/metrics");
+ }
+
@Bean
public Tracer tracer() {
if (!feastProperties.getTracing().isEnabled()) {
diff --git a/serving/src/main/java/feast/serving/service/BigQueryServingService.java b/serving/src/main/java/feast/serving/service/BigQueryServingService.java
index 9d93d4dd06c..be8ccf7fee1 100644
--- a/serving/src/main/java/feast/serving/service/BigQueryServingService.java
+++ b/serving/src/main/java/feast/serving/service/BigQueryServingService.java
@@ -1,6 +1,8 @@
package feast.serving.service;
import static feast.serving.util.BigQueryUtil.getTimestampLimitQuery;
+import static feast.serving.util.Metrics.requestCount;
+import static feast.serving.util.Metrics.requestLatency;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
@@ -37,6 +39,7 @@
import feast.serving.ServingAPIProto.JobType;
import feast.serving.util.BigQueryUtil;
import io.grpc.Status;
+import io.prometheus.client.Histogram.Timer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -100,11 +103,13 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest getF
*/
@Override
public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeaturesRequest) {
-
+ Timer getBatchFeaturesTimer = requestLatency.labels("getBatchFeatures").startTimer();
List featureSetSpecs =
getFeaturesRequest.getFeatureSetsList().stream()
- .map(featureSet ->
- specService.getFeatureSet(featureSet.getName(), featureSet.getVersion())
+ .map(featureSet -> {
+ requestCount.labels(featureSet.getName()).inc();
+ return specService.getFeatureSet(featureSet.getName(), featureSet.getVersion());
+ }
)
.collect(Collectors.toList());
@@ -233,6 +238,7 @@ public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeat
})
.start();
+ getBatchFeaturesTimer.observeDuration();
return GetBatchFeaturesResponse.newBuilder().setJob(feastJob).build();
}
diff --git a/serving/src/main/java/feast/serving/service/CachedSpecService.java b/serving/src/main/java/feast/serving/service/CachedSpecService.java
index df08fb7b384..dd1be0bf7d8 100644
--- a/serving/src/main/java/feast/serving/service/CachedSpecService.java
+++ b/serving/src/main/java/feast/serving/service/CachedSpecService.java
@@ -16,6 +16,7 @@
import feast.core.StoreProto.Store.Subscription;
import feast.serving.exception.SpecRetrievalException;
import io.grpc.StatusRuntimeException;
+import io.prometheus.client.Gauge;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -40,6 +41,15 @@ public class CachedSpecService {
private final LoadingCache featureSetSpecCache;
private Store store;
+ private static Gauge featureSetsCount = Gauge.build().name("feature_set_count")
+ .subsystem("feast_serving")
+ .help("number of feature sets served by this instance")
+ .register();
+ private static Gauge cacheLastUpdated = Gauge.build().name("cache_last_updated")
+ .subsystem("feast_serving")
+ .help("epoch time of the last time the cache was updated")
+ .register();
+
public CachedSpecService(CoreSpecService coreService, Path configPath) {
this.configPath = configPath;
this.coreService = coreService;
@@ -102,6 +112,9 @@ public void populateCache() {
this.store = updateStore(readConfig(configPath));
Map featureSetSpecMap = getFeatureSetSpecMap();
featureSetSpecCache.putAll(featureSetSpecMap);
+
+ featureSetsCount.set(featureSetSpecCache.size());
+ cacheLastUpdated.set(System.currentTimeMillis());
}
public void scheduledPopulateCache() {
diff --git a/serving/src/main/java/feast/serving/service/RedisServingService.java b/serving/src/main/java/feast/serving/service/RedisServingService.java
index 69feec724fb..aec8a4bfb4c 100644
--- a/serving/src/main/java/feast/serving/service/RedisServingService.java
+++ b/serving/src/main/java/feast/serving/service/RedisServingService.java
@@ -16,6 +16,11 @@
package feast.serving.service;
+import static feast.serving.util.Metrics.missingKeyCount;
+import static feast.serving.util.Metrics.requestLatency;
+import static feast.serving.util.Metrics.requestCount;
+import static feast.serving.util.Metrics.staleKeyCount;
+
import com.google.common.collect.Maps;
import com.google.protobuf.AbstractMessageLite;
import com.google.protobuf.Duration;
@@ -41,6 +46,7 @@
import io.grpc.Status;
import io.opentracing.Scope;
import io.opentracing.Tracer;
+import io.prometheus.client.Histogram.Timer;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -61,7 +67,9 @@ public RedisServingService(JedisPool jedisPool, CachedSpecService specService, T
this.tracer = tracer;
}
- /** {@inheritDoc} */
+ /**
+ * {@inheritDoc}
+ */
@Override
public GetFeastServingInfoResponse getFeastServingInfo(
GetFeastServingInfoRequest getFeastServingInfoRequest) {
@@ -70,10 +78,13 @@ public GetFeastServingInfoResponse getFeastServingInfo(
.build();
}
- /** {@inheritDoc} */
+ /**
+ * {@inheritDoc}
+ */
@Override
public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest request) {
try (Scope scope = tracer.buildSpan("Redis-getOnlineFeatures").startActive(true)) {
+ Timer getOnlineFeaturesTimer = requestLatency.labels("getOnlineFeatures").startTimer();
GetOnlineFeaturesResponse.Builder getOnlineFeaturesResponseBuilder =
GetOnlineFeaturesResponse.newBuilder();
@@ -114,6 +125,7 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest requ
featureValuesMap.values().stream()
.map(m -> FieldValues.newBuilder().putAllFields(m).build())
.collect(Collectors.toList());
+ getOnlineFeaturesTimer.observeDuration();
return getOnlineFeaturesResponseBuilder.addAllFieldValues(fieldValues).build();
}
}
@@ -166,9 +178,11 @@ private RedisKey makeRedisKey(
for (int i = 0; i < featureSetEntityNames.size(); i++) {
String entityName = featureSetEntityNames.get(i);
- if (!fieldsMap.containsKey(entityName)){
+ if (!fieldsMap.containsKey(entityName)) {
throw Status.INVALID_ARGUMENT
- .withDescription(String.format("Entity row fields \"%s\" does not contain required entity field \"%s\"", fieldsMap.keySet().toString(), entityName))
+ .withDescription(String
+ .format("Entity row fields \"%s\" does not contain required entity field \"%s\"",
+ fieldsMap.keySet().toString(), entityName))
.asRuntimeException();
}
@@ -186,33 +200,45 @@ private void sendAndProcessMultiGet(
throws InvalidProtocolBufferException {
List jedisResps = sendMultiGet(redisKeys);
-
+ Timer processResponseTimer = requestLatency.labels("processResponse")
+ .startTimer();
try (Scope scope = tracer.buildSpan("Redis-processResponse").startActive(true)) {
String featureSetId =
String.format("%s:%d", featureSetRequest.getName(), featureSetRequest.getVersion());
+
Map nullValues =
featureSetRequest.getFeatureNamesList().stream()
.collect(
Collectors.toMap(
name -> featureSetId + ":" + name, name -> Value.newBuilder().build()));
+
for (int i = 0; i < jedisResps.size(); i++) {
EntityRow entityRow = entityRows.get(i);
Map featureValues = featureValuesMap.get(entityRow);
+
byte[] jedisResponse = jedisResps.get(i);
if (jedisResponse == null) {
+ missingKeyCount.labels(featureSetRequest.getName()).inc();
featureValues.putAll(nullValues);
continue;
}
+
FeatureRow featureRow = FeatureRow.parseFrom(jedisResponse);
+
boolean stale = isStale(featureSetRequest, entityRow, featureRow);
if (stale) {
+ staleKeyCount.labels(featureSetRequest.getName()).inc();
featureValues.putAll(nullValues);
continue;
}
+
+ requestCount.labels(featureSetRequest.getName()).inc();
featureRow.getFieldsList().stream()
.filter(f -> featureSetRequest.getFeatureNamesList().contains(f.getName()))
.forEach(f -> featureValues.put(featureSetId + ":" + f.getName(), f.getValue()));
}
+ } finally {
+ processResponseTimer.observeDuration();
}
}
@@ -237,6 +263,7 @@ private boolean isStale(
*/
private List sendMultiGet(List keys) {
try (Scope scope = tracer.buildSpan("Redis-sendMultiGet").startActive(true)) {
+ Timer sendMultiGetTimer = requestLatency.labels("sendMultiGet").startTimer();
try (Jedis jedis = jedisPool.getResource()) {
byte[][] binaryKeys =
keys.stream()
@@ -249,6 +276,8 @@ private List sendMultiGet(List keys) {
.withDescription("Unable to retrieve feature from Redis")
.withCause(e)
.asRuntimeException();
+ } finally {
+ sendMultiGetTimer.observeDuration();
}
}
}
diff --git a/serving/src/main/java/feast/serving/util/Metrics.java b/serving/src/main/java/feast/serving/util/Metrics.java
new file mode 100644
index 00000000000..a4463cf6897
--- /dev/null
+++ b/serving/src/main/java/feast/serving/util/Metrics.java
@@ -0,0 +1,37 @@
+package feast.serving.util;
+
+import io.prometheus.client.Counter;
+import io.prometheus.client.Histogram;
+import io.prometheus.client.Summary;
+
+public class Metrics {
+
+ public static final Histogram requestLatency = Histogram.build()
+ .buckets(2, 4, 6, 8, 10, 15, 20, 25, 30, 35, 50)
+ .name("request_latency_ms")
+ .subsystem("feast_serving")
+ .help("Request latency in milliseconds.")
+ .labelNames("method")
+ .register();
+
+ public static final Counter requestCount = Counter.build()
+ .name("request_feature_count")
+ .subsystem("feast_serving")
+ .help("number of feature rows requested")
+ .labelNames("feature_set_name")
+ .register();
+
+ public static final Counter missingKeyCount = Counter.build()
+ .name("missing_feature_count")
+ .subsystem("feast_serving")
+ .help("number requested feature rows that were not found")
+ .labelNames("feature_set_name")
+ .register();
+
+ public static final Counter staleKeyCount = Counter.build()
+ .name("stale_feature_count")
+ .subsystem("feast_serving")
+ .help("number requested feature rows that were stale")
+ .labelNames("feature_set_name")
+ .register();
+}
diff --git a/serving/src/main/resources/application.yml b/serving/src/main/resources/application.yml
index 6ef68d710cf..95d3a3e9c41 100644
--- a/serving/src/main/resources/application.yml
+++ b/serving/src/main/resources/application.yml
@@ -55,6 +55,6 @@ grpc:
server:
# The port number on which the Tomcat webserver that serves REST API endpoints should listen
- # It is set by default to 8080 so it does not conflict with Tomcat webserver on Feast Core
+ # It is set by default to 8081 so it does not conflict with Tomcat webserver on Feast Core
# if both Feast Core and Serving are running on the same machine
port: ${SERVER_PORT:8081}
\ No newline at end of file