Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fs = feast.Client('feast.example.com')
customer_features = ['CreditScore', 'Balance', 'Age', 'NumOfProducts', 'IsActive']

# Training your model (typically from a notebook or pipeline)
data = fs.get_batch_features(customer_features, customer_entities)
data = fs.get_historical_features(customer_features, customer_entities)
my_model = ml.fit(data)

# Serving predictions (when serving the model in production)
Expand Down
10 changes: 5 additions & 5 deletions docs/user-guide/feature-retrieval.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ features = self._features + [self._target]
# Retrieve training dataset from Feast. The "entity_df" is a dataframe that contains
# timestamps and entity keys. In this case, it is a dataframe with two columns.
# One timestamp column, and one customer id column
dataset = client.get_batch_features(
dataset = client.get_historical_features(
feature_refs=features,
entity_rows=entity_df
)
Expand All @@ -59,7 +59,7 @@ df = dataset.to_dataframe()
```

{% hint style="info" %}
When no project is specified when retrieving features with `get_batch_features()`, Feast infers that the features specified belong to the `default` project. To retrieve from another project, specify the `default` parameter when retrieving features.
When no project is specified when retrieving features with `get_historical_features()`, Feast infers that the features specified belong to the `default` project. To retrieve from another project, specify the `default` parameter when retrieving features.
{% endhint %}

In the above example, Feast does a point in time correct query from a single feature set. For each timestamp and entity key combination that is provided by `entity_df`, Feast determines the values of all the features in the `features` list at that respective point in time and then joins features values to that specific entity value and timestamp, and repeats this process for all timestamps.
Expand Down Expand Up @@ -88,7 +88,7 @@ features = [
]


dataset = client.get_batch_features(
dataset = client.get_historical_features(
feature_refs=features, # this is a list of feature references
entity_rows=entity_df # This is the entity dataframe above
)
Expand All @@ -111,10 +111,10 @@ Point-in-time-correct joins also prevents the occurrence of feature leakage by t

Feast is able to compute [TFDV](https://tensorflow.google.cn/tfx/tutorials/data_validation/tfdv_basic) compatible statistics over data retrieved from historical stores. The statistics can be used in conjunction with feature schemas and TFDV to verify the integrity of your retrieved dataset, or to [Facets](https://github.com/PAIR-code/facets) to visualize the distribution.

The computation of statistics is not enabled by default. To indicate to Feast that the statistics are to be computed for a given historical retrieval request, pass `compute_statistics=True` to `get_batch_features`.
The computation of statistics is not enabled by default. To indicate to Feast that the statistics are to be computed for a given historical retrieval request, pass `compute_statistics=True` to `get_historical_features`.

```python
dataset = client.get_batch_features(
dataset = client.get_historical_features(
feature_refs=features,
entity_rows=entity_df
compute_statistics=True
Expand Down
4 changes: 2 additions & 2 deletions examples/basic/basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"By calling the `get_batch_features` method we are able to retrieve a `job` object. This object can be used to retrieve the resulting training dataset that is exported by Feast. \n",
"By calling the `get_historical_features` method we are able to retrieve a `job` object. This object can be used to retrieve the resulting training dataset that is exported by Feast. \n",
"\n",
"The dataset that is returned will contain feature values for each entity and timestamp combination in `entity_rows`."
]
Expand All @@ -585,7 +585,7 @@
},
"outputs": [],
"source": [
"job = batch_client.get_batch_features(\n",
"job = batch_client.get_historical_features(\n",
Comment thread
terryyylim marked this conversation as resolved.
" feature_refs=[\n",
" f\"daily_transactions\", \n",
" f\"total_transactions\", \n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7195,7 +7195,7 @@
" features = self._features + [self._target]\n",
"\n",
" # Retrieve training dataset from Feast\n",
" dataset = self._feast_batch_client.get_batch_features(\n",
" dataset = self._feast_batch_client.get_historical_features(\n",
" feature_refs=features,\n",
" entity_rows=entity_df).to_dataframe()\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ spec:
entity_rows_df = df.copy(deep=True).rename(columns={"datetime": "event_timestamp"})[["event_timestamp", "customer_id"]]
pandavro.to_avro("entity_rows.avro", entity_rows_df)

batch_serving_job = client.get_batch_features(
batch_serving_job = client.get_historical_features(
entity_rows="file://entity_rows.avro",
feature_refs=[
f"{project}/daily_transactions:1",
Expand Down
22 changes: 21 additions & 1 deletion sdk/python/feast/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import tempfile
import time
import uuid
import warnings
from collections import OrderedDict
from math import ceil
from typing import Dict, List, Optional, Tuple, Union
Expand Down Expand Up @@ -547,6 +548,25 @@ def get_batch_features(
entity_rows: Union[pd.DataFrame, str],
compute_statistics: bool = False,
project: str = None,
) -> RetrievalJob:
"""
Deprecated. Please see get_historical_features.
"""
warnings.warn(
"The method get_batch_features() is being deprecated. Please use the identical get_historical_features(). "
"Feast 0.7 and onwards will not support get_batch_features().",
DeprecationWarning,
)
return self.get_historical_features(
feature_refs, entity_rows, compute_statistics, project
)

def get_historical_features(
self,
feature_refs: List[str],
entity_rows: Union[pd.DataFrame, str],
compute_statistics: bool = False,
project: str = None,
Comment thread
terryyylim marked this conversation as resolved.
) -> RetrievalJob:
"""
Retrieves historical features from a Feast Serving deployment.
Expand Down Expand Up @@ -585,7 +605,7 @@ def get_batch_features(
>>> "customer": [1001, 1002, 1003],
>>> }
>>> )
>>> feature_retrieval_job = feast_client.get_batch_features(
>>> feature_retrieval_job = feast_client.get_historical_features(
>>> feature_refs, entity_rows, project="my_project")
>>> df = feature_retrieval_job.to_dataframe()
>>> print(df)
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ def test_stop_ingest_job(self, mocked_client, mocker):
"mocked_client",
[pytest.lazy_fixture("mock_client"), pytest.lazy_fixture("secure_mock_client")],
)
def test_get_batch_features(self, mocked_client, mocker):
def test_get_historical_features(self, mocked_client, mocker):

mocked_client._serving_service_stub = Serving.ServingServiceStub(
grpc.insecure_channel("")
Expand Down Expand Up @@ -705,7 +705,7 @@ def test_get_batch_features(self, mocked_client, mocker):
# NOTE: Feast Serving does not allow for feature references
# that specify the same feature in the same request.
with patch("google.cloud.storage.Client"):
response = mocked_client.get_batch_features(
response = mocked_client.get_historical_features(
entity_rows=pd.DataFrame(
{
"datetime": [
Expand Down
28 changes: 14 additions & 14 deletions tests/e2e/bq/bq-batch-retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test_batch_apply_all_featuresets(client):
@pytest.mark.direct_runner
@pytest.mark.dataflow_runner
@pytest.mark.run(order=10)
def test_batch_get_batch_features_with_file(client):
def test_batch_get_historical_features_with_file(client):
file_fs1 = client.get_feature_set(name="file_feature_set")

N_ROWS = 10
Expand Down Expand Up @@ -193,7 +193,7 @@ def test_batch_get_batch_features_with_file(client):
time.sleep(10)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows="file://file_feature_set.avro",
feature_refs=["feature_value1"],
project=PROJECT_NAME,
Expand All @@ -213,7 +213,7 @@ def check():
@pytest.mark.direct_runner
@pytest.mark.dataflow_runner
@pytest.mark.run(order=11)
def test_batch_get_batch_features_with_gs_path(client, gcs_path):
def test_batch_get_historical_features_with_gs_path(client, gcs_path):
gcs_fs1 = client.get_feature_set(name="gcs_feature_set")

N_ROWS = 10
Expand Down Expand Up @@ -252,7 +252,7 @@ def test_batch_get_batch_features_with_gs_path(client, gcs_path):
blob.upload_from_filename(file_name)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=f"{gcs_path}{ts}/*",
feature_refs=["feature_value2"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -296,7 +296,7 @@ def test_batch_order_by_creation_time(client):
client.ingest(proc_time_fs, correct_df)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=incorrect_df[["datetime", "entity_id"]],
feature_refs=["feature_value3"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_batch_additional_columns_in_entity_table(client):
)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=entity_df,
feature_refs=["feature_value4"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -392,7 +392,7 @@ def test_batch_point_in_time_correctness_join(client):
client.ingest(historical_fs, historical_df)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=entity_df,
feature_refs=["feature_value5"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -443,7 +443,7 @@ def test_batch_multiple_featureset_joins(client):
# Test retrieve with different variations of the string feature refs
# ie feature set inference for feature refs without specified feature set
def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=entity_df,
feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -480,7 +480,7 @@ def test_batch_no_max_age(client):
client.ingest(no_max_age_fs, features_8_df)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=features_8_df[["datetime", "entity_id"]],
feature_refs=["feature_value8"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -564,7 +564,7 @@ def test_update_featureset_apply_featureset_and_ingest_first_subset(
client.ingest(feature_set=update_fs, source=subset_df)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5],
feature_refs=["update_feature1", "update_feature2"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -627,7 +627,7 @@ def test_update_featureset_update_featureset_and_ingest_second_subset(
time.sleep(30)

def check():
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:],
feature_refs=["update_feature1", "update_feature3", "update_feature4"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -659,7 +659,7 @@ def check():
@pytest.mark.run(order=22)
def test_update_featureset_retrieve_all_fields(client, update_featureset_dataframe):
with pytest.raises(Exception):
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
feature_refs=[
"update_feature1",
Expand All @@ -675,7 +675,7 @@ def test_update_featureset_retrieve_all_fields(client, update_featureset_datafra
@pytest.mark.fs_update
@pytest.mark.run(order=23)
def test_update_featureset_retrieve_valid_fields(client, update_featureset_dataframe):
feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
feature_refs=["update_feature1", "update_feature3", "update_feature4"],
project=PROJECT_NAME,
Expand Down Expand Up @@ -751,7 +751,7 @@ def test_batch_dataset_statistics(client):
break
time.sleep(30)

feature_retrieval_job = client.get_batch_features(
feature_retrieval_job = client.get_historical_features(
entity_rows=entity_df,
feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
project=PROJECT_NAME,
Expand Down