feast-dev · feast-ci-bot · Jun 30, 2020 · Jun 30, 2020 · Jun 30, 2020 · Jun 30, 2020
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ fs = feast.Client('feast.example.com')
 customer_features = ['CreditScore', 'Balance', 'Age', 'NumOfProducts', 'IsActive']
 
 # Training your model (typically from a notebook or pipeline)
-data = fs.get_batch_features(customer_features, customer_entities)
+data = fs.get_historical_features(customer_features, customer_entities)
 my_model = ml.fit(data)
 
 # Serving predictions (when serving the model in production)

@@ -48,7 +48,7 @@ features = self._features + [self._target]
 # Retrieve training dataset from Feast. The "entity_df" is a dataframe that contains
 # timestamps and entity keys. In this case, it is a dataframe with two columns.
 # One timestamp column, and one customer id column
-dataset = client.get_batch_features(
+dataset = client.get_historical_features(
     feature_refs=features,
     entity_rows=entity_df
 )
@@ -59,7 +59,7 @@ df = dataset.to_dataframe()
 ```
 
 {% hint style="info" %}
-When no project is specified when retrieving features with `get_batch_features()`, Feast infers that the features specified belong to the `default` project. To retrieve from another project, specify the `default` parameter when retrieving features.
+When no project is specified when retrieving features with `get_historical_features()`, Feast infers that the features specified belong to the `default` project. To retrieve from another project, specify the `default` parameter when retrieving features.
 {% endhint %}
 
 In the above example, Feast does a point in time correct query from a single feature set. For each timestamp and entity key combination that is provided by `entity_df`, Feast determines the values of all the features in the `features` list at that respective point in time and then joins features values to that specific entity value and timestamp, and repeats this process for all timestamps.
@@ -88,7 +88,7 @@ features = [
  ]
 
 
-dataset = client.get_batch_features(
+dataset = client.get_historical_features(
         feature_refs=features, # this is a list of feature references
         entity_rows=entity_df # This is the entity dataframe above
     )
@@ -111,10 +111,10 @@ Point-in-time-correct joins also prevents the occurrence of feature leakage by t
 
 Feast is able to compute [TFDV](https://tensorflow.google.cn/tfx/tutorials/data_validation/tfdv_basic) compatible statistics over data retrieved from historical stores. The statistics can be used in conjunction with feature schemas and TFDV to verify the integrity of your retrieved dataset, or to [Facets](https://github.com/PAIR-code/facets) to visualize the distribution.
 
-The computation of statistics is not enabled by default. To indicate to Feast that the statistics are to be computed for a given historical retrieval request, pass `compute_statistics=True` to `get_batch_features`.
+The computation of statistics is not enabled by default. To indicate to Feast that the statistics are to be computed for a given historical retrieval request, pass `compute_statistics=True` to `get_historical_features`.
 
 ```python
-dataset = client.get_batch_features(
+dataset = client.get_historical_features(
     feature_refs=features, 
     entity_rows=entity_df 
     compute_statistics=True

@@ -572,7 +572,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "By calling the `get_batch_features` method we are able to retrieve a `job` object. This object can be used to retrieve the resulting training dataset that is exported by Feast. \n",
+    "By calling the `get_historical_features` method we are able to retrieve a `job` object. This object can be used to retrieve the resulting training dataset that is exported by Feast. \n",
     "\n",
     "The dataset that is returned will contain feature values for each entity and timestamp combination in `entity_rows`."
    ]
@@ -585,7 +585,7 @@
    },
    "outputs": [],
    "source": [
-    "job = batch_client.get_batch_features(\n",
+    "job = batch_client.get_historical_features(\n",
     "                            feature_refs=[\n",
     "                                f\"daily_transactions\", \n",
     "                                f\"total_transactions\", \n",

@@ -7195,7 +7195,7 @@
     "        features = self._features + [self._target]\n",
     "\n",
     "        # Retrieve training dataset from Feast\n",
-    "        dataset = self._feast_batch_client.get_batch_features(\n",
+    "        dataset = self._feast_batch_client.get_historical_features(\n",
     "            feature_refs=features,\n",
     "            entity_rows=entity_df).to_dataframe()\n",
     "\n",

@@ -70,7 +70,7 @@ spec:
       entity_rows_df = df.copy(deep=True).rename(columns={"datetime": "event_timestamp"})[["event_timestamp", "customer_id"]]
       pandavro.to_avro("entity_rows.avro", entity_rows_df)
 
-      batch_serving_job = client.get_batch_features(
+      batch_serving_job = client.get_historical_features(
           entity_rows="file://entity_rows.avro", 
           feature_refs=[
               f"{project}/daily_transactions:1",

@@ -20,6 +20,7 @@
 import tempfile
 import time
 import uuid
+import warnings
 from collections import OrderedDict
 from math import ceil
 from typing import Dict, List, Optional, Tuple, Union
@@ -547,6 +548,25 @@ def get_batch_features(
         entity_rows: Union[pd.DataFrame, str],
         compute_statistics: bool = False,
         project: str = None,
+    ) -> RetrievalJob:
+        """
+        Deprecated. Please see get_historical_features.
+        """
+        warnings.warn(
+            "The method get_batch_features() is being deprecated. Please use the identical get_historical_features(). "
+            "Feast 0.7 and onwards will not support get_batch_features().",
+            DeprecationWarning,
+        )
+        return self.get_historical_features(
+            feature_refs, entity_rows, compute_statistics, project
+        )
+
+    def get_historical_features(
+        self,
+        feature_refs: List[str],
+        entity_rows: Union[pd.DataFrame, str],
+        compute_statistics: bool = False,
+        project: str = None,
     ) -> RetrievalJob:
         """
         Retrieves historical features from a Feast Serving deployment.
@@ -585,7 +605,7 @@ def get_batch_features(
             >>>            "customer": [1001, 1002, 1003],
             >>>         }
             >>>     )
-            >>> feature_retrieval_job = feast_client.get_batch_features(
+            >>> feature_retrieval_job = feast_client.get_historical_features(
             >>>     feature_refs, entity_rows, project="my_project")
             >>> df = feature_retrieval_job.to_dataframe()
             >>> print(df)

@@ -610,7 +610,7 @@ def test_stop_ingest_job(self, mocked_client, mocker):
         "mocked_client",
         [pytest.lazy_fixture("mock_client"), pytest.lazy_fixture("secure_mock_client")],
     )
-    def test_get_batch_features(self, mocked_client, mocker):
+    def test_get_historical_features(self, mocked_client, mocker):
 
         mocked_client._serving_service_stub = Serving.ServingServiceStub(
             grpc.insecure_channel("")
@@ -705,7 +705,7 @@ def test_get_batch_features(self, mocked_client, mocker):
         # NOTE: Feast Serving does not allow for feature references
         # that specify the same feature in the same request.
         with patch("google.cloud.storage.Client"):
-            response = mocked_client.get_batch_features(
+            response = mocked_client.get_historical_features(
                 entity_rows=pd.DataFrame(
                     {
                         "datetime": [

diff --git a/tests/e2e/bq/bq-batch-retrieval.py b/tests/e2e/bq/bq-batch-retrieval.py
@@ -159,7 +159,7 @@ def test_batch_apply_all_featuresets(client):
 @pytest.mark.direct_runner
 @pytest.mark.dataflow_runner
 @pytest.mark.run(order=10)
-def test_batch_get_batch_features_with_file(client):
+def test_batch_get_historical_features_with_file(client):
     file_fs1 = client.get_feature_set(name="file_feature_set")
 
     N_ROWS = 10
@@ -193,7 +193,7 @@ def test_batch_get_batch_features_with_file(client):
     time.sleep(10)
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows="file://file_feature_set.avro",
             feature_refs=["feature_value1"],
             project=PROJECT_NAME,
@@ -213,7 +213,7 @@ def check():
 @pytest.mark.direct_runner
 @pytest.mark.dataflow_runner
 @pytest.mark.run(order=11)
-def test_batch_get_batch_features_with_gs_path(client, gcs_path):
+def test_batch_get_historical_features_with_gs_path(client, gcs_path):
     gcs_fs1 = client.get_feature_set(name="gcs_feature_set")
 
     N_ROWS = 10
@@ -252,7 +252,7 @@ def test_batch_get_batch_features_with_gs_path(client, gcs_path):
     blob.upload_from_filename(file_name)
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=f"{gcs_path}{ts}/*",
             feature_refs=["feature_value2"],
             project=PROJECT_NAME,
@@ -296,7 +296,7 @@ def test_batch_order_by_creation_time(client):
     client.ingest(proc_time_fs, correct_df)
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=incorrect_df[["datetime", "entity_id"]],
             feature_refs=["feature_value3"],
             project=PROJECT_NAME,
@@ -337,7 +337,7 @@ def test_batch_additional_columns_in_entity_table(client):
     )
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=entity_df,
             feature_refs=["feature_value4"],
             project=PROJECT_NAME,
@@ -392,7 +392,7 @@ def test_batch_point_in_time_correctness_join(client):
     client.ingest(historical_fs, historical_df)
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=entity_df,
             feature_refs=["feature_value5"],
             project=PROJECT_NAME,
@@ -443,7 +443,7 @@ def test_batch_multiple_featureset_joins(client):
     # Test retrieve with different variations of the string feature refs
     # ie feature set inference for feature refs without specified feature set
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=entity_df,
             feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
             project=PROJECT_NAME,
@@ -480,7 +480,7 @@ def test_batch_no_max_age(client):
     client.ingest(no_max_age_fs, features_8_df)
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=features_8_df[["datetime", "entity_id"]],
             feature_refs=["feature_value8"],
             project=PROJECT_NAME,
@@ -564,7 +564,7 @@ def test_update_featureset_apply_featureset_and_ingest_first_subset(
     client.ingest(feature_set=update_fs, source=subset_df)
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[:5],
             feature_refs=["update_feature1", "update_feature2"],
             project=PROJECT_NAME,
@@ -627,7 +627,7 @@ def test_update_featureset_update_featureset_and_ingest_second_subset(
         time.sleep(30)
 
     def check():
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=update_featureset_dataframe[["datetime", "entity_id"]].iloc[5:],
             feature_refs=["update_feature1", "update_feature3", "update_feature4"],
             project=PROJECT_NAME,
@@ -659,7 +659,7 @@ def check():
 @pytest.mark.run(order=22)
 def test_update_featureset_retrieve_all_fields(client, update_featureset_dataframe):
     with pytest.raises(Exception):
-        feature_retrieval_job = client.get_batch_features(
+        feature_retrieval_job = client.get_historical_features(
             entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
             feature_refs=[
                 "update_feature1",
@@ -675,7 +675,7 @@ def test_update_featureset_retrieve_all_fields(client, update_featureset_datafra
 @pytest.mark.fs_update
 @pytest.mark.run(order=23)
 def test_update_featureset_retrieve_valid_fields(client, update_featureset_dataframe):
-    feature_retrieval_job = client.get_batch_features(
+    feature_retrieval_job = client.get_historical_features(
         entity_rows=update_featureset_dataframe[["datetime", "entity_id"]],
         feature_refs=["update_feature1", "update_feature3", "update_feature4"],
         project=PROJECT_NAME,
@@ -751,7 +751,7 @@ def test_batch_dataset_statistics(client):
             break
         time.sleep(30)
 
-    feature_retrieval_job = client.get_batch_features(
+    feature_retrieval_job = client.get_historical_features(
         entity_rows=entity_df,
         feature_refs=["feature_value6", "feature_set_2:other_feature_value7"],
         project=PROJECT_NAME,