Skip to content

Commit 07e87ad

Browse files
authored
Speed up tests (#977)
* Cache _list_all we don't need the latest list The test does not require the list of flows to be updated, to a single cached version will do fine (this call otherwise would take ~40 seconds). * Reduce the amount of verified runs Downloading a run takes a non-significant amount of time (est. 300ms on my current setup). It is unnecessary to compare against all >=100 runs, while a handful should do fine (perhaps even just one should do). * Increase the batch size to avoid more than 2 pages The batch size required in some pages over 40 pages to be loaded, which increased the workload unnecessarily. These changing preserve pagination tests while lowering the amount of round trips required. * Mark as test_get_run_trace as skip Since it is already covered by test_run_and_upload_randomsearch. * Filter on dataset id serverside Speeds up ~25x, and reduces network traffic. * Reduce the amount of pages loaded Loading a page takes ~600ms. I don't think testing with 3 pages is any worse than 10. I also think this is an ideal candidate of test that could be split up into (1) testing the url is generated correctly, (2) testing a pre-cached result is parsed correctly and (3) testing the url gives the expected response (the actual integration test). * Simplify model tested in swapped parameter test If the test is that swapped parameters work, we don't need a complicated pipeline or dataset. * Add a cli flag to toggle short/long scenarios Some tests support both, by checking e.g. only a few runs vs all runs. * Skip time measurement on any Windows machine * Invoke the --long versions on the COVERAGE job * Add long/short versions for some long tests * Check the trace can be retrieved individually To cover for the skipping of test_get_run_trace * Remove old test * Use patch isolate list_all caching to one test * Fix decorator call
1 parent e84cdf9 commit 07e87ad

File tree

8 files changed

+63
-99
lines changed

8 files changed

+63
-99
lines changed

ci_scripts/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ run_tests() {
1919
cd $TEST_DIR
2020

2121
if [[ "$COVERAGE" == "true" ]]; then
22-
PYTEST_ARGS='--cov=openml'
22+
PYTEST_ARGS='--cov=openml --long'
2323
else
2424
PYTEST_ARGS=''
2525
fi

openml/datasets/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
347347
dict
348348
A dictionary with items {did: bool}
349349
"""
350-
dataset_list = list_datasets(status="all")
350+
dataset_list = list_datasets(status="all", data_id=dataset_ids)
351351
active = {}
352352

353353
for did in dataset_ids:

tests/conftest.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import os
2626
import logging
2727
from typing import List
28+
import pytest
2829

2930
import openml
3031
from openml.testing import TestBase
@@ -182,3 +183,17 @@ def pytest_sessionfinish() -> None:
182183
logger.info("Local files deleted")
183184

184185
logger.info("{} is killed".format(worker))
186+
187+
188+
def pytest_addoption(parser):
189+
parser.addoption(
190+
"--long",
191+
action="store_true",
192+
default=False,
193+
help="Run the long version of tests which support both short and long scenarios.",
194+
)
195+
196+
197+
@pytest.fixture(scope="class")
198+
def long_version(request):
199+
request.cls.long_version = request.config.getoption("--long")

tests/test_evaluations/test_evaluation_functions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
# License: BSD 3-Clause
2+
import pytest
23

34
import openml
45
import openml.evaluations
56
from openml.testing import TestBase
67

78

9+
@pytest.mark.usefixtures("long_version")
810
class TestEvaluationFunctions(TestBase):
911
_multiprocess_can_split_ = True
1012

@@ -27,6 +29,10 @@ def _check_list_evaluation_setups(self, **kwargs):
2729

2830
# Check if output and order of list_evaluations is preserved
2931
self.assertSequenceEqual(evals_setups["run_id"].tolist(), evals["run_id"].tolist())
32+
33+
if not self.long_version:
34+
evals_setups = evals_setups.head(1)
35+
3036
# Check if the hyper-parameter column is as accurate and flow_id
3137
for index, row in evals_setups.iterrows():
3238
params = openml.runs.get_run(row["run_id"]).parameter_settings

tests/test_flows/test_flow_functions.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,22 @@
22

33
from collections import OrderedDict
44
import copy
5+
import functools
56
import unittest
7+
from unittest.mock import patch
68

79
from distutils.version import LooseVersion
810
import sklearn
911
from sklearn import ensemble
1012
import pandas as pd
13+
import pytest
1114

1215
import openml
1316
from openml.testing import TestBase
1417
import openml.extensions.sklearn
1518

1619

20+
@pytest.mark.usefixtures("long_version")
1721
class TestFlowFunctions(TestBase):
1822
_multiprocess_can_split_ = True
1923

@@ -334,20 +338,27 @@ def test_get_flow_reinstantiate_model_wrong_version(self):
334338
assert "0.19.1" not in flow.dependencies
335339

336340
def test_get_flow_id(self):
337-
clf = sklearn.tree.DecisionTreeClassifier()
338-
flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
339-
340-
self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id)
341-
flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
342-
self.assertIn(flow.flow_id, flow_ids)
343-
self.assertGreater(len(flow_ids), 2)
344-
345-
# Check that the output of get_flow_id is identical if only the name is given, no matter
346-
# whether exact_version is set to True or False.
347-
flow_ids_exact_version_True = openml.flows.get_flow_id(name=flow.name, exact_version=True)
348-
flow_ids_exact_version_False = openml.flows.get_flow_id(
349-
name=flow.name, exact_version=False,
350-
)
351-
self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
352-
self.assertIn(flow.flow_id, flow_ids_exact_version_True)
353-
self.assertGreater(len(flow_ids_exact_version_True), 2)
341+
if self.long_version:
342+
list_all = openml.utils._list_all
343+
else:
344+
list_all = functools.lru_cache()(openml.utils._list_all)
345+
with patch("openml.utils._list_all", list_all):
346+
clf = sklearn.tree.DecisionTreeClassifier()
347+
flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
348+
349+
self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id)
350+
flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
351+
self.assertIn(flow.flow_id, flow_ids)
352+
self.assertGreater(len(flow_ids), 2)
353+
354+
# Check that the output of get_flow_id is identical if only the name is given, no matter
355+
# whether exact_version is set to True or False.
356+
flow_ids_exact_version_True = openml.flows.get_flow_id(
357+
name=flow.name, exact_version=True
358+
)
359+
flow_ids_exact_version_False = openml.flows.get_flow_id(
360+
name=flow.name, exact_version=False,
361+
)
362+
self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
363+
self.assertIn(flow.flow_id, flow_ids_exact_version_True)
364+
self.assertGreater(len(flow_ids_exact_version_True), 2)

tests/test_runs/test_run_functions.py

Lines changed: 7 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import unittest.mock
1111

1212
import numpy as np
13-
import pytest
1413

1514
import openml
1615
import openml.exceptions
@@ -335,7 +334,7 @@ def _check_sample_evaluations(
335334
for sample in range(num_sample_entrees):
336335
evaluation = sample_evaluations[measure][rep][fold][sample]
337336
self.assertIsInstance(evaluation, float)
338-
if not os.environ.get("CI_WINDOWS"):
337+
if not (os.environ.get("CI_WINDOWS") or os.name == "nt"):
339338
# Either Appveyor is much faster than Travis
340339
# and/or measurements are not as accurate.
341340
# Either way, windows seems to get an eval-time
@@ -682,6 +681,8 @@ def test_run_and_upload_randomsearch(self):
682681
flow_expected_rsv="12172",
683682
)
684683
self.assertEqual(len(run.trace.trace_iterations), 5)
684+
trace = openml.runs.get_run_trace(run.run_id)
685+
self.assertEqual(len(trace.trace_iterations), 5)
685686

686687
def test_run_and_upload_maskedarrays(self):
687688
# This testcase is important for 2 reasons:
@@ -828,31 +829,12 @@ def _test_local_evaluations(self, run):
828829
self.assertGreaterEqual(alt_scores[idx], 0)
829830
self.assertLessEqual(alt_scores[idx], 1)
830831

831-
@unittest.skipIf(
832-
LooseVersion(sklearn.__version__) < "0.20",
833-
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
834-
)
835832
def test_local_run_swapped_parameter_order_model(self):
833+
clf = DecisionTreeClassifier()
834+
australian_task = 595
835+
task = openml.tasks.get_task(australian_task)
836836

837-
# construct sci-kit learn classifier
838-
clf = Pipeline(
839-
steps=[
840-
(
841-
"imputer",
842-
make_pipeline(
843-
SimpleImputer(strategy="most_frequent"),
844-
OneHotEncoder(handle_unknown="ignore"),
845-
),
846-
),
847-
# random forest doesn't take categoricals
848-
("estimator", RandomForestClassifier()),
849-
]
850-
)
851-
852-
# download task
853-
task = openml.tasks.get_task(7)
854-
855-
# invoke OpenML run
837+
# task and clf are purposely in the old order
856838
run = openml.runs.run_model_on_task(
857839
task, clf, avoid_duplicate_runs=False, upload_flow=False,
858840
)
@@ -950,55 +932,6 @@ def test_initialize_model_from_run(self):
950932
self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"')
951933
self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05")
952934

953-
@pytest.mark.flaky()
954-
def test_get_run_trace(self):
955-
# get_run_trace is already tested implicitly in test_run_and_publish
956-
# this test is a bit additional.
957-
num_iterations = 10
958-
num_folds = 1
959-
task_id = 119
960-
961-
task = openml.tasks.get_task(task_id)
962-
963-
# IMPORTANT! Do not sentinel this flow. is faster if we don't wait
964-
# on openml server
965-
clf = RandomizedSearchCV(
966-
RandomForestClassifier(random_state=42, n_estimators=5),
967-
{
968-
"max_depth": [3, None],
969-
"max_features": [1, 2, 3, 4],
970-
"bootstrap": [True, False],
971-
"criterion": ["gini", "entropy"],
972-
},
973-
num_iterations,
974-
random_state=42,
975-
cv=3,
976-
)
977-
978-
# [SPEED] make unit test faster by exploiting run information
979-
# from the past
980-
try:
981-
# in case the run did not exists yet
982-
run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=True,)
983-
984-
self.assertEqual(
985-
len(run.trace.trace_iterations), num_iterations * num_folds,
986-
)
987-
run = run.publish()
988-
TestBase._mark_entity_for_removal("run", run.run_id)
989-
TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
990-
self._wait_for_processed_run(run.run_id, 400)
991-
run_id = run.run_id
992-
except openml.exceptions.OpenMLRunsExistError as e:
993-
# The only error we expect, should fail otherwise.
994-
run_ids = [int(run_id) for run_id in e.run_ids]
995-
self.assertGreater(len(run_ids), 0)
996-
run_id = random.choice(list(run_ids))
997-
998-
# now the actual unit test ...
999-
run_trace = openml.runs.get_run_trace(run_id)
1000-
self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)
1001-
1002935
@unittest.skipIf(
1003936
LooseVersion(sklearn.__version__) < "0.20",
1004937
reason="SimpleImputer doesn't handle mixed type DataFrame as input",

tests/test_tasks/test_task_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def test_list_tasks_paginate(self):
110110
self._check_task(tasks[tid])
111111

112112
def test_list_tasks_per_type_paginate(self):
113-
size = 10
113+
size = 40
114114
max = 100
115115
task_types = [
116116
TaskType.SUPERVISED_CLASSIFICATION,

tests/test_utils/test_utils.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
class OpenMLTaskTest(TestBase):
1313
_multiprocess_can_split_ = True
14-
_batch_size = 25
1514

1615
def mocked_perform_api_call(call, request_method):
1716
# TODO: JvR: Why is this not a staticmethod?
@@ -33,7 +32,7 @@ def test_list_all_few_results_available(self, _perform_api_call):
3332

3433
def test_list_all_for_datasets(self):
3534
required_size = 127 # default test server reset value
36-
datasets = openml.datasets.list_datasets(batch_size=self._batch_size, size=required_size)
35+
datasets = openml.datasets.list_datasets(batch_size=100, size=required_size)
3736

3837
self.assertEqual(len(datasets), required_size)
3938
for did in datasets:
@@ -53,13 +52,13 @@ def test_list_datasets_with_high_size_parameter(self):
5352

5453
def test_list_all_for_tasks(self):
5554
required_size = 1068 # default test server reset value
56-
tasks = openml.tasks.list_tasks(batch_size=self._batch_size, size=required_size)
55+
tasks = openml.tasks.list_tasks(batch_size=1000, size=required_size)
5756

5857
self.assertEqual(len(tasks), required_size)
5958

6059
def test_list_all_for_flows(self):
6160
required_size = 15 # default test server reset value
62-
flows = openml.flows.list_flows(batch_size=self._batch_size, size=required_size)
61+
flows = openml.flows.list_flows(batch_size=25, size=required_size)
6362

6463
self.assertEqual(len(flows), required_size)
6564

@@ -73,7 +72,7 @@ def test_list_all_for_setups(self):
7372

7473
def test_list_all_for_runs(self):
7574
required_size = 21
76-
runs = openml.runs.list_runs(batch_size=self._batch_size, size=required_size)
75+
runs = openml.runs.list_runs(batch_size=25, size=required_size)
7776

7877
# might not be on test server after reset, please rerun test at least once if fails
7978
self.assertEqual(len(runs), required_size)

0 commit comments

Comments
 (0)