Skip to content

Commit a629562

Browse files
authored
Improve unit tests (#985)
* randomize test order * reduce noise in the output to better see the issues * deprecate format argument to OpenMLDataset * fix file upload * further reduce warnings * fix test which failed due to deleting a dataset on the test server * re-add test randomization (due to rebase) * try if random test order causes all problems by removing it * improve lbfgs test * distribute tests better * reduce randomness in lbfgs test * add requested commits
1 parent 51eaff6 commit a629562

File tree

11 files changed

+63
-53
lines changed

11 files changed

+63
-53
lines changed

.github/workflows/ubuntu-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
- name: Run tests
5252
run: |
5353
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
54-
pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread -sv $codecov
54+
pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov
5555
- name: Check for files left behind by test
5656
if: ${{ always() }}
5757
run: |

appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ build: false
4545

4646
test_script:
4747
- "cd C:\\projects\\openml-python"
48-
- "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread -sv"
48+
- "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread --dist load -sv"

openml/datasets/dataset.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import numpy as np
1414
import pandas as pd
1515
import scipy.sparse
16-
from warnings import warn
1716

1817
from openml.base import OpenMLBase
1918
from .data_feature import OpenMLDataFeature
@@ -34,7 +33,7 @@ class OpenMLDataset(OpenMLBase):
3433
Name of the dataset.
3534
description : str
3635
Description of the dataset.
37-
format : str
36+
data_format : str
3837
Format of the dataset which can be either 'arff' or 'sparse_arff'.
3938
cache_format : str
4039
Format for caching the dataset which can be either 'feather' or 'pickle'.
@@ -103,7 +102,6 @@ def __init__(
103102
self,
104103
name,
105104
description,
106-
format=None,
107105
data_format="arff",
108106
cache_format="pickle",
109107
dataset_id=None,
@@ -178,16 +176,8 @@ def find_invalid_characters(string, pattern):
178176
)
179177

180178
self.cache_format = cache_format
181-
if format is None:
182-
self.format = data_format
183-
else:
184-
warn(
185-
"The format parameter in the init will be deprecated "
186-
"in the future."
187-
"Please use data_format instead",
188-
DeprecationWarning,
189-
)
190-
self.format = format
179+
# Has to be called format, otherwise there will be an XML upload error
180+
self.format = data_format
191181
self.creator = creator
192182
self.contributor = contributor
193183
self.collection_date = collection_date
@@ -456,12 +446,11 @@ def _parse_data_from_arff(
456446
col.append(
457447
self._unpack_categories(X[column_name], categories_names[column_name])
458448
)
459-
elif attribute_dtype[column_name] in ('floating',
460-
'integer'):
449+
elif attribute_dtype[column_name] in ("floating", "integer"):
461450
X_col = X[column_name]
462451
if X_col.min() >= 0 and X_col.max() <= 255:
463452
try:
464-
X_col_uint = X_col.astype('uint8')
453+
X_col_uint = X_col.astype("uint8")
465454
if (X_col == X_col_uint).all():
466455
col.append(X_col_uint)
467456
continue

openml/extensions/sklearn/extension.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1748,7 +1748,7 @@ def _prediction_to_probabilities(
17481748
proba_y.shape[1], len(task.class_labels),
17491749
)
17501750
warnings.warn(message)
1751-
openml.config.logger.warn(message)
1751+
openml.config.logger.warning(message)
17521752

17531753
for i, col in enumerate(task.class_labels):
17541754
# adding missing columns with 0 probability

openml/flows/flow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
229229

230230
if not self.description:
231231
logger = logging.getLogger(__name__)
232-
logger.warn("Flow % has empty description", self.name)
232+
logger.warning("Flow % has empty description", self.name)
233233

234234
flow_parameters = []
235235
for key in self.parameters:

openml/study/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def get_study(
5858
"of things have changed since then. Please use `get_suite('OpenML100')` instead."
5959
)
6060
warnings.warn(message, DeprecationWarning)
61-
openml.config.logger.warn(message)
61+
openml.config.logger.warning(message)
6262
study = _get_study(study_id, entity_type="task")
6363
return cast(OpenMLBenchmarkSuite, study) # type: ignore
6464
else:

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def delete_remote_files(tracker) -> None:
126126
openml.utils._delete_entity(entity_type, entity)
127127
logger.info("Deleted ({}, {})".format(entity_type, entity))
128128
except Exception as e:
129-
logger.warn("Cannot delete ({},{}): {}".format(entity_type, entity, e))
129+
logger.warning("Cannot delete ({},{}): {}".format(entity_type, entity, e))
130130

131131

132132
def pytest_sessionstart() -> None:

tests/test_datasets/test_dataset.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# License: BSD 3-Clause
22

33
from time import time
4-
from warnings import filterwarnings, catch_warnings
54

65
import numpy as np
76
import pandas as pd
@@ -120,11 +119,11 @@ def test_get_data_no_str_data_for_nparrays(self):
120119

121120
def _check_expected_type(self, dtype, is_cat, col):
122121
if is_cat:
123-
expected_type = 'category'
124-
elif not col.isna().any() and (col.astype('uint8') == col).all():
125-
expected_type = 'uint8'
122+
expected_type = "category"
123+
elif not col.isna().any() and (col.astype("uint8") == col).all():
124+
expected_type = "uint8"
126125
else:
127-
expected_type = 'float64'
126+
expected_type = "float64"
128127

129128
self.assertEqual(dtype.name, expected_type)
130129

@@ -192,14 +191,6 @@ def test_get_data_with_ignore_attributes(self):
192191
self.assertEqual(rval.shape, (898, 38))
193192
self.assertEqual(len(categorical), 38)
194193

195-
def test_dataset_format_constructor(self):
196-
197-
with catch_warnings():
198-
filterwarnings("error")
199-
self.assertRaises(
200-
DeprecationWarning, openml.OpenMLDataset, "Test", "Test", format="arff"
201-
)
202-
203194
def test_get_data_with_nonexisting_class(self):
204195
# This class is using the anneal dataset with labels [1, 2, 3, 4, 5, 'U']. However,
205196
# label 4 does not exist and we test that the features 5 and 'U' are correctly mapped to

tests/test_datasets/test_dataset_functions.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import random
55
from itertools import product
66
from unittest import mock
7+
import shutil
78

89
import arff
910
import time
@@ -373,9 +374,9 @@ def test_get_dataset_by_name(self):
373374
def test_get_dataset_uint8_dtype(self):
374375
dataset = openml.datasets.get_dataset(1)
375376
self.assertEqual(type(dataset), OpenMLDataset)
376-
self.assertEqual(dataset.name, 'anneal')
377+
self.assertEqual(dataset.name, "anneal")
377378
df, _, _, _ = dataset.get_data()
378-
self.assertEqual(df['carbon'].dtype, 'uint8')
379+
self.assertEqual(df["carbon"].dtype, "uint8")
379380

380381
def test_get_dataset(self):
381382
# This is the only non-lazy load to ensure default behaviour works.
@@ -1154,27 +1155,31 @@ def test_publish_fetch_ignore_attribute(self):
11541155
# test if publish was successful
11551156
self.assertIsInstance(dataset.id, int)
11561157

1158+
downloaded_dataset = self._wait_for_dataset_being_processed(dataset.id)
1159+
self.assertEqual(downloaded_dataset.ignore_attribute, ignore_attribute)
1160+
1161+
def _wait_for_dataset_being_processed(self, dataset_id):
11571162
downloaded_dataset = None
11581163
# fetching from server
11591164
# loop till timeout or fetch not successful
1160-
max_waiting_time_seconds = 400
1165+
max_waiting_time_seconds = 600
11611166
# time.time() works in seconds
11621167
start_time = time.time()
11631168
while time.time() - start_time < max_waiting_time_seconds:
11641169
try:
1165-
downloaded_dataset = openml.datasets.get_dataset(dataset.id)
1170+
downloaded_dataset = openml.datasets.get_dataset(dataset_id)
11661171
break
11671172
except Exception as e:
11681173
# returned code 273: Dataset not processed yet
11691174
# returned code 362: No qualities found
11701175
TestBase.logger.error(
1171-
"Failed to fetch dataset:{} with '{}'.".format(dataset.id, str(e))
1176+
"Failed to fetch dataset:{} with '{}'.".format(dataset_id, str(e))
11721177
)
11731178
time.sleep(10)
11741179
continue
11751180
if downloaded_dataset is None:
1176-
raise ValueError("TIMEOUT: Failed to fetch uploaded dataset - {}".format(dataset.id))
1177-
self.assertEqual(downloaded_dataset.ignore_attribute, ignore_attribute)
1181+
raise ValueError("TIMEOUT: Failed to fetch uploaded dataset - {}".format(dataset_id))
1182+
return downloaded_dataset
11781183

11791184
def test_create_dataset_row_id_attribute_error(self):
11801185
# meta-information
@@ -1347,7 +1352,7 @@ def test_get_dataset_cache_format_feather(self):
13471352
self.assertEqual(len(categorical), X.shape[1])
13481353
self.assertEqual(len(attribute_names), X.shape[1])
13491354

1350-
def test_data_edit(self):
1355+
def test_data_edit_non_critical_field(self):
13511356
# Case 1
13521357
# All users can edit non-critical fields of datasets
13531358
desc = (
@@ -1368,14 +1373,31 @@ def test_data_edit(self):
13681373
edited_dataset = openml.datasets.get_dataset(did)
13691374
self.assertEqual(edited_dataset.description, desc)
13701375

1376+
def test_data_edit_critical_field(self):
13711377
# Case 2
13721378
# only owners (or admin) can edit all critical fields of datasets
1373-
# this is a dataset created by CI, so it is editable by this test
1374-
did = 315
1375-
result = edit_dataset(did, default_target_attribute="col_1", ignore_attribute="col_2")
1379+
# for this, we need to first clone a dataset to do changes
1380+
did = fork_dataset(1)
1381+
self._wait_for_dataset_being_processed(did)
1382+
result = edit_dataset(did, default_target_attribute="shape", ignore_attribute="oil")
13761383
self.assertEqual(did, result)
1377-
edited_dataset = openml.datasets.get_dataset(did)
1378-
self.assertEqual(edited_dataset.ignore_attribute, ["col_2"])
1384+
1385+
n_tries = 10
1386+
# we need to wait for the edit to be reflected on the server
1387+
for i in range(n_tries):
1388+
edited_dataset = openml.datasets.get_dataset(did)
1389+
try:
1390+
self.assertEqual(edited_dataset.default_target_attribute, "shape", edited_dataset)
1391+
self.assertEqual(edited_dataset.ignore_attribute, ["oil"], edited_dataset)
1392+
break
1393+
except AssertionError as e:
1394+
if i == n_tries - 1:
1395+
raise e
1396+
time.sleep(10)
1397+
# Delete the cache dir to get the newer version of the dataset
1398+
shutil.rmtree(
1399+
os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did))
1400+
)
13791401

13801402
def test_data_edit_errors(self):
13811403
# Check server exception when no field to edit is provided

tests/test_runs/test_run_functions.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def determine_grid_size(param_grid):
442442
# suboptimal (slow), and not guaranteed to work if evaluation
443443
# engine is behind.
444444
# TODO: mock this? We have the arff already on the server
445-
self._wait_for_processed_run(run.run_id, 400)
445+
self._wait_for_processed_run(run.run_id, 600)
446446
try:
447447
model_prime = openml.runs.initialize_model_from_trace(
448448
run_id=run.run_id, repeat=0, fold=0,
@@ -519,7 +519,7 @@ def _run_and_upload_regression(
519519
)
520520

521521
def test_run_and_upload_logistic_regression(self):
522-
lr = LogisticRegression(solver="lbfgs")
522+
lr = LogisticRegression(solver="lbfgs", max_iter=1000)
523523
task_id = self.TEST_SERVER_TASK_SIMPLE[0]
524524
n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
525525
n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
@@ -605,7 +605,8 @@ def get_ct_cf(nominal_indices, numeric_indices):
605605
LooseVersion(sklearn.__version__) < "0.20",
606606
reason="columntransformer introduction in 0.20.0",
607607
)
608-
def test_run_and_upload_knn_pipeline(self):
608+
@unittest.mock.patch("warnings.warn")
609+
def test_run_and_upload_knn_pipeline(self, warnings_mock):
609610

610611
cat_imp = make_pipeline(
611612
SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
@@ -635,11 +636,18 @@ def test_run_and_upload_knn_pipeline(self):
635636
n_missing_vals = self.TEST_SERVER_TASK_MISSING_VALS[1]
636637
n_test_obs = self.TEST_SERVER_TASK_MISSING_VALS[2]
637638
self._run_and_upload_classification(pipeline2, task_id, n_missing_vals, n_test_obs, "62501")
639+
# The warning raised is:
640+
# The total space of parameters 8 is smaller than n_iter=10.
641+
# Running 8 iterations. For exhaustive searches, use GridSearchCV.'
642+
# It is raised three times because we once run the model to upload something and then run
643+
# it again twice to compare that the predictions are reproducible.
644+
self.assertEqual(warnings_mock.call_count, 3)
638645

639646
def test_run_and_upload_gridsearch(self):
640647
gridsearch = GridSearchCV(
641648
BaggingClassifier(base_estimator=SVC()),
642649
{"base_estimator__C": [0.01, 0.1, 10], "base_estimator__gamma": [0.01, 0.1, 10]},
650+
cv=3,
643651
)
644652
task_id = self.TEST_SERVER_TASK_SIMPLE[0]
645653
n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]

0 commit comments

Comments
 (0)