diff --git a/.github/workflows/ubuntu-test.yml b/.github/workflows/ubuntu-test.yml index 21f0e106c..41cc155ac 100644 --- a/.github/workflows/ubuntu-test.yml +++ b/.github/workflows/ubuntu-test.yml @@ -29,6 +29,8 @@ jobs: steps: - uses: actions/checkout@v2 + with: + fetch-depth: 2 - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: @@ -51,7 +53,7 @@ jobs: - name: Run tests run: | if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi - pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov + pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov --reruns 5 --reruns-delay 1 - name: Check for files left behind by test if: ${{ always() }} run: | @@ -67,5 +69,6 @@ jobs: if: matrix.code-cov && always() uses: codecov/codecov-action@v1 with: + files: coverage.xml fail_ci_if_error: true - verbose: true + verbose: true \ No newline at end of file diff --git a/openml/config.py b/openml/config.py index 8daaa2d5c..a39b72d48 100644 --- a/openml/config.py +++ b/openml/config.py @@ -211,15 +211,6 @@ def _setup(config=None): else: cache_exists = True - if cache_exists: - _create_log_handlers() - else: - _create_log_handlers(create_file_handler=False) - openml_logger.warning( - "No permission to create OpenML directory at %s! This can result in OpenML-Python " - "not working properly." % config_dir - ) - if config is None: config = _parse_config(config_file) @@ -240,6 +231,15 @@ def _get(config, key): connection_n_retries = int(_get(config, "connection_n_retries")) max_retries = int(_get(config, "max_retries")) + if cache_exists: + _create_log_handlers() + else: + _create_log_handlers(create_file_handler=False) + openml_logger.warning( + "No permission to create OpenML directory at %s! This can result in OpenML-Python " + "not working properly." % config_dir + ) + cache_directory = os.path.expanduser(short_cache_dir) # create the cache subdirectory if not os.path.exists(cache_directory): diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index 4442f798c..026dc356d 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -1744,6 +1744,8 @@ def _prediction_to_probabilities( user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000 + if hasattr(model_copy, "refit_time_"): + modelfit_dur_walltime += model_copy.refit_time_ if can_measure_wallclocktime: user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 6558bb4eb..d7daa7242 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -271,7 +271,6 @@ def run_flow_on_task( # execute the run res = _run_task_get_arffcontent( - flow=flow, model=flow.model, task=task, extension=flow.extension, @@ -432,7 +431,6 @@ def run_exists(task_id: int, setup_id: int) -> Set[int]: def _run_task_get_arffcontent( - flow: OpenMLFlow, model: Any, task: OpenMLTask, extension: "Extension", @@ -476,7 +474,6 @@ def _run_task_get_arffcontent( job_rvals = Parallel(verbose=0, n_jobs=n_jobs)( delayed(_run_task_get_arffcontent_parallel_helper)( extension=extension, - flow=flow, fold_no=fold_no, model=model, rep_no=rep_no, @@ -613,7 +610,6 @@ def _calculate_local_measure(sklearn_fn, openml_name): def _run_task_get_arffcontent_parallel_helper( extension: "Extension", - flow: OpenMLFlow, fold_no: int, model: Any, rep_no: int, @@ -661,12 +657,13 @@ def _run_task_get_arffcontent_parallel_helper( else: raise NotImplementedError(task.task_type) config.logger.info( - "Going to execute flow '%s' on task %d for repeat %d fold %d sample %d.", - flow.name, - task.task_id, - rep_no, - fold_no, - sample_no, + "Going to run model {} on dataset {} for repeat {} fold {} sample {}".format( + str(model), + openml.datasets.get_dataset(task.dataset_id).name, + rep_no, + fold_no, + sample_no, + ) ) pred_y, proba_y, user_defined_measures_fold, trace, = extension._run_model_on_fold( model=model, diff --git a/setup.py b/setup.py index 22a77bcbc..81f3a546c 100644 --- a/setup.py +++ b/setup.py @@ -68,6 +68,7 @@ "pyarrow", "pre-commit", "pytest-cov", + "pytest-rerunfailures", "mypy", ], "examples": [ diff --git a/tests/test_evaluations/__init__.py b/tests/test_evaluations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index 4dc8744f1..c1f88bcda 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -1254,7 +1254,7 @@ def test_paralizable_check(self): # using this param distribution should raise an exception illegal_param_dist = {"base__n_jobs": [-1, 0, 1]} # using this param distribution should not raise an exception - legal_param_dist = {"base__max_depth": [2, 3, 4]} + legal_param_dist = {"n_estimators": [2, 3, 4]} legal_models = [ sklearn.ensemble.RandomForestClassifier(), @@ -1282,12 +1282,19 @@ def test_paralizable_check(self): can_measure_cputime_answers = [True, False, False, True, False, False, True, False, False] can_measure_walltime_answers = [True, True, False, True, True, False, True, True, False] + if LooseVersion(sklearn.__version__) < "0.20": + has_refit_time = [False, False, False, False, False, False, False, False, False] + else: + has_refit_time = [False, False, False, False, False, False, True, True, False] - for model, allowed_cputime, allowed_walltime in zip( - legal_models, can_measure_cputime_answers, can_measure_walltime_answers + X, y = sklearn.datasets.load_iris(return_X_y=True) + for model, allowed_cputime, allowed_walltime, refit_time in zip( + legal_models, can_measure_cputime_answers, can_measure_walltime_answers, has_refit_time ): self.assertEqual(self.extension._can_measure_cputime(model), allowed_cputime) self.assertEqual(self.extension._can_measure_wallclocktime(model), allowed_walltime) + model.fit(X, y) + self.assertEqual(refit_time, hasattr(model, "refit_time_")) for model in illegal_models: with self.assertRaises(PyOpenMLError): diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index fdbbc1e76..4593f8b64 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -10,6 +10,7 @@ import unittest.mock import numpy as np +import joblib from joblib import parallel_backend import openml @@ -1187,13 +1188,10 @@ def test__run_task_get_arffcontent(self): num_folds = 10 num_repeats = 1 - flow = unittest.mock.Mock() - flow.name = "dummy" clf = make_pipeline( OneHotEncoder(handle_unknown="ignore"), SGDClassifier(loss="log", random_state=1) ) res = openml.runs.functions._run_task_get_arffcontent( - flow=flow, extension=self.extension, model=clf, task=task, @@ -1404,8 +1402,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self): # Check that _run_task_get_arffcontent works when one of the class # labels only declared in the arff file, but is not present in the # actual data - flow = unittest.mock.Mock() - flow.name = "dummy" task = openml.tasks.get_task(2) # anneal; crossvalidation from sklearn.compose import ColumnTransformer @@ -1420,7 +1416,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self): ) # build a sklearn classifier data_content, _, _, _ = _run_task_get_arffcontent( - flow=flow, model=model, task=task, extension=self.extension, @@ -1442,8 +1437,6 @@ def test_run_on_dataset_with_missing_labels_array(self): # Check that _run_task_get_arffcontent works when one of the class # labels only declared in the arff file, but is not present in the # actual data - flow = unittest.mock.Mock() - flow.name = "dummy" task = openml.tasks.get_task(2) # anneal; crossvalidation # task_id=2 on test server has 38 columns with 6 numeric columns cont_idx = [3, 4, 8, 32, 33, 34] @@ -1465,7 +1458,6 @@ def test_run_on_dataset_with_missing_labels_array(self): ) # build a sklearn classifier data_content, _, _, _ = _run_task_get_arffcontent( - flow=flow, model=model, task=task, extension=self.extension, @@ -1581,20 +1573,18 @@ def test_format_prediction_task_regression(self): LooseVersion(sklearn.__version__) < "0.21", reason="couldn't perform local tests successfully w/o bloating RAM", ) - @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._run_model_on_fold") + @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs") def test__run_task_get_arffcontent_2(self, parallel_mock): """ Tests if a run executed in parallel is collated correctly. """ task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp x, y = task.get_X_and_y(dataset_format="dataframe") num_instances = x.shape[0] line_length = 6 + len(task.class_labels) - flow = unittest.mock.Mock() - flow.name = "dummy" clf = SGDClassifier(loss="log", random_state=1) n_jobs = 2 - with parallel_backend("loky", n_jobs=n_jobs): + backend = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing" + with parallel_backend(backend, n_jobs=n_jobs): res = openml.runs.functions._run_task_get_arffcontent( - flow=flow, extension=self.extension, model=clf, task=task, @@ -1606,6 +1596,9 @@ def test__run_task_get_arffcontent_2(self, parallel_mock): # function _run_model_on_fold is being mocked out. However, for a new spawned worker, it # is not and the mock call_count should remain 0 while the subsequent check of actual # results should also hold, only on successful distribution of tasks to workers. + # The _prevent_optimize_n_jobs() is a function executed within the _run_model_on_fold() + # block and mocking this function doesn't affect rest of the pipeline, but is adequately + # indicative if _run_model_on_fold() is being called or not. self.assertEqual(parallel_mock.call_count, 0) self.assertIsInstance(res[0], list) self.assertEqual(len(res[0]), num_instances) @@ -1638,13 +1631,12 @@ def test_joblib_backends(self, parallel_mock): x, y = task.get_X_and_y(dataset_format="dataframe") num_instances = x.shape[0] line_length = 6 + len(task.class_labels) - flow = unittest.mock.Mock() - flow.name = "dummy" + backend_choice = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing" for n_jobs, backend, len_time_stats, call_count in [ - (1, "loky", 7, 10), - (2, "loky", 4, 10), - (-1, "loky", 1, 10), + (1, backend_choice, 7, 10), + (2, backend_choice, 4, 10), + (-1, backend_choice, 1, 10), (1, "threading", 7, 20), (-1, "threading", 1, 30), (1, "sequential", 7, 40), @@ -1668,7 +1660,6 @@ def test_joblib_backends(self, parallel_mock): ) with parallel_backend(backend, n_jobs=n_jobs): res = openml.runs.functions._run_task_get_arffcontent( - flow=flow, extension=self.extension, model=clf, task=task, diff --git a/tests/test_study/__init__.py b/tests/test_study/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index eef874b15..e028ba2bd 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -4,6 +4,7 @@ import openml.study from openml.testing import TestBase import pandas as pd +import pytest class TestStudyFunctions(TestBase): @@ -113,6 +114,7 @@ def test_publish_benchmark_suite(self): self.assertEqual(study_downloaded.status, "deactivated") # can't delete study, now it's not longer in preparation + @pytest.mark.flaky() def test_publish_study(self): # get some random runs to attach run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10) @@ -133,8 +135,8 @@ def test_publish_study(self): run_ids=list(run_list.keys()), ) study.publish() - # not tracking upload for delete since _delete_entity called end of function - # asserting return status from openml.study.delete_study() + TestBase._mark_entity_for_removal("study", study.id) + TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) self.assertGreater(study.id, 0) study_downloaded = openml.study.get_study(study.id) self.assertEqual(study_downloaded.alias, fixt_alias)