|
61 | 61 | #################################################################################################### |
62 | 62 | # And then split the data based on this: |
63 | 63 |
|
64 | | -X, y, _, _ = task.get_dataset().get_data(task.target_name) |
65 | | -X_train = X.loc[train_indices] |
66 | | -y_train = y[train_indices] |
67 | | -X_test = X.loc[test_indices] |
68 | | -y_test = y[test_indices] |
| 64 | +X, y = task.get_X_and_y(dataset_format="dataframe") |
| 65 | +X_train = X.iloc[train_indices] |
| 66 | +y_train = y.iloc[train_indices] |
| 67 | +X_test = X.iloc[test_indices] |
| 68 | +y_test = y.iloc[test_indices] |
69 | 69 |
|
70 | 70 | print( |
71 | 71 | "X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format( |
|
78 | 78 |
|
79 | 79 | task_id = 3 |
80 | 80 | task = openml.tasks.get_task(task_id) |
| 81 | +X, y = task.get_X_and_y(dataset_format="dataframe") |
81 | 82 | n_repeats, n_folds, n_samples = task.get_split_dimensions() |
82 | 83 | print( |
83 | 84 | "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format( |
|
93 | 94 | train_indices, test_indices = task.get_train_test_split_indices( |
94 | 95 | repeat=repeat_idx, fold=fold_idx, sample=sample_idx, |
95 | 96 | ) |
96 | | - X_train = X.loc[train_indices] |
97 | | - y_train = y[train_indices] |
98 | | - X_test = X.loc[test_indices] |
99 | | - y_test = y[test_indices] |
| 97 | + X_train = X.iloc[train_indices] |
| 98 | + y_train = y.iloc[train_indices] |
| 99 | + X_test = X.iloc[test_indices] |
| 100 | + y_test = y.iloc[test_indices] |
100 | 101 |
|
101 | 102 | print( |
102 | 103 | "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, " |
|
116 | 117 |
|
117 | 118 | task_id = 1767 |
118 | 119 | task = openml.tasks.get_task(task_id) |
| 120 | +X, y = task.get_X_and_y(dataset_format="dataframe") |
119 | 121 | n_repeats, n_folds, n_samples = task.get_split_dimensions() |
120 | 122 | print( |
121 | 123 | "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format( |
|
131 | 133 | train_indices, test_indices = task.get_train_test_split_indices( |
132 | 134 | repeat=repeat_idx, fold=fold_idx, sample=sample_idx, |
133 | 135 | ) |
134 | | - X_train = X.loc[train_indices] |
135 | | - y_train = y[train_indices] |
136 | | - X_test = X.loc[test_indices] |
137 | | - y_test = y[test_indices] |
| 136 | + X_train = X.iloc[train_indices] |
| 137 | + y_train = y.iloc[train_indices] |
| 138 | + X_test = X.iloc[test_indices] |
| 139 | + y_test = y.iloc[test_indices] |
138 | 140 |
|
139 | 141 | print( |
140 | 142 | "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, " |
|
154 | 156 |
|
155 | 157 | task_id = 1702 |
156 | 158 | task = openml.tasks.get_task(task_id) |
| 159 | +X, y = task.get_X_and_y(dataset_format="dataframe") |
157 | 160 | n_repeats, n_folds, n_samples = task.get_split_dimensions() |
158 | 161 | print( |
159 | 162 | "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format( |
|
169 | 172 | train_indices, test_indices = task.get_train_test_split_indices( |
170 | 173 | repeat=repeat_idx, fold=fold_idx, sample=sample_idx, |
171 | 174 | ) |
172 | | - X_train = X.loc[train_indices] |
173 | | - y_train = y[train_indices] |
174 | | - X_test = X.loc[test_indices] |
175 | | - y_test = y[test_indices] |
| 175 | + X_train = X.iloc[train_indices] |
| 176 | + y_train = y.iloc[train_indices] |
| 177 | + X_test = X.iloc[test_indices] |
| 178 | + y_test = y.iloc[test_indices] |
176 | 179 |
|
177 | 180 | print( |
178 | 181 | "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, " |
|
0 commit comments