Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat

dual = n_samples<=n_features

dual = TRUE_SPECIAL_STRING if dual else FALSE_SPECIAL_STRING
dual = FALSE_SPECIAL_STRING

space = {"solver":"saga",
"max_iter":1000,
Expand All @@ -21,6 +21,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat
penalty = Categorical('penalty', ['l1', 'l2',"elasticnet"], default='l2')
C = Float('C', (0.01, 1e5), log=True)
l1_ratio = Float('l1_ratio', (0.0, 1.0))
class_weight = Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced'])

l1_ratio_condition = EqualsCondition(l1_ratio, penalty, 'elasticnet')

Expand All @@ -29,7 +30,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat


cs = ConfigurationSpace(space)
cs.add_hyperparameters([penalty, C, l1_ratio])
cs.add_hyperparameters([penalty, C, l1_ratio, class_weight])
cs.add_conditions([l1_ratio_condition])

return cs
Expand Down Expand Up @@ -84,6 +85,7 @@ def get_DecisionTreeClassifier_ConfigurationSpace(n_featues, random_state):
'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)),
'max_features': Categorical("max_features", [NONE_SPECIAL_STRING, 'sqrt', 'log2']),
'min_weight_fraction_leaf': 0.0,
'class_weight' : Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced']),
}


Expand All @@ -94,7 +96,7 @@ def get_DecisionTreeClassifier_ConfigurationSpace(n_featues, random_state):
space = space
)


#TODO Does not support predict_proba
def get_LinearSVC_ConfigurationSpace(random_state):
space = {"dual":"auto"}

Expand All @@ -120,12 +122,13 @@ def get_SVC_ConfigurationSpace(random_state):
'max_iter': 3000,
'probability':TRUE_SPECIAL_STRING}

kernel = Categorical("kernel", ['poly', 'rbf', 'sigmoid'])
kernel = Categorical("kernel", ['poly', 'rbf', 'sigmoid', 'linear'])
C = Float('C', (0.01, 1e5), log=True)
degree = Integer("degree", bounds=(1, 5))
gamma = Float("gamma", bounds=(1e-5, 8), log=True)
shrinking = Categorical("shrinking", [True, False])
coef0 = Float("coef0", bounds=(-1, 1))
class_weight = Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced'])

degree_condition = EqualsCondition(degree, kernel, 'poly')
gamma_condition = InCondition(gamma, kernel, ['rbf', 'poly'])
Expand All @@ -136,7 +139,7 @@ def get_SVC_ConfigurationSpace(random_state):


cs = ConfigurationSpace(space)
cs.add_hyperparameters([kernel, C, coef0, degree, gamma, shrinking])
cs.add_hyperparameters([kernel, C, coef0, degree, gamma, shrinking, class_weight])
cs.add_conditions([degree_condition, gamma_condition, coef0_condition])

return cs
Expand Down Expand Up @@ -187,12 +190,11 @@ def get_XGBClassifier_ConfigurationSpace(random_state,):
def get_LGBMClassifier_ConfigurationSpace(random_state,):

space = {
'objective': 'binary',
'metric': 'binary_logloss',
'boosting_type': Categorical("boosting_type", ['gbdt', 'dart', 'goss']),
'num_leaves': Integer("num_leaves", bounds=(2, 256)),
'max_depth': Integer("max_depth", bounds=(1, 10)),
'n_estimators': Integer("n_estimators", bounds=(10, 100)),
'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']),
'verbose':-1,
'n_jobs': 1,
}
Expand All @@ -213,6 +215,7 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state):
'min_samples_split': Integer("min_samples_split", bounds=(2, 20)),
'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)),
'bootstrap': Categorical("bootstrap", [True, False]),
'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']),
'n_jobs': 1,
}

Expand All @@ -228,7 +231,7 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state):
def get_SGDClassifier_ConfigurationSpace(random_state):

space = {
'loss': Categorical("loss", ['squared_hinge', 'modified_huber']), #don't include hinge because we have LinearSVC, don't include log because we have LogisticRegression
'loss': Categorical("loss", ['modified_huber']), #don't include hinge because we have LinearSVC, don't include log because we have LogisticRegression. TODO 'squared_hinge'? doesn't support predict proba
'penalty': 'elasticnet',
'alpha': Float("alpha", bounds=(1e-5, 0.01), log=True),
'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)),
Expand Down
4 changes: 2 additions & 2 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@
"selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",],
"selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"],
"selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"],
"classifiers" : ["LGBMRegressor", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],
"classifiers" : ["LGBMClassifier", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ["LGBMRegressor", 'AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],


"transformers": ["Binarizer", "PCA", "ZeroCount", "ColumnOneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"],
Expand Down
56 changes: 51 additions & 5 deletions tpot2/search_spaces/pipelines/dynamic_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,62 @@ def _mutate_step(self, rng=None):


def _crossover(self, other, rng=None):
#swap a random step in the pipeline with the corresponding step in the other pipeline

rng = np.random.default_rng()
cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]

rng.shuffle(cx_funcs)
for cx_func in cx_funcs:
if cx_func(other, rng):
return True

return False

def _crossover_swap_random_steps(self, other, rng):
rng = np.random.default_rng()

if len(self.pipeline) < 2 or len(other.pipeline) < 2:
return False
max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

idx = rng.integers(1,len(self.pipeline))
idx2 = rng.integers(1,len(other.pipeline))
self.pipeline[idx:] = copy.deepcopy(other.pipeline[idx2:])
other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)

# self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]

for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]

return True

def _crossover_swap_step(self, other, rng):
if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
idx = rng.integers(1,len(self.pipeline))

self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
return True

def _crossover_inner_step(self, other, rng):
rng = np.random.default_rng()

crossover_success = False
for idx in range(len(self.pipeline)):
if rng.random() < 0.5:
if self.pipeline[idx].crossover(other.pipeline[idx], rng):
crossover_success = True

return crossover_success

def export_pipeline(self, **graph_pipeline_args):
return [step.export_pipeline(**graph_pipeline_args) for step in self.pipeline]
Expand Down
11 changes: 9 additions & 2 deletions tpot2/search_spaces/pipelines/dynamicunion.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,19 @@ def _crossover_swap_random_steps(self, other, rng):
max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

n_steps_to_swap = rng.integers(1, max_steps)
if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)

self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]
# self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]

for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]

return True


Expand Down
32 changes: 23 additions & 9 deletions tpot2/search_spaces/pipelines/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ def _crossover(self, other, rng=None):
#swap a random step in the pipeline with the corresponding step in the other pipeline
if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
cx_funcs = [self._crossover_swap_random_steps, self._crossover_swap_segment, self._crossover_inner_step]
Expand All @@ -51,8 +48,6 @@ def _crossover_swap_step(self, other, rng):
if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
idx = rng.integers(1,len(self.pipeline))
Expand All @@ -61,12 +56,29 @@ def _crossover_swap_step(self, other, rng):
return True

def _crossover_swap_random_steps(self, other, rng):

if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
#selet steps idxs with probability 0.5
idxs = rng.random(len(self.pipeline)) < 0.5
#swap steps
self.pipeline[idxs], other.pipeline[idxs] = other.pipeline[idxs], self.pipeline[idxs]

max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

indexes_to_swap = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)

for idx in indexes_to_swap:
self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]


return True

def _crossover_swap_segment(self, other, rng):
Expand Down Expand Up @@ -105,6 +117,8 @@ def unique_id(self):
l = [step.unique_id() for step in self.pipeline]
l = ["SequentialPipeline"] + l
return TupleIndex(tuple(l))




class SequentialPipeline(SklearnIndividualGenerator):
Expand Down
20 changes: 16 additions & 4 deletions tpot2/search_spaces/pipelines/union.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,23 @@ def _crossover_swap_step(self, other, rng):

def _crossover_swap_random_steps(self, other, rng):
rng = np.random.default_rng()
#selet steps idxs with probability 0.5
idxs = rng.random(len(self.pipeline)) < 0.5
#swap steps
self.pipeline[idxs], other.pipeline[idxs] = other.pipeline[idxs], self.pipeline[idxs]

max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)

# self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]

for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]

return True

def _crossover_inner_step(self, other, rng):
Expand Down