Note
Go to the end to download the full example code
AdaBoost Optimization#
Hyperparameter optimization for AdaBoost ensemble algorithm.
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import plotly
from mloptimizer.interfaces import HyperparameterSpaceBuilder, GeneticSearch
from mloptimizer.application.reporting.plots import plotly_search_space, plotly_logbook
Load and prepare the dataset
print("Loading Breast Cancer dataset...")
data = load_breast_cancer()
X, y = data.data, data.target
print(f"Dataset shape: {X.shape}")
Loading Breast Cancer dataset...
Dataset shape: (569, 30)
Split the data
Define the hyperparameter space
hyperparam_space = HyperparameterSpaceBuilder.get_default_space(
estimator_class=AdaBoostClassifier
)
Configure and run the genetic optimization
genetic_params = {
'generations': 5,
'population_size': 8,
'n_elites': 2,
'seed': 42,
'use_mlflow': False,
'use_parallel': False
}
opt = GeneticSearch(
estimator_class=AdaBoostClassifier,
hyperparam_space=hyperparam_space,
cv=3,
scoring='accuracy',
**genetic_params
)
print("Starting AdaBoostClassifier optimization...")
opt.fit(X_train, y_train)
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/checkouts/master/examples/plot_adaboost.py:47: UserWarning: Expected mutations per offspring is very low (0.32). With mutpb=0.8, indpb=0.2, and 2 hyperparameters, the population will converge prematurely. Recommended: mutpb >= 0.8, indpb >= 0.2 (gives ~0.3 mutations/offspring).
opt = GeneticSearch(
Starting AdaBoostClassifier optimization...
Genetic execution: 0%| | 0/6 [00:00<?, ?it/s, best fitness=?]
Genetic execution: 17%|█▋ | 1/6 [00:00<00:01, 4.05it/s, best fitness=0.967]
Genetic execution: 17%|█▋ | 1/6 [00:01<00:05, 1.09s/it, best fitness=0.967]
Genetic execution: 17%|█▋ | 1/6 [00:01<00:07, 1.56s/it, best fitness=0.967]
Genetic execution: 17%|█▋ | 1/6 [00:01<00:09, 1.89s/it, best fitness=0.969]
Genetic execution: 33%|███▎ | 2/6 [00:04<00:08, 2.24s/it, best fitness=0.969]
Genetic execution: 33%|███▎ | 2/6 [00:04<00:08, 2.24s/it, best fitness=0.974]
Genetic execution: 50%|█████ | 3/6 [00:06<00:06, 2.28s/it, best fitness=0.974]
Genetic execution: 67%|██████▋ | 4/6 [00:09<00:04, 2.33s/it, best fitness=0.974]
Genetic execution: 83%|████████▎ | 5/6 [00:10<00:02, 2.09s/it, best fitness=0.974]
Genetic execution: 100%|██████████| 6/6 [00:12<00:00, 1.94s/it, best fitness=0.974]
Genetic execution: 100%|██████████| 6/6 [00:14<00:00, 2.36s/it, best fitness=0.974]
Evaluate the optimized model
best_clf = opt.best_estimator_
y_pred = best_clf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
test_f1 = f1_score(y_test, y_pred, average='binary')
print(f"\nOptimization completed!")
print(f"Best parameters: {opt.best_params_}")
print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Test F1: {test_f1:.4f}")
Optimization completed!
Best parameters: {'algorithm': 'SAMME', 'estimator': None, 'learning_rate': 0.63, 'n_estimators': 67, 'random_state': 42}
Test accuracy: 0.9561
Test F1: 0.9660
Visualize the search space
population_df = opt.populations_
top_params = ['n_estimators', 'learning_rate', 'fitness']
df_filtered = population_df[top_params]
g_search_space = plotly_search_space(df_filtered, top_params)
g_search_space.update_layout(
title="AdaBoostClassifier Hyperparameter Search Space",
autosize=True,
width=None,
height=650
)
plotly.io.show(g_search_space, config={'responsive': True})
Visualize the optimization evolution
g_logbook = plotly_logbook(opt.logbook_, population_df)
g_logbook.update_layout(
title="AdaBoostClassifier Optimization Evolution",
autosize=True,
width=None,
height=500
)
plotly.io.show(g_logbook, config={'responsive': True})
Analyze optimization performance
print("\n=== Optimization Performance ===")
print(f"Unique evaluations performed: {opt.n_trials_}")
print(f"Total individuals in population history: {len(opt.populations_)}")
print(f"Optimization time: {opt.optimization_time_:.4f} seconds")
print(f"Time per evaluation: {opt.optimization_time_ / opt.n_trials_:.4f} seconds")
=== Optimization Performance ===
Unique evaluations performed: 33
Total individuals in population history: 48
Optimization time: 14.1626 seconds
Time per evaluation: 0.4292 seconds
Total running time of the script: (0 minutes 15.149 seconds)