Logistic Regression Optimization#

Hyperparameter optimization for Logistic Regression with regularization.

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import plotly
from mloptimizer.interfaces import HyperparameterSpaceBuilder, GeneticSearch
from mloptimizer.application.reporting.plots import plotly_search_space, plotly_logbook

Load and prepare the dataset

print("Loading Breast Cancer dataset...")
data = load_breast_cancer()
X, y = data.data, data.target

print(f"Dataset shape: {X.shape}")

Loading Breast Cancer dataset...
Dataset shape: (569, 30)

Split the data

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

Define the hyperparameter space

hyperparam_space = HyperparameterSpaceBuilder.get_default_space(
    estimator_class=LogisticRegression
)

Configure and run the genetic optimization

genetic_params = {
    'generations': 5,
    'population_size': 8,
    'n_elites': 2,
    'seed': 42,
    'use_mlflow': False,
    'use_parallel': False
}

opt = GeneticSearch(
    estimator_class=LogisticRegression,
    hyperparam_space=hyperparam_space,
    cv=3,
    scoring='accuracy',
    **genetic_params
)

print("Starting Logistic Regression optimization...")
opt.fit(X_train, y_train)

/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/checkouts/master/examples/plot_logistic_regression.py:47: UserWarning: Expected mutations per offspring is very low (0.48). With mutpb=0.8, indpb=0.2, and 3 hyperparameters, the population will converge prematurely. Recommended: mutpb >= 0.8, indpb >= 0.2 (gives ~0.5 mutations/offspring).
  opt = GeneticSearch(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/checkouts/master/examples/plot_logistic_regression.py:47: UserWarning: Some hyperparameters have very small integer ranges (< 10 distinct values): 'penalty' (2 values: 0 to 1), 'solver' (2 values: 0 to 1). Small ranges limit search granularity. Consider increasing the range or scale for float types.
  opt = GeneticSearch(
Starting Logistic Regression optimization...

Genetic execution:   0%|          | 0/6 [00:00<?, ?it/s, best fitness=?]
Genetic execution:  17%|█▋        | 1/6 [00:00<00:00, 88.88it/s, best fitness=0.947]
Genetic execution:  17%|█▋        | 1/6 [00:00<00:02,  2.06it/s, best fitness=0.96] /home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Genetic execution:  33%|███▎      | 2/6 [00:02<00:04,  1.03s/it, best fitness=0.96]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Genetic execution:  50%|█████     | 3/6 [00:04<00:04,  1.43s/it, best fitness=0.96]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Genetic execution:  67%|██████▋   | 4/6 [00:06<00:03,  1.73s/it, best fitness=0.96]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Genetic execution:  83%|████████▎ | 5/6 [00:08<00:01,  1.90s/it, best fitness=0.96]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Genetic execution: 100%|██████████| 6/6 [00:10<00:00,  2.01s/it, best fitness=0.96]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/linear_model/_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  warnings.warn(

Genetic execution: 100%|██████████| 6/6 [00:12<00:00,  2.16s/it, best fitness=0.96]

GeneticSearch(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=True),
              estimator_class=<class 'sklearn.linear_model._logistic.LogisticRegression'>,
              generations=5,
              hyperparam_space=HyperparameterSpace(fixed_hyperparams={'max_iter': 1000}, evolvable_hyperparams={'C': Hyperparam('C', 1, 1000, 'float', 100), 'penalty': Hyperparam('penalty', 0, 1, 'list', ['l1', 'l2']), 'solver': Hyperparam('solver', 0, 1, 'list', ['liblinear', 'saga'])}),
              n_elites=2, population_size=8, scoring='accuracy', seed=42,
              use_parallel=False)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Evaluate the optimized model

best_clf = opt.best_estimator_
y_pred = best_clf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
test_f1 = f1_score(y_test, y_pred, average='binary')

print(f"\nOptimization completed!")
print(f"Best parameters: {opt.best_params_}")
print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Test F1: {test_f1:.4f}")

Optimization completed!
Best parameters: {'C': 7.6, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 1000, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l1', 'random_state': 42, 'solver': 'liblinear', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
Test accuracy: 0.9825
Test F1: 0.9861

Visualize the search space

population_df = opt.populations_
top_params = ['C', 'penalty', 'solver', 'fitness']
df_filtered = population_df[top_params]
g_search_space = plotly_search_space(df_filtered, top_params)
g_search_space.update_layout(
    title="Logistic Regression Hyperparameter Search Space",
    autosize=True,
    width=None,
    height=650
)
plotly.io.show(g_search_space, config={'responsive': True})

Visualize the optimization evolution

g_logbook = plotly_logbook(opt.logbook_, population_df)
g_logbook.update_layout(
    title="Logistic Regression Optimization Evolution",
    autosize=True,
    width=None,
    height=500
)
plotly.io.show(g_logbook, config={'responsive': True})

Analyze optimization performance

print("\n=== Optimization Performance ===")
print(f"Unique evaluations performed: {opt.n_trials_}")
print(f"Total individuals in population history: {len(opt.populations_)}")
print(f"Optimization time: {opt.optimization_time_:.4f} seconds")
print(f"Time per evaluation: {opt.optimization_time_ / opt.n_trials_:.4f} seconds")

=== Optimization Performance ===
Unique evaluations performed: 33
Total individuals in population history: 48
Optimization time: 12.9662 seconds
Time per evaluation: 0.3929 seconds

Total running time of the script: (0 minutes 14.071 seconds)

Gallery generated by Sphinx-Gallery