Note
Go to the end to download the full example code
XGBoost optimization with MLflow tracking#
A complete example showing hyperparameter optimization for XGBoost with MLflow integration for experiment tracking.
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import xgboost as xgb
import numpy as np
import plotly
import os
from mloptimizer.interfaces import HyperparameterSpaceBuilder, GeneticSearch
from mloptimizer.application.reporting.plots import (plotly_search_space, plotly_logbook,
plot_logbook)
import matplotlib.pyplot as plt
Load and prepare a complex classification dataset#
print("Loading Forest CoverType dataset...")
data = fetch_covtype()
X, y = data.data, data.target
y = y - 1 # Adjust labels to start from 0
# Use a subset for faster execution
np.random.seed(42)
sample_indices = np.random.choice(len(X), size=2000, replace=False)
X = X[sample_indices]
y = y[sample_indices]
print(f"Dataset shape: {X.shape}")
print(f"Number of classes: {len(np.unique(y))}")
Loading Forest CoverType dataset...
Dataset shape: (2000, 54)
Number of classes: 7
Split the data
Define the XGBoost hyperparameter space using HyperparameterSpaceBuilder#
We can build a custom hyperparameter space by adding individual parameters. This gives fine-grained control over the search space for each hyperparameter.
hyperparam_space = (HyperparameterSpaceBuilder()
.add_int_param('max_depth', min_value=2, max_value=10)
.add_float_param('learning_rate', min_value=10, max_value=30, scale=100)
.add_int_param('n_estimators', min_value=50, max_value=300)
.add_float_param('subsample', min_value=60, max_value=100, scale=100)
.add_float_param('colsample_bytree', min_value=60, max_value=100, scale=100)
.build())
Configure and run the genetic optimization WITH MLFLOW#
Genetic Algorithm Configuration: - generations: Number of evolutionary iterations - population_size: Number of configurations per generation - n_elites: Number of best individuals preserved each generation - seed: Random seed for reproducibility - use_mlflow: Enable MLflow experiment tracking Note: Small values for documentation builds. For production, increase to 20+ generations.
genetic_params = {
'generations': 5,
'population_size': 8,
'n_elites': 2,
'seed': 42,
'use_mlflow': True,
'use_parallel': False
}
opt = GeneticSearch(
estimator_class=xgb.XGBClassifier,
hyperparam_space=hyperparam_space,
cv=3,
scoring='accuracy',
disable_file_output=False,
**genetic_params
)
print("Starting XGBoost optimization with MLflow tracking...")
print(f"use_mlflow parameter: {opt.use_mlflow}")
# Run the optimization
opt.fit(X_train, y_train)
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/checkouts/master/examples/plot_xgboost_example.py:74: UserWarning: Some hyperparameters have very small integer ranges (< 10 distinct values): 'max_depth' (9 values: 2 to 10). Small ranges limit search granularity. Consider increasing the range or scale for float types.
opt = GeneticSearch(
Starting XGBoost optimization with MLflow tracking...
use_mlflow parameter: True
Genetic execution: 0%| | 0/6 [00:00<?, ?it/s, best fitness=?]2026/04/06 23:08:08 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/04/06 23:08:08 INFO mlflow.store.db.utils: Updating database tables
2026/04/06 23:08:09 INFO mlflow.tracking.fluent: Experiment with name 'mloptimizer' does not exist. Creating a new experiment.
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/mlflow/types/utils.py:452: UserWarning: Hint: Inferred schema contains integer column(s). Integer columns in Python cannot represent missing values. If your input data contains missing values at inference time, it will be encoded as floats and will cause a schema enforcement error. The best way to avoid this problem is to infer the model schema based on a realistic data sample (training dataset) that includes missing values. Alternatively, you can declare integer columns as doubles (float64) whenever these columns may have missing values. See `Handling Integers With Missing Values <https://www.mlflow.org/docs/latest/models.html#handling-integers-with-missing-values>`_ for more details.
warnings.warn(
Genetic execution: 17%|█▋ | 1/6 [00:02<00:13, 2.75s/it, best fitness=?]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 17%|█▋ | 1/6 [00:04<00:13, 2.75s/it, best fitness=0.708]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 17%|█▋ | 1/6 [00:05<00:13, 2.75s/it, best fitness=0.723]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 33%|███▎ | 2/6 [00:10<00:21, 5.42s/it, best fitness=0.723]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 50%|█████ | 3/6 [00:16<00:17, 5.70s/it, best fitness=0.723]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 50%|█████ | 3/6 [00:18<00:17, 5.70s/it, best fitness=0.726]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 67%|██████▋ | 4/6 [00:20<00:10, 5.39s/it, best fitness=0.726]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 67%|██████▋ | 4/6 [00:21<00:10, 5.39s/it, best fitness=0.726]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 83%|████████▎ | 5/6 [00:26<00:05, 5.44s/it, best fitness=0.726]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 83%|████████▎ | 5/6 [00:26<00:05, 5.44s/it, best fitness=0.73] /home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 83%|████████▎ | 5/6 [00:29<00:05, 5.44s/it, best fitness=0.732]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 100%|██████████| 6/6 [00:32<00:00, 5.46s/it, best fitness=0.732]/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/model_selection/_split.py:776: UserWarning: The least populated class in y has only 2 members, which is less than n_splits=3.
warnings.warn(
Genetic execution: 100%|██████████| 6/6 [00:36<00:00, 6.09s/it, best fitness=0.732]
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Get results and evaluate
best_clf = opt.best_estimator_
y_pred = best_clf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"\nOptimization completed!")
print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Test precision: {precision:.4f}")
print(f"Test recall: {recall:.4f}")
print(f"Test F1: {f1:.4f}")
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Optimization completed!
Test accuracy: 0.7025
Test precision: 0.6866
Test recall: 0.7025
Test F1: 0.6895
Generate visualizations
population_df = opt.populations_
# Search space visualization
top_params = ['max_depth', 'learning_rate', 'n_estimators', 'subsample', 'fitness']
df_filtered = population_df[top_params]
g_search_space = plotly_search_space(df_filtered, top_params)
g_search_space.update_layout(
title="XGBoost Hyperparameter Search Space - CoverType Dataset",
autosize=True,
width=None,
height=650
)
plotly.io.show(g_search_space, config={'responsive': True})
Simple logbook visualization
g_logbook_s = plot_logbook(opt.logbook_)
# Show plot
plt.show()

/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
/home/docs/checkouts/readthedocs.org/user_builds/mloptimizer/envs/master/lib/python3.11/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Evolution logbook visualization
g_logbook = plotly_logbook(opt.logbook_, population_df)
g_logbook.update_layout(
title="XGBoost Optimization Evolution - CoverType Dataset",
autosize=True,
width=None,
height=500
)
plotly.io.show(g_logbook, config={'responsive': True})
Analyze optimization results
print("\n=== Optimization Analysis ===")
print(f"Unique evaluations performed: {opt.n_trials_}")
print(f"Total individuals in population history: {len(population_df)}")
print(f"Optimization time: {opt.optimization_time_:.4f} seconds")
print(f"Time per evaluation: {opt.optimization_time_ / opt.n_trials_:.4f} seconds")
print(f"Generations completed: {opt.generations}")
final_gen = population_df[population_df['population'] == opt.generations]
initial_gen = population_df[population_df['population'] == 1]
final_avg_fitness = final_gen['fitness'].mean()
initial_avg_fitness = initial_gen['fitness'].mean()
improvement = final_avg_fitness - initial_avg_fitness
print(f"Average fitness improvement: {improvement:.4f}")
print(f"Initial average fitness: {initial_avg_fitness:.4f}")
print(f"Final average fitness: {final_avg_fitness:.4f}")
=== Optimization Analysis ===
Unique evaluations performed: 38
Total individuals in population history: 48
Optimization time: 37.7855 seconds
Time per evaluation: 0.9944 seconds
Generations completed: 5
Average fitness improvement: 0.0112
Initial average fitness: 0.7138
Final average fitness: 0.7250
# Access generated files
print("\n=== Generated Files ===")
graphics_path = opt._optimizer_service.optimizer.tracker.graphics_path
results_path = opt._optimizer_service.optimizer.tracker.results_path
print(f"Graphics path: {graphics_path}")
if os.path.exists(graphics_path):
print("Graphics files:", [f for f in os.listdir(graphics_path) if f.endswith('.html')])
print(f"Results path: {results_path}")
if os.path.exists(results_path):
print("Results files:", [f for f in os.listdir(results_path) if f.endswith('.csv')])
=== Generated Files ===
Graphics path: ./20260406_230807_XGBClassifier/graphics
Graphics files: ['logbook.html', 'search_space.html']
Results path: ./20260406_230807_XGBClassifier/results
Results files: ['logbook.csv', 'populations.csv']
MLflow UI Instructions#
To inspect the results recorded during the optimization, you can launch the MLflow user interface from a terminal.
Starting the MLflow UI
Open a console and run:
mlflow ui --port 5000
Then open a web browser and go to:
In the MLflow UI you can
View all optimization runs in the experiment
Compare hyperparameters and metrics across runs
See the evolution of fitness scores across generations
Inspect logs and stored artifacts (TODO)
Track model performance and optimization progress
Total running time of the script: (1 minutes 26.251 seconds)