Note
Click here to download the full example code
Quick Visualization for Hyperparameter Optimization Analysis¶
Optuna provides various visualization features in optuna.visualization
to optimization results visually.
This tutorial walks you through this module by visualizing the history of multi-layer preceptron for MNIST implemented in PyTorch.
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import optuna
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice
SEED = 42
BATCH_SIZE = 256
DEVICE = torch.device("cpu")
if torch.cuda.is_available():
DEVICE = torch.device("cuda")
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
DIR = ".."
# Reduce the number of samples for faster build.
N_TRAIN_SAMPLES = BATCH_SIZE * 30
N_VALID_SAMPLES = BATCH_SIZE * 10
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
Out:
/home/docs/checkouts/readthedocs.org/user_builds/hvy-optuna/envs/tutorial-new-with-pytorch/lib/python3.8/site-packages/torch/cuda/__init__.py:52: UserWarning:
CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at /pytorch/c10/cuda/CUDAFunctions.cpp:100.)
<torch._C.Generator object at 0x7fae21d31870>
Before defining the objective function, prepare some utility functions for training.
def train_model(model, optimizer, train_loader):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if batch_idx * BATCH_SIZE >= N_TRAIN_SAMPLES:
break
data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
def eval_model(model, valid_loader):
model.eval()
correct = 0
with torch.no_grad():
for batch_idx, (data, target) in enumerate(valid_loader):
if batch_idx * BATCH_SIZE >= N_VALID_SAMPLES:
break
data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
accuracy = correct / min(len(valid_loader.dataset), N_VALID_SAMPLES)
return accuracy
Define the objective function.
def objective(trial):
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST(
DIR, train=True, download=True, transform=torchvision.transforms.ToTensor()
),
batch_size=BATCH_SIZE,
shuffle=True,
)
valid_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST(
DIR, train=False, download=True, transform=torchvision.transforms.ToTensor()
),
batch_size=BATCH_SIZE,
shuffle=True,
)
layers = []
in_features = 28 * 28
for i in range(3):
# Optimize the number of units of each layer and the initial learning rate.
out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
layers.append(nn.Linear(in_features, out_features))
layers.append(nn.ReLU())
in_features = out_features
layers.append(nn.Linear(in_features, 10))
layers.append(nn.LogSoftmax(dim=1))
model = nn.Sequential(*layers).to(DEVICE)
# Sample the initial learning rate from [1e-5, 1e-1] in log space.
optimizer = torch.optim.Adam(
model.parameters(), trial.suggest_float("lr_init", 1e-5, 1e-1, log=True)
)
for step in range(10):
model.train()
train_model(model, optimizer, train_loader)
accuracy = eval_model(model, valid_loader)
# Report intermediate objective value.
trial.report(accuracy, step)
# Handle pruning based on the intermediate value.
if trial.should_prune():
raise optuna.TrialPruned()
return accuracy
Run hyperparameter optimization with optuna.pruners.MedianPruner
.
study = optuna.create_study(
direction="maximize",
sampler=optuna.samplers.TPESampler(seed=SEED),
pruner=optuna.pruners.MedianPruner(),
)
study.optimize(objective, n_trials=100, timeout=600)
Out:
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../MNIST/raw/train-images-idx3-ubyte.gz
0it [00:00, ?it/s]
0%| | 16384/9912422 [00:00<01:10, 140440.45it/s]
68%|######8 | 6758400/9912422 [00:00<00:15, 200442.89it/s]Extracting ../MNIST/raw/train-images-idx3-ubyte.gz to ../MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../MNIST/raw/train-labels-idx1-ubyte.gz
0it [00:00, ?it/s][AExtracting ../MNIST/raw/train-labels-idx1-ubyte.gz to ../MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../MNIST/raw/t10k-images-idx3-ubyte.gz
0it [00:00, ?it/s][A[A
1%| | 16384/1648877 [00:00<00:10, 149857.98it/s][A[AExtracting ../MNIST/raw/t10k-images-idx3-ubyte.gz to ../MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../MNIST/raw/t10k-labels-idx1-ubyte.gz
0it [00:00, ?it/s][A[A[AExtracting ../MNIST/raw/t10k-labels-idx1-ubyte.gz to ../MNIST/raw
Processing...
/home/docs/checkouts/readthedocs.org/user_builds/hvy-optuna/envs/tutorial-new-with-pytorch/lib/python3.8/site-packages/torchvision/datasets/mnist.py:480: UserWarning:
The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:141.)
Done!
9920512it [00:01, 8511810.09it/s]
32768it [00:00, 60380.35it/s]
1654784it [00:00, 3518430.52it/s]
8192it [00:00, 37788.85it/s]
Plot functions¶
Visualize the optimization history. See plot_optimization_history()
for the details.
plot_optimization_history(study)
Visualize the learning curves of the trials. See plot_intermediate_values()
for the details.
plot_intermediate_values(study)
Visualize high-dimensional parameter relationships. See plot_parallel_coordinate()
for the details.
plot_parallel_coordinate(study)
Select parameters to visualize.
plot_parallel_coordinate(study, params=["lr_init", "n_units_l0"])
Visualize hyperparameter relationships. See plot_contour()
for the details.
plot_contour(study)
Select parameters to visualize.
plot_contour(study, params=["n_units_l0", "n_units_l1"])
Visualize individual hyperparameters as slice plot. See plot_slice()
for the details.
plot_slice(study)
Select parameters to visualize.
plot_slice(study, params=["n_units_l0", "n_units_l1"])
Visualize parameter importances. See plot_param_importances()
for the details.
plot_param_importances(study)
Visualize empirical distribution function. See plot_edf()
for the details.
plot_edf(study)
Total running time of the script: ( 7 minutes 12.554 seconds)