"""This module is responsible for tuning the learning rate and batch size for
training a module."""
import ignite_simple # pylint: disable=unused-import
import typing
import importlib
from ignite_simple.hyperparams import HyperparameterSettings
from ignite_simple.analarams import AnalysisSettings
from ignite_simple.vary_bs_loader import BatchSizeVaryingDataLoader
from ignite_simple.range_finder import smooth_window_size, find_with_derivs
import ignite_simple.utils as utils
import ignite_simple.trainer
import torch
import torch.utils.data as data
import numpy as np
from ignite.engine import Events
import os
import uuid
import multiprocessing as mp
import logging
import time
import scipy.signal
import scipy.special
import json
import math
_valldr = utils.create_partial_loader
_task_loader = utils.task_loader
NUM_TO_VAL_MAX = 64 * 3
def _store_lr_and_perf(lrs, perfs, cur_iter, num_to_val, tnr,
state: ignite_simple.trainer.TrainState):
valldr = _valldr(state.train_set, num_to_val)
state.evaluator.run(valldr)
loss = state.evaluator.state.metrics['loss']
if math.isnan(loss):
# our network has died!
lrs[cur_iter[0]:] = float('nan')
perfs[cur_iter[0]:] = float('nan')
tnr.terminate()
return
lrs[cur_iter[0]] = state.lr_scheduler.get_param()
perfs[cur_iter[0]] = state.evaluator.state.metrics['perf']
def _increment(cur, tnr, state):
cur[0] += 1
def _lr_vs_perf(model_loader, dataset_loader, loss_loader, outfile,
accuracy_style, lr_start, lr_end, batch_size,
cycle_time_epochs):
train_set, _ = utils.invoke(dataset_loader)
num_train_iters = (len(train_set) // batch_size) * (cycle_time_epochs // 2)
cur_iter = [0]
num_to_val = min(NUM_TO_VAL_MAX, len(train_set))
lrs = np.zeros(num_train_iters)
perfs = np.zeros(num_train_iters)
tnr_settings = ignite_simple.trainer.TrainSettings(
accuracy_style, model_loader, loss_loader,
(__name__, '_task_loader',
(dataset_loader, batch_size, True, True), dict()),
(
(Events.ITERATION_COMPLETED,
(__name__, '_store_lr_and_perf',
(lrs, perfs, cur_iter, num_to_val), dict())),
(Events.ITERATION_COMPLETED,
(__name__, '_increment', (cur_iter,), dict()))
),
None,
lr_start,
lr_end,
cycle_time_epochs,
cycle_time_epochs // 2
)
ignite_simple.trainer.train(tnr_settings)
np.savez_compressed(outfile, lrs=lrs, perfs=perfs)
def _task_loader_bs(dataset_loader, batch_start, batch_end, epochs):
train_set, val_set = utils.invoke(dataset_loader)
train_loader = BatchSizeVaryingDataLoader(
train_set, batch_start, batch_end, epochs)
return train_set, val_set, train_loader
def _store_bs_and_perf(bss, perfs, cur, num_to_val, tnr,
state: ignite_simple.trainer.TrainState):
valldr = _valldr(state.train_set, num_to_val)
state.evaluator.run(valldr)
perf = state.evaluator.state.metrics['perf']
cur_bs, cur_sum, cur_num = cur[0]
cur_ind = cur[1]
if bss[cur_ind] != cur_bs:
raise Exception(f'bss[{cur_ind}] = {bss[cur_ind]}, cur_bs={cur_bs}')
bs_this = int(state.train_loader.last_iter.last_batch_size)
if bs_this == cur_bs:
cur_sum += perf
cur_num += 1
cur[0] = (cur_bs, cur_sum, cur_num)
else:
avg = cur_sum / cur_num
perfs[cur_ind] = avg
cur[0] = (bs_this, perf, 1)
cur[1] = cur_ind + 1
if cur[1] >= bss.shape[0]:
raise Exception(f'after batch size {cur_bs} got batch size {bs_this} - out of range')
def _store_last_bs(perfs, cur, tnr, state):
perfs[cur[1]] = cur[0][1] / cur[0][2]
def _batch_vs_perf(model_loader, dataset_loader, loss_loader, outfile,
accuracy_style, batch_start, batch_end, lr_start, lr_end,
cycle_time_epochs):
train_set, _ = utils.invoke(dataset_loader)
# N = 0.5 * k * (k + 1)
# => k^2 + k - 2N = 0
# -> k = (-1 + sqrt(1 + 8N)) / 2
epochs = cycle_time_epochs // 2
train_loader = BatchSizeVaryingDataLoader(
train_set, batch_start, batch_end, epochs)
unique_batch_sizes = []
miter = train_loader.dry_iter()
for _ in miter:
bs = int(miter.last_batch_size)
if not unique_batch_sizes or bs != unique_batch_sizes[-1]:
unique_batch_sizes.append(bs)
bss = np.array(unique_batch_sizes)
perfs = np.zeros(bss.shape, dtype='float32')
cur = [(bss[0], 0, 0), 0]
num_to_val = min(NUM_TO_VAL_MAX, len(train_set))
tnr_settings = ignite_simple.trainer.TrainSettings(
accuracy_style, model_loader, loss_loader,
(__name__, '_task_loader_bs',
(dataset_loader, batch_start, batch_end, epochs), dict()),
(
(Events.ITERATION_COMPLETED,
(__name__, '_store_bs_and_perf',
(bss, perfs, cur, num_to_val), dict())),
(Events.COMPLETED,
(__name__, '_store_last_bs', (perfs, cur), dict()))
),
None,
lr_start,
lr_end,
2,
1
)
ignite_simple.trainer.train(tnr_settings)
np.savez_compressed(outfile, bss=bss, perfs=perfs)
def _store_perf(perfs, cur, num_to_val, tnr, state):
valldr = _valldr(state.train_set, num_to_val)
state.evaluator.run(valldr)
perfs[cur[0]] = state.evaluator.state.metrics['perf']
def _train_with_perf(model_loader, dataset_loader, loss_loader, outfile,
accuracy_style, batch_size, lr_start, lr_end,
cycle_time_epochs, epochs, with_raw):
train_set, _ = utils.invoke(dataset_loader)
final_perf = np.zeros(1)
final_ind = [0]
handlers = [
(Events.COMPLETED,
(__name__, '_store_perf',
(final_perf, final_ind, len(train_set)), dict()))
]
if with_raw:
num_iters = (len(train_set) // batch_size) * epochs
num_to_val = min(NUM_TO_VAL_MAX, len(train_set))
perf = np.zeros(num_iters)
ind = [0]
handlers.extend([
(Events.ITERATION_COMPLETED,
(__name__, '_store_perf',
(perf, ind, num_to_val), dict())),
(Events.ITERATION_COMPLETED,
(__name__, '_increment', (ind,), dict()))
])
tnr_settings = ignite_simple.trainer.TrainSettings(
accuracy_style, model_loader, loss_loader,
(__name__, '_task_loader',
(dataset_loader, batch_size, True, True), dict()),
handlers, None, lr_start, lr_end, cycle_time_epochs, epochs
)
ignite_simple.trainer.train(tnr_settings)
to_save = {'final_perf': final_perf}
if with_raw:
to_save['perf'] = perf
np.savez_compressed(outfile, **to_save)
def _run_and_collate(fn, kwargs, cores,
min_iters) -> typing.Dict[str, np.ndarray]:
folder = str(uuid.uuid4())
os.makedirs(folder)
i = 0
while i < min_iters:
procs = []
for procid in range(i, i + cores - 1):
proc_kwargs = kwargs.copy()
proc_kwargs['outfile'] = os.path.join(folder, f'{procid}.npz')
proc = mp.Process(target=fn, kwargs=proc_kwargs)
proc.start()
procs.append(proc)
i += cores - 1
my_kwargs = kwargs.copy()
my_kwargs['outfile'] = os.path.join(folder, f'{i}.npz')
fn(**my_kwargs)
i += 1
for proc in procs:
proc.join()
all_lists = dict()
with np.load(os.path.join(folder, '0.npz')) as infile:
for key, val in infile.items():
all_lists[key] = [val]
os.remove(os.path.join(folder, '0.npz'))
for j in range(1, i):
fname = os.path.join(folder, f'{j}.npz')
with np.load(fname) as infile:
for key, val in infile.items():
all_lists[key].append(val)
os.remove(fname)
os.rmdir(folder)
return dict((key, np.stack(val)) for key, val in all_lists.items())
def _select_lr_from(model_loader, dataset_loader, loss_loader,
accuracy_style, outfile, cores, settings,
store_up_to, logger, cycle_time_epochs,
batch_size, lr_start, lr_end) -> typing.Tuple[int, int]:
result = _run_and_collate(
_lr_vs_perf, {
'model_loader': model_loader,
'dataset_loader': dataset_loader,
'loss_loader': loss_loader,
'accuracy_style': accuracy_style,
'lr_start': lr_start,
'lr_end': lr_end,
'batch_size': batch_size,
'cycle_time_epochs': cycle_time_epochs
}, cores, settings.lr_min_inits
)
logger.debug('Organizing and interpreting learning rate sweep...')
lrs = result['lrs']
lr_perfs = result['perfs']
if np.isnan(lrs.sum()):
clip_at = np.isnan(lrs.sum(0)).argmax()
lrs = lrs[:, :clip_at]
lr_perfs = lr_perfs[:, :clip_at]
lrs = lrs[0]
num_trials = lr_perfs.shape[0]
window_size = smooth_window_size(lrs.shape[0])
lr_smoothed_perfs = scipy.signal.savgol_filter(
lr_perfs, window_size, 1)
old_settings = np.seterr(under='ignore')
lse_smoothed_lr_perfs = scipy.special.logsumexp(
lr_smoothed_perfs, axis=0
)
np.seterr(**old_settings)
lse_smoothed_lr_perf_then_derivs = np.gradient(lse_smoothed_lr_perfs)
lr_perf_derivs = np.gradient(lr_perfs, axis=-1)
smoothed_lr_perf_derivs = scipy.signal.savgol_filter(
lr_perfs, window_size, 1, deriv=1)
mean_smoothed_lr_perf_derivs = smoothed_lr_perf_derivs.mean(0)
lr_min, lr_max = find_with_derivs(lrs, lse_smoothed_lr_perf_then_derivs)
np.savez_compressed(
outfile,
lrs=lrs, perfs=lr_perfs,
smoothed_perfs=lr_smoothed_perfs,
lse_smoothed_perfs=lse_smoothed_lr_perfs,
perf_derivs=lr_perf_derivs,
smoothed_perf_derivs=smoothed_lr_perf_derivs,
mean_smoothed_perf_derivs=mean_smoothed_lr_perf_derivs,
lse_smoothed_perf_then_derivs=lse_smoothed_lr_perf_then_derivs,
lr_range=np.array([lr_min, lr_max]))
logger.info('Learning rate range: [%s, %s) (found from %s trials)',
lr_min, lr_max, num_trials)
return lr_min, lr_max, window_size, num_trials
def _select_batch_size_from(model_loader, dataset_loader, loss_loader,
accuracy_style, mainfolder, cores, settings,
store_up_to, logger, cycle_time_epochs, bss,
collated_smoothed_bs_perf_derivs,
bs_min, bs_max, lr_min_over_batch,
lr_max_over_batch) -> int:
settings: HyperparameterSettings
store_up_to: AnalysisSettings
bs_min_ind = int((bss == bs_min).argmax())
bs_max_ind = int((bss == bs_max).argmax())
incl_raw = store_up_to.hparam_selection_specific_imgs
if bs_min_ind == bs_max_ind:
logger.info('Only found a single good batch size, using that without '
+ 'further investigation')
return bs_min
if bs_max_ind - bs_min_ind <= settings.batch_pts:
logger.debug('Found %s good batch sizes and willing to try up to %s, '
+ 'so testing all of them.', bs_max_ind - bs_min_ind,
settings.batch_pts)
test_pts = bss[bs_min_ind:bs_max_ind]
else:
probs = collated_smoothed_bs_perf_derivs[bs_min_ind:bs_max_ind]
old_settings = np.seterr(under='ignore')
probs = scipy.special.softmax(probs)
iters = 0
while (probs < 1e-6).sum() != 0:
if iters > 10:
probs[:] = 1 / probs.shape[0]
break
probs[probs < 1e-6] = 1e-6
probs = scipy.special.softmax(probs)
iters += 1
np.seterr(**old_settings)
test_pts = np.random.choice(
np.arange(bs_min_ind, bs_max_ind), settings.batch_pts,
replace=False, p=probs)
test_pts = bss[test_pts]
logger.debug('Comparing batch sizes: %s', test_pts)
# here we could naively just loop over the test_pts, but this will be
# a very inefficient use of our cores if we are on fast settings and
# have many cores. Furthermore, some batch sizes will almost certainly
# run faster than others. So alas, in the name of performance, this is
# going to look a lot like _run_and_collate but dissimilar enough to not be
# worth calling it
folder = str(uuid.uuid4())
os.makedirs(folder)
loops = 0 # number spawned // test_pts.shape[0]
last_loop_printed = 0
cur_ind = 0 # in test_pts
current_processes = []
target_num_loops = max(
settings.batch_pt_min_inits,
cores // test_pts.shape[0]
)
while loops < target_num_loops:
while len(current_processes) == cores:
if last_loop_printed < loops:
logger.debug('On loop %s/%s',
loops + 1, settings.batch_pt_min_inits)
last_loop_printed = loops
time.sleep(0.1)
for i in range(len(current_processes) - 1, -1, -1):
if not current_processes[i].is_alive():
current_processes.pop(i)
fname = os.path.join(folder, f'{cur_ind}_{loops}.npz')
bs = int(test_pts[cur_ind])
proc = mp.Process(
target=_train_with_perf,
args=(
model_loader, dataset_loader, loss_loader, fname,
accuracy_style, bs, lr_min_over_batch * bs,
lr_max_over_batch * bs, cycle_time_epochs,
cycle_time_epochs, incl_raw
)
)
proc.start()
current_processes.append(proc)
cur_ind += 1
if cur_ind >= test_pts.shape[0]:
cur_ind = 0
loops += 1
logger.debug('Waiting for %s currently running trials to end...',
len(current_processes))
for proc in current_processes:
proc.join()
logger.debug('Organizing and interpreting batch size performance info...')
all_final_perfs = np.zeros((test_pts.shape[0], loops))
all_final_lse_perfs = np.zeros(test_pts.shape[0])
raws_dict = dict()
for i, bs in enumerate(test_pts):
trials = []
trials_raw = [] if incl_raw else None
for trial in range(loops):
fname = os.path.join(folder, f'{i}_{trial}.npz')
with np.load(fname) as infile:
final_perf = infile['final_perf']
if np.isnan(final_perf).sum() > 0:
logger.debug('Found some nans, treating them as inf bad')
final_perf[np.isnan(final_perf)] = 0
trials.append(final_perf)
if incl_raw:
perf = infile['perf']
if np.isnan(perf).sum() > 0:
logger.debug('Found some nans in raw perfs')
perf[np.isnan(perf)] = 0
trials_raw.append(perf)
os.remove(fname)
trials = np.concatenate(trials)
old_settings = np.seterr(under='ignore')
lse_trials = scipy.special.logsumexp(trials)
np.seterr(**old_settings)
all_final_perfs[i] = trials
all_final_lse_perfs[i] = lse_trials
if incl_raw:
trials_raw = np.stack(trials_raw)
smoothed_trials_raw = scipy.signal.savgol_filter(
trials_raw, smooth_window_size(trials_raw.shape[1]), 1
)
old_settings = np.seterr(under='ignore')
lse_smoothed_trials_raw = scipy.special.logsumexp(
smoothed_trials_raw, axis=0)
np.seterr(**old_settings)
raws_dict[f'raw_{bs}'] = trials_raw
raws_dict[f'smoothed_raw_{bs}'] = smoothed_trials_raw
raws_dict[f'lse_smoothed_raw_{bs}'] = lse_smoothed_trials_raw
os.rmdir(folder)
best_ind = np.argmax(all_final_lse_perfs)
best_bs = int(test_pts[best_ind])
np.savez_compressed(
os.path.join(mainfolder, 'bs_sampled.npz'),
bss=test_pts, final=all_final_perfs, lse_final=all_final_lse_perfs,
**raws_dict
)
logger.info('Found best batch size of those tested: %s', best_bs)
return best_bs, test_pts, loops
[docs]def tune(model_loader: typing.Tuple[str, str, tuple, dict],
dataset_loader: typing.Tuple[str, str, tuple, dict],
loss_loader: typing.Tuple[str, str, tuple, dict],
accuracy_style: str,
folder: str, cores: int,
settings: HyperparameterSettings,
store_up_to: AnalysisSettings,
logger: logging.Logger = None):
r"""Finds the optimal learning rate and batch size for the specified model
on the specified dataset trained with the given loss. Stores the following
information:
.. code:: none
folder/
final.json
{'lr_start': float, 'lr_end': float, 'batch_size': float,
'cycle_size_epochs': int, 'epochs': int}
misc.json
Variables that went into the final output. Typically selected
via heuristics, constants, or come from the hyperparameter
settings. Some may be deduced from the numpy array files
directly
{
'initial_batch_size': int,
'initial_cycle_time': int,
'initial_min_lr': float,
'initial_max_lr': float,
'initial_lr_num_to_val': int,
'initial_lr_num_trials': int,
'initial_lr_window_size': int,
'initial_lr_sweep_result_min': float,
'initial_lr_sweep_result_max': float,
'second_min_lr': float,
'second_max_lr': float
}
lr_vs_perf.npz
lrs=np.ndarray[number of batches]
perfs=np.ndarray[trials, number of batches]
smoothed_perfs=np.ndarray[trials, number of batches]
lse_smoothed_perfs=np.ndarray[trials, number of batches]
perf_derivs=np.ndarray[trials, number_of_batches]
smoothed_perf_derivs=np.ndarray[trials, number of batches]
mean_smoothed_perf_derivs=np.ndarray[number of batches]
lse_smoothed_perf_then_derivs=np.ndarray[number of batches]
lse = log sum exp. when there are many trials, the mean
gets overly pessimistic from bad initializations,
so LSE is more stable. however, we can't do lse on the
smoothed derivatives because then derivatives will tend
to be positive everywhere, so we have to smooth first,
then take lse, then take derivative
lr_range=np.ndarray[2]
min, max for the good range of learning rates
bs_vs_perf.npz (bs=batch_size)
Where a single batch is tried multiple times, we take the
mean over those times to ensure bss contains only unique
values and hence can be treated like lrs
bss=np.ndarray[number of batches]
perfs=np.ndarray[trials, number of batches]
smoothed_perfs=np.ndarray[number of batches]
lse_smoothed_perfs=np.ndarray[number of batches]
perf_derivs=np.ndarray[trials, number_of_batches]
smoothed_perf_derivs=np.ndarray[trials, number of batches]
mean_smoothed_perf_derivs=np.ndarray[number of batches]
lse_smoothed_perf_then_derivs=np.ndarray[number of batches]
bs_range=np.ndarray[2]
min, max for the good range of batch sizes
lr_vs_perf2.npz
only stored if settings.rescan_lr_after_bs. looks exactly
like lr_vs_perf.npz, except these runs are performed with
the newly selected batch size
bs_sampled.npz
only stored if settings.batch_pts > 0
bss=np.ndarray[num bs attempted]
final=np.ndarray[num bs attempted, trials]
final performance for batch size i for each trial
lse_final=np.ndarray[num bs attempted]
final logsumexp performance for each batch size, argmax
is the selected batch size. If you want this to nicely
be below the maximum, subtract log(trials) and note
this does not effect the argmax
raw_i=np.ndarray[trials, number of batches]
only if store_up_to.hparam_selection_specific_imgs,
same for the *_raw_i
i is a sampled batch size and raw_i[t, j] is the
performance of the model after iteration j for
batch size i on trial t.
smoothed_raw_i=np.ndarray[trials, number of batches]
lse_smoothed_raw_i=np.ndarray[number of batches]
:param model_loader: describes which module and corresponding attribute can
be passed what arguments and keyword arguments to produce the
nn.Module with a random initialization which can be trained
.. code::python
model_loader = ('torch.nn', 'Linear', tuple(20, 10),
{'bias': True})
:param dataset_loader: describes which module and corresponding attribute
can be passed what arguments and keyword arguments to produce the
training dataset and validation dataset.
:param loss_loader: describes which module and corresponding attribute can
be passed what arguments and keyword arguments to produce the nn.Module
that converts (y_pred, y) to a scalar which should be minimized
:param folder: where to save the output to
:param cores: how many cores to use; 1 for just the main process
:param settings: the settings to use to tune the learning rate and batch
size
:param store_up_to: the information stored should be at least what is
required to produce this analysis
"""
if logger is None:
logger = logging.getLogger(__name__)
os.makedirs(folder)
train_set, _ = ignite_simple.utils.invoke(dataset_loader)
logger.info('Performing initial learning rate sweep...')
init_batch_size = 64
init_cycle_time = int(np.clip(150000 // len(train_set), 2, 5) * 2)
lr_min, lr_max, lr_initial_window_size, lr_initial_trials = (
_select_lr_from(
model_loader, dataset_loader, loss_loader, accuracy_style,
os.path.join(folder, 'lr_vs_perf.npz'), cores, settings,
store_up_to, logger, init_cycle_time, init_batch_size,
settings.lr_start, settings.lr_end
)
)
initial_lr_sweep_result_min, initial_lr_sweep_result_max = lr_min, lr_max
initial_lr_num_to_val = min(NUM_TO_VAL_MAX, len(train_set))
logger.info('Performing initial batch size sweep...')
# The trick is the increasing the batch size requires a corresponding
# increase in learning rate. We don't want to include the lr range
# except insofar as taking that into account as otherwise these
# results would be even muddier than they already are
lr_avg_over_batch = ((lr_min + lr_max) / 2) / init_batch_size
bs_sweep_lr_min = lr_avg_over_batch * settings.batch_start
bs_sweep_lr_max = lr_avg_over_batch * settings.batch_end
result = _run_and_collate(
_batch_vs_perf, {
'model_loader': model_loader,
'dataset_loader': dataset_loader,
'loss_loader': loss_loader,
'accuracy_style': accuracy_style,
'batch_start': settings.batch_start,
'batch_end': settings.batch_end,
'lr_start': bs_sweep_lr_min,
'lr_end': bs_sweep_lr_max,
'cycle_time_epochs': init_cycle_time
}, cores, settings.batch_rn_min_inits
)
logger.debug('Organizing and interpreting batch size sweep...')
bss = result['bss'][0]
bs_perfs = result['perfs']
bs_sweep_trials = int(bs_perfs.shape[0])
window_size = smooth_window_size(bs_perfs.shape[1])
smoothed_bs_perf = scipy.signal.savgol_filter(
bs_perfs, window_size, 1
)
old_settings = np.seterr(under='ignore')
lse_smoothed_bs_perf = scipy.special.logsumexp(
smoothed_bs_perf, axis=0
)
np.seterr(**old_settings)
lse_smoothed_bs_perf_then_derivs = np.gradient(
lse_smoothed_bs_perf, axis=0)
bs_perf_derivs = np.gradient(bs_perfs, axis=-1)
smoothed_bs_perf_derivs = scipy.signal.savgol_filter(
bs_perfs, window_size, 1, deriv=1)
mean_smoothed_bs_perf_derivs = smoothed_bs_perf_derivs.mean(0)
bs_min, bs_max = find_with_derivs(bss, lse_smoothed_bs_perf_then_derivs)
bs_min, bs_max = int(bs_min), int(bs_max)
logger.info('Batch size range: [%s, %s) (found from %s trials)',
bs_min, bs_max, bs_perfs.shape[0])
np.savez_compressed(
os.path.join(folder, 'bs_vs_perf.npz'),
bss=bss, perfs=bs_perfs,
perf_derivs=bs_perf_derivs,
smoothed_perfs=smoothed_bs_perf,
smoothed_perf_derivs=smoothed_bs_perf_derivs,
mean_smoothed_perf_derivs=mean_smoothed_bs_perf_derivs,
lse_smoothed_perf_then_derivs=lse_smoothed_bs_perf_then_derivs,
bs_range=np.array([bs_min, bs_max]))
if settings.batch_pts > 1:
batch_size, batch_pts_checked, num_batch_loops = _select_batch_size_from(
model_loader, dataset_loader, loss_loader, accuracy_style, folder,
cores, settings, store_up_to, logger, init_cycle_time, bss,
lse_smoothed_bs_perf_then_derivs, bs_min, bs_max,
lr_min / init_batch_size, lr_max / init_batch_size)
else:
batch_size = (bs_min + bs_max) // 2
batch_pts_checked = []
num_batch_loops = -1
logger.info('Choosing mean batch size: %s', batch_size)
if settings.rescan_lr_after_bs and batch_size != init_batch_size:
logger.info('Finding learning rate range on new batch size...')
second_min_lr = (settings.lr_start / init_batch_size) * batch_size
second_max_lr = (settings.lr_end / init_batch_size) * batch_size
lr_min, lr_max, second_lr_window_size, second_lr_num_trials = _select_lr_from(
model_loader, dataset_loader, loss_loader, accuracy_style,
os.path.join(folder, 'lr_vs_perf2.npz'), cores, settings,
store_up_to, logger, init_cycle_time, init_batch_size,
second_min_lr, second_max_lr
)
else:
second_min_lr = float('nan')
second_max_lr = float('nan')
second_lr_window_size = float('nan')
second_lr_num_trials = float('nan')
lr_min = (lr_min / init_batch_size) * batch_size
lr_max = (lr_max / init_batch_size) * batch_size
with open(os.path.join(folder, 'final.json'), 'w') as outfile:
json.dump({'lr_start': lr_min, 'lr_end': lr_max,
'batch_size': batch_size,
'cycle_size_epochs': init_cycle_time,
'epochs': init_cycle_time * 4}, outfile)
with open(os.path.join(folder, 'misc.json'), 'w') as outfile:
json.dump(
{
'initial_batch_size': init_batch_size,
'initial_cycle_time': init_cycle_time,
'initial_min_lr': settings.lr_start,
'initial_max_lr': settings.lr_end,
'initial_lr_num_to_val': initial_lr_num_to_val,
'initial_lr_num_trials': lr_initial_trials,
'initial_lr_window_size': lr_initial_window_size,
'initial_lr_sweep_result_min': initial_lr_sweep_result_min,
'initial_lr_sweep_result_max': initial_lr_sweep_result_max,
'initial_avg_lr': (initial_lr_sweep_result_min + initial_lr_sweep_result_max) / 2,
'initial_min_batch': settings.batch_start,
'initial_max_batch': settings.batch_end,
'initial_batch_num_to_val': initial_lr_num_to_val,
'initial_batch_num_trials': bs_sweep_trials,
'batch_sweep_result_min': bs_min,
'batch_sweep_result_max': bs_max,
'batch_sweep_result': batch_size,
'batch_sweep_num_pts': len(batch_pts_checked),
'batch_sweep_pts_list': list(int(i) for i in batch_pts_checked),
'batch_sweep_trials_each': num_batch_loops,
'second_min_lr': second_min_lr,
'second_max_lr': second_max_lr,
'second_lr_num_trials': second_lr_num_trials,
'second_lr_window_size': second_lr_window_size,
'lr_sweep_result_min': lr_min,
'lr_sweep_result_max': lr_max,
},
outfile
)
logger.debug('Tuning completed successfully')