Source code for ignite_simple.tuner

"""This module is responsible for tuning the learning rate and batch size for
training a module."""
import ignite_simple  # pylint: disable=unused-import
import typing
import importlib
from ignite_simple.hyperparams import HyperparameterSettings
from ignite_simple.analarams import AnalysisSettings
from ignite_simple.vary_bs_loader import BatchSizeVaryingDataLoader
from ignite_simple.range_finder import smooth_window_size, find_with_derivs
import ignite_simple.utils as utils
import ignite_simple.trainer
import torch
import torch.utils.data as data
import numpy as np
from ignite.engine import Events
import os
import uuid
import multiprocessing as mp
import logging
import time
import scipy.signal
import scipy.special
import json
import math

_valldr = utils.create_partial_loader
_task_loader = utils.task_loader

NUM_TO_VAL_MAX = 64 * 3

def _store_lr_and_perf(lrs, perfs, cur_iter, num_to_val, tnr,
                       state: ignite_simple.trainer.TrainState):
    valldr = _valldr(state.train_set, num_to_val)
    state.evaluator.run(valldr)

    loss = state.evaluator.state.metrics['loss']
    if math.isnan(loss):
        # our network has died!
        lrs[cur_iter[0]:] = float('nan')
        perfs[cur_iter[0]:] = float('nan')
        tnr.terminate()
        return

    lrs[cur_iter[0]] = state.lr_scheduler.get_param()
    perfs[cur_iter[0]] = state.evaluator.state.metrics['perf']

def _increment(cur, tnr, state):
    cur[0] += 1


def _lr_vs_perf(model_loader, dataset_loader, loss_loader, outfile,
                accuracy_style, lr_start, lr_end, batch_size,
                cycle_time_epochs):
    train_set, _ = utils.invoke(dataset_loader)

    num_train_iters = (len(train_set) // batch_size) * (cycle_time_epochs // 2)

    cur_iter = [0]
    num_to_val = min(NUM_TO_VAL_MAX, len(train_set))

    lrs = np.zeros(num_train_iters)
    perfs = np.zeros(num_train_iters)

    tnr_settings = ignite_simple.trainer.TrainSettings(
        accuracy_style, model_loader, loss_loader,
        (__name__, '_task_loader',
         (dataset_loader, batch_size, True, True), dict()),
        (
            (Events.ITERATION_COMPLETED,
             (__name__, '_store_lr_and_perf',
              (lrs, perfs, cur_iter, num_to_val), dict())),
            (Events.ITERATION_COMPLETED,
             (__name__, '_increment', (cur_iter,), dict()))
        ),
        None,
        lr_start,
        lr_end,
        cycle_time_epochs,
        cycle_time_epochs // 2
    )
    ignite_simple.trainer.train(tnr_settings)
    np.savez_compressed(outfile, lrs=lrs, perfs=perfs)

def _task_loader_bs(dataset_loader, batch_start, batch_end, epochs):
    train_set, val_set = utils.invoke(dataset_loader)
    train_loader = BatchSizeVaryingDataLoader(
        train_set, batch_start, batch_end, epochs)
    return train_set, val_set, train_loader


def _store_bs_and_perf(bss, perfs, cur, num_to_val, tnr,
                       state: ignite_simple.trainer.TrainState):
    valldr = _valldr(state.train_set, num_to_val)
    state.evaluator.run(valldr)
    perf = state.evaluator.state.metrics['perf']

    cur_bs, cur_sum, cur_num = cur[0]
    cur_ind = cur[1]

    if bss[cur_ind] != cur_bs:
        raise Exception(f'bss[{cur_ind}] = {bss[cur_ind]}, cur_bs={cur_bs}')

    bs_this = int(state.train_loader.last_iter.last_batch_size)

    if bs_this == cur_bs:
        cur_sum += perf
        cur_num += 1
        cur[0] = (cur_bs, cur_sum, cur_num)
    else:
        avg = cur_sum / cur_num
        perfs[cur_ind] = avg
        cur[0] = (bs_this, perf, 1)
        cur[1] = cur_ind + 1

        if cur[1] >= bss.shape[0]:
            raise Exception(f'after batch size {cur_bs} got batch size {bs_this} - out of range')

def _store_last_bs(perfs, cur, tnr, state):
    perfs[cur[1]] = cur[0][1] / cur[0][2]

def _batch_vs_perf(model_loader, dataset_loader, loss_loader, outfile,
                   accuracy_style, batch_start, batch_end, lr_start, lr_end,
                   cycle_time_epochs):
    train_set, _ = utils.invoke(dataset_loader)

    # N = 0.5 * k * (k + 1)
    # => k^2 + k - 2N = 0
    # -> k = (-1 + sqrt(1 + 8N)) / 2
    epochs = cycle_time_epochs // 2
    train_loader = BatchSizeVaryingDataLoader(
        train_set, batch_start, batch_end, epochs)

    unique_batch_sizes = []
    miter = train_loader.dry_iter()
    for _ in miter:
        bs = int(miter.last_batch_size)
        if not unique_batch_sizes or bs != unique_batch_sizes[-1]:
            unique_batch_sizes.append(bs)

    bss = np.array(unique_batch_sizes)
    perfs = np.zeros(bss.shape, dtype='float32')

    cur = [(bss[0], 0, 0), 0]
    num_to_val = min(NUM_TO_VAL_MAX, len(train_set))

    tnr_settings = ignite_simple.trainer.TrainSettings(
        accuracy_style, model_loader, loss_loader,
        (__name__, '_task_loader_bs',
         (dataset_loader, batch_start, batch_end, epochs), dict()),
        (
            (Events.ITERATION_COMPLETED,
             (__name__, '_store_bs_and_perf',
              (bss, perfs, cur, num_to_val), dict())),
            (Events.COMPLETED,
             (__name__, '_store_last_bs', (perfs, cur), dict()))
        ),
        None,
        lr_start,
        lr_end,
        2,
        1
    )
    ignite_simple.trainer.train(tnr_settings)
    np.savez_compressed(outfile, bss=bss, perfs=perfs)

def _store_perf(perfs, cur, num_to_val, tnr, state):
    valldr = _valldr(state.train_set, num_to_val)
    state.evaluator.run(valldr)
    perfs[cur[0]] = state.evaluator.state.metrics['perf']

def _train_with_perf(model_loader, dataset_loader, loss_loader, outfile,
                     accuracy_style, batch_size, lr_start, lr_end,
                     cycle_time_epochs, epochs, with_raw):
    train_set, _ = utils.invoke(dataset_loader)

    final_perf = np.zeros(1)
    final_ind = [0]
    handlers = [
        (Events.COMPLETED,
         (__name__, '_store_perf',
          (final_perf, final_ind, len(train_set)), dict()))
    ]
    if with_raw:
        num_iters = (len(train_set) // batch_size) * epochs
        num_to_val = min(NUM_TO_VAL_MAX, len(train_set))
        perf = np.zeros(num_iters)
        ind = [0]
        handlers.extend([
            (Events.ITERATION_COMPLETED,
             (__name__, '_store_perf',
              (perf, ind, num_to_val), dict())),
            (Events.ITERATION_COMPLETED,
             (__name__, '_increment', (ind,), dict()))
        ])

    tnr_settings = ignite_simple.trainer.TrainSettings(
        accuracy_style, model_loader, loss_loader,
        (__name__, '_task_loader',
         (dataset_loader, batch_size, True, True), dict()),
        handlers, None, lr_start, lr_end, cycle_time_epochs, epochs
    )
    ignite_simple.trainer.train(tnr_settings)

    to_save = {'final_perf': final_perf}
    if with_raw:
        to_save['perf'] = perf
    np.savez_compressed(outfile, **to_save)

def _run_and_collate(fn, kwargs, cores,
                     min_iters) -> typing.Dict[str, np.ndarray]:
    folder = str(uuid.uuid4())
    os.makedirs(folder)

    i = 0
    while i < min_iters:
        procs = []
        for procid in range(i, i + cores - 1):
            proc_kwargs = kwargs.copy()
            proc_kwargs['outfile'] = os.path.join(folder, f'{procid}.npz')
            proc = mp.Process(target=fn, kwargs=proc_kwargs)
            proc.start()
            procs.append(proc)
        i += cores - 1

        my_kwargs = kwargs.copy()
        my_kwargs['outfile'] = os.path.join(folder, f'{i}.npz')
        fn(**my_kwargs)
        i += 1

        for proc in procs:
            proc.join()

    all_lists = dict()
    with np.load(os.path.join(folder, '0.npz')) as infile:
        for key, val in infile.items():
            all_lists[key] = [val]

    os.remove(os.path.join(folder, '0.npz'))

    for j in range(1, i):
        fname = os.path.join(folder, f'{j}.npz')
        with np.load(fname) as infile:
            for key, val in infile.items():
                all_lists[key].append(val)
        os.remove(fname)

    os.rmdir(folder)
    return dict((key, np.stack(val)) for key, val in all_lists.items())

def _select_lr_from(model_loader, dataset_loader, loss_loader,
                    accuracy_style, outfile, cores, settings,
                    store_up_to, logger, cycle_time_epochs,
                    batch_size, lr_start, lr_end) -> typing.Tuple[int, int]:
    result = _run_and_collate(
        _lr_vs_perf, {
            'model_loader': model_loader,
            'dataset_loader': dataset_loader,
            'loss_loader': loss_loader,
            'accuracy_style': accuracy_style,
            'lr_start': lr_start,
            'lr_end': lr_end,
            'batch_size': batch_size,
            'cycle_time_epochs': cycle_time_epochs
        }, cores, settings.lr_min_inits
    )

    logger.debug('Organizing and interpreting learning rate sweep...')

    lrs = result['lrs']
    lr_perfs = result['perfs']
    if np.isnan(lrs.sum()):
        clip_at = np.isnan(lrs.sum(0)).argmax()
        lrs = lrs[:, :clip_at]
        lr_perfs = lr_perfs[:, :clip_at]

    lrs = lrs[0]
    num_trials = lr_perfs.shape[0]
    window_size = smooth_window_size(lrs.shape[0])

    lr_smoothed_perfs = scipy.signal.savgol_filter(
        lr_perfs, window_size, 1)

    old_settings = np.seterr(under='ignore')
    lse_smoothed_lr_perfs = scipy.special.logsumexp(
        lr_smoothed_perfs, axis=0
    )
    np.seterr(**old_settings)
    lse_smoothed_lr_perf_then_derivs = np.gradient(lse_smoothed_lr_perfs)
    lr_perf_derivs = np.gradient(lr_perfs, axis=-1)
    smoothed_lr_perf_derivs = scipy.signal.savgol_filter(
        lr_perfs, window_size, 1, deriv=1)
    mean_smoothed_lr_perf_derivs = smoothed_lr_perf_derivs.mean(0)

    lr_min, lr_max = find_with_derivs(lrs, lse_smoothed_lr_perf_then_derivs)

    np.savez_compressed(
        outfile,
        lrs=lrs, perfs=lr_perfs,
        smoothed_perfs=lr_smoothed_perfs,
        lse_smoothed_perfs=lse_smoothed_lr_perfs,
        perf_derivs=lr_perf_derivs,
        smoothed_perf_derivs=smoothed_lr_perf_derivs,
        mean_smoothed_perf_derivs=mean_smoothed_lr_perf_derivs,
        lse_smoothed_perf_then_derivs=lse_smoothed_lr_perf_then_derivs,
        lr_range=np.array([lr_min, lr_max]))

    logger.info('Learning rate range: [%s, %s) (found from %s trials)',
                lr_min, lr_max, num_trials)
    return lr_min, lr_max, window_size, num_trials


def _select_batch_size_from(model_loader, dataset_loader, loss_loader,
                            accuracy_style, mainfolder, cores, settings,
                            store_up_to, logger, cycle_time_epochs, bss,
                            collated_smoothed_bs_perf_derivs,
                            bs_min, bs_max, lr_min_over_batch,
                            lr_max_over_batch) -> int:
    settings: HyperparameterSettings
    store_up_to: AnalysisSettings

    bs_min_ind = int((bss == bs_min).argmax())
    bs_max_ind = int((bss == bs_max).argmax())

    incl_raw = store_up_to.hparam_selection_specific_imgs

    if bs_min_ind == bs_max_ind:
        logger.info('Only found a single good batch size, using that without '
                    + 'further investigation')
        return bs_min

    if bs_max_ind - bs_min_ind <= settings.batch_pts:
        logger.debug('Found %s good batch sizes and willing to try up to %s, '
                     + 'so testing all of them.', bs_max_ind - bs_min_ind,
                     settings.batch_pts)
        test_pts = bss[bs_min_ind:bs_max_ind]
    else:
        probs = collated_smoothed_bs_perf_derivs[bs_min_ind:bs_max_ind]
        old_settings = np.seterr(under='ignore')
        probs = scipy.special.softmax(probs)

        iters = 0
        while (probs < 1e-6).sum() != 0:
            if iters > 10:
                probs[:] = 1 / probs.shape[0]
                break
            probs[probs < 1e-6] = 1e-6
            probs = scipy.special.softmax(probs)
            iters += 1

        np.seterr(**old_settings)

        test_pts = np.random.choice(
            np.arange(bs_min_ind, bs_max_ind), settings.batch_pts,
            replace=False, p=probs)
        test_pts = bss[test_pts]

        logger.debug('Comparing batch sizes: %s', test_pts)

    # here we could naively just loop over the test_pts, but this will be
    # a very inefficient use of our cores if we are on fast settings and
    # have many cores. Furthermore, some batch sizes will almost certainly
    # run faster than others. So alas, in the name of performance, this is
    # going to look a lot like _run_and_collate but dissimilar enough to not be
    # worth calling it

    folder = str(uuid.uuid4())
    os.makedirs(folder)

    loops = 0  # number spawned // test_pts.shape[0]
    last_loop_printed = 0
    cur_ind = 0  # in test_pts
    current_processes = []
    target_num_loops = max(
        settings.batch_pt_min_inits,
        cores // test_pts.shape[0]
    )
    while loops < target_num_loops:
        while len(current_processes) == cores:
            if last_loop_printed < loops:
                logger.debug('On loop %s/%s',
                             loops + 1, settings.batch_pt_min_inits)
                last_loop_printed = loops
            time.sleep(0.1)

            for i in range(len(current_processes) - 1, -1, -1):
                if not current_processes[i].is_alive():
                    current_processes.pop(i)

        fname = os.path.join(folder, f'{cur_ind}_{loops}.npz')
        bs = int(test_pts[cur_ind])
        proc = mp.Process(
            target=_train_with_perf,
            args=(
                model_loader, dataset_loader, loss_loader, fname,
                accuracy_style, bs, lr_min_over_batch * bs,
                lr_max_over_batch * bs, cycle_time_epochs,
                cycle_time_epochs, incl_raw
            )
        )
        proc.start()
        current_processes.append(proc)

        cur_ind += 1
        if cur_ind >= test_pts.shape[0]:
            cur_ind = 0
            loops += 1

    logger.debug('Waiting for %s currently running trials to end...',
                 len(current_processes))

    for proc in current_processes:
        proc.join()

    logger.debug('Organizing and interpreting batch size performance info...')

    all_final_perfs = np.zeros((test_pts.shape[0], loops))
    all_final_lse_perfs = np.zeros(test_pts.shape[0])

    raws_dict = dict()

    for i, bs in enumerate(test_pts):
        trials = []
        trials_raw = [] if incl_raw else None
        for trial in range(loops):
            fname = os.path.join(folder, f'{i}_{trial}.npz')
            with np.load(fname) as infile:
                final_perf = infile['final_perf']
                if np.isnan(final_perf).sum() > 0:
                    logger.debug('Found some nans, treating them as inf bad')
                    final_perf[np.isnan(final_perf)] = 0
                trials.append(final_perf)

                if incl_raw:
                    perf = infile['perf']
                    if np.isnan(perf).sum() > 0:
                        logger.debug('Found some nans in raw perfs')
                        perf[np.isnan(perf)] = 0
                    trials_raw.append(perf)
            os.remove(fname)
        trials = np.concatenate(trials)

        old_settings = np.seterr(under='ignore')
        lse_trials = scipy.special.logsumexp(trials)
        np.seterr(**old_settings)

        all_final_perfs[i] = trials
        all_final_lse_perfs[i] = lse_trials

        if incl_raw:
            trials_raw = np.stack(trials_raw)
            smoothed_trials_raw = scipy.signal.savgol_filter(
                trials_raw, smooth_window_size(trials_raw.shape[1]), 1
            )
            old_settings = np.seterr(under='ignore')
            lse_smoothed_trials_raw = scipy.special.logsumexp(
                smoothed_trials_raw, axis=0)
            np.seterr(**old_settings)

            raws_dict[f'raw_{bs}'] = trials_raw
            raws_dict[f'smoothed_raw_{bs}'] = smoothed_trials_raw
            raws_dict[f'lse_smoothed_raw_{bs}'] = lse_smoothed_trials_raw

    os.rmdir(folder)

    best_ind = np.argmax(all_final_lse_perfs)
    best_bs = int(test_pts[best_ind])

    np.savez_compressed(
        os.path.join(mainfolder, 'bs_sampled.npz'),
        bss=test_pts, final=all_final_perfs, lse_final=all_final_lse_perfs,
        **raws_dict
    )

    logger.info('Found best batch size of those tested: %s', best_bs)

    return best_bs, test_pts, loops

[docs]def tune(model_loader: typing.Tuple[str, str, tuple, dict],
         dataset_loader: typing.Tuple[str, str, tuple, dict],
         loss_loader: typing.Tuple[str, str, tuple, dict],
         accuracy_style: str,
         folder: str, cores: int,
         settings: HyperparameterSettings,
         store_up_to: AnalysisSettings,
         logger: logging.Logger = None):
    r"""Finds the optimal learning rate and batch size for the specified model
    on the specified dataset trained with the given loss. Stores the following
    information:

    .. code:: none

        folder/
            final.json
                {'lr_start': float, 'lr_end': float, 'batch_size': float,
                 'cycle_size_epochs': int, 'epochs': int}
            misc.json
                Variables that went into the final output. Typically selected
                via heuristics, constants, or come from the hyperparameter
                settings. Some may be deduced from the numpy array files
                directly

                {
                    'initial_batch_size': int,
                    'initial_cycle_time': int,
                    'initial_min_lr': float,
                    'initial_max_lr': float,
                    'initial_lr_num_to_val': int,
                    'initial_lr_num_trials': int,
                    'initial_lr_window_size': int,
                    'initial_lr_sweep_result_min': float,
                    'initial_lr_sweep_result_max': float,
                    'second_min_lr': float,
                    'second_max_lr': float
                }

            lr_vs_perf.npz
                lrs=np.ndarray[number of batches]
                perfs=np.ndarray[trials, number of batches]
                smoothed_perfs=np.ndarray[trials, number of batches]
                lse_smoothed_perfs=np.ndarray[trials, number of batches]
                perf_derivs=np.ndarray[trials, number_of_batches]
                smoothed_perf_derivs=np.ndarray[trials, number of batches]
                mean_smoothed_perf_derivs=np.ndarray[number of batches]
                lse_smoothed_perf_then_derivs=np.ndarray[number of batches]
                    lse = log sum exp. when there are many trials, the mean
                    gets overly pessimistic from bad initializations,
                    so LSE is more stable. however, we can't do lse on the
                    smoothed derivatives because then derivatives will tend
                    to be positive everywhere, so we have to smooth first,
                    then take lse, then take derivative
                lr_range=np.ndarray[2]
                    min, max for the good range of learning rates
            bs_vs_perf.npz  (bs=batch_size)
                Where a single batch is tried multiple times, we take the
                mean over those times to ensure bss contains only unique
                values and hence can be treated like lrs

                bss=np.ndarray[number of batches]
                perfs=np.ndarray[trials, number of batches]
                smoothed_perfs=np.ndarray[number of batches]
                lse_smoothed_perfs=np.ndarray[number of batches]
                perf_derivs=np.ndarray[trials, number_of_batches]
                smoothed_perf_derivs=np.ndarray[trials, number of batches]
                mean_smoothed_perf_derivs=np.ndarray[number of batches]
                lse_smoothed_perf_then_derivs=np.ndarray[number of batches]
                bs_range=np.ndarray[2]
                    min, max for the good range of batch sizes
            lr_vs_perf2.npz
                only stored if settings.rescan_lr_after_bs. looks exactly
                like lr_vs_perf.npz, except these runs are performed with
                the newly selected batch size
            bs_sampled.npz
                only stored if settings.batch_pts > 0

                bss=np.ndarray[num bs attempted]
                final=np.ndarray[num bs attempted, trials]
                    final performance for batch size i for each trial
                lse_final=np.ndarray[num bs attempted]
                    final logsumexp performance for each batch size, argmax
                    is the selected batch size. If you want this to nicely
                    be below the maximum, subtract log(trials) and note
                    this does not effect the argmax

                raw_i=np.ndarray[trials, number of batches]
                    only if store_up_to.hparam_selection_specific_imgs,
                    same for the *_raw_i

                    i is a sampled batch size and raw_i[t, j] is the
                    performance of the model after iteration j for
                    batch size i on trial t.
                smoothed_raw_i=np.ndarray[trials, number of batches]
                lse_smoothed_raw_i=np.ndarray[number of batches]


    :param model_loader: describes which module and corresponding attribute can
        be passed what arguments and keyword arguments to produce the
        nn.Module with a random initialization which can be trained

        .. code::python

            model_loader = ('torch.nn', 'Linear', tuple(20, 10),
                            {'bias': True})

    :param dataset_loader: describes which module and corresponding attribute
        can be passed what arguments and keyword arguments to produce the
        training dataset and validation dataset.
    :param loss_loader: describes which module and corresponding attribute can
        be passed what arguments and keyword arguments to produce the nn.Module
        that converts (y_pred, y) to a scalar which should be minimized
    :param folder: where to save the output to
    :param cores: how many cores to use; 1 for just the main process
    :param settings: the settings to use to tune the learning rate and batch
        size
    :param store_up_to: the information stored should be at least what is
        required to produce this analysis
    """
    if logger is None:
        logger = logging.getLogger(__name__)
    os.makedirs(folder)

    train_set, _ = ignite_simple.utils.invoke(dataset_loader)

    logger.info('Performing initial learning rate sweep...')
    init_batch_size = 64
    init_cycle_time = int(np.clip(150000 // len(train_set), 2, 5) * 2)

    lr_min, lr_max, lr_initial_window_size, lr_initial_trials = (
        _select_lr_from(
            model_loader, dataset_loader, loss_loader, accuracy_style,
            os.path.join(folder, 'lr_vs_perf.npz'), cores, settings,
            store_up_to, logger, init_cycle_time, init_batch_size,
            settings.lr_start, settings.lr_end
        )
    )
    initial_lr_sweep_result_min, initial_lr_sweep_result_max = lr_min, lr_max
    initial_lr_num_to_val = min(NUM_TO_VAL_MAX, len(train_set))

    logger.info('Performing initial batch size sweep...')

    # The trick is the increasing the batch size requires a corresponding
    # increase in learning rate. We don't want to include the lr range
    # except insofar as taking that into account as otherwise these
    # results would be even muddier than they already are

    lr_avg_over_batch = ((lr_min + lr_max) / 2) / init_batch_size

    bs_sweep_lr_min = lr_avg_over_batch * settings.batch_start
    bs_sweep_lr_max = lr_avg_over_batch * settings.batch_end

    result = _run_and_collate(
        _batch_vs_perf, {
            'model_loader': model_loader,
            'dataset_loader': dataset_loader,
            'loss_loader': loss_loader,
            'accuracy_style': accuracy_style,
            'batch_start': settings.batch_start,
            'batch_end': settings.batch_end,
            'lr_start': bs_sweep_lr_min,
            'lr_end': bs_sweep_lr_max,
            'cycle_time_epochs': init_cycle_time
        }, cores, settings.batch_rn_min_inits
    )

    logger.debug('Organizing and interpreting batch size sweep...')

    bss = result['bss'][0]
    bs_perfs = result['perfs']
    bs_sweep_trials = int(bs_perfs.shape[0])

    window_size = smooth_window_size(bs_perfs.shape[1])

    smoothed_bs_perf = scipy.signal.savgol_filter(
        bs_perfs, window_size, 1
    )
    old_settings = np.seterr(under='ignore')
    lse_smoothed_bs_perf = scipy.special.logsumexp(
        smoothed_bs_perf, axis=0
    )
    np.seterr(**old_settings)
    lse_smoothed_bs_perf_then_derivs = np.gradient(
        lse_smoothed_bs_perf, axis=0)

    bs_perf_derivs = np.gradient(bs_perfs, axis=-1)
    smoothed_bs_perf_derivs = scipy.signal.savgol_filter(
        bs_perfs, window_size, 1, deriv=1)

    mean_smoothed_bs_perf_derivs = smoothed_bs_perf_derivs.mean(0)

    bs_min, bs_max = find_with_derivs(bss, lse_smoothed_bs_perf_then_derivs)
    bs_min, bs_max = int(bs_min), int(bs_max)

    logger.info('Batch size range: [%s, %s) (found from %s trials)',
                bs_min, bs_max, bs_perfs.shape[0])

    np.savez_compressed(
        os.path.join(folder, 'bs_vs_perf.npz'),
        bss=bss, perfs=bs_perfs,
        perf_derivs=bs_perf_derivs,
        smoothed_perfs=smoothed_bs_perf,
        smoothed_perf_derivs=smoothed_bs_perf_derivs,
        mean_smoothed_perf_derivs=mean_smoothed_bs_perf_derivs,
        lse_smoothed_perf_then_derivs=lse_smoothed_bs_perf_then_derivs,
        bs_range=np.array([bs_min, bs_max]))

    if settings.batch_pts > 1:
        batch_size, batch_pts_checked, num_batch_loops = _select_batch_size_from(
            model_loader, dataset_loader, loss_loader, accuracy_style, folder,
            cores, settings, store_up_to, logger, init_cycle_time, bss,
            lse_smoothed_bs_perf_then_derivs, bs_min, bs_max,
            lr_min / init_batch_size, lr_max / init_batch_size)
    else:
        batch_size = (bs_min + bs_max) // 2
        batch_pts_checked = []
        num_batch_loops = -1
        logger.info('Choosing mean batch size: %s', batch_size)

    if settings.rescan_lr_after_bs and batch_size != init_batch_size:
        logger.info('Finding learning rate range on new batch size...')
        second_min_lr = (settings.lr_start / init_batch_size) * batch_size
        second_max_lr = (settings.lr_end / init_batch_size) * batch_size
        lr_min, lr_max, second_lr_window_size, second_lr_num_trials = _select_lr_from(
            model_loader, dataset_loader, loss_loader, accuracy_style,
            os.path.join(folder, 'lr_vs_perf2.npz'), cores, settings,
            store_up_to, logger, init_cycle_time, init_batch_size,
            second_min_lr, second_max_lr
        )
    else:
        second_min_lr = float('nan')
        second_max_lr = float('nan')
        second_lr_window_size = float('nan')
        second_lr_num_trials = float('nan')
        lr_min = (lr_min / init_batch_size) * batch_size
        lr_max = (lr_max / init_batch_size) * batch_size

    with open(os.path.join(folder, 'final.json'), 'w') as outfile:
        json.dump({'lr_start': lr_min, 'lr_end': lr_max,
                   'batch_size': batch_size,
                   'cycle_size_epochs': init_cycle_time,
                   'epochs': init_cycle_time * 4}, outfile)

    with open(os.path.join(folder, 'misc.json'), 'w') as outfile:
        json.dump(
            {
                'initial_batch_size': init_batch_size,
                'initial_cycle_time': init_cycle_time,
                'initial_min_lr': settings.lr_start,
                'initial_max_lr': settings.lr_end,
                'initial_lr_num_to_val': initial_lr_num_to_val,
                'initial_lr_num_trials': lr_initial_trials,
                'initial_lr_window_size': lr_initial_window_size,
                'initial_lr_sweep_result_min': initial_lr_sweep_result_min,
                'initial_lr_sweep_result_max': initial_lr_sweep_result_max,
                'initial_avg_lr': (initial_lr_sweep_result_min + initial_lr_sweep_result_max) / 2,
                'initial_min_batch': settings.batch_start,
                'initial_max_batch': settings.batch_end,
                'initial_batch_num_to_val': initial_lr_num_to_val,
                'initial_batch_num_trials': bs_sweep_trials,
                'batch_sweep_result_min': bs_min,
                'batch_sweep_result_max': bs_max,
                'batch_sweep_result': batch_size,
                'batch_sweep_num_pts': len(batch_pts_checked),
                'batch_sweep_pts_list': list(int(i) for i in batch_pts_checked),
                'batch_sweep_trials_each': num_batch_loops,
                'second_min_lr': second_min_lr,
                'second_max_lr': second_max_lr,
                'second_lr_num_trials': second_lr_num_trials,
                'second_lr_window_size': second_lr_window_size,
                'lr_sweep_result_min': lr_min,
                'lr_sweep_result_max': lr_max,
            },
            outfile
        )

    logger.debug('Tuning completed successfully')