Source code for ignite_simple.range_finder

"""This module is responsible for finding a good range of values within which
a measure is increasing most rapidly. This works best when the y-values have
already been smoothed."""

import typing
import numpy as np
import scipy.signal

[docs]def smooth_window_size(npts: int) -> int: """Returns the a heuristic for the window size for smoothing an array with the given number of points. Specifically, this is 1/10 the number of points or 101, whichever is smaller. :param int npts: the number of data points you have :returns: suggested window size for smoothing the data """ result = int(npts) // 10 if result >= 100: return 101 if result < 3: return 3 if result % 2 == 0: result += 1 return result
[docs]def autosmooth(arr: np.ndarray, axis: int = -1): """Causes scipy.signal.savgol_filter on the given data for the given dimension with the default settings. :param arr: the array to smooth :param dim: the smooth dimension """ window_size = smooth_window_size(arr.shape[axis]) return scipy.signal.savgol_filter(arr, window_size, 1, axis=axis)
[docs]def nonzero_intervals(vec: np.ndarray) -> np.ndarray: """Find which intervals in the given vector are non-zero. Adapted from https://stackoverflow.com/a/27642744 :param np.ndarray vec: the vector to scan for non-zero intervals :returns: a list [x1, x2, ..., xn] such that [xi, xi+1) is a nonzero interval in vec """ if not vec.shape: return [] edges, = np.nonzero(np.diff((vec == 0).astype('int32'))) edge_vec = [edges + 1] if vec[0] != 0: edge_vec.insert(0, [0]) if vec[-1] != 0: edge_vec.append([len(vec)]) return np.concatenate(edge_vec)
[docs]def trim_range_derivs(xs: np.ndarray, derivs: np.ndarray, x_st_ind: int, x_en_ind: int) -> typing.Tuple[int, int]: """Given that we have found a good interval in the xs for which the derivs is positive, it may be possible that the bulk of the integral can be maintained while reducing the width of the integral. This function returns a new interval which is a subset of the given interval for which the bulk of the integral is maintained. :param np.ndarray xs: with shape `(points,)`, the x-values corresponding to the derivatives :param np.ndarray derivs: with shape `(points,)`, the relevant derivatives :param int x_st_ind: the index in xs the good interval starts at :param int x_en_ind: the index in xs the good interval ends at :returns: `(x_st_ind, x_end_ind)` which is a subset of the passed interval """ xs_in_range = xs[x_st_ind:x_en_ind] in_range = derivs[x_st_ind:x_en_ind] max_in_range = in_range.max() intvl_width = xs_in_range[-1] - xs_in_range[0] intvl_intgrl = np.trapz(in_range, xs_in_range) for new_floor_perc in np.linspace(0.5, 0, num=50, endpoint=False): new_floor = max_in_range * new_floor_perc new_valid = in_range > new_floor new_valid_rev = new_valid[::-1] first_true = np.argmax(new_valid) last_true = new_valid.shape[0] - np.argmax(new_valid_rev) # now first_true:last_true is the correct range new_intvl_xs = xs_in_range[first_true:last_true] new_intvl_derivs = in_range[first_true:last_true] new_intvl_width = new_intvl_xs[-1] - new_intvl_xs[0] new_intvl_perc = new_intvl_width / intvl_width if new_intvl_perc > (1 - new_floor_perc * 1.2): # e.g. we shaved off 10% of max and only lost 5% of width, # not meaningfully helpful for reducing interval size continue new_intvl_intgrl = np.trapz(new_intvl_derivs, new_intvl_xs) new_intvl_intgrl_perc = new_intvl_intgrl / intvl_intgrl if new_intvl_intgrl_perc < (1 - new_floor_perc * 1.2): # e.g. we shaved off 10% of max and lost 20% of integral, # not meaningfully helpful for reducing interval size continue if new_intvl_intgrl_perc < (1 - new_intvl_perc * 1.2): # e.g. we shaved off 15% of width and lost 20% of integral, # not meaningfully helpful for reducing interval size continue # now e.g. shaved off 10% of max, lost 15% width but only # 5% of the integral return (x_st_ind + first_true, x_st_ind + last_true) return (x_st_ind, x_en_ind)
[docs]def find_with_derivs(xs: np.ndarray, derivs: np.ndarray) -> typing.Tuple[float, float]: """Finds the range in derivs wherein the derivative is always positive. From these intervals, this returns specifically the one with the greatest integral. :param np.ndarray xs: with shape `(points,)`, the x-values corresponding to the derivatives :param np.ndarray derivs: with shape `(points,)`, the relevant derivatives :returns: the `(min, max)` for the best interval in xs that has positive derivative in ys. Where multiple such intervals exist, this is the one with the greatest integral """ derivs = derivs.copy() derivs[derivs < 0] = 0 candidates = nonzero_intervals(derivs) if not candidates.shape or candidates.shape[0] < 2: return xs[0], xs[-1] best_change = 0 best_candidate = -1 for i in range(candidates.shape[0] - 1): st = candidates[i] en = candidates[i + 1] change = np.trapz(derivs[st:en], xs[st:en]) if change > best_change: best_candidate = i best_change = change x_st_ind = candidates[best_candidate] x_en_ind = candidates[best_candidate + 1] x_st_ind, x_en_ind = trim_range_derivs(xs, derivs, x_st_ind, x_en_ind) return (xs[x_st_ind], xs[x_en_ind - 1])