Source code for mvtk.sobol

import numpy
import public


def choose(x, N, nprng=None):
    if nprng is None:
        nprng = numpy.random.RandomState(0)
    return x[nprng.choice(numpy.arange(len(x), dtype="int"), N)]


[docs]@public.add def sobol(model, data, N=None, nprng=None): """Total and first order Sobol sensitivity indices. https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis. Args: model (function): Maps data to scores data (ndarray): Data matrix. Each row is a sample vector. N (int): sample size for monte carlo estimate of sobol indices. Should be less than or equal to the number of rows of data. If None, entire dataset is used. nprng (RandomState): Optional numpy RandomState. returns: Total and first order Sobol sensitivity indices. Each index is expressed as an array of length equal to the number of features in the supplied data matrix. """ if nprng is None: nprng = numpy.random.RandomState(0) if N is None: A = data.copy() B = data.copy() nprng.shuffle(A) nprng.shuffle(B) N = len(data) elif N > len(data): raise ValueError("Sample size must be less than or equal to size of dataset") else: A, B = (choose(data, N, nprng) for _ in range(2)) d = data.shape[1] total = [] first_order = [] for i in range(d): C = A[:, i].copy() A[:, i] = B[:, i] diff = model(A) A[:, i] = C diff -= model(A) first_order.append(model(B).dot(diff) / N) total.append(diff.dot(diff) / (2 * N)) variance_y = model(numpy.vstack((A, B))).std() ** 2 total = numpy.asarray(total) / variance_y first_order = numpy.asarray(first_order) / variance_y return total, first_order