import numpy
import public
def choose(x, N, nprng=None):
if nprng is None:
nprng = numpy.random.RandomState(0)
return x[nprng.choice(numpy.arange(len(x), dtype="int"), N)]
[docs]@public.add
def sobol(model, data, N=None, nprng=None):
"""Total and first order Sobol sensitivity indices.
https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis.
Args:
model (function): Maps data to scores
data (ndarray): Data matrix. Each row is a sample vector.
N (int): sample size for monte carlo estimate of sobol
indices. Should be less than or equal to the number of rows
of data. If None, entire dataset is used.
nprng (RandomState): Optional numpy RandomState.
returns:
Total and first order Sobol sensitivity indices. Each index
is expressed as an array of length equal to the number of
features in the supplied data matrix.
"""
if nprng is None:
nprng = numpy.random.RandomState(0)
if N is None:
A = data.copy()
B = data.copy()
nprng.shuffle(A)
nprng.shuffle(B)
N = len(data)
elif N > len(data):
raise ValueError("Sample size must be less than or equal to size of dataset")
else:
A, B = (choose(data, N, nprng) for _ in range(2))
d = data.shape[1]
total = []
first_order = []
for i in range(d):
C = A[:, i].copy()
A[:, i] = B[:, i]
diff = model(A)
A[:, i] = C
diff -= model(A)
first_order.append(model(B).dot(diff) / N)
total.append(diff.dot(diff) / (2 * N))
variance_y = model(numpy.vstack((A, B))).std() ** 2
total = numpy.asarray(total) / variance_y
first_order = numpy.asarray(first_order) / variance_y
return total, first_order