Source code for mvtk.supervisor.divergence.utils

import itertools
import numpy
import scipy
import public

from mvtk.supervisor.utils import parallel


[docs]@public.add def get_drift_series(metric, baseline, test): return numpy.asarray(parallel(lambda x: metric(x, baseline), test))
[docs]@public.add def get_distance_matrix(metric, sample_distributions, show_progress=False): distance_matrix = numpy.zeros((len(sample_distributions),) * 2) for index, d in parallel( lambda x: (x[0], metric(x[1][0], x[1][1])), [ list(zip(*x)) for x in itertools.combinations(enumerate(sample_distributions), 2) ], show_progress=show_progress, ): distance_matrix[index] = d distance_matrix += distance_matrix.T return distance_matrix
[docs]@public.add def sparse_wrapper(v): class _SparseWrapper(type(v)): def __getitem__(self, i): ret = super().__getitem__(i) if isinstance(i, int): return ret.toarray()[0] return ret def __len__(self): return self.shape[0] return _SparseWrapper(v)
def to_array_like(v): if hasattr(v, "values"): return v.values if isinstance(v, scipy.sparse.spmatrix): return sparse_wrapper(v) return v
[docs]@public.add def arrayify(item): """Convert the value to at least dim 3. If is dataframe it converts it to a list of values. :param item: ndarray or a list of ndarray, or a dataframe, a series or a list of dataframes or series :return: a list of dataframes/series or array of dim 3 """ if hasattr(item, "shape"): ret = to_array_like(item) if len(ret.shape) == 2: return [ret] if len(ret.shape) == 1: return numpy.atleast_3d(ret) return list(map(to_array_like, item))