Source code for mvtk.bias_variance.bias_variance_parallel

import ray
import numpy as np
import public

from sklearn.utils import resample

from . import bias_variance_mse, get_values, train_and_predict


def _prepare_X_and_y(X_train_values, y_train_values, prepare_X, prepare_y_train):
    return prepare_X(X_train_values), prepare_y_train(y_train_values)


[docs]@public.add def bias_variance_compute_parallel( estimator, X_train, y_train, X_test, y_test, prepare_X=lambda x: x, prepare_y_train=lambda x: x, iterations=200, random_state=None, decomp_fn=bias_variance_mse, fit_kwargs=None, predict_kwargs=None, ): r"""Compute the bias-variance decomposition in parallel Args: estimator (EstimatorWrapper): estimator wrapped with a class extending EstimatorWrapper X_train: features for training y_train: ground truth labels for training X_test: features for testing y_test: ground truth labels for testing prepare_X (function, optional): function to transform feature datasets before calling fit and predict methods prepare_y_train (function, optional): function to transform training ground truth labels before calling fit method iterations (int, optional): number of iterations for the training/testing random_state (int, optional): random state for bootstrap sampling decomp_fn (function, optional): bias-variance decomposition function fit_kwargs (dict, optional): kwargs to pass to the fit method predict_kwargs (dict, optional): kwargs to pass to the predict method Returns: (average loss, average bias, average variance, net variance)""" if predict_kwargs is None: predict_kwargs = {} if fit_kwargs is None: fit_kwargs = {} if isinstance(random_state, int): random_state = np.random.RandomState(seed=random_state) X_train_values = get_values(X_train) y_train_values = get_values(y_train) X_test_values = get_values(X_test) X_test_prepared = prepare_X(X_test_values) if random_state is None: result = [ bootstrap_train_and_predict_ray.remote( estimator, X_train_values, y_train_values, X_test_prepared, prepare_X, prepare_y_train, fit_kwargs, predict_kwargs, ) for _ in range(iterations) ] else: result = [ train_and_predict_ray.remote( estimator, *_prepare_X_and_y( *resample( X_train_values, y_train_values, random_state=random_state ), prepare_X, prepare_y_train ), X_test_prepared, fit_kwargs, predict_kwargs ) for _ in range(iterations) ] predictions = np.array(ray.get(result)) y_test_values = get_values(y_test) return decomp_fn(predictions, y_test_values)
@ray.remote def train_and_predict_ray( estimator, X_train_values, y_train_values, X_test_prepared, fit_kwargs=None, predict_kwargs=None, ): r"""Train an estimator and get predictions from it Args: estimator (EstimatorWrapper): estimator wrapped with a class extending EstimatorWrapper X_train_values: numpy array of features for training y_train_values: numpy array of ground truth labels for training X_test_prepared: features for testing which has been processed by prepare_X function fit_kwargs (dict, optional): kwargs to pass to the fit method predict_kwargs (dict, optional): kwargs to pass to the predict method Returns: predictions""" return train_and_predict( estimator, X_train_values, y_train_values, X_test_prepared, fit_kwargs=fit_kwargs, predict_kwargs=predict_kwargs, ) @ray.remote def bootstrap_train_and_predict_ray( estimator, X_train_values, y_train_values, X_test_prepared, prepare_X=lambda x: x, prepare_y_train=lambda x: x, fit_kwargs=None, predict_kwargs=None, ): r"""Train an estimator using a bootstrap sample of the training data and get predictions from it Args: estimator (EstimatorWrapper): estimator wrapped with a class extending EstimatorWrapper X_train_values: numpy array of features for training y_train_values: numpy array of ground truth labels for training X_test_prepared: features for testing which has been processed by prepare_X function prepare_X (function, optional): function to transform feature datasets before calling fit and predict methods prepare_y_train (function, optional): function to transform train ground truth labels before calling fit method fit_kwargs (dict, optional): kwargs to pass to the fit method predict_kwargs (dict, optional): kwargs to pass to the predict method Returns: predictions""" if predict_kwargs is None: predict_kwargs = {} if fit_kwargs is None: fit_kwargs = {} X_sample, y_sample = resample(X_train_values, y_train_values) return train_and_predict( estimator, X_sample, y_sample, X_test_prepared, prepare_X, prepare_y_train, fit_kwargs, predict_kwargs, )