Source code for skmultilearn.base.base

import copy
import numpy as np
from ..utils import get_matrix_in_format, matrix_creation_function_for_format
from scipy.sparse import issparse, csr_matrix
from sklearn.base import BaseEstimator, ClassifierMixin


[docs]class MLClassifierBase(BaseEstimator, ClassifierMixin): """Base class providing API and common functions for all multi-label classifiers. Parameters ---------- classifier : scikit classifier type The base classifier that will be used in a class, will be automagically put under self.classifier for future access. require_dense : boolean Whether the base classifier requires input as dense arrays, False by default """ def __init__(self): super(MLClassifierBase, self).__init__() self.copyable_attrs = []
[docs] def generate_data_subset(self, y, subset, axis): """Subset rows or columns from matrix This function subsets the array of binary label vectors to include only certain labels. Parameters ---------- y : array-like of array-likes An array-like of binary label vectors. subset: array-like of integers array of integers, indices that will be subsetted from array-likes in y axis: integer 0 for 'rows', 1 for 'labels', control variable for whether to return rows or labels as indexed by subset Returns ------- multi-label binary label vector : array-like of array-likes of {0,1} array of binary label vectors including label data only for labels from parameter labels """ return_data = None if axis == 1: return_data = y.tocsc()[:, subset] elif axis == 0: return_data = y.tocsr()[subset, :] return return_data
[docs] def ensure_input_format(self, X, sparse_format='csr', enforce_sparse=False): """Ensure the desired input format This function ensures that input format follows the density/sparsity requirements of base classifier. Parameters ---------- X : array-like or sparse matrix, shape = [n_samples, n_features] An input feature matrix sparse_format: string Requested format of returned scipy.sparse matrix, if sparse is returned enforce_sparse : bool Ignore require_dense and enforce sparsity, useful internally Returns ------- transformed X : array-like or sparse matrix, shape = [n_samples, n_features] If require_dense was set to true for input features in the constructor, the returned value is an array-like of array-likes. If require_dense is set to false, a sparse matrix of format sparse_format is returned, if possible - without cloning. """ is_sparse = issparse(X) if is_sparse: if self.require_dense[0] and not enforce_sparse: return X.toarray() else: if sparse_format is None: return X else: return get_matrix_in_format(X, sparse_format) else: if self.require_dense[0] and not enforce_sparse: # TODO: perhaps a check_array? return X else: return matrix_creation_function_for_format(sparse_format)(X)
[docs] def ensure_output_format(self, matrix, sparse_format='csr', enforce_sparse=False): """Ensure the desired output format This function ensures that output format follows the density/sparsity requirements of base classifier. Parameters ---------- matrix : array-like with shape = [n_samples] or [n_samples, n_outputs]; or sparse matrix, shape = [n_samples, n_outputs] An input feature matrix sparse_format: string Requested format of returned scipy.sparse matrix, if sparse is returned enforce_sparse : bool Ignore require_dense and enforce sparsity, useful internally Returns ------- transformed matrix: array-like with shape = [n_samples] or [n_samples, n_outputs]; or sparse matrix, shape = [n_samples, n_outputs] If require_dense was set to True for input features in the constructor, the returned value is an array-like of array-likes. If require_dense is set to False, a sparse matrix of format sparse_format is returned, if possible - without cloning. """ is_sparse = issparse(matrix) if is_sparse: if self.require_dense[1] and not enforce_sparse: if matrix.shape[1] != 1: return matrix.toarray() elif matrix.shape[1] == 1: return np.ravel(matrix.toarray()) else: if sparse_format is None: return matrix else: return get_matrix_in_format(matrix, sparse_format) else: if self.require_dense[1] and not enforce_sparse: # ensuring 1d if len(matrix[0]) == 1: return np.ravel(matrix) else: return matrix else: return matrix_creation_function_for_format(sparse_format)(matrix)
[docs] def fit(self, X, y): """Abstract method to fit classifier with training data :param X: input features :type X: dense or sparse matrix (n_samples, n_features) :param y: binary indicator matrix with label assignments :type y: dense or sparse matrix of {0, 1} (n_samples, n_labels) Should return self - a trained instance of the classifier. :raises NotImplementedError: this is just an abstract method """ raise NotImplementedError("MLClassifierBase::fit()")
[docs] def predict(self, X): """Abstract method to predict labels :param X: input features :type X: dense or sparse matrix (n_samples, n_features) Should return sparse matrix of {0, 1} (n_samples, n_labels). :raises NotImplementedError: this is just an abstract method """ raise NotImplementedError("MLClassifierBase::predict()")
[docs] def get_params(self, deep=True): """Get parameters to subobjects Introspection of classifier for search models like cross validation and grid search. Parameters ---------- deep : boolean If true all params will be introspected also and appended to the output dict. Returns ------- out : dictionary Dictionary of all parameters and their values. If deep=True the dictionary also holds the parameters of the parameters. """ out = dict() for attr in self.copyable_attrs: out[attr] = getattr(self, attr) if hasattr(getattr(self, attr), 'get_params') and deep: deep_items = list(getattr(self, attr).get_params().items()) out.update((attr + '__' + k, val) for k, val in deep_items) return out
[docs] def set_params(self, **parameters): """Propagate parameters to subobjects Set parameters as returned by `get_params`. @see https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py#L243 """ if not parameters: return self valid_params = self.get_params(deep=True) parameters_current_level = [x for x in parameters if '__' not in x] for parameter in parameters_current_level: value = parameters[parameter] if parameter in valid_params: setattr(self, parameter, value) else: raise ValueError('Invalid parameter %s for estimator %s. ' 'Check the list of available parameters ' 'with `estimator.get_params().keys()`.' % (parameter, self)) parameters_below_current_level = [x for x in parameters if '__' in x] parameters_grouped_by_current_level = {object : {} for object in valid_params} for parameter in parameters_below_current_level: object_name, sub_param = parameter.split('__', 1) if object_name not in parameters_grouped_by_current_level: raise ValueError('Invalid parameter %s for estimator %s. ' 'Check the list of available parameters ' 'with `estimator.get_params().keys()`.' % (object_name, self)) value = parameters[parameter] parameters_grouped_by_current_level[object_name][sub_param] = value valid_params = self.get_params(deep=True) # parameters_grouped_by_current_level groups valid parameters for subojects for object_name, sub_params in parameters_grouped_by_current_level.items(): if len(sub_params) > 0: sub_object = valid_params[object_name] sub_object.set_params(**sub_params) return self