Source code for skmultilearn.problem_transform.br

import copy
import numpy as np

from scipy.sparse import hstack, issparse, lil_matrix

from ..base.problem_transformation import ProblemTransformationBase
from ..base.base import MLClassifierBase


[docs]class BinaryRelevance(ProblemTransformationBase): """Performs classification per label Transforms a multi-label classification problem with L labels into L single-label separate binary classification problems using the same base classifier provided in the constructor. The prediction output is the union of all per label classifiers Parameters ---------- classifier : :class:`~sklearn.base.BaseEstimator` scikit-learn compatible base classifier require_dense : [bool, bool], optional whether the base classifier requires dense representations for input features and classes/labels matrices in fit/predict. If value not provided, sparse representations are used if base classifier is an instance of :class:`~skmultilearn.base.MLClassifierBase` and dense otherwise. Attributes ---------- model_count_ : int number of trained models, in this classifier equal to `n_labels` partition_ : List[List[int]], shape=(`model_count_`,) list of lists of label indexes, used to index the output space matrix, set in :meth:`_generate_partition` via :meth:`fit` classifiers_ : List[:class:`~sklearn.base.BaseEstimator`] of shape `model_count` list of classifiers trained per partition, set in :meth:`fit` Notes ----- .. note :: This is one of the most basic approaches to multi-label classification, it ignores relationships between labels. Examples -------- An example use case for Binary Relevance classification with an :class:`sklearn.svm.SVC` base classifier which supports sparse input: .. code-block:: python from skmultilearn.problem_transform import BinaryRelevance from sklearn.svm import SVC # initialize Binary Relevance multi-label classifier # with an SVM classifier # SVM in scikit only supports the X matrix in sparse representation classifier = BinaryRelevance( classifier = SVC(), require_dense = [False, True] ) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) Another way to use this classifier is to select the best scenario from a set of single-label classifiers used with Binary Relevance, this can be done using cross validation grid search. In the example below, the model with highest accuracy results is selected from either a :class:`sklearn.naive_bayes.MultinomialNB` or :class:`sklearn.svm.SVC` base classifier, alongside with best parameters for that base classifier. .. code-block:: python from skmultilearn.problem_transform import BinaryRelevance from sklearn.model_selection import GridSearchCV from sklearn.naive_bayes import MultinomialNB from sklearn.svm import SVC parameters = [ { 'classifier': [MultinomialNB()], 'classifier__alpha': [0.7, 1.0], }, { 'classifier': [SVC()], 'classifier__kernel': ['rbf', 'linear'], }, ] clf = GridSearchCV(BinaryRelevance(), parameters, scoring='accuracy') clf.fit(x, y) print (clf.best_params_, clf.best_score_) # result: # # { # 'classifier': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, # decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear', # max_iter=-1, probability=False, random_state=None, shrinking=True, # tol=0.001, verbose=False), 'classifier__kernel': 'linear' # } 0.17 """ def __init__(self, classifier=None, require_dense=None): super(BinaryRelevance, self).__init__(classifier, require_dense) def _generate_partition(self, X, y): """Partitions the label space into singletons Sets `self.partition_` (list of single item lists) and `self.model_count_` (equal to number of labels). Parameters ---------- X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features) not used, only for API compatibility y : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix of `int`, shape=(n_samples, n_labels) binary indicator matrix with label assignments """ self.partition_ = list(range(y.shape[1])) self.model_count_ = y.shape[1]
[docs] def fit(self, X, y): """Fits classifier to training data Parameters ---------- X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features) input feature matrix y : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels) binary indicator matrix with label assignments Returns ------- self fitted instance of self Notes ----- .. note :: Input matrices are converted to sparse format internally if a numpy representation is passed """ X = self._ensure_input_format( X, sparse_format='csr', enforce_sparse=True) y = self._ensure_output_format( y, sparse_format='csc', enforce_sparse=True) self.classifiers_ = [] self._generate_partition(X, y) self._label_count = y.shape[1] for i in range(self.model_count_): classifier = copy.deepcopy(self.classifier) y_subset = self._generate_data_subset(y, self.partition_[i], axis=1) if issparse(y_subset) and y_subset.ndim > 1 and y_subset.shape[1] == 1: y_subset = np.ravel(y_subset.toarray()) classifier.fit(self._ensure_input_format( X), self._ensure_output_format(y_subset)) self.classifiers_.append(classifier) return self
[docs] def predict(self, X): """Predict labels for X Parameters ---------- X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features) input feature matrix Returns ------- :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels) binary indicator matrix with label assignments """ predictions = [self._ensure_multi_label_from_single_class( self.classifiers_[label].predict(self._ensure_input_format(X))) for label in range(self.model_count_)] return hstack(predictions)
[docs] def predict_proba(self, X): """Predict probabilities of label assignments for X Parameters ---------- X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features) input feature matrix Returns ------- :mod:`scipy.sparse` matrix of `float in [0.0, 1.0]`, shape=(n_samples, n_labels) matrix with label assignment probabilities """ result = lil_matrix((X.shape[0], self._label_count), dtype='float') for label_assignment, classifier in zip(self.partition_, self.classifiers_): if isinstance(self.classifier, MLClassifierBase): # the multilabel classifier should provide a (n_samples, n_labels) matrix # we just need to reorder it column wise result[:, label_assignment] = classifier.predict_proba(X) else: # a base classifier for binary relevance returns # n_samples x n_classes, where n_classes = [0, 1] - 1 is the probability of # the label being assigned result[:, label_assignment] = self._ensure_multi_label_from_single_class( classifier.predict_proba( self._ensure_input_format(X)) )[:, 1] # probability that label is assigned return result