Source code for skmultilearn.ensemble.voting

import numpy as np
from builtins import range
from builtins import zip
from scipy import sparse

from .partition import LabelSpacePartitioningClassifier


[docs]class MajorityVotingClassifier(LabelSpacePartitioningClassifier):
    """Majority Voting ensemble classifier

    Divides the label space using provided clusterer class, trains a provided base classifier
    type classifier for each subset and assign a label to an instance
    if more than half of all classifiers (majority) from clusters that contain the label
    assigned the label to the instance.

    Parameters
    ----------
    classifier : :class:`~sklearn.base.BaseEstimator`
        the base classifier that will be used in a class, will be
        automatically put under :code:`self.classifier`.
    clusterer : :class:`~skmultilearn.cluster.LabelSpaceClustererBase`
        object that partitions the output space, will be
        automatically put under :code:`self.clusterer`.
    require_dense : [bool, bool]
        whether the base classifier requires [input, output] matrices
        in dense representation, will be automatically
        put under :code:`self.require_dense`.


    Attributes
    ----------
    model_count_ : int
        number of trained models, in this classifier equal to the number of partitions
    partition_ : List[List[int]], shape=(`model_count_`,)
        list of lists of label indexes, used to index the output space matrix, set in :meth:`_generate_partition`
        via :meth:`fit`
    classifiers : List[:class:`~sklearn.base.BaseEstimator`], shape=(`model_count_`,)
        list of classifiers trained per partition, set in :meth:`fit`


    Examples
    --------
    Here's an example of building an overlapping ensemble of chains

    .. code :: python

        from skmultilearn.ensemble import MajorityVotingClassifier
        from skmultilearn.cluster import FixedLabelSpaceClusterer
        from skmultilearn.problem_transform import ClassifierChain
        from sklearn.naive_bayes import GaussianNB


        classifier = MajorityVotingClassifier(
            clusterer = FixedLabelSpaceClusterer(clusters = [[1,2,3], [0, 2, 5], [4, 5]]),
            classifier = ClassifierChain(classifier=GaussianNB())
        )
        classifier.fit(X_train,y_train)
        predictions = classifier.predict(X_test)

    More advanced examples can be found in `the label relations exploration guide <../labelrelations.ipynb>`_

    """

    def __init__(self, classifier=None, clusterer=None, require_dense=None):
        super(MajorityVotingClassifier, self).__init__(
            classifier=classifier, clusterer=clusterer, require_dense=require_dense
        )

[docs]    def predict(self, X):
        """Predict label assignments for X

        Parameters
        ----------
        X : numpy.ndarray or scipy.sparse.csc_matrix
            input features of shape :code:`(n_samples, n_features)`

        Returns
        -------
        scipy.sparse of float
            binary indicator matrix with label assignments with shape
            :code:`(n_samples, n_labels)`
        """
        predictions = [
            self._ensure_input_format(self._ensure_input_format(
                c.predict(X)), sparse_format='csc', enforce_sparse=True)
            for c in self.classifiers_
        ]

        voters = np.zeros(self._label_count, dtype='int')
        votes = sparse.lil_matrix(
            (predictions[0].shape[0], self._label_count), dtype='int')
        for model in range(self.model_count_):
            for label in range(len(self.partition_[model])):
                votes[:, self.partition_[model][label]] = votes[
                                                         :, self.partition_[model][label]] + predictions[model][:, label]
                voters[self.partition_[model][label]] += 1

        nonzeros = votes.nonzero()
        for row, column in zip(nonzeros[0], nonzeros[1]):
            votes[row, column] = np.round(
                votes[row, column] / float(voters[column]))

        return self._ensure_output_format(votes, enforce_sparse=False)

    def predict_proba(self, X):
        raise NotImplemented("The voting scheme does not define a method for calculating probabilities")