Source code for skmultilearn.ensemble.voting

import numpy as np
from builtins import range
from builtins import zip
from scipy import sparse

from .partition import LabelSpacePartitioningClassifier


[docs]class MajorityVotingClassifier(LabelSpacePartitioningClassifier): """Majority Voting ensemble classifier Divides the label space using provided clusterer class, trains a provided base classifier type classifier for each subset and assign a label to an instance if more than half of all classifiers (majority) from clusters that contain the label assigned the label to the instance. Parameters ---------- classifier : :class:`~sklearn.base.BaseEstimator` the base classifier that will be used in a class, will be automatically put under :code:`self.classifier`. clusterer : :class:`~skmultilearn.cluster.LabelSpaceClustererBase` object that partitions the output space, will be automatically put under :code:`self.clusterer`. require_dense : [bool, bool] whether the base classifier requires [input, output] matrices in dense representation, will be automatically put under :code:`self.require_dense`. Attributes ---------- model_count_ : int number of trained models, in this classifier equal to the number of partitions partition_ : List[List[int]], shape=(`model_count_`,) list of lists of label indexes, used to index the output space matrix, set in :meth:`_generate_partition` via :meth:`fit` classifiers : List[:class:`~sklearn.base.BaseEstimator`], shape=(`model_count_`,) list of classifiers trained per partition, set in :meth:`fit` Examples -------- Here's an example of building an overlapping ensemble of chains .. code :: python from skmultilearn.ensemble import MajorityVotingClassifier from skmultilearn.cluster import FixedLabelSpaceClusterer from skmultilearn.problem_transform import ClassifierChain from sklearn.naive_bayes import GaussianNB classifier = MajorityVotingClassifier( clusterer = FixedLabelSpaceClusterer(clusters = [[1,2,3], [0, 2, 5], [4, 5]]), classifier = ClassifierChain(classifier=GaussianNB()) ) classifier.fit(X_train,y_train) predictions = classifier.predict(X_test) More advanced examples can be found in `the label relations exploration guide <../labelrelations.ipynb>`_ """ def __init__(self, classifier=None, clusterer=None, require_dense=None): super(MajorityVotingClassifier, self).__init__( classifier=classifier, clusterer=clusterer, require_dense=require_dense )
[docs] def predict(self, X): """Predict label assignments for X Parameters ---------- X : numpy.ndarray or scipy.sparse.csc_matrix input features of shape :code:`(n_samples, n_features)` Returns ------- scipy.sparse of float binary indicator matrix with label assignments with shape :code:`(n_samples, n_labels)` """ predictions = [ self._ensure_input_format(self._ensure_input_format( c.predict(X)), sparse_format='csc', enforce_sparse=True) for c in self.classifiers_ ] voters = np.zeros(self._label_count, dtype='int') votes = sparse.lil_matrix( (predictions[0].shape[0], self._label_count), dtype='int') for model in range(self.model_count_): for label in range(len(self.partition_[model])): votes[:, self.partition_[model][label]] = votes[ :, self.partition_[model][label]] + predictions[model][:, label] voters[self.partition_[model][label]] += 1 nonzeros = votes.nonzero() for row, column in zip(nonzeros[0], nonzeros[1]): votes[row, column] = np.round( votes[row, column] / float(voters[column])) return self._ensure_output_format(votes, enforce_sparse=False)
def predict_proba(self, X): raise NotImplemented("The voting scheme does not define a method for calculating probabilities")