Source code for skmultilearn.ensemble.rakelo

from .voting import MajorityVotingClassifier
from ..cluster.random import RandomLabelSpaceClusterer
from ..problem_transform import LabelPowerset
from ..base import MLClassifierBase


[docs]class RakelO(MLClassifierBase):
    """Overlapping RAndom k-labELsets multi-label classifier

    Divides the label space in to m subsets of size k, trains a Label Powerset
    classifier for each subset and assign a label to an instance
    if more than half of all classifiers (majority) from clusters that contain the label
    assigned the label to the instance.

    Parameters
    ----------
    base_classifier: :class:`~sklearn.base.BaseEstimator`
        scikit-learn compatible base classifier, will be set under `self.classifier.classifier`.
    base_classifier_require_dense : [bool, bool]
        whether the base classifier requires [input, output] matrices
        in dense representation. Will be automatically
        set under `self.classifier.require_dense`
    labelset_size : int
        the desired size of each of the partitions, parameter k according to paper.
        According to paper, the best parameter is 3, so it's set as default
        Will be automatically set under `self.labelset_size`
    model_count : int
        the desired number of classifiers, parameter m according to paper.
        According to paper, the best value for this parameter is 2M (being M the number of labels)
        Will be automatically set under :code:`self.model_count_`.


    Attributes
    ----------
    classifier : :class:`~skmultilearn.ensemble.MajorityVotingClassifier`
        the voting classifier initialized with :class:`~skmultilearn.problem_transform.LabelPowerset` multi-label
        classifier with `base_classifier` and :class:`~skmultilearn.cluster.random.RandomLabelSpaceClusterer`


    References
    ----------

    If you use this class please cite the paper introducing the method:

    .. code :: latex

        @ARTICLE{5567103,
            author={G. Tsoumakas and I. Katakis and I. Vlahavas},
            journal={IEEE Transactions on Knowledge and Data Engineering},
            title={Random k-Labelsets for Multilabel Classification},
            year={2011},
            volume={23},
            number={7},
            pages={1079-1089},
            doi={10.1109/TKDE.2010.164},
            ISSN={1041-4347},
            month={July},
        }

    Examples
    --------

    Here's a simple example of how to use this class with a base classifier from scikit-learn to teach 6 classifiers
    each trained on a quarter of labels, which is sure to overlap:

    .. code :: python

        from sklearn.naive_bayes import GaussianNB
        from skmultilearn.ensemble import RakelO

        classifier = RakelO(
            base_classifier=GaussianNB(),
            base_classifier_require_dense=[True, True],
            labelset_size=y_train.shape[1] // 4,
            model_count_=6
        )

        classifier.fit(X_train, y_train)
        prediction = classifier.predict(X_train, y_train)

    """

    def __init__(self, base_classifier=None, model_count=None, labelset_size=3, base_classifier_require_dense=None):
        super(RakelO, self).__init__()

        self.model_count = model_count
        self.labelset_size = labelset_size
        self.base_classifier = base_classifier
        self.base_classifier_require_dense = base_classifier_require_dense
        self.copyable_attrs = ['model_count', 'labelset_size',
                               'base_classifier_require_dense',
                               'base_classifier']

[docs]    def fit(self, X, y):
        """Fits classifier to training data

        Parameters
        ----------
        X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features)
            input feature matrix
        y : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels)
            binary indicator matrix with label assignments

        Returns
        -------
        self
            fitted instance of self
        """
        self.classifier = MajorityVotingClassifier(
            classifier=LabelPowerset(
                classifier=self.base_classifier,
                require_dense=self.base_classifier_require_dense
            ),
            clusterer=RandomLabelSpaceClusterer(
                cluster_size=self.labelset_size,
                cluster_count=self.model_count,
                allow_overlap=True
            ),
            require_dense=[False, False]
        )
        return self.classifier.fit(X, y)

[docs]    def predict(self, X):
        """Predict labels for X

        Parameters
        ----------
        X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features)
            input feature matrix

        Returns
        -------
        :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels)
            binary indicator matrix with label assignments
        """

        return self.classifier.predict(X)

[docs]    def predict_proba(self, X):
        """Predict probabilities of label assignments for X

        Parameters
        ----------
        X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features)
            input feature matrix

        Returns
        -------
        :mod:`scipy.sparse` matrix of `float in [0.0, 1.0]`, shape=(n_samples, n_labels)
            matrix with label assignment probabilities
        """
        return self.classifier.predict_proba(X)