Source code for skmultilearn.ensemble.rakeld

import numpy as np

from .partition import LabelSpacePartitioningClassifier
from ..cluster.random import RandomLabelSpaceClusterer
from ..problem_transform import LabelPowerset
from ..base import MLClassifierBase


[docs]class RakelD(MLClassifierBase): """Distinct RAndom k-labELsets multi-label classifier. Divides the label space in to equal partitions of size k, trains a Label Powerset classifier per partition and predicts by summing the result of all trained classifiers. Parameters ---------- base_classifier : sklearn.base the base classifier that will be used in a class, will be automatically put under :code:`self.classifier` for future access. base_classifier_require_dense : [bool, bool] whether the base classifier requires [input, output] matrices in dense representation, will be automatically put under :code:`self.require_dense` labelset_size : int the desired size of each of the partitions, parameter k according to paper Default is 3, according to paper it has the best results Attributes ---------- _label_count : int the number of labels the classifier is fit to, set by :meth:`fit` model_count_ : int the number of sub classifiers trained, set by :meth:`fit` classifier_: :class:`skmultilearn.ensemble.LabelSpacePartitioningClassifier` the underneath classifier that perform the label space partitioning using a random clusterer :class:`skmultilearn.ensemble.RandomLabelSpaceClusterer` References ---------- If you use this class please cite the paper introducing the method: .. code :: latex @ARTICLE{5567103, author={G. Tsoumakas and I. Katakis and I. Vlahavas}, journal={IEEE Transactions on Knowledge and Data Engineering}, title={Random k-Labelsets for Multilabel Classification}, year={2011}, volume={23}, number={7}, pages={1079-1089}, doi={10.1109/TKDE.2010.164}, ISSN={1041-4347}, month={July}, } Examples -------- Here's a simple example of how to use this class with a base classifier from scikit-learn to teach non-overlapping classifiers each trained on at most four labels: .. code :: python from sklearn.naive_bayes import GaussianNB from skmultilearn.ensemble import RakelD classifier = RakelD( base_classifier=GaussianNB(), base_classifier_require_dense=[True, True], labelset_size=4 ) classifier.fit(X_train, y_train) prediction = classifier.predict(X_train, y_train) """ def __init__(self, base_classifier=None, labelset_size=3, base_classifier_require_dense=None): super(RakelD, self).__init__() self.labelset_size = labelset_size self.base_classifier = base_classifier self.base_classifier_require_dense = base_classifier_require_dense self.copyable_attrs = ['base_classifier', 'base_classifier_require_dense', 'labelset_size']
[docs] def fit(self, X, y): """Fit classifier to multi-label data Parameters ---------- X : numpy.ndarray or scipy.sparse input features, can be a dense or sparse matrix of size :code:`(n_samples, n_features)` y : numpy.ndaarray or scipy.sparse {0,1} binary indicator matrix with label assignments, shape :code:`(n_samples, n_labels)` Returns ------- fitted instance of self """ self._label_count = y.shape[1] self.model_count_ = int(np.ceil(self._label_count / self.labelset_size)) self.classifier_ = LabelSpacePartitioningClassifier( classifier=LabelPowerset( classifier=self.base_classifier, require_dense=self.base_classifier_require_dense ), clusterer=RandomLabelSpaceClusterer( cluster_size=self.labelset_size, cluster_count=self.model_count_, allow_overlap=False ), require_dense=[False, False] ) return self.classifier_.fit(X, y)
[docs] def predict(self, X): """Predict label assignments Parameters ---------- X : numpy.ndarray or scipy.sparse.csc_matrix input features of shape :code:`(n_samples, n_features)` Returns ------- scipy.sparse of int binary indicator matrix with label assignments with shape :code:`(n_samples, n_labels)` """ return self.classifier_.predict(X)
[docs] def predict_proba(self, X): """Predict label probabilities Parameters ---------- X : numpy.ndarray or scipy.sparse.csc_matrix input features of shape :code:`(n_samples, n_features)` Returns ------- scipy.sparse of float binary indicator matrix with probability of label assignment with shape :code:`(n_samples, n_labels)` """ return self.classifier_.predict_proba(X)