Source code for skmultilearn.cluster.matrix

from __future__ import absolute_import

import numpy as np

from .base import LabelSpaceClustererBase
from .helpers import _membership_to_list_of_communities


[docs]class MatrixLabelSpaceClusterer(LabelSpaceClustererBase): """Cluster the label space using a scikit-compatible matrix-based clusterer Parameters ---------- clusterer : sklearn.base.ClusterMixin a clonable instance of a scikit-compatible clusterer, will be automatically put under :code:`self.clusterer`. pass_input_space : bool (default is False) whether to take :code:`X` into consideration upon clustering, use only if you know that the clusterer can handle two parameters for clustering, will be automatically put under :code:`self.pass_input_space`. Example code for using this clusterer looks like this: .. code-block:: python from sklearn.ensemble import RandomForestClassifier from sklearn.cluster import KMeans from skmultilearn.problem_transform import LabelPowerset from skmultilearn.cluster import MatrixLabelSpaceClusterer from skmultilearn.ensemble import LabelSpacePartitioningClassifier # construct base forest classifier base_classifier = RandomForestClassifier(n_estimators=1030) # setup problem transformation approach with sparse matrices for random forest problem_transform_classifier = LabelPowerset(classifier=base_classifier, require_dense=[False, False]) # setup the clusterer clusterer = MatrixLabelSpaceClusterer(clusterer=KMeans(n_clusters=3)) # setup the ensemble metaclassifier classifier = LabelSpacePartitioningClassifier(problem_transform_classifier, clusterer) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) """ def __init__(self, clusterer=None, pass_input_space=False): super(MatrixLabelSpaceClusterer, self).__init__() self.clusterer = clusterer self.pass_input_space = pass_input_space
[docs] def fit_predict(self, X, y): """Clusters the output space The clusterer's :code:`fit_predict` method is executed on either X and y.T vectors (if :code:`self.pass_input_space` is true) or just y.T to detect clusters of labels. The transposition of label space is used to align with the format expected by scikit-learn classifiers, i.e. we cluster labels with label assignment vectors as samples. Returns ------- arrray of arrays of label indexes (numpy.ndarray) label space division, each sublist represents labels that are in that community """ if self.pass_input_space: result = self.clusterer.fit_predict(X, y.transpose()) else: result = self.clusterer.fit_predict(y.transpose()) return np.array(_membership_to_list_of_communities(result, 1 + max(result)))