Source code for skmultilearn.cluster.networkx

from __future__ import absolute_import

import community
import networkx as nx
from networkx.algorithms.community import asyn_lpa_communities
import numpy as np

from .base import LabelGraphClustererBase
from .helpers import _membership_to_list_of_communities


[docs]class NetworkXLabelGraphClusterer(LabelGraphClustererBase):
    """Cluster label space with NetworkX community detection

    This clusterer constructs a NetworkX representation of the Label Graph generated by graph builder and detects
    communities in it using methods from the NetworkX library. Detected communities are converted to
    a label space clustering.

    Parameters
    ----------
    graph_builder: a GraphBuilderBase inherited transformer
        the graph builder to provide the adjacency matrix and weight map for the underlying graph
    method: string
        the community detection method to use, this clusterer supports the following community detection methods:

        +----------------------+--------------------------------------------------------------------------------+
        | Method name string   |                             Description                                        |
        +----------------------+--------------------------------------------------------------------------------+
        | louvain_             | Detecting communities with largest modularity using incremental greedy search  |
        +----------------------+--------------------------------------------------------------------------------+
        | label_propagation_   | Detecting communities from multiple async label propagation on the graph       |
        +----------------------+--------------------------------------------------------------------------------+

        .. _louvain: https://python-louvain.readthedocs.io/en/latest/
        .. _label_propagation: https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.label_propagation.asyn_lpa_communities.html


    Attributes
    ----------
    graph_ : networkx.Graph
        the networkx Graph object containing the graph representation of graph builder's adjacency matrix and weights
    weights_ : { 'weight' : list of values in edge order of graph edges }
        edge weights stored in a format recognizable by the networkx module

    References
    ----------
    If you use this clusterer please cite the igraph paper and the clustering paper:

    .. code :: latex

        @unknown{networkx,
            author = {Hagberg, Aric and Swart, Pieter and S Chult, Daniel},
            year = {2008},
            month = {01},
            title = {Exploring Network Structure, Dynamics, and Function Using NetworkX},
            booktitle = {Proceedings of the 7th Python in Science Conference}
        }

        @article{blondel2008fast,
          title={Fast unfolding of communities in large networks},
          author={Blondel, Vincent D and Guillaume, Jean-Loup and Lambiotte, Renaud and Lefebvre, Etienne},
          journal={Journal of statistical mechanics: theory and experiment},
          volume={2008},
          number={10},
          pages={P10008},
          year={2008},
          publisher={IOP Publishing}
        }


    Examples
    --------

    An example code for using this clusterer with a classifier looks like this:

    .. code-block:: python

        from sklearn.ensemble import RandomForestClassifier
        from skmultilearn.problem_transform import LabelPowerset
        from skmultilearn.cluster import NetworkXLabelGraphClusterer, LabelCooccurrenceGraphBuilder
        from skmultilearn.ensemble import LabelSpacePartitioningClassifier

        # construct base forest classifier
        base_classifier = RandomForestClassifier(n_estimators=1000)

        # construct a graph builder that will include
        # label relations weighted by how many times they
        # co-occurred in the data, without self-edges
        graph_builder = LabelCooccurrenceGraphBuilder(
            weighted = True,
            include_self_edges = False
        )

        # setup problem transformation approach with sparse matrices for random forest
        problem_transform_classifier = LabelPowerset(classifier=base_classifier,
            require_dense=[False, False])

        # setup the clusterer to use, we selected the modularity-based approach
        clusterer = NetworkXLabelGraphClusterer(graph_builder=graph_builder, method='louvain')

        # setup the ensemble metaclassifier
        classifier = LabelSpacePartitioningClassifier(problem_transform_classifier, clusterer)

        # train
        classifier.fit(X_train, y_train)

        # predict
        predictions = classifier.predict(X_test)

    For more use cases see `the label relations exploration guide <../labelrelations.ipynb>`_.

    """

    def __init__(self, graph_builder, method):
        """Initializes the clusterer

        Attributes
        ----------
        graph_builder: a GraphBuilderBase inherited transformer
                Class used to provide an underlying graph for NetworkX
        """
        super(NetworkXLabelGraphClusterer, self).__init__(graph_builder)
        self.method = method

[docs]    def fit_predict(self, X, y):
        """Performs clustering on y and returns list of label lists

        Builds a label graph using the provided graph builder's `transform` method
        on `y` and then detects communities using the selected `method`.

        Sets :code:`self.weights_` and :code:`self.graph_`.

        Parameters
        ----------
        X : None
            currently unused, left for scikit compatibility
        y : scipy.sparse
            label space of shape :code:`(n_samples, n_labels)`

        Returns
        -------
        arrray of arrays of label indexes (numpy.ndarray)
            label space division, each sublist represents labels that are in that community
        """
        edge_map = self.graph_builder.transform(y)

        if self.graph_builder.is_weighted:
            self.weights_ = dict(weight=list(edge_map.values()))
        else:
            self.weights_ = dict(weight=None)

        self.graph_ = nx.Graph()
        for n in range(y.shape[1]):
            self.graph_.add_node(n)

        for e, w in edge_map.items():
            self.graph_.add_edge(e[0], e[1], weight=w)

        if self.method == 'louvain':
            partition_dict = community.best_partition(self.graph_)
            memberships = [partition_dict[i] for i in range(y.shape[1])]

            return np.array(
                _membership_to_list_of_communities(
                    memberships,
                    1 + max(memberships)
                )
            )
        else:
            return np.array([list(i) for i in asyn_lpa_communities(self.graph_, 'weight')])