Source code for skmultilearn.cluster.networkx
from __future__ import absolute_import
import community
import networkx as nx
from networkx.algorithms.community import asyn_lpa_communities
import numpy as np
from .base import LabelGraphClustererBase
from .helpers import _membership_to_list_of_communities
[docs]class NetworkXLabelGraphClusterer(LabelGraphClustererBase):
"""Cluster label space with NetworkX community detection
This clusterer constructs a NetworkX representation of the Label Graph generated by graph builder and detects
communities in it using methods from the NetworkX library. Detected communities are converted to
a label space clustering.
Parameters
----------
graph_builder: a GraphBuilderBase inherited transformer
the graph builder to provide the adjacency matrix and weight map for the underlying graph
method: string
the community detection method to use, this clusterer supports the following community detection methods:
+----------------------+--------------------------------------------------------------------------------+
| Method name string | Description |
+----------------------+--------------------------------------------------------------------------------+
| louvain_ | Detecting communities with largest modularity using incremental greedy search |
+----------------------+--------------------------------------------------------------------------------+
| label_propagation_ | Detecting communities from multiple async label propagation on the graph |
+----------------------+--------------------------------------------------------------------------------+
.. _louvain: https://python-louvain.readthedocs.io/en/latest/
.. _label_propagation: https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.label_propagation.asyn_lpa_communities.html
Attributes
----------
graph_ : networkx.Graph
the networkx Graph object containing the graph representation of graph builder's adjacency matrix and weights
weights_ : { 'weight' : list of values in edge order of graph edges }
edge weights stored in a format recognizable by the networkx module
References
----------
If you use this clusterer please cite the igraph paper and the clustering paper:
.. code :: latex
@unknown{networkx,
author = {Hagberg, Aric and Swart, Pieter and S Chult, Daniel},
year = {2008},
month = {01},
title = {Exploring Network Structure, Dynamics, and Function Using NetworkX},
booktitle = {Proceedings of the 7th Python in Science Conference}
}
@article{blondel2008fast,
title={Fast unfolding of communities in large networks},
author={Blondel, Vincent D and Guillaume, Jean-Loup and Lambiotte, Renaud and Lefebvre, Etienne},
journal={Journal of statistical mechanics: theory and experiment},
volume={2008},
number={10},
pages={P10008},
year={2008},
publisher={IOP Publishing}
}
Examples
--------
An example code for using this clusterer with a classifier looks like this:
.. code-block:: python
from sklearn.ensemble import RandomForestClassifier
from skmultilearn.problem_transform import LabelPowerset
from skmultilearn.cluster import NetworkXLabelGraphClusterer, LabelCooccurrenceGraphBuilder
from skmultilearn.ensemble import LabelSpacePartitioningClassifier
# construct base forest classifier
base_classifier = RandomForestClassifier(n_estimators=1000)
# construct a graph builder that will include
# label relations weighted by how many times they
# co-occurred in the data, without self-edges
graph_builder = LabelCooccurrenceGraphBuilder(
weighted = True,
include_self_edges = False
)
# setup problem transformation approach with sparse matrices for random forest
problem_transform_classifier = LabelPowerset(classifier=base_classifier,
require_dense=[False, False])
# setup the clusterer to use, we selected the modularity-based approach
clusterer = NetworkXLabelGraphClusterer(graph_builder=graph_builder, method='louvain')
# setup the ensemble metaclassifier
classifier = LabelSpacePartitioningClassifier(problem_transform_classifier, clusterer)
# train
classifier.fit(X_train, y_train)
# predict
predictions = classifier.predict(X_test)
For more use cases see `the label relations exploration guide <../labelrelations.ipynb>`_.
"""
def __init__(self, graph_builder, method):
"""Initializes the clusterer
Attributes
----------
graph_builder: a GraphBuilderBase inherited transformer
Class used to provide an underlying graph for NetworkX
"""
super(NetworkXLabelGraphClusterer, self).__init__(graph_builder)
self.method = method
[docs] def fit_predict(self, X, y):
"""Performs clustering on y and returns list of label lists
Builds a label graph using the provided graph builder's `transform` method
on `y` and then detects communities using the selected `method`.
Sets :code:`self.weights_` and :code:`self.graph_`.
Parameters
----------
X : None
currently unused, left for scikit compatibility
y : scipy.sparse
label space of shape :code:`(n_samples, n_labels)`
Returns
-------
arrray of arrays of label indexes (numpy.ndarray)
label space division, each sublist represents labels that are in that community
"""
edge_map = self.graph_builder.transform(y)
if self.graph_builder.is_weighted:
self.weights_ = dict(weight=list(edge_map.values()))
else:
self.weights_ = dict(weight=None)
self.graph_ = nx.Graph()
for n in range(y.shape[1]):
self.graph_.add_node(n)
for e, w in edge_map.items():
self.graph_.add_edge(e[0], e[1], weight=w)
if self.method == 'louvain':
partition_dict = community.best_partition(self.graph_)
memberships = [partition_dict[i] for i in range(y.shape[1])]
return np.array(
_membership_to_list_of_communities(
memberships,
1 + max(memberships)
)
)
else:
return np.array([list(i) for i in asyn_lpa_communities(self.graph_, 'weight')])