Source code for skmultilearn.embedding.clems
from sklearn.neighbors import NearestNeighbors
from sklearn.base import BaseEstimator
from copy import copy
from ._mdsw import _MDSW
import numpy as np
import scipy.sparse as sp
# inspired by implementation by Kuan-Hao Huang
# https://github.com/ej0cl6/csmlc
[docs]class CLEMS(BaseEstimator):
"""Embed the label space using a label network embedder from OpenNE
Parameters
----------
measure: Callable
a cost function executed on two label vectors
dimension: int
the dimension of the label embedding vectors
is_score: boolean
set to True if measures is a score function (higher value is better), False if loss function (lower is better)
param_dict: dict or None
parameters passed to the embedder, don't use the dimension and graph parameters, this class will set them at fit
Example code for using this embedder looks like this:
.. code-block:: python
from skmultilearn.embedding import CLEMS, EmbeddingClassifier
from sklearn.ensemble import RandomForestRegressor
from skmultilearn.adapt import MLkNN
from sklearn.metrics import accuracy_score
clf = EmbeddingClassifier(
CLEMS(accuracy_score, True),
RandomForestRegressor(n_estimators=10),
MLkNN(k=5)
)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
"""
def __init__(self, measure, is_score=False, params=None):
self.measure = measure
if is_score:
self.measure = lambda x, y: 1 - measure(x, y)
if params is None:
params = {}
self.params = params
[docs] def fit(self, X, y):
"""Fits the embedder to data
Parameters
----------
X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features)
input feature matrix
y : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels)
binary indicator matrix with label assignments
Returns
-------
self
fitted instance of self
"""
# get unique label combinations
self.fit_transform(X, y)
[docs] def fit_transform(self, X, y):
"""Fit the embedder and transform the output space
Parameters
----------
X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features)
input feature matrix
y : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels)
binary indicator matrix with label assignments
Returns
-------
X, y_embedded
results of the embedding, input and output space
"""
if sp.issparse(y):
idx = np.unique(y.tolil().rows, return_index=True)[1]
else:
idx = np.unique(y, axis=0, return_index=True)[1]
y_unique = y[idx]
n_unique = y_unique.shape[0]
self.knn_ = NearestNeighbors(n_neighbors=1)
self.knn_.fit(y_unique)
nearest_points = self.knn_.kneighbors(y)[1][:, 0]
nearest_points_counts = np.unique(nearest_points, return_counts=True)[1]
# calculate delta matrix
delta = np.zeros((2 * n_unique, 2 * n_unique))
for i in range(n_unique):
for j in range(n_unique):
delta[i, n_unique + j] = np.sqrt(self.measure(y_unique[None, i], y_unique[None, j]))
delta[n_unique + j, i] = delta[i, n_unique + j]
# calculate MDS embedding
params = copy(self.params)
params['n_components'] = y.shape[1]
params['n_uq'] = n_unique
params['uq_weight'] = nearest_points_counts
params['dissimilarity'] = "precomputed"
self.embedder_ = _MDSW(**params)
y_unique_embedded = self.embedder_.fit(delta).embedding_
y_unique_limited_to_before_trick = y_unique_embedded[n_unique:]
knn_to_extend_embeddings_to_other_combinations = NearestNeighbors(n_neighbors=1)
knn_to_extend_embeddings_to_other_combinations.fit(y_unique_limited_to_before_trick)
neighboring_embeddings_indices = knn_to_extend_embeddings_to_other_combinations.kneighbors(y)[1][:, 0]
return X, y_unique_embedded[neighboring_embeddings_indices]