Source code for skmultilearn.embedding.classifier

from skmultilearn.base import ProblemTransformationBase
import numpy as np
import scipy.sparse as sp
from copy import copy

[docs]class EmbeddingClassifier(ProblemTransformationBase): """Embedding-based classifier Implements a general scheme presented in LNEMLC: label network embeddings for multi-label classification. The classifier embeds the label space with the embedder, trains a set of single-variate or a multi-variate regressor for embedding unseen cases and a base classifier to predict labels based on input features and the embeddings. Parameters ---------- embedder : :class:`~sklearn.base.BaseEstimator` the class to embed the label space regressor : :class:`~sklearn.base.BaseEstimator` the base regressor to predict embeddings from input features classifier : :class:`~sklearn.base.BaseEstimator` the base classifier to predict labels from input features and embeddings regressor_per_dimension : bool whether to train one joint multi-variate regressor (False) or per dimension single-variate regressor (True) require_dense : [bool, bool], optional whether the base classifier requires dense representations for input features and classes/labels matrices in fit/predict. Attributes ---------- n_regressors_ : int number of trained regressors partition_ : List[List[int]], shape=(`model_count_`,) list of lists of label indexes, used to index the output space matrix, set in :meth:`_generate_partition` via :meth:`fit` classifiers_ : List[:class:`~sklearn.base.BaseEstimator`] of shape `model_count` list of classifiers trained per partition, set in :meth:`fit` If you use this classifier please cite the relevant embedding method paper and the label network embedding for multi-label classification paper: .. code :: bibtex @article{zhang2007ml, title={ML-KNN: A lazy learning approach to multi-label learning}, author={Zhang, Min-Ling and Zhou, Zhi-Hua}, journal={Pattern recognition}, volume={40}, number={7}, pages={2038--2048}, year={2007}, publisher={Elsevier} } Example ------- An example use case for EmbeddingClassifier: .. code-block:: python from skmultilearn.embedding import SKLearnEmbedder, EmbeddingClassifier from sklearn.manifold import SpectralEmbedding from sklearn.ensemble import RandomForestRegressor from skmultilearn.adapt import MLkNN clf = EmbeddingClassifier( SKLearnEmbedder(SpectralEmbedding(n_components = 10)), RandomForestRegressor(n_estimators=10), MLkNN(k=5) ) clf.fit(X_train, y_train) predictions = clf.predict(X_test) """ def __init__(self, embedder, regressor, classifier, regressor_per_dimension=False, require_dense=None): super(EmbeddingClassifier, self).__init__() self.embedder = embedder self.regressor = regressor self.classifier = classifier self.regressor_per_dimension = regressor_per_dimension if require_dense is None: require_dense = [True, True] self.require_dense = require_dense self.copyable_attrs = ['embedder', 'regressor', 'classifier', 'regressor_per_dimension', 'require_dense']
[docs] def fit(self, X, y): """Fits classifier to training data Parameters ---------- X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features) input feature matrix y : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels) binary indicator matrix with label assignments Returns ------- self fitted instance of self """ X = self._ensure_input_format(X) y = self._ensure_input_format(y) y_embedded = self.embedder.fit_transform(X, y)[1] X_y_embedded = self._concatenate_matrices(X, y_embedded) if self.regressor_per_dimension: self.n_regressors_ = y_embedded.shape[1] self.regressors_ = [None for _ in range(self.n_regressors_)] for i in range(self.n_regressors_): self.regressors_[i] = copy(self.regressor) self.regressors_[i].fit(X, y_embedded[:, i]) else: self.n_regressors_ = 1 self.regressor.fit(X, y_embedded) self.classifier.fit(X_y_embedded, y) return self
[docs] def predict(self, X): """Predict labels for X Parameters ---------- X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features) input feature matrix Returns ------- :mod:`scipy.sparse` matrix of `{0, 1}`, shape=(n_samples, n_labels) binary indicator matrix with label assignments """ X = self._ensure_input_format(X) X_y_embedded = self._predict_embedding(X) return self.classifier.predict(X_y_embedded)
[docs] def predict_proba(self, X): """Predict probabilities of label assignments for X Parameters ---------- X : `array_like`, :class:`numpy.matrix` or :mod:`scipy.sparse` matrix, shape=(n_samples, n_features) input feature matrix Returns ------- :mod:`scipy.sparse` matrix of `float in [0.0, 1.0]`, shape=(n_samples, n_labels) matrix with label assignment probabilities """ X_y_embedded = self._predict_embedding(X) return self.classifier.predict_proba(X_y_embedded)
def _concatenate_matrices(self, X, y_embedded): X = self._ensure_input_format(X) y_embedded = self._ensure_input_format(y_embedded) if sp.issparse(X): X_y_embedded = sp.hstack([X, y_embedded]) else: X_y_embedded = np.hstack([X, y_embedded]) return X_y_embedded def _predict_embedding(self, X): if self.regressor_per_dimension: y_embedded = [self.regressors_[i].predict(X) for i in range(self.n_regressors_)] if sp.issparse(X): y_embedded=sp.csr_matrix(y_embedded).T else: y_embedded=np.matrix(y_embedded).T else: y_embedded = self.regressor.predict(X) return self._concatenate_matrices(X, y_embedded)