Source code for skmultilearn.adapt.brknn

from builtins import range
from ..base import MLClassifierBase
from ..utils import get_matrix_in_format
from sklearn.neighbors import NearestNeighbors
import scipy.sparse as sparse
import numpy as np


class _BinaryRelevanceKNN(MLClassifierBase):
    """Binary Relevance adapted kNN Multi-Label Classifier base class."""

    def __init__(self, k=10):
        super(_BinaryRelevanceKNN, self).__init__()
        self.k = k  # Number of neighbours
        self.copyable_attrs = ['k']

    def fit(self, X, y):
        """Fit classifier with training data

        Internally this method uses a sparse CSC representation for y
        (:class:`scipy.sparse.csc_matrix`).

        Parameters
        ----------
        X : numpy.ndarray or scipy.sparse
            input features, can be a dense or sparse matrix of size
            :code:`(n_samples, n_features)`
        y : numpy.ndaarray or scipy.sparse {0,1}
            binary indicator matrix with label assignments.

        Returns
        -------
        self
            fitted instance of self
        """
        self.train_labelspace = get_matrix_in_format(y, 'csc')
        self._n_samples = self.train_labelspace.shape[0]
        self._n_labels = self.train_labelspace.shape[1]
        self.knn_ = NearestNeighbors(self.k).fit(X)
        return self

    def predict(self, X):
        """Predict labels for X

        Parameters
        ----------
        X : numpy.ndarray or scipy.sparse.csc_matrix
            input features of shape :code:`(n_samples, n_features)`

        Returns
        -------
        scipy.sparse of int
            binary indicator matrix with label assignments with shape
            :code:`(n_samples, n_labels)`
        """
        self.neighbors_ = self.knn_.kneighbors(X, self.k, return_distance=False)
        self.confidences_ = np.vstack([self.train_labelspace[n, :].tocsc().sum(axis=0) / self.k for n in self.neighbors_])
        return self._predict_variant(X)


[docs]class BRkNNaClassifier(_BinaryRelevanceKNN): """Binary Relevance multi-label classifier based on k-Nearest Neighbors method. This version of the classifier assigns the labels that are assigned to at least half of the neighbors. Parameters ---------- k : int number of neighbours Attributes ---------- knn_ : an instance of sklearn.NearestNeighbors the nearest neighbors single-label classifier used underneath neighbors_ : array of arrays of int, shape = (n_samples, k) k neighbors of each sample confidences_ : matrix of int, shape = (n_samples, n_labels) label assignment confidences References ---------- If you use this method please cite the relevant paper: .. code :: bibtex @inproceedings{EleftheriosSpyromitros2008, author = {Eleftherios Spyromitros, Grigorios Tsoumakas, Ioannis Vlahavas}, booktitle = {Proc. 5th Hellenic Conference on Artificial Intelligence (SETN 2008)}, title = {An Empirical Study of Lazy Multilabel Classification Algorithms}, year = {2008}, location = {Syros, Greece} } Examples -------- Here's a very simple example of using BRkNNaClassifier with a fixed number of neighbors: .. code :: python from skmultilearn.adapt import BRkNNaClassifier classifier = BRkNNaClassifier(k=3) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) You can also use :class:`~sklearn.model_selection.GridSearchCV` to find an optimal set of parameters: .. code :: python from skmultilearn.adapt import BRkNNaClassifier from sklearn.model_selection import GridSearchCV parameters = {'k': range(1,3)} score = 'f1-macro clf = GridSearchCV(BRkNNaClassifier(), parameters, scoring=score) clf.fit(X, y) """ def _predict_variant(self, X): # TODO: find out if moving the sparsity to compute confidences_ boots speed return sparse.csr_matrix(np.rint(self.confidences_), dtype='i8')
[docs]class BRkNNbClassifier(_BinaryRelevanceKNN): """Binary Relevance multi-label classifier based on k-Nearest Neighbors method. This version of the classifier assigns the most popular m labels of the neighbors, where m is the average number of labels assigned to the object's neighbors. Parameters ---------- k : int number of neighbours Attributes ---------- knn_ : an instance of sklearn.NearestNeighbors the nearest neighbors single-label classifier used underneath neighbors_ : array of arrays of int, shape = (n_samples, k) k neighbors of each sample confidences_ : matrix of int, shape = (n_samples, n_labels) label assignment confidences References ---------- If you use this method please cite the relevant paper: .. code :: bibtex @inproceedings{EleftheriosSpyromitros2008, author = {Eleftherios Spyromitros, Grigorios Tsoumakas, Ioannis Vlahavas}, booktitle = {Proc. 5th Hellenic Conference on Artificial Intelligence (SETN 2008)}, title = {An Empirical Study of Lazy Multilabel Classification Algorithms}, year = {2008}, location = {Syros, Greece} } Examples -------- Here's a very simple example of using BRkNNbClassifier with a fixed number of neighbors: .. code :: python from skmultilearn.adapt import BRkNNbClassifier classifier = BRkNNbClassifier(k=3) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) You can also use :class:`~sklearn.model_selection.GridSearchCV` to find an optimal set of parameters: .. code :: python from skmultilearn.adapt import BRkNNbClassifier from sklearn.model_selection import GridSearchCV parameters = {'k': range(1,3)} score = 'f1-macro clf = GridSearchCV(BRkNNbClassifier(), parameters, scoring=score) clf.fit(X, y) """ def _predict_variant(self, X): avg_labels = [int(np.average(self.train_labelspace[n, :].sum(axis=1)).round()) for n in self.neighbors_] prediction = sparse.lil_matrix((X.shape[0], self._n_labels), dtype='i8') top_labels = np.argpartition(self.confidences_, kth=min(avg_labels + [len(self.confidences_[0])]), axis=1).tolist() for i in range(X.shape[0]): for j in top_labels[i][-avg_labels[i]:]: prediction[i, j] += 1 return prediction