Source code for skmultilearn.adapt.mltsvm

# Authors: Grzegorz Kulakowski <grzegorz7w@gmail.com>
# License: BSD 3 clause
from skmultilearn.base import MLClassifierBase

import numpy as np
import scipy.sparse as sp
from scipy.linalg import norm
from scipy.sparse.linalg import inv as inv_sparse
from scipy.linalg import inv as inv_dense


[docs]class MLTSVM(MLClassifierBase):
    """Twin multi-Label Support Vector Machines

    Parameters
    ----------
    c_k : int
        the empirical risk penalty parameter that determines the trade-off between the loss terms
    sor_omega: float (default is 1.0)
        the smoothing parameter
    threshold : int (default is 1e-6)
            threshold above which a label should be assigned
    lambda_param : float (default is 1.0)
            the regularization parameter
    max_iteration : int (default is 500)
            maximum number of iterations to use in successive overrelaxation


    References
    ----------

    If you use this classifier please cite the original paper introducing the method:

    .. code :: bibtex

        @article{chen2016mltsvm,
          title={MLTSVM: a novel twin support vector machine to multi-label learning},
          author={Chen, Wei-Jie and Shao, Yuan-Hai and Li, Chun-Na and Deng, Nai-Yang},
          journal={Pattern Recognition},
          volume={52},
          pages={61--74},
          year={2016},
          publisher={Elsevier}
        }


    Examples
    --------

    Here's a very simple example of using MLTSVM with a fixed number of neighbors:

    .. code :: python

        from skmultilearn.adapt import MLTSVM

        classifier = MLTSVM(c_k = 2**-1)

        # train
        classifier.fit(X_train, y_train)

        # predict
        predictions = classifier.predict(X_test)


    You can also use :class:`~sklearn.model_selection.GridSearchCV` to find an optimal set of parameters:

    .. code :: python

        from skmultilearn.adapt import MLTSVM
        from sklearn.model_selection import GridSearchCV

        parameters = {'c_k': [2**i for i in range(-5, 5, 2)]}
        score = 'f1-macro

        clf = GridSearchCV(MLTSVM(), parameters, scoring=score)
        clf.fit(X, y)

        print (clf.best_params_, clf.best_score_)

        # output
        {'c_k': 0.03125} 0.347518217573


    """

    def __init__(self, c_k=0, sor_omega=1.0, threshold=1e-6, lambda_param=1.0, max_iteration=500):
        super(MLClassifierBase, self).__init__()
        self.max_iteration = max_iteration
        self.threshold = threshold
        self.lambda_param = lambda_param  # TODO: possibility to add different lambda to different labels
        self.c_k = c_k
        self.sor_omega = sor_omega
        self.copyable_attrs = ['c_k', 'sor_omega', 'lambda_param', 'threshold', 'max_iteration']

[docs]    def fit(self, X, Y):
        n_labels = Y.shape[1]
        m = X.shape[1]  # Count of features
        self.wk_bk = np.zeros([n_labels, m + 1], dtype=float)

        if sp.issparse(X):
            identity_matrix = sp.identity(m + 1)
            _inv = inv_sparse
        else:
            identity_matrix = np.identity(m + 1)
            _inv = inv_dense

        X_bias = _hstack(X, np.ones((X.shape[0], 1), dtype=X.dtype))
        self.iteration_count = []
        for label in range(0, n_labels):
            # Calculate the parameter Q for overrelaxation
            H_k = _get_x_class_instances(X_bias, Y, label)
            G_k = _get_x_noclass_instances(X_bias, Y, label)
            Q_knoPrefixGk = _inv((H_k.T).dot(H_k) + self.lambda_param * identity_matrix).dot(G_k.T)
            Q_k = G_k.dot(Q_knoPrefixGk).A
            Q_k = (Q_k + Q_k.T) / 2.0

            # Calculate other
            alpha_k = self._successive_overrelaxation(self.sor_omega, Q_k)
            if sp.issparse(X):
                self.wk_bk[label] = -Q_knoPrefixGk.dot(alpha_k).T
            else:
                self.wk_bk[label] = (-np.dot(Q_knoPrefixGk, alpha_k)).T

        self.wk_norms = norm(self.wk_bk, axis=1)
        self.treshold = 1.0 / np.max(self.wk_norms)

[docs]    def predict(self, X):
        X_with_bias = _hstack(X, np.ones((X.shape[0], 1), dtype=X.dtype))
        wk_norms_multiplicated = self.wk_norms[np.newaxis, :]  # change to form [[wk1, wk2, ..., wkk]]
        all_distances = (-X_with_bias.dot(self.wk_bk.T)) / wk_norms_multiplicated
        predicted_y = np.where(all_distances < self.treshold, 1, 0)
        # TODO: It's possible to add condition to: add label if no labels is in row.
        return predicted_y

    def _successive_overrelaxation(self, omegaW, Q):
        # Initialization
        D = np.diag(Q)  # Only one dimension vector - is enough
        D_inv = 1.0 / D  # D-1 simplify form
        small_l = Q.shape[1]
        oldnew_alpha = np.zeros([small_l, 1])  # buffer

        is_not_enough = True
        was_going_down = False
        last_alfa_norm_change = -1

        nr_iter = 0
        while is_not_enough:  # do while
            oldAlpha = oldnew_alpha
            for j in range(0, small_l):  # It's from last alpha to first
                oldnew_alpha[j] = oldAlpha[j] - omegaW * D_inv[j] * (Q[j, :].T.dot(oldnew_alpha) - 1)
            oldnew_alpha = oldnew_alpha.clip(0.0, self.c_k)
            alfa_norm_change = norm(oldnew_alpha - oldAlpha)

            if not was_going_down and last_alfa_norm_change > alfa_norm_change:
                was_going_down = True
            is_not_enough = alfa_norm_change > self.threshold and \
                            nr_iter < self.max_iteration \
                            and ((not was_going_down) or last_alfa_norm_change > alfa_norm_change)
            # TODO: maybe add any(oldnew_alpha != oldAlpha)

            last_alfa_norm_change = alfa_norm_change
            nr_iter += 1
        self.iteration_count.append(nr_iter)
        return oldnew_alpha


def _get_x_noclass_instances(X, Y, label_class):
    if sp.issparse(Y):
        indices = np.where(Y[:, 1].A == 0)[0]
    else:
        indices = np.where(Y[:, 1] == 0)[0]
    return X[indices, :]


def _get_x_class_instances(X, Y, label_class):
    if sp.issparse(Y):
        indices = Y[:, label_class].nonzero()[0]
    else:
        indices = np.nonzero(Y[:, label_class])[0]
    return X[indices, :]


def _hstack(X, Y):
    if sp.issparse(X):
        return sp.hstack([X, Y], format=X.format)
    else:
        return np.hstack([X, Y])