Source code for skmultilearn.base.problem_transformation
import numpy as np
from .base import MLClassifierBase
from ..utils import matrix_creation_function_for_format
from scipy.sparse import issparse, csr_matrix
[docs]class ProblemTransformationBase(MLClassifierBase):
"""Base class providing common functions for multi-label classifiers
that follow the problem transformation approach.
Problem transformation is the approach in which the
original multi-label classification problem is transformed into one
or more single-label problems, which are then solved by single-class
or multi-class classifiers.
Scikit-multilearn provides a number of such methods:
- :class:`BinaryRelevance` - performs a single-label single-class classification for each label and sums the results :class:`BinaryRelevance`
- :class:`ClassifierChains` - performs a single-label single-class classification for each label and sums the results :class:`ClassifierChain`
- :class:`LabelPowerset` - performs a single-label single-class classification for each label and sums the results :class:`LabelPowerset`
Parameters
----------
classifier : scikit classifier type
The base classifier that will be used in a class, will be automagically put under self.classifier for future access.
require_dense : boolean (default is False)
Whether the base classifier requires input as dense arrays.
"""
def __init__(self, classifier=None, require_dense=None):
super(ProblemTransformationBase, self).__init__()
self.copyable_attrs = ["classifier", "require_dense"]
self.classifier = classifier
if require_dense is not None:
if isinstance(require_dense, bool):
self.require_dense = [require_dense, require_dense]
else:
assert len(require_dense) == 2 and isinstance(
require_dense[0], bool) and isinstance(require_dense[1], bool)
self.require_dense = require_dense
else:
if isinstance(self.classifier, MLClassifierBase):
self.require_dense = [False, False]
else:
self.require_dense = [True, True]
def _ensure_multi_label_from_single_class(self, matrix, matrix_format='csr'):
"""Transform single class outputs to a 2D sparse matrix
Parameters
----------
matrix : array-like
input matrix to be checked
matrix_format : str (default is csr)
the matrix format to validate with
Returns
-------
scipy.sparse
a 2-dimensional sparse matrix
"""
is_2d = None
dim_1 = None
dim_2 = None
# check if array like of array likes
if isinstance(matrix, (list, tuple, np.ndarray)):
if isinstance(matrix[0], (list, tuple, np.ndarray)):
is_2d = True
dim_1 = len(matrix)
dim_2 = len(matrix[0])
# 1d list or array
else:
is_2d = False
# shape is n_samples of 1 class assignment
dim_1 = len(matrix)
dim_2 = 1
# not an array but 2D, probably a matrix
elif matrix.ndim == 2:
is_2d = True
dim_1 = matrix.shape[0]
dim_2 = matrix.shape[1]
# what is it?
else:
raise ValueError("Matrix dimensions too large (>2) or other value error")
new_matrix = None
if is_2d:
if issparse(matrix):
new_matrix = matrix
else:
new_matrix = matrix_creation_function_for_format(matrix_format)(matrix, shape=(dim_1, dim_2))
else:
new_matrix = matrix_creation_function_for_format(matrix_format)(matrix).T
assert new_matrix.shape == (dim_1, dim_2)
return new_matrix