Source code for lightautoml.tasks.common_metric

"""Bunch of metrics with unified interface."""

from functools import partial
from typing import Callable
from typing import Optional

import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import r2_score
from sklearn.metrics import roc_auc_score


[docs]def mean_quantile_error(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    sample_weight: Optional[np.ndarray] = None,
    q: float = 0.9,
) -> float:
    """Computes Mean Quantile Error.

    Args:
        y_true: True target values.
        y_pred: Predicted target values.
        sample_weight: Specify weighted mean.
        q: Metric coefficient.

    Returns:
        metric value.

    """
    err = y_pred - y_true
    s = np.sign(err)
    err = np.abs(err)
    err = np.where(s > 0, q, 1 - q) * err
    if sample_weight is not None:
        return (err * sample_weight).mean() / sample_weight.mean()

    return err.mean()


[docs]def mean_huber_error(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    sample_weight: Optional[np.ndarray] = None,
    a: float = 0.9,
) -> float:
    """Computes Mean Huber Error.

    Args:
        y_true: True target values.
        y_pred: Predicted target values.
        sample_weight: Specify weighted mean.
        a: Metric coefficient.

    Returns:
        Metric value.

    """
    err = y_pred - y_true
    s = np.abs(err) < a
    err = np.where(s, 0.5 * (err ** 2), a * np.abs(err) - 0.5 * (a ** 2))

    if sample_weight is not None:
        return (err * sample_weight).mean() / sample_weight.mean()

    return err.mean()


[docs]def mean_fair_error(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    sample_weight: Optional[np.ndarray] = None,
    c: float = 0.9,
) -> float:
    """Computes Mean Fair Error.

    Args:
        y_true: True target values.
        y_pred: Predicted target values.
        sample_weight: Specify weighted mean.
        c: Metric coefficient.

    Returns:
        Metric value.

    """
    x = np.abs(y_pred - y_true) / c
    err = c ** 2 * (x - np.log(x + 1))

    if sample_weight is not None:
        return (err * sample_weight).mean() / sample_weight.mean()

    return err.mean()


[docs]def mean_absolute_percentage_error(
    y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None
) -> float:
    """Computes Mean Absolute Percentage error.

    Args:
        y_true: True target values.
        y_pred: Predicted target values.
        sample_weight: Specify weighted mean.

    Returns:
        Metric value.

    """
    err = (y_true - y_pred) / y_true
    err = np.abs(err)

    if sample_weight is not None:
        return (err * sample_weight).mean() / sample_weight.mean()

    return err.mean()


[docs]def roc_auc_ovr(y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None):
    """ROC-AUC One-Versus-Rest.

    Args:
        y_true: True target values.
        y_pred: Predicted target values.
        sample_weight: Weights of samples.

    Returns:
        Metric values.

    """
    return roc_auc_score(y_true, y_pred, sample_weight=sample_weight, multi_class="ovr")


[docs]def rmsle(y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None):
    """Root mean squared log error.

    Args:
        y_true: True target values.
        y_pred: Predicted target values.
        sample_weight: Weights of samples.

    Returns:
        Metric values.


    """
    return np.sqrt(mean_squared_log_error(y_true, y_pred, sample_weight=sample_weight))


[docs]def auc_mu(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    sample_weight: Optional[np.ndarray] = None,
    class_weights: Optional[np.ndarray] = None,
) -> float:
    """Compute multi-class metric AUC-Mu.

    We assume that confusion matrix full of ones, except diagonal elements.
    All diagonal elements are zeroes.
    By default, for averaging between classes scores we use simple mean.

    Args:
        y_true: True target values.
        y_pred: Predicted target values.
        sample_weight: Not used.
        class_weights: The between classes weight matrix. If ``None``,
            the standard mean will be used. It is expected to be a lower
            triangular matrix (diagonal is also full of zeroes).
            In position (i, j), i > j, there is a partial positive score
            between i-th and j-th classes. All elements must sum up to 1.

    Returns:
        Metric value.

    Note:
        Code was refactored from https://github.com/kleimanr/auc_mu/blob/master/auc_mu.py

    """
    if not isinstance(y_pred, np.ndarray):
        raise TypeError("Expected y_pred to be np.ndarray, got: {}".format(type(y_pred)))
    if not y_pred.ndim == 2:
        raise ValueError("Expected array with predictions be a 2-dimentional array")
    if not isinstance(y_true, np.ndarray):
        raise TypeError("Expected y_true to be np.ndarray, got: {}".format(type(y_true)))
    if not y_true.ndim == 1:
        raise ValueError("Expected array with ground truths be a 1-dimentional array")
    if y_true.shape[0] != y_pred.shape[0]:
        raise ValueError(
            "Expected number of samples in y_true and y_pred be same,"
            " got {} and {}, respectively".format(y_true.shape[0], y_pred.shape[0])
        )

    uniq_labels = np.unique(y_true)
    n_samples, n_classes = y_pred.shape

    if not np.all(uniq_labels == np.arange(n_classes)):
        raise ValueError("Expected classes encoded values 0, ..., N_classes-1")

    if class_weights is None:
        class_weights = np.tri(n_classes, k=-1)
        class_weights /= class_weights.sum()

    if not isinstance(class_weights, np.ndarray):
        raise TypeError("Expected class_weights to be np.ndarray, got: {}".format(type(class_weights)))
    if not class_weights.ndim == 2:
        raise ValueError("Expected class_weights to be a 2-dimentional array")
    if not class_weights.shape == (n_classes, n_classes):
        raise ValueError("Expected class_weights size: {}, got: {}".format((n_classes, n_classes), class_weights.shape))
    # check sum?
    confusion_matrix = np.ones((n_classes, n_classes)) - np.eye(n_classes)
    auc_full = 0.0

    for class_i in range(n_classes):
        preds_i = y_pred[y_true == class_i]
        n_i = preds_i.shape[0]
        for class_j in range(class_i):
            preds_j = y_pred[y_true == class_j]
            n_j = preds_j.shape[0]
            n = n_i + n_j
            tmp_labels = np.zeros((n,), dtype=np.int32)
            tmp_labels[n_i:] = 1
            tmp_pres = np.vstack((preds_i, preds_j))
            v = confusion_matrix[class_i, :] - confusion_matrix[class_j, :]
            scores = np.dot(tmp_pres, v)
            score_ij = roc_auc_score(tmp_labels, scores)
            auc_full += class_weights[class_i, class_j] * score_ij

    return auc_full


[docs]class F1Factory:
    """Wrapper for :func:`~sklearn.metrics.f1_score` function.

    Args:
        average: Averaging type ('micro', 'macro', 'weighted').

    """

    def __init__(self, average: str = "micro"):
        self.average = average

    def __call__(
        self,
        y_true: np.ndarray,
        y_pred: np.ndarray,
        sample_weight: Optional[np.ndarray] = None,
    ) -> float:
        """Compute metric.

        Args:
            y_true: Ground truth target values.
            y_pred: Estimated target values.
            sample_weight: Sample weights.

        Returns:
            F1 score of the positive class in binary classification
            or weighted average of the F1 scores of each class
            for the multiclass task.

        """
        return f1_score(y_true, y_pred, sample_weight=sample_weight, average=self.average)


[docs]class BestClassBinaryWrapper:
    r"""Metric wrapper to get best class prediction instead of probs.

    There is cut-off for prediction by ``0.5``.

    Args:
        func: Metric function. Function format:
            func(y_pred, y_true, weights, \*\*kwargs).

    """

    def __init__(self, func: Callable):
        self.func = func

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None, **kwargs):
        """Calculate metric."""
        y_pred = (y_pred > 0.5).astype(np.float32)

        return self.func(y_true, y_pred, sample_weight=sample_weight, **kwargs)


[docs]class BestClassMulticlassWrapper:
    r"""Metric wrapper to get best class prediction instead of probs for multiclass.

    Prediction provides by argmax.

    Args:
        func: Metric function. Function format:
            func(y_pred, y_true, weights, \*\*kwargs)

    """

    def __init__(self, func):
        self.func = func

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None, **kwargs):
        """Calculate metric."""
        y_pred = (y_pred.argmax(axis=1)).astype(np.float32)

        return self.func(y_true, y_pred, sample_weight=sample_weight, **kwargs)


# TODO: Add custom metrics - precision/recall/fscore at K. Fscore at best co
# TODO: Move to other module


_valid_str_binary_metric_names = {
    "auc": roc_auc_score,
    "logloss": partial(log_loss),
    "accuracy": BestClassBinaryWrapper(accuracy_score),
}

_valid_str_reg_metric_names = {
    "r2": r2_score,
    "mse": mean_squared_error,
    "mae": mean_absolute_error,
    "rmsle": rmsle,
    "fair": mean_fair_error,
    "huber": mean_huber_error,
    "quantile": mean_quantile_error,
    "mape": mean_absolute_percentage_error,
}

_valid_str_multiclass_metric_names = {
    "auc_mu": auc_mu,
    "auc": roc_auc_ovr,
    "crossentropy": partial(log_loss),
    "accuracy": BestClassMulticlassWrapper(accuracy_score),
    "f1_macro": BestClassMulticlassWrapper(F1Factory("macro")),
    "f1_micro": BestClassMulticlassWrapper(F1Factory("micro")),
    "f1_weighted": BestClassMulticlassWrapper(F1Factory("weighted")),
}
_valid_str_multireg_metric_names = {"mse": mean_squared_error, "mae": mean_absolute_error}

_valid_str_multilabel_metric_names = {"logloss": partial(log_loss)}

_valid_str_metric_names = {
    "binary": _valid_str_binary_metric_names,
    "reg": _valid_str_reg_metric_names,
    "multiclass": _valid_str_multiclass_metric_names,
    "multi:reg": _valid_str_multireg_metric_names,
    "multilabel": _valid_str_multilabel_metric_names,
}

_valid_metric_args = {"quantile": ["q"], "huber": ["a"], "fair": ["c"]}