"""Bunch of metrics with unified interface."""
from functools import partial
from typing import Callable
from typing import Optional
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import r2_score
from sklearn.metrics import roc_auc_score
[docs]def mean_quantile_error(
y_true: np.ndarray,
y_pred: np.ndarray,
sample_weight: Optional[np.ndarray] = None,
q: float = 0.9,
) -> float:
"""Computes Mean Quantile Error.
Args:
y_true: True target values.
y_pred: Predicted target values.
sample_weight: Specify weighted mean.
q: Metric coefficient.
Returns:
metric value.
"""
err = y_pred - y_true
s = np.sign(err)
err = np.abs(err)
err = np.where(s > 0, q, 1 - q) * err
if sample_weight is not None:
return (err * sample_weight).mean() / sample_weight.mean()
return err.mean()
[docs]def mean_huber_error(
y_true: np.ndarray,
y_pred: np.ndarray,
sample_weight: Optional[np.ndarray] = None,
a: float = 0.9,
) -> float:
"""Computes Mean Huber Error.
Args:
y_true: True target values.
y_pred: Predicted target values.
sample_weight: Specify weighted mean.
a: Metric coefficient.
Returns:
Metric value.
"""
err = y_pred - y_true
s = np.abs(err) < a
err = np.where(s, 0.5 * (err ** 2), a * np.abs(err) - 0.5 * (a ** 2))
if sample_weight is not None:
return (err * sample_weight).mean() / sample_weight.mean()
return err.mean()
[docs]def mean_fair_error(
y_true: np.ndarray,
y_pred: np.ndarray,
sample_weight: Optional[np.ndarray] = None,
c: float = 0.9,
) -> float:
"""Computes Mean Fair Error.
Args:
y_true: True target values.
y_pred: Predicted target values.
sample_weight: Specify weighted mean.
c: Metric coefficient.
Returns:
Metric value.
"""
x = np.abs(y_pred - y_true) / c
err = c ** 2 * (x - np.log(x + 1))
if sample_weight is not None:
return (err * sample_weight).mean() / sample_weight.mean()
return err.mean()
[docs]def mean_absolute_percentage_error(
y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None
) -> float:
"""Computes Mean Absolute Percentage error.
Args:
y_true: True target values.
y_pred: Predicted target values.
sample_weight: Specify weighted mean.
Returns:
Metric value.
"""
err = (y_true - y_pred) / y_true
err = np.abs(err)
if sample_weight is not None:
return (err * sample_weight).mean() / sample_weight.mean()
return err.mean()
[docs]def roc_auc_ovr(y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None):
"""ROC-AUC One-Versus-Rest.
Args:
y_true: True target values.
y_pred: Predicted target values.
sample_weight: Weights of samples.
Returns:
Metric values.
"""
return roc_auc_score(y_true, y_pred, sample_weight=sample_weight, multi_class="ovr")
[docs]def rmsle(y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None):
"""Root mean squared log error.
Args:
y_true: True target values.
y_pred: Predicted target values.
sample_weight: Weights of samples.
Returns:
Metric values.
"""
return np.sqrt(mean_squared_log_error(y_true, y_pred, sample_weight=sample_weight))
[docs]def auc_mu(
y_true: np.ndarray,
y_pred: np.ndarray,
sample_weight: Optional[np.ndarray] = None,
class_weights: Optional[np.ndarray] = None,
) -> float:
"""Compute multi-class metric AUC-Mu.
We assume that confusion matrix full of ones, except diagonal elements.
All diagonal elements are zeroes.
By default, for averaging between classes scores we use simple mean.
Args:
y_true: True target values.
y_pred: Predicted target values.
sample_weight: Not used.
class_weights: The between classes weight matrix. If ``None``,
the standard mean will be used. It is expected to be a lower
triangular matrix (diagonal is also full of zeroes).
In position (i, j), i > j, there is a partial positive score
between i-th and j-th classes. All elements must sum up to 1.
Returns:
Metric value.
Note:
Code was refactored from https://github.com/kleimanr/auc_mu/blob/master/auc_mu.py
"""
if not isinstance(y_pred, np.ndarray):
raise TypeError("Expected y_pred to be np.ndarray, got: {}".format(type(y_pred)))
if not y_pred.ndim == 2:
raise ValueError("Expected array with predictions be a 2-dimentional array")
if not isinstance(y_true, np.ndarray):
raise TypeError("Expected y_true to be np.ndarray, got: {}".format(type(y_true)))
if not y_true.ndim == 1:
raise ValueError("Expected array with ground truths be a 1-dimentional array")
if y_true.shape[0] != y_pred.shape[0]:
raise ValueError(
"Expected number of samples in y_true and y_pred be same,"
" got {} and {}, respectively".format(y_true.shape[0], y_pred.shape[0])
)
uniq_labels = np.unique(y_true)
n_samples, n_classes = y_pred.shape
if not np.all(uniq_labels == np.arange(n_classes)):
raise ValueError("Expected classes encoded values 0, ..., N_classes-1")
if class_weights is None:
class_weights = np.tri(n_classes, k=-1)
class_weights /= class_weights.sum()
if not isinstance(class_weights, np.ndarray):
raise TypeError("Expected class_weights to be np.ndarray, got: {}".format(type(class_weights)))
if not class_weights.ndim == 2:
raise ValueError("Expected class_weights to be a 2-dimentional array")
if not class_weights.shape == (n_classes, n_classes):
raise ValueError("Expected class_weights size: {}, got: {}".format((n_classes, n_classes), class_weights.shape))
# check sum?
confusion_matrix = np.ones((n_classes, n_classes)) - np.eye(n_classes)
auc_full = 0.0
for class_i in range(n_classes):
preds_i = y_pred[y_true == class_i]
n_i = preds_i.shape[0]
for class_j in range(class_i):
preds_j = y_pred[y_true == class_j]
n_j = preds_j.shape[0]
n = n_i + n_j
tmp_labels = np.zeros((n,), dtype=np.int32)
tmp_labels[n_i:] = 1
tmp_pres = np.vstack((preds_i, preds_j))
v = confusion_matrix[class_i, :] - confusion_matrix[class_j, :]
scores = np.dot(tmp_pres, v)
score_ij = roc_auc_score(tmp_labels, scores)
auc_full += class_weights[class_i, class_j] * score_ij
return auc_full
[docs]class F1Factory:
"""Wrapper for :func:`~sklearn.metrics.f1_score` function.
Args:
average: Averaging type ('micro', 'macro', 'weighted').
"""
def __init__(self, average: str = "micro"):
self.average = average
def __call__(
self,
y_true: np.ndarray,
y_pred: np.ndarray,
sample_weight: Optional[np.ndarray] = None,
) -> float:
"""Compute metric.
Args:
y_true: Ground truth target values.
y_pred: Estimated target values.
sample_weight: Sample weights.
Returns:
F1 score of the positive class in binary classification
or weighted average of the F1 scores of each class
for the multiclass task.
"""
return f1_score(y_true, y_pred, sample_weight=sample_weight, average=self.average)
[docs]class BestClassBinaryWrapper:
r"""Metric wrapper to get best class prediction instead of probs.
There is cut-off for prediction by ``0.5``.
Args:
func: Metric function. Function format:
func(y_pred, y_true, weights, \*\*kwargs).
"""
def __init__(self, func: Callable):
self.func = func
def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None, **kwargs):
"""Calculate metric."""
y_pred = (y_pred > 0.5).astype(np.float32)
return self.func(y_true, y_pred, sample_weight=sample_weight, **kwargs)
[docs]class BestClassMulticlassWrapper:
r"""Metric wrapper to get best class prediction instead of probs for multiclass.
Prediction provides by argmax.
Args:
func: Metric function. Function format:
func(y_pred, y_true, weights, \*\*kwargs)
"""
def __init__(self, func):
self.func = func
def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None, **kwargs):
"""Calculate metric."""
y_pred = (y_pred.argmax(axis=1)).astype(np.float32)
return self.func(y_true, y_pred, sample_weight=sample_weight, **kwargs)
# TODO: Add custom metrics - precision/recall/fscore at K. Fscore at best co
# TODO: Move to other module
_valid_str_binary_metric_names = {
"auc": roc_auc_score,
"logloss": partial(log_loss),
"accuracy": BestClassBinaryWrapper(accuracy_score),
}
_valid_str_reg_metric_names = {
"r2": r2_score,
"mse": mean_squared_error,
"mae": mean_absolute_error,
"rmsle": rmsle,
"fair": mean_fair_error,
"huber": mean_huber_error,
"quantile": mean_quantile_error,
"mape": mean_absolute_percentage_error,
}
_valid_str_multiclass_metric_names = {
"auc_mu": auc_mu,
"auc": roc_auc_ovr,
"crossentropy": partial(log_loss),
"accuracy": BestClassMulticlassWrapper(accuracy_score),
"f1_macro": BestClassMulticlassWrapper(F1Factory("macro")),
"f1_micro": BestClassMulticlassWrapper(F1Factory("micro")),
"f1_weighted": BestClassMulticlassWrapper(F1Factory("weighted")),
}
_valid_str_multireg_metric_names = {"mse": mean_squared_error, "mae": mean_absolute_error}
_valid_str_multilabel_metric_names = {"logloss": partial(log_loss)}
_valid_str_metric_names = {
"binary": _valid_str_binary_metric_names,
"reg": _valid_str_reg_metric_names,
"multiclass": _valid_str_multiclass_metric_names,
"multi:reg": _valid_str_multireg_metric_names,
"multilabel": _valid_str_multilabel_metric_names,
}
_valid_metric_args = {"quantile": ["q"], "huber": ["a"], "fair": ["c"]}