Modeling Limit Distribution

Modeling real data

This part provides the optimal method for tests in which you need to determine a statistically significantly better sample

[1]:

import plotly.graph_objects as go
from scipy.stats import bernoulli, norm
from statsmodels.stats.proportion import proportions_ztest

import numpy as np

from lightautoml.addons.hypex.abn_test import min_sample_size
from lightautoml.addons.hypex.abn_test import quantile_of_marginal_distribution
from lightautoml.addons.hypex.abn_test import test_on_marginal_distribution

Help functions for easy solution and construction of confidence intervals

[2]:

def min_sample_size_2(d, alpha=0.05, beta=0.2, p=1 / 2):
    l = 2 * p * (1 - p) * (((norm.ppf(1 - alpha) - norm.ppf(beta)) / d) ** 2)
    return int(l)


def confidence_interval_bern(p, cnt, c=0.95):
    se = np.sqrt(p * (1 - p) / cnt)
    q = norm.ppf(1 - (1 - c) / 2)
    return p - q * se, p + q * se

Comparison of simple and optimal solutions

Consider the size of a single sample depending on the number of samples.

[3]:

k_list = [2, 3, 4, 5, 6]  # Number of samples
alpha = 0.05  # Significance level
beta = 0.2  # 1 - power
p_true = 0.3  # Bernoulli distribution parameter
d_relative_pred = 0.1  # I assume that the conversion rate will increase by d_relative_pred * p

n_1 = []  # limit method
n_2 = []  # method 2

[4]:

for k in k_list:
    alpha_ = alpha / k
    beta_ = beta / (k - 1)

    c_1 = quantile_of_marginal_distribution(num_samples=k, quantile_level=1 - alpha_)
    c_2 = quantile_of_marginal_distribution(num_samples=k, quantile_level=beta)
    n_1 += [min_sample_size(number_of_samples=k,
                            minimum_detectable_effect=p_true * d_relative_pred,
                            variances=p_true * (1 - p_true),
                            significance_level=alpha,
                            power_level=beta,
                            quantile_1=c_1,
                            quantile_2=c_2)]
    n_2 += [min_sample_size_2(d=p_true * d_relative_pred, p=p_true,
                              alpha=alpha_, beta=beta_)]

[5]:

fig = go.Figure()
fig.add_trace(go.Scatter(x=k_list, y=n_1,
                         mode="lines", name=f"Optimal solution"))
fig.add_trace(go.Scatter(x=k_list, y=n_2,
                         mode="lines", name=f"Simple solution"))

fig.update_layout(title={
    "text": "Modeling the minimum sample size",
    "font": {
        "size": 18
    }
},
    xaxis={
        "title_text": "Number of samples"
    },
    yaxis={
        "title_text": "The size of a single sample",
        "range": [0, 10500]
    },
    legend={
        "yanchor": "top",
        "y": 0.99,
        "xanchor": "left",
        "x": 0.005
    },
    width=725,
    height=500,
    autosize=True,
    font={
        "size": 16
    })
fig.show()

Consider the probability of an error of the first kind - this is the probability of rejecting a hypothesis if it is valid. In order not to make mistakes too often, we want to control this value.

The value is theoretical, so we will model the frequency of incorrect rejection of the hypothesis, close to the true probability of error of the first kind with a sufficient number of simulations.

[6]:

d_relative_pred = 0.05  # Assume that the conversion rate will increase by d_relative_pred * p
iter_size = 1000  # Number of test iterations

reject_stat_1 = []  # limit method
reject_stat_2 = []  # pairwise comparisons

[7]:

for k in k_list:
    alpha_ = alpha / k
    beta_ = beta / (k - 1)

    c_1 = quantile_of_marginal_distribution(num_samples=k, quantile_level=1 - alpha_)
    c_2 = quantile_of_marginal_distribution(num_samples=k, quantile_level=beta)
    n_1 = min_sample_size(number_of_samples=k,
                          minimum_detectable_effect=p_true * d_relative_pred,
                          variances=p_true * (1 - p_true),
                          significance_level=alpha,
                          power_level=beta,
                          quantile_1=c_1,
                          quantile_2=c_2)
    n_2 = min_sample_size_2(d=p_true * d_relative_pred, p=p_true,
                            alpha=alpha_, beta=beta_)

    success_cnt_1 = bernoulli.rvs(p_true, size=[iter_size, k, n_1])
    success_cnt_2 = bernoulli.rvs(p_true, size=[iter_size, k, n_2])
    # limit method
    reject_cnt_0 = 0
    for i in range(iter_size):
        hyp = test_on_marginal_distribution(success_cnt_1[i],
                                            significance_level=alpha,
                                            quantiles=c_1)
        if hyp == 0:
            reject_cnt_0 += 1
    reject_stat_1.append(1 - reject_cnt_0 / iter_size)

    # pairwise comparisons
    reject_cnt_0 = 0
    for i in range(iter_size):
        hyp = 0  # if nothing is rejected, then H(0) is valid
        for s in range(k):
            all_rejected_flg = True
            for t in range(k):
                if s != t:
                    if proportions_ztest([np.sum(success_cnt_2[i][s]), np.sum(success_cnt_2[i][t])], [n_2, n_2],
                                         alternative="larger")[1] > alpha_:
                        all_rejected_flg = False
                        break
            if all_rejected_flg:
                hyp = s + 1
                break
        if hyp == 0:
            reject_cnt_0 += 1
    reject_stat_2.append(1 - reject_cnt_0 / iter_size)
reject_stat_1 = np.array(reject_stat_1)
left_side_1, right_side_1 = confidence_interval_bern(reject_stat_1, iter_size, c=1 - alpha)

reject_stat_2 = np.array(reject_stat_2)
left_side_2, right_side_2 = confidence_interval_bern(reject_stat_2, iter_size, c=1 - alpha)

[8]:

fig = go.Figure()
fig.add_trace(go.Scatter(x=k_list, y=len(k_list) * [alpha],
                         mode="lines", name="Significance level"))
fig.add_trace(go.Scatter(x=k_list,
                         y=reject_stat_1,
                         mode="lines",
                         name=f"Optimal solution",
                         error_y={
                             "type": "data",
                             "symmetric": False,
                             "array": reject_stat_1 - left_side_1,
                             "arrayminus": right_side_1 - reject_stat_1,
                             "visible": True
                         }))

fig.add_trace(go.Scatter(x=k_list,
                         y=reject_stat_2,
                         mode="lines",
                         name=f"Simple solution",
                         error_y={
                             "type": "data",
                             "symmetric": False,
                             "array": reject_stat_2 - left_side_2,
                             "arrayminus": right_side_2 - reject_stat_2,
                             "visible": True
                         }))

fig.update_layout(title="Modeling the probability of a first-kind error",
                  xaxis={
                      "title_text": "Number of samples"
                  },
                  yaxis={
                      "tickformat": ".0%",
                      "title_text": "The frequency of incorrect rejection of the hypothesis",
                      "range": [-0.005, 0.07]
                  },
                  legend={
                      "yanchor": "top",
                      "y": 0.99,
                      "xanchor": "right",
                      "x": 0.995
                  },
                  width=725,
                  height=500)
fig.show()

Consider the probability of a second kind of error - this is the probability of accepting a hypothesis if it is unfair.

We will model the frequency of incorrect hypothesis acceptance, close to the true probability of a second-kind error with a sufficient number of simulations.

[9]:

d_relative_pred = 0.1  # Assume that the conversion rate will increase by d_relative_pred * p
iter_size = 1000  # Number of test iterations

reject_stat_1 = []
reject_stat_2 = []

[10]:

for k in k_list:
    alpha_ = alpha / k
    beta_ = beta / (k - 1)

    c_1 = quantile_of_marginal_distribution(num_samples=k, quantile_level=1 - alpha_)
    c_2 = quantile_of_marginal_distribution(num_samples=k, quantile_level=beta)
    n_1 = min_sample_size(number_of_samples=k,
                          minimum_detectable_effect=p_true * d_relative_pred,
                          variances=p_true * (1 - p_true),
                          significance_level=alpha,
                          power_level=beta,
                          quantile_1=c_1,
                          quantile_2=c_2)
    n_2 = min_sample_size_2(d=p_true * d_relative_pred, p=p_true,
                            alpha=alpha_, beta=beta_)

    success_cnt_2 = []
    for i in range(k - 1):
        success_cnt_2 += [bernoulli.rvs(p_true, size=[iter_size, n_2])]
    success_cnt_2 += [bernoulli.rvs(p_true * (1 + d_relative_pred), size=[iter_size, n_2])]

    # limit method
    reject_cnt_0 = 0
    for i in range(iter_size):
        success_cnt_1 = []
        for _ in range(k - 1):
            success_cnt_1 += [bernoulli.rvs(p_true, size=n_1)]
        success_cnt_1 += [bernoulli.rvs(p_true * (1 + d_relative_pred), size=n_1)]
        hyp = test_on_marginal_distribution(success_cnt_1,
                                            significance_level=alpha,
                                            quantiles=c_1)
        if hyp == k:
            reject_cnt_0 += 1
    reject_stat_1.append(1 - reject_cnt_0 / iter_size)

    # pairwise comparisons
    reject_cnt_0 = 0
    for i in range(iter_size):
        hyp = 0
        for s in range(k):
            all_rejected_flg = True
            for t in range(k):
                if s != t:
                    if proportions_ztest([np.sum(success_cnt_2[s][i]), np.sum(success_cnt_2[t][i])], [n_2, n_2],
                                         alternative="larger")[1] > alpha_:
                        all_rejected_flg = False
                        break
            if all_rejected_flg:
                hyp = s + 1
                break
        if hyp == k:
            reject_cnt_0 += 1
    reject_stat_2.append(1 - reject_cnt_0 / iter_size)

reject_stat_1 = np.array(reject_stat_1)
left_side_1, right_side_1 = confidence_interval_bern(reject_stat_1, iter_size, c=1 - alpha)
reject_stat_2 = np.array(reject_stat_2)
left_side_2, right_side_2 = confidence_interval_bern(reject_stat_2, iter_size, c=1 - alpha)

[11]:

fig = go.Figure()
fig.add_trace(go.Scatter(x=k_list, y=len(k_list) * [beta],
                         mode="lines", name="The probability of an error of the 2nd kind"))
fig.add_trace(go.Scatter(x=k_list,
                         y=reject_stat_1,
                         mode="lines",
                         name=f"Optimal solution",
                         error_y={
                             "type": "data",
                             "symmetric": False,
                             "array": reject_stat_1 - left_side_1,
                             "arrayminus": right_side_1 - reject_stat_1,
                             "visible": True
                         }))

fig.add_trace(go.Scatter(x=k_list,
                         y=reject_stat_2,
                         mode="lines",
                         name=f"Simple solution",
                         error_y={
                             "type": "data",
                             "symmetric": False,
                             "array": reject_stat_2 - left_side_2,
                             "arrayminus": right_side_2 - reject_stat_2,
                             "visible": True
                         }))

fig.update_layout(title="Modeling the probability of a second kind of error",
                  xaxis={
                      "title_text": "Number of samples"
                  },
                  yaxis={
                      "tickformat": ".0%",
                      "title_text": "The frequency of hypothesis acceptance",
                      "range": [0, 0.25]
                  },
                  legend={
                      "yanchor": "top",
                      "y": 0.99,
                      "xanchor": "right",
                      "x": 0.995
                  },
                  width=725,
                  height=500)
fig.show()

[ ]: