Insurance A/B Test Analysis

Author

Your Name

Show technical details
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import levene, ttest_ind, chi2_contingency
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from statsmodels.stats.proportion import proportions_ztest, confint_proportions_2indep
from statsmodels.stats.multitest import multipletests
Show technical details
converted = pd.read_csv('converted.csv')
Show technical details
converted.head(5)
Unnamed: 0 age joined_getsafe_at language channel first_product zip_bucket area_classification most_used_os buying_platform converted variant
0 0 51 2024-04-21 18:19:07 de paid_social legal 539xx rural Android web 1 B
1 1 18 2024-08-09 20:14:37 de direct liability 305xx urban Android app 1 A
2 2 20 2024-08-03 00:51:27 de aggregator liability 882xx rural Android app 1 A
3 3 18 2024-10-24 13:07:40 de aggregator liability 986xx rural NaN NaN 1 B
4 4 28 2024-05-31 22:54:47 de direct liability 104xx Big 7 Android app 1 B
Show technical details
converted.shape
(6000, 12)
Show technical details
converted['joined_getsafe_at'] = pd.to_datetime(converted['joined_getsafe_at'])
converted['user_days'] = (pd.Timestamp.today() - converted['joined_getsafe_at']).dt.days
converted['user_months'] = (converted['user_days'] / 30.44).round(1)
Show technical details
print(converted['age'].describe())
count    6000.000000
mean       32.721833
std        27.635606
min        18.000000
25%        25.000000
50%        30.000000
75%        37.000000
max      2013.000000
Name: age, dtype: float64
Show technical details
converted = converted[converted['age'] != 2013]
Show technical details
def t_test_analysis(df, variable, chart_name):
    converted = df.loc[df['converted']==1, variable]
    non_converted = df.loc[df['converted']==0, variable]

    stat, p_levene = levene(converted, non_converted)
    equal_var = p_levene >= 0.05
    t_stat, p_val = ttest_ind(converted, non_converted, equal_var=equal_var)

    mean1, mean2 = converted.mean(), non_converted.mean()
    std1, std2 = converted.std(), non_converted.std()
    n1, n2 = converted.shape[0], non_converted.shape[0]

    s_pooled = np.sqrt(((n1 - 1)*std1**2 + (n2 - 1)*std2**2) / (n1 + n2 - 2))

    # Cohen's d
    d = (mean1 - mean2) / s_pooled
    
    print(f"T-test: t={t_stat:.2f}, p={p_val:.4f}")
    print(f"Means: converted users={mean1}, non-converted users={mean2}")
    print(f"Cohen's d = {d:.3f}")

    sns.boxplot(x='converted', y=variable, data=df)
    plt.title(chart_name)
    plt.show()
Show technical details
t_test_analysis(converted, 'age', 'Age vs. Conversion')
T-test: t=-4.81, p=0.0000
Means: converted users=31.712680014049877, non-converted users=33.005076142131976
Cohen's d = -0.123

Usage days vs. Conversion

Show technical details
print(converted['user_months'].describe())
count    5999.000000
mean       16.503484
std         2.929747
min        12.300000
25%        13.900000
50%        16.000000
75%        19.200000
max        22.300000
Name: user_months, dtype: float64
Show technical details
t_test_analysis(converted, 'user_months', 'Usage days vs. Conversion')
T-test: t=-15.05, p=0.0000
Means: converted users=15.917386722866176, non-converted users=17.03286802030457
Cohen's d = -0.388

Show technical details
corr = converted['user_months'].corr(converted['converted'])
print(f"Correlation (months of usage vs. conversion): {corr:.3f}")
Correlation (months of usage vs. conversion): -0.190

Language vs. Conversion

Show technical details
converted['language'].value_counts()
language
de    5138
en     861
Name: count, dtype: int64
Show technical details
def chi2_residuals_heatmap(df, row, col, title, use_yates=False):

    observed = pd.crosstab(df[row], df[col])

    chi2, p, dof, expected = chi2_contingency(observed.values, correction=use_yates)
    expected = pd.DataFrame(expected, index=observed.index, columns=observed.columns)

    residuals = (observed - expected) / np.sqrt(expected)

    n = observed.values.sum()
    r, c = observed.shape
    cramers_v = np.sqrt((chi2 / n) / (min(r-1, c-1)))

    print(f"Chi-squared test: t={chi2:.2f}, p={p:.4f}")
    print(f"Cramer's V = {cramers_v:.3f}")

    plt.figure(figsize=(8, 5))
    ax = sns.heatmap(
        residuals,
        cmap="coolwarm_r",
        center=0,
        annot=True,
        fmt=".2f",
        linewidths=0.5,
        cbar_kws={"label": "Pearson residuals"}
    )
    ttl = title or f"Pearson Residuals: {row} × {col}"
    ax.set_title(title)
    ax.set_xlabel(col)
    ax.set_ylabel(row)
    plt.tight_layout()
    plt.show()
Show technical details
chi2_residuals_heatmap(converted, 'language', 'converted', 'Language vs. Conversion', use_yates=True)
Chi-squared test: t=10.00, p=0.0016
Cramer's V = 0.041

Channel vs. Conversion

Show technical details
converted['channel'].value_counts()
channel
aggregator        2974
affiliate_deal     856
direct             743
affiliate          460
referral           327
paid_social        314
paid_search        214
partner            100
podcast              8
offline              2
other                1
Name: count, dtype: int64
Show technical details
chi2_residuals_heatmap(converted, 'channel', 'converted', 'Channel vs. Conversion', use_yates=True)
Chi-squared test: t=399.38, p=0.0000
Cramer's V = 0.258

First product vs. Conversion

Show technical details
converted['first_product'].value_counts()
first_product
liability    3322
contents     1302
travel        539
legal         472
bike          161
dog           137
dental         61
accident        5
Name: count, dtype: int64
Show technical details
chi2_residuals_heatmap(converted, 'first_product', 'converted', 'First product vs. Conversion')
Chi-squared test: t=186.17, p=0.0000
Cramer's V = 0.176

Area classification vs. Conversion

Show technical details
converted['area_classification'].value_counts()
area_classification
rural         2226
Big 7         1656
large town    1028
town           605
urban          465
Name: count, dtype: int64
Show technical details
chi2_residuals_heatmap(converted, 'area_classification', 'converted', 'Area classification vs. Conversion')
Chi-squared test: t=5.36, p=0.2525
Cramer's V = 0.030

Show technical details
converted['most_used_os'].value_counts()
most_used_os
Android      3175
iOS          2237
iPhone OS       1
Name: count, dtype: int64
Show technical details
chi2_residuals_heatmap(converted[converted['most_used_os'] != 'iPhone OS'], 
                       'most_used_os', 'converted', 'Area classification vs. Conversion',
                      use_yates=True)
Chi-squared test: t=1.46, p=0.2268
Cramer's V = 0.016

Show technical details
converted['buying_platform'].value_counts()
buying_platform
web    1752
app    1376
Name: count, dtype: int64
Show technical details
chi2_residuals_heatmap(converted, 'buying_platform', 'converted', 'Buying platform vs. Conversion', use_yates=True)
Chi-squared test: t=422.96, p=0.0000
Cramer's V = 0.368

Show technical details
numeric = ['age', 'user_months']
categorical = [
    'language', 'channel', 'first_product',
    'area_classification', 'most_used_os', 'buying_platform'
]
Show technical details
converted_copy = converted.dropna()
converted_copy = converted_copy[converted_copy['most_used_os'] != 'iPhone OS']
converted_copy = converted_copy[~converted_copy['channel'].isin(['other', 'offline', 'podcast'])]
converted_copy = converted_copy[converted_copy['first_product'] != 'accident']
Show technical details
converted_features = converted_copy.drop(columns = ['Unnamed: 0', 'converted', 'variant', 'user_days', 'joined_getsafe_at', 'zip_bucket'])
Show technical details
X = pd.get_dummies(converted_features, drop_first=True, dtype=float)
Show technical details
X = sm.add_constant(X)
Show technical details
X_vif = X.drop(columns=['const'], errors='ignore')

# Add a constant to compute VIFs correctly
X_vif = add_constant(X_vif)

# Compute VIF for each column
vif_df = pd.DataFrame({
    'feature': X_vif.columns,
    'VIF': [variance_inflation_factor(X_vif.values, i) for i in range(X_vif.shape[1])]
})

# Sort descending by VIF
vif_df = vif_df.sort_values('VIF', ascending=False)
print(vif_df.head(20))
                           feature         VIF
0                            const  130.040462
15         first_product_liability    7.221505
11          first_product_contents    5.390722
5               channel_aggregator    3.829026
6                   channel_direct    3.552739
16            first_product_travel    3.131186
14             first_product_legal    3.079703
4           channel_affiliate_deal    2.844060
22             buying_platform_web    2.697693
8              channel_paid_social    1.877376
10                channel_referral    1.766673
18       area_classification_rural    1.759984
13               first_product_dog    1.657467
7              channel_paid_search    1.617012
12            first_product_dental    1.500211
17  area_classification_large town    1.428000
19        area_classification_town    1.306626
3                      language_en    1.296014
2                      user_months    1.278958
9                  channel_partner    1.262322
Show technical details
X = X.drop(columns = ['first_product_liability'])
y = converted_copy['converted'].astype(int)
Show technical details
model = sm.Logit(y, X)
result = model.fit(disp=False)
print(result.summary())
                           Logit Regression Results                           
==============================================================================
Dep. Variable:              converted   No. Observations:                 2966
Model:                          Logit   Df Residuals:                     2944
Method:                           MLE   Df Model:                           21
Date:                Sun, 09 Nov 2025   Pseudo R-squ.:                  0.1537
Time:                        10:44:26   Log-Likelihood:                -1613.3
converged:                       True   LL-Null:                       -1906.3
Covariance Type:            nonrobust   LLR p-value:                1.442e-110
==================================================================================================
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const                              2.7049      0.417      6.488      0.000       1.888       3.522
age                               -0.0180      0.005     -3.950      0.000      -0.027      -0.009
user_months                       -0.0793      0.018     -4.401      0.000      -0.115      -0.044
language_en                        0.2317      0.122      1.906      0.057      -0.007       0.470
channel_affiliate_deal             0.8454      0.169      4.997      0.000       0.514       1.177
channel_aggregator                 1.0525      0.222      4.742      0.000       0.617       1.487
channel_direct                     0.5108      0.186      2.742      0.006       0.146       0.876
channel_paid_search                1.0429      0.206      5.063      0.000       0.639       1.447
channel_paid_social                1.9445      0.197      9.894      0.000       1.559       2.330
channel_partner                    1.0289      0.276      3.727      0.000       0.488       1.570
channel_referral                   0.8496      0.183      4.633      0.000       0.490       1.209
first_product_contents             0.5808      0.127      4.583      0.000       0.332       0.829
first_product_dental               0.3540      0.326      1.085      0.278      -0.285       0.993
first_product_dog                  0.0902      0.299      0.302      0.763      -0.495       0.676
first_product_legal               -0.1101      0.162     -0.680      0.497      -0.428       0.207
first_product_travel              -0.0238      0.154     -0.155      0.877      -0.326       0.278
area_classification_large town    -0.0914      0.138     -0.660      0.509      -0.363       0.180
area_classification_rural         -0.0825      0.117     -0.706      0.480      -0.312       0.147
area_classification_town           0.1208      0.167      0.725      0.468      -0.206       0.447
area_classification_urban         -0.2656      0.180     -1.479      0.139      -0.617       0.086
most_used_os_iOS                  -0.1591      0.091     -1.756      0.079      -0.337       0.019
buying_platform_web               -1.6312      0.145    -11.285      0.000      -1.914      -1.348
==================================================================================================
Show technical details
coef = result.params.copy()
pval = result.pvalues.copy()
or_  = np.exp(coef)

if 'const' in coef.index:
    coef = coef.drop('const')
    pval = pval.drop('const')
    or_  = pd.Series(or_, index=result.params.index).drop('const')

sig_mask = pval < 0.005

df_sig = pd.DataFrame({
    'coef': coef[sig_mask],
    'pval': pval[sig_mask],
    'odds_ratio': or_[sig_mask]
})

df_sig = df_sig.sort_values('coef')

colors = np.where(df_sig['coef'] >= 0, '#2B6CB0', '#C53030')

plt.figure(figsize=(9, 6))
y_pos = np.arange(len(df_sig))
plt.barh(y_pos, df_sig['coef'].values, color=colors, alpha=0.9)

plt.yticks(y_pos, df_sig.index)
plt.axvline(0, color='black', linewidth=1)
plt.title(f"Significant Logistic Regression Coefficients")
plt.xlabel("Coefficient (log-odds)")
plt.tight_layout()

plt.show()

Show technical details
def ab_balance_check(df, variant_col, numeric, categorical, alpha=0.05):
    results = []

    groups = df[variant_col].dropna().unique()

    groupA, groupB = groups
    dfA = df[df[variant_col] == groupA]
    dfB = df[df[variant_col] == groupB]

    # Numeric columns ---
    for col in numeric or []:
        a = pd.to_numeric(dfA[col], errors="coerce").dropna()
        b = pd.to_numeric(dfB[col], errors="coerce").dropna()

        stat, p = ttest_ind(a, b, equal_var=False)
        results.append({
            "variable": col,
            "groupA_mean": a.mean(),
            "groupB_mean": b.mean(),
            "p_value": p
        })

    # Categorical columns
    for col in categorical or []:
        contingency = pd.crosstab(df[col], df[variant_col])
        chi2, p, dof, expected = chi2_contingency(contingency)
        results.append({
            "variable": col,
            "p_value": p
        })

    res_df = pd.DataFrame(results)
    res_df["significant"] = res_df["p_value"] < alpha
    return res_df.sort_values("p_value")
Show technical details
balance = ab_balance_check(
    converted,
    variant_col='variant',
    numeric=['age', 'user_months'],
    categorical=['language','channel','first_product','area_classification','most_used_os','buying_platform']
)
Show technical details
balance
variable groupA_mean groupB_mean p_value significant
6 most_used_os NaN NaN 0.193801 False
1 user_months 16.543168 16.464155 0.296358 False
0 age 32.512056 32.272486 0.376271 False
3 channel NaN NaN 0.383246 False
2 language NaN NaN 0.420498 False
7 buying_platform NaN NaN 0.440814 False
4 first_product NaN NaN 0.736315 False
5 area_classification NaN NaN 0.988582 False
Show technical details
def ab_conversion_test(df, variant_col='ab_group', target_col='converted'):

    df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
    df = df.dropna(subset=[variant_col, target_col])

    groups = df[variant_col].unique()

    A, B = groups

    conv_A = df.loc[df[variant_col] == A, target_col].sum()
    n_A    = df.loc[df[variant_col] == A, target_col].count()

    conv_B = df.loc[df[variant_col] == B, target_col].sum()
    n_B    = df.loc[df[variant_col] == B, target_col].count()

    # Proportions
    pA, pB = conv_A / n_A, conv_B / n_B

    # Two-proportion z-test
    count = np.array([conv_A, conv_B])
    nobs  = np.array([n_A, n_B])
    stat, pval = proportions_ztest(count, nobs)

    lift = (pB - pA) / pA if pA != 0 else np.nan

    print(f"Group A: {conv_A}/{n_A} = {pA:.3%}")
    print(f"Group B: {conv_B}/{n_B} = {pB:.3%}")
    print(f"Absolute diff: {pB - pA:.3%}")
    print(f"Relative lift: {lift:.2%}")
    print(f"z = {stat:.3f}, p = {pval:.4f}")

    if pval < 0.05:
        print("🚀 Significant difference between A and B (p < 0.05)")
    else:
        print("✅ No statistically significant difference detected")

    return {
        "pA": pA, "pB": pB, "z": stat, "pval": pval,
        "diff": pB - pA, "lift": lift
    }
Show technical details
ab_conversion_test(converted, variant_col='variant', target_col='converted')
Group A: 1473/2986 = 49.330%
Group B: 1374/3013 = 45.602%
Absolute diff: -3.728%
Relative lift: -7.56%
z = 2.891, p = 0.0038
🚀 Significant difference between A and B (p < 0.05)
{'pA': 0.49330207635632956,
 'pB': 0.4560238964487222,
 'z': 2.8910244224088557,
 'pval': 0.003839882787472746,
 'diff': -0.037278179907607345,
 'lift': -0.07556866612635134}
Show technical details
observed = pd.crosstab(converted['converted'], converted['variant'])

chi2, p, dof, expected = chi2_contingency(observed.values, correction=True)
expected = pd.DataFrame(expected, index=observed.index, columns=observed.columns)

residuals = (observed - expected) / np.sqrt(expected)

n = observed.values.sum()
r, c = observed.shape
cramers_v = np.sqrt((chi2 / n) / (min(r-1, c-1)))

print(f"Chi-squared test: t={chi2:.2f}, p={p:.4f}")
print(f"Cramer's V = {cramers_v:.3f}")
Chi-squared test: t=8.21, p=0.0042
Cramer's V = 0.037
Show technical details
def ab_by_segment(df, group_col='ab_group', target_col='converted', segment_col='channel',
                  alpha=0.05, min_obs_per_group=20):
    """
    Compare A vs B conversion by segment (e.g., channel).
    Returns a dataframe with per-segment stats and BH-FDR adjusted p-values.
    """
    d = df.copy()
    d[target_col] = pd.to_numeric(d[target_col], errors='coerce')
    d = d.dropna(subset=[group_col, target_col, segment_col])

    groups = d[group_col].unique()
    if len(groups) != 2:
        raise ValueError(f"{group_col} must have exactly 2 groups (found {groups}).")

    A, B = sorted(groups)  # just to have a stable order

    rows = []
    for seg, g in d.groupby(segment_col):
        # counts
        n_A = g.loc[g[group_col] == A, target_col].count()
        n_B = g.loc[g[group_col] == B, target_col].count()
        if n_A < min_obs_per_group or n_B < min_obs_per_group:
            continue

        conv_A = g.loc[g[group_col] == A, target_col].sum()
        conv_B = g.loc[g[group_col] == B, target_col].sum()

        pA = conv_A / n_A if n_A else np.nan
        pB = conv_B / n_B if n_B else np.nan

        # z-test for proportions (two-sided)
        count = np.array([conv_B, conv_A])   # (B, A)
        nobs  = np.array([n_B, n_A])
        z, p  = proportions_ztest(count, nobs)

        # CI for (pB - pA)
        low, high = confint_proportions_2indep(conv_B, n_B, conv_A, n_A, method='wald')

        diff = pB - pA
        lift = (pB - pA) / pA if pA not in (0, np.nan) else np.nan

        rows.append({
            segment_col: seg,
            'n_A': n_A, 'conv_A': conv_A, 'pA': pA,
            'n_B': n_B, 'conv_B': conv_B, 'pB': pB,
            'diff_B_minus_A': diff,
            'ci_low': low, 'ci_high': high,
            'rel_lift_vs_A': lift,
            'z': z, 'p_value': p
        })

    out = pd.DataFrame(rows).sort_values('p_value')
    if out.empty:
        return out

    # FDR correction across segments
    out['p_adj_fdr'] = multipletests(out['p_value'], alpha=alpha, method='fdr_bh')[1]
    out['sig_fdr'] = out['p_adj_fdr'] < alpha

    # nice percentages
    for col in ['pA','pB','diff_B_minus_A','rel_lift_vs_A','ci_low','ci_high']:
        out[col] = (out[col] * 100).round(2)

    return out.reset_index(drop=True)
Show technical details
results = ab_by_segment(converted, 'variant', 'converted', 'channel', alpha=0.05, min_obs_per_group=20)
print(results)
          channel   n_A  conv_A     pA   n_B  conv_B     pB  diff_B_minus_A  \
0          direct   398     261  65.58   345     264  76.52           10.94   
1      aggregator  1479     641  43.34  1495     702  46.96            3.62   
2         partner    54      22  40.74    46      24  52.17           11.43   
3  affiliate_deal   432     146  33.80   424     154  36.32            2.52   
4        referral   161      79  49.07   166      87  52.41            3.34   
5     paid_search   101      54  53.47   113      65  57.52            4.06   
6       affiliate   229      55  24.02   231      60  25.97            1.96   
7     paid_social   151     109  72.19   163     117  71.78           -0.41   

   ci_low  ci_high  rel_lift_vs_A         z   p_value  p_adj_fdr  sig_fdr  
0    4.48    17.41          16.69  3.267441  0.001085   0.008682     True  
1    0.04     7.19           8.34  1.981481  0.047537   0.190149    False  
2   -8.06    30.93          28.06  1.143317  0.252907   0.674418    False  
3   -3.87     8.92           7.47  0.773984  0.438940   0.717711    False  
4   -7.49    14.17           6.81  0.604215  0.545701   0.717711    False  
5   -9.27    17.39           7.59  0.596283  0.550986   0.717711    False  
6   -5.95     9.87           8.15  0.484548  0.627997   0.717711    False  
7  -10.35     9.53          -0.56 -0.080092  0.936164   0.936164    False  
Show technical details
results = ab_by_segment(converted, 'variant', 'converted', 'buying_platform', alpha=0.05, min_obs_per_group=20)
print(results)
  buying_platform  n_A  conv_A     pA  n_B  conv_B     pB  diff_B_minus_A  \
0             app  679     539  79.38  697     603  86.51            7.13   
1             web  890     398  44.72  862     428  49.65            4.93   

   ci_low  ci_high  rel_lift_vs_A         z   p_value  p_adj_fdr  sig_fdr  
0    3.17    11.09           8.98  3.520807  0.000430   0.000860     True  
1    0.26     9.60          11.03  2.067854  0.038654   0.038654     True