Insurance A/B Test Analysis

Author

Your Name

Show technical details

import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import levene, ttest_ind, chi2_contingency
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from statsmodels.stats.proportion import proportions_ztest, confint_proportions_2indep
from statsmodels.stats.multitest import multipletests

Show technical details

converted = pd.read_csv('converted.csv')

Show technical details

converted.head(5)

	Unnamed: 0	age	joined_getsafe_at	language	channel	first_product	zip_bucket	area_classification	most_used_os	buying_platform	converted	variant
0	0	51	2024-04-21 18:19:07	de	paid_social	legal	539xx	rural	Android	web	1	B
1	1	18	2024-08-09 20:14:37	de	direct	liability	305xx	urban	Android	app	1	A
2	2	20	2024-08-03 00:51:27	de	aggregator	liability	882xx	rural	Android	app	1	A
3	3	18	2024-10-24 13:07:40	de	aggregator	liability	986xx	rural	NaN	NaN	1	B
4	4	28	2024-05-31 22:54:47	de	direct	liability	104xx	Big 7	Android	app	1	B

Show technical details

converted.shape

(6000, 12)

Show technical details

converted['joined_getsafe_at'] = pd.to_datetime(converted['joined_getsafe_at'])
converted['user_days'] = (pd.Timestamp.today() - converted['joined_getsafe_at']).dt.days
converted['user_months'] = (converted['user_days'] / 30.44).round(1)

Show technical details

print(converted['age'].describe())

count    6000.000000
mean       32.721833
std        27.635606
min        18.000000
25%        25.000000
50%        30.000000
75%        37.000000
max      2013.000000
Name: age, dtype: float64

Show technical details

converted = converted[converted['age'] != 2013]

Show technical details

def t_test_analysis(df, variable, chart_name):
    converted = df.loc[df['converted']==1, variable]
    non_converted = df.loc[df['converted']==0, variable]

    stat, p_levene = levene(converted, non_converted)
    equal_var = p_levene >= 0.05
    t_stat, p_val = ttest_ind(converted, non_converted, equal_var=equal_var)

    mean1, mean2 = converted.mean(), non_converted.mean()
    std1, std2 = converted.std(), non_converted.std()
    n1, n2 = converted.shape[0], non_converted.shape[0]

    s_pooled = np.sqrt(((n1 - 1)*std1**2 + (n2 - 1)*std2**2) / (n1 + n2 - 2))

    # Cohen's d
    d = (mean1 - mean2) / s_pooled
    
    print(f"T-test: t={t_stat:.2f}, p={p_val:.4f}")
    print(f"Means: converted users={mean1}, non-converted users={mean2}")
    print(f"Cohen's d = {d:.3f}")

    sns.boxplot(x='converted', y=variable, data=df)
    plt.title(chart_name)
    plt.show()

Show technical details

t_test_analysis(converted, 'age', 'Age vs. Conversion')

T-test: t=-4.81, p=0.0000
Means: converted users=31.712680014049877, non-converted users=33.005076142131976
Cohen's d = -0.123

Usage days vs. Conversion

Show technical details

print(converted['user_months'].describe())

count    5999.000000
mean       16.503484
std         2.929747
min        12.300000
25%        13.900000
50%        16.000000
75%        19.200000
max        22.300000
Name: user_months, dtype: float64

Show technical details

t_test_analysis(converted, 'user_months', 'Usage days vs. Conversion')

T-test: t=-15.05, p=0.0000
Means: converted users=15.917386722866176, non-converted users=17.03286802030457
Cohen's d = -0.388

Show technical details

corr = converted['user_months'].corr(converted['converted'])
print(f"Correlation (months of usage vs. conversion): {corr:.3f}")

Correlation (months of usage vs. conversion): -0.190

Language vs. Conversion

Show technical details

converted['language'].value_counts()

language
de    5138
en     861
Name: count, dtype: int64

Show technical details

def chi2_residuals_heatmap(df, row, col, title, use_yates=False):

    observed = pd.crosstab(df[row], df[col])

    chi2, p, dof, expected = chi2_contingency(observed.values, correction=use_yates)
    expected = pd.DataFrame(expected, index=observed.index, columns=observed.columns)

    residuals = (observed - expected) / np.sqrt(expected)

    n = observed.values.sum()
    r, c = observed.shape
    cramers_v = np.sqrt((chi2 / n) / (min(r-1, c-1)))

    print(f"Chi-squared test: t={chi2:.2f}, p={p:.4f}")
    print(f"Cramer's V = {cramers_v:.3f}")

    plt.figure(figsize=(8, 5))
    ax = sns.heatmap(
        residuals,
        cmap="coolwarm_r",
        center=0,
        annot=True,
        fmt=".2f",
        linewidths=0.5,
        cbar_kws={"label": "Pearson residuals"}
    )
    ttl = title or f"Pearson Residuals: {row} × {col}"
    ax.set_title(title)
    ax.set_xlabel(col)
    ax.set_ylabel(row)
    plt.tight_layout()
    plt.show()

Show technical details

chi2_residuals_heatmap(converted, 'language', 'converted', 'Language vs. Conversion', use_yates=True)

Chi-squared test: t=10.00, p=0.0016
Cramer's V = 0.041

Channel vs. Conversion

Show technical details

converted['channel'].value_counts()

channel
aggregator        2974
affiliate_deal     856
direct             743
affiliate          460
referral           327
paid_social        314
paid_search        214
partner            100
podcast              8
offline              2
other                1
Name: count, dtype: int64

Show technical details

chi2_residuals_heatmap(converted, 'channel', 'converted', 'Channel vs. Conversion', use_yates=True)

Chi-squared test: t=399.38, p=0.0000
Cramer's V = 0.258

First product vs. Conversion

Show technical details

converted['first_product'].value_counts()

first_product
liability    3322
contents     1302
travel        539
legal         472
bike          161
dog           137
dental         61
accident        5
Name: count, dtype: int64

Show technical details

chi2_residuals_heatmap(converted, 'first_product', 'converted', 'First product vs. Conversion')

Chi-squared test: t=186.17, p=0.0000
Cramer's V = 0.176

Area classification vs. Conversion

Show technical details

converted['area_classification'].value_counts()

area_classification
rural         2226
Big 7         1656
large town    1028
town           605
urban          465
Name: count, dtype: int64

Show technical details

chi2_residuals_heatmap(converted, 'area_classification', 'converted', 'Area classification vs. Conversion')

Chi-squared test: t=5.36, p=0.2525
Cramer's V = 0.030

Show technical details

converted['most_used_os'].value_counts()

most_used_os
Android      3175
iOS          2237
iPhone OS       1
Name: count, dtype: int64

Show technical details

chi2_residuals_heatmap(converted[converted['most_used_os'] != 'iPhone OS'], 
                       'most_used_os', 'converted', 'Area classification vs. Conversion',
                      use_yates=True)

Chi-squared test: t=1.46, p=0.2268
Cramer's V = 0.016

Show technical details

converted['buying_platform'].value_counts()

buying_platform
web    1752
app    1376
Name: count, dtype: int64

Show technical details

chi2_residuals_heatmap(converted, 'buying_platform', 'converted', 'Buying platform vs. Conversion', use_yates=True)

Chi-squared test: t=422.96, p=0.0000
Cramer's V = 0.368

Show technical details

numeric = ['age', 'user_months']
categorical = [
    'language', 'channel', 'first_product',
    'area_classification', 'most_used_os', 'buying_platform'
]

Show technical details

converted_copy = converted.dropna()
converted_copy = converted_copy[converted_copy['most_used_os'] != 'iPhone OS']
converted_copy = converted_copy[~converted_copy['channel'].isin(['other', 'offline', 'podcast'])]
converted_copy = converted_copy[converted_copy['first_product'] != 'accident']

Show technical details

converted_features = converted_copy.drop(columns = ['Unnamed: 0', 'converted', 'variant', 'user_days', 'joined_getsafe_at', 'zip_bucket'])

Show technical details

X = pd.get_dummies(converted_features, drop_first=True, dtype=float)

Show technical details

X = sm.add_constant(X)

Show technical details

X_vif = X.drop(columns=['const'], errors='ignore')

# Add a constant to compute VIFs correctly
X_vif = add_constant(X_vif)

# Compute VIF for each column
vif_df = pd.DataFrame({
    'feature': X_vif.columns,
    'VIF': [variance_inflation_factor(X_vif.values, i) for i in range(X_vif.shape[1])]
})

# Sort descending by VIF
vif_df = vif_df.sort_values('VIF', ascending=False)
print(vif_df.head(20))

                           feature         VIF
0                            const  130.040462
15         first_product_liability    7.221505
11          first_product_contents    5.390722
5               channel_aggregator    3.829026
6                   channel_direct    3.552739
16            first_product_travel    3.131186
14             first_product_legal    3.079703
4           channel_affiliate_deal    2.844060
22             buying_platform_web    2.697693
8              channel_paid_social    1.877376
10                channel_referral    1.766673
18       area_classification_rural    1.759984
13               first_product_dog    1.657467
7              channel_paid_search    1.617012
12            first_product_dental    1.500211
17  area_classification_large town    1.428000
19        area_classification_town    1.306626
3                      language_en    1.296014
2                      user_months    1.278958
9                  channel_partner    1.262322

Show technical details

X = X.drop(columns = ['first_product_liability'])
y = converted_copy['converted'].astype(int)

Show technical details

model = sm.Logit(y, X)
result = model.fit(disp=False)
print(result.summary())

                           Logit Regression Results                           
==============================================================================
Dep. Variable:              converted   No. Observations:                 2966
Model:                          Logit   Df Residuals:                     2944
Method:                           MLE   Df Model:                           21
Date:                Sun, 09 Nov 2025   Pseudo R-squ.:                  0.1537
Time:                        10:44:26   Log-Likelihood:                -1613.3
converged:                       True   LL-Null:                       -1906.3
Covariance Type:            nonrobust   LLR p-value:                1.442e-110
==================================================================================================
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const                              2.7049      0.417      6.488      0.000       1.888       3.522
age                               -0.0180      0.005     -3.950      0.000      -0.027      -0.009
user_months                       -0.0793      0.018     -4.401      0.000      -0.115      -0.044
language_en                        0.2317      0.122      1.906      0.057      -0.007       0.470
channel_affiliate_deal             0.8454      0.169      4.997      0.000       0.514       1.177
channel_aggregator                 1.0525      0.222      4.742      0.000       0.617       1.487
channel_direct                     0.5108      0.186      2.742      0.006       0.146       0.876
channel_paid_search                1.0429      0.206      5.063      0.000       0.639       1.447
channel_paid_social                1.9445      0.197      9.894      0.000       1.559       2.330
channel_partner                    1.0289      0.276      3.727      0.000       0.488       1.570
channel_referral                   0.8496      0.183      4.633      0.000       0.490       1.209
first_product_contents             0.5808      0.127      4.583      0.000       0.332       0.829
first_product_dental               0.3540      0.326      1.085      0.278      -0.285       0.993
first_product_dog                  0.0902      0.299      0.302      0.763      -0.495       0.676
first_product_legal               -0.1101      0.162     -0.680      0.497      -0.428       0.207
first_product_travel              -0.0238      0.154     -0.155      0.877      -0.326       0.278
area_classification_large town    -0.0914      0.138     -0.660      0.509      -0.363       0.180
area_classification_rural         -0.0825      0.117     -0.706      0.480      -0.312       0.147
area_classification_town           0.1208      0.167      0.725      0.468      -0.206       0.447
area_classification_urban         -0.2656      0.180     -1.479      0.139      -0.617       0.086
most_used_os_iOS                  -0.1591      0.091     -1.756      0.079      -0.337       0.019
buying_platform_web               -1.6312      0.145    -11.285      0.000      -1.914      -1.348
==================================================================================================

Show technical details

coef = result.params.copy()
pval = result.pvalues.copy()
or_  = np.exp(coef)

if 'const' in coef.index:
    coef = coef.drop('const')
    pval = pval.drop('const')
    or_  = pd.Series(or_, index=result.params.index).drop('const')

sig_mask = pval < 0.005

df_sig = pd.DataFrame({
    'coef': coef[sig_mask],
    'pval': pval[sig_mask],
    'odds_ratio': or_[sig_mask]
})

df_sig = df_sig.sort_values('coef')

colors = np.where(df_sig['coef'] >= 0, '#2B6CB0', '#C53030')

plt.figure(figsize=(9, 6))
y_pos = np.arange(len(df_sig))
plt.barh(y_pos, df_sig['coef'].values, color=colors, alpha=0.9)

plt.yticks(y_pos, df_sig.index)
plt.axvline(0, color='black', linewidth=1)
plt.title(f"Significant Logistic Regression Coefficients")
plt.xlabel("Coefficient (log-odds)")
plt.tight_layout()

plt.show()

Show technical details

def ab_balance_check(df, variant_col, numeric, categorical, alpha=0.05):
    results = []

    groups = df[variant_col].dropna().unique()

    groupA, groupB = groups
    dfA = df[df[variant_col] == groupA]
    dfB = df[df[variant_col] == groupB]

    # Numeric columns ---
    for col in numeric or []:
        a = pd.to_numeric(dfA[col], errors="coerce").dropna()
        b = pd.to_numeric(dfB[col], errors="coerce").dropna()

        stat, p = ttest_ind(a, b, equal_var=False)
        results.append({
            "variable": col,
            "groupA_mean": a.mean(),
            "groupB_mean": b.mean(),
            "p_value": p
        })

    # Categorical columns
    for col in categorical or []:
        contingency = pd.crosstab(df[col], df[variant_col])
        chi2, p, dof, expected = chi2_contingency(contingency)
        results.append({
            "variable": col,
            "p_value": p
        })

    res_df = pd.DataFrame(results)
    res_df["significant"] = res_df["p_value"] < alpha
    return res_df.sort_values("p_value")

Show technical details

balance = ab_balance_check(
    converted,
    variant_col='variant',
    numeric=['age', 'user_months'],
    categorical=['language','channel','first_product','area_classification','most_used_os','buying_platform']
)

Show technical details

balance

	variable	groupA_mean	groupB_mean	p_value	significant
6	most_used_os	NaN	NaN	0.193801	False
1	user_months	16.543168	16.464155	0.296358	False
0	age	32.512056	32.272486	0.376271	False
3	channel	NaN	NaN	0.383246	False
2	language	NaN	NaN	0.420498	False
7	buying_platform	NaN	NaN	0.440814	False
4	first_product	NaN	NaN	0.736315	False
5	area_classification	NaN	NaN	0.988582	False

Show technical details

def ab_conversion_test(df, variant_col='ab_group', target_col='converted'):

    df[target_col] = pd.to_numeric(df[target_col], errors='coerce')
    df = df.dropna(subset=[variant_col, target_col])

    groups = df[variant_col].unique()

    A, B = groups

    conv_A = df.loc[df[variant_col] == A, target_col].sum()
    n_A    = df.loc[df[variant_col] == A, target_col].count()

    conv_B = df.loc[df[variant_col] == B, target_col].sum()
    n_B    = df.loc[df[variant_col] == B, target_col].count()

    # Proportions
    pA, pB = conv_A / n_A, conv_B / n_B

    # Two-proportion z-test
    count = np.array([conv_A, conv_B])
    nobs  = np.array([n_A, n_B])
    stat, pval = proportions_ztest(count, nobs)

    lift = (pB - pA) / pA if pA != 0 else np.nan

    print(f"Group A: {conv_A}/{n_A} = {pA:.3%}")
    print(f"Group B: {conv_B}/{n_B} = {pB:.3%}")
    print(f"Absolute diff: {pB - pA:.3%}")
    print(f"Relative lift: {lift:.2%}")
    print(f"z = {stat:.3f}, p = {pval:.4f}")

    if pval < 0.05:
        print("🚀 Significant difference between A and B (p < 0.05)")
    else:
        print("✅ No statistically significant difference detected")

    return {
        "pA": pA, "pB": pB, "z": stat, "pval": pval,
        "diff": pB - pA, "lift": lift
    }

Show technical details

ab_conversion_test(converted, variant_col='variant', target_col='converted')

Group A: 1473/2986 = 49.330%
Group B: 1374/3013 = 45.602%
Absolute diff: -3.728%
Relative lift: -7.56%
z = 2.891, p = 0.0038
🚀 Significant difference between A and B (p < 0.05)

{'pA': 0.49330207635632956,
 'pB': 0.4560238964487222,
 'z': 2.8910244224088557,
 'pval': 0.003839882787472746,
 'diff': -0.037278179907607345,
 'lift': -0.07556866612635134}

Show technical details

observed = pd.crosstab(converted['converted'], converted['variant'])

chi2, p, dof, expected = chi2_contingency(observed.values, correction=True)
expected = pd.DataFrame(expected, index=observed.index, columns=observed.columns)

residuals = (observed - expected) / np.sqrt(expected)

n = observed.values.sum()
r, c = observed.shape
cramers_v = np.sqrt((chi2 / n) / (min(r-1, c-1)))

print(f"Chi-squared test: t={chi2:.2f}, p={p:.4f}")
print(f"Cramer's V = {cramers_v:.3f}")

Chi-squared test: t=8.21, p=0.0042
Cramer's V = 0.037

Show technical details

def ab_by_segment(df, group_col='ab_group', target_col='converted', segment_col='channel',
                  alpha=0.05, min_obs_per_group=20):
    """
    Compare A vs B conversion by segment (e.g., channel).
    Returns a dataframe with per-segment stats and BH-FDR adjusted p-values.
    """
    d = df.copy()
    d[target_col] = pd.to_numeric(d[target_col], errors='coerce')
    d = d.dropna(subset=[group_col, target_col, segment_col])

    groups = d[group_col].unique()
    if len(groups) != 2:
        raise ValueError(f"{group_col} must have exactly 2 groups (found {groups}).")

    A, B = sorted(groups)  # just to have a stable order

    rows = []
    for seg, g in d.groupby(segment_col):
        # counts
        n_A = g.loc[g[group_col] == A, target_col].count()
        n_B = g.loc[g[group_col] == B, target_col].count()
        if n_A < min_obs_per_group or n_B < min_obs_per_group:
            continue

        conv_A = g.loc[g[group_col] == A, target_col].sum()
        conv_B = g.loc[g[group_col] == B, target_col].sum()

        pA = conv_A / n_A if n_A else np.nan
        pB = conv_B / n_B if n_B else np.nan

        # z-test for proportions (two-sided)
        count = np.array([conv_B, conv_A])   # (B, A)
        nobs  = np.array([n_B, n_A])
        z, p  = proportions_ztest(count, nobs)

        # CI for (pB - pA)
        low, high = confint_proportions_2indep(conv_B, n_B, conv_A, n_A, method='wald')

        diff = pB - pA
        lift = (pB - pA) / pA if pA not in (0, np.nan) else np.nan

        rows.append({
            segment_col: seg,
            'n_A': n_A, 'conv_A': conv_A, 'pA': pA,
            'n_B': n_B, 'conv_B': conv_B, 'pB': pB,
            'diff_B_minus_A': diff,
            'ci_low': low, 'ci_high': high,
            'rel_lift_vs_A': lift,
            'z': z, 'p_value': p
        })

    out = pd.DataFrame(rows).sort_values('p_value')
    if out.empty:
        return out

    # FDR correction across segments
    out['p_adj_fdr'] = multipletests(out['p_value'], alpha=alpha, method='fdr_bh')[1]
    out['sig_fdr'] = out['p_adj_fdr'] < alpha

    # nice percentages
    for col in ['pA','pB','diff_B_minus_A','rel_lift_vs_A','ci_low','ci_high']:
        out[col] = (out[col] * 100).round(2)

    return out.reset_index(drop=True)

Show technical details

results = ab_by_segment(converted, 'variant', 'converted', 'channel', alpha=0.05, min_obs_per_group=20)
print(results)

          channel   n_A  conv_A     pA   n_B  conv_B     pB  diff_B_minus_A  \
0          direct   398     261  65.58   345     264  76.52           10.94   
1      aggregator  1479     641  43.34  1495     702  46.96            3.62   
2         partner    54      22  40.74    46      24  52.17           11.43   
3  affiliate_deal   432     146  33.80   424     154  36.32            2.52   
4        referral   161      79  49.07   166      87  52.41            3.34   
5     paid_search   101      54  53.47   113      65  57.52            4.06   
6       affiliate   229      55  24.02   231      60  25.97            1.96   
7     paid_social   151     109  72.19   163     117  71.78           -0.41   

   ci_low  ci_high  rel_lift_vs_A         z   p_value  p_adj_fdr  sig_fdr  
0    4.48    17.41          16.69  3.267441  0.001085   0.008682     True  
1    0.04     7.19           8.34  1.981481  0.047537   0.190149    False  
2   -8.06    30.93          28.06  1.143317  0.252907   0.674418    False  
3   -3.87     8.92           7.47  0.773984  0.438940   0.717711    False  
4   -7.49    14.17           6.81  0.604215  0.545701   0.717711    False  
5   -9.27    17.39           7.59  0.596283  0.550986   0.717711    False  
6   -5.95     9.87           8.15  0.484548  0.627997   0.717711    False  
7  -10.35     9.53          -0.56 -0.080092  0.936164   0.936164    False

Show technical details

results = ab_by_segment(converted, 'variant', 'converted', 'buying_platform', alpha=0.05, min_obs_per_group=20)
print(results)

  buying_platform  n_A  conv_A     pA  n_B  conv_B     pB  diff_B_minus_A  \
0             app  679     539  79.38  697     603  86.51            7.13   
1             web  890     398  44.72  862     428  49.65            4.93   

   ci_low  ci_high  rel_lift_vs_A         z   p_value  p_adj_fdr  sig_fdr  
0    3.17    11.09           8.98  3.520807  0.000430   0.000860     True  
1    0.26     9.60          11.03  2.067854  0.038654   0.038654     True