def ab_by_segment(df, group_col='ab_group', target_col='converted', segment_col='channel',
alpha=0.05, min_obs_per_group=20):
"""
Compare A vs B conversion by segment (e.g., channel).
Returns a dataframe with per-segment stats and BH-FDR adjusted p-values.
"""
d = df.copy()
d[target_col] = pd.to_numeric(d[target_col], errors='coerce')
d = d.dropna(subset=[group_col, target_col, segment_col])
groups = d[group_col].unique()
if len(groups) != 2:
raise ValueError(f"{group_col} must have exactly 2 groups (found {groups}).")
A, B = sorted(groups) # just to have a stable order
rows = []
for seg, g in d.groupby(segment_col):
# counts
n_A = g.loc[g[group_col] == A, target_col].count()
n_B = g.loc[g[group_col] == B, target_col].count()
if n_A < min_obs_per_group or n_B < min_obs_per_group:
continue
conv_A = g.loc[g[group_col] == A, target_col].sum()
conv_B = g.loc[g[group_col] == B, target_col].sum()
pA = conv_A / n_A if n_A else np.nan
pB = conv_B / n_B if n_B else np.nan
# z-test for proportions (two-sided)
count = np.array([conv_B, conv_A]) # (B, A)
nobs = np.array([n_B, n_A])
z, p = proportions_ztest(count, nobs)
# CI for (pB - pA)
low, high = confint_proportions_2indep(conv_B, n_B, conv_A, n_A, method='wald')
diff = pB - pA
lift = (pB - pA) / pA if pA not in (0, np.nan) else np.nan
rows.append({
segment_col: seg,
'n_A': n_A, 'conv_A': conv_A, 'pA': pA,
'n_B': n_B, 'conv_B': conv_B, 'pB': pB,
'diff_B_minus_A': diff,
'ci_low': low, 'ci_high': high,
'rel_lift_vs_A': lift,
'z': z, 'p_value': p
})
out = pd.DataFrame(rows).sort_values('p_value')
if out.empty:
return out
# FDR correction across segments
out['p_adj_fdr'] = multipletests(out['p_value'], alpha=alpha, method='fdr_bh')[1]
out['sig_fdr'] = out['p_adj_fdr'] < alpha
# nice percentages
for col in ['pA','pB','diff_B_minus_A','rel_lift_vs_A','ci_low','ci_high']:
out[col] = (out[col] * 100).round(2)
return out.reset_index(drop=True)