cuped

exposure	conversion	clicks_in_week_prior
control	0	0
control	0	0
control	0	0
treatment	0	0
control	0	1
treatment	0	0

test assumptions

df %>%
  ggplot(aes(x=as.factor(conversion),y=clicks_in_week_prior)) +   geom_violin() +
  labs(title='plot showing variable correlated with outcome')

df %>%
  ggplot(aes(x=as.factor(exposure),y=clicks_in_week_prior)) +   geom_violin() +
  labs(title='plot showing covariate correlated by treatment')

df %>% 
  group_by(exposure) %>% 
  summarise(
    users = n(),
    converters = sum(conversion)
  ) %>% 
  mutate(cvr = converters / users) %>% 
  knitr::kable()

## `summarise()` ungrouping output (override with `.groups` argument)

exposure	users	converters	cvr
control	7551	227	0.0300622
treatment	7449	267	0.0358437

show lm OLS == t.test

fit linear model and t-test to show results are the same without incorporating covariates:

t_test <- t.test(conversion ~ exposure, data = df) %>% tidy()
lm_model <- lm(conversion ~ exposure, data = df) 

tidy(lm_model) %>% 
  filter(term=='exposuretreatment') %>% 
  knitr::kable(caption = 'linear model coefficients')

linear model coefficients
term	estimate	std.error	statistic	p.value
exposuretreatment	0.0057815	0.0029142	1.983936	0.0472812

t_test %>% 
  knitr::kable(caption = 't-test model coefficient')

t-test model coefficient
estimate	estimate1	estimate2	statistic	p.value	parameter	conf.low	conf.high	method	alternative
-0.0057815	0.0300622	0.0358437	-1.982793	0.047409	14854.44	-0.0114969	-6.61e-05	Welch Two Sample t-test	two.sided

CUPED

incorporate covariates – note that variance reduction slightly affects p-values here. you will see larger reduction if you choose covariate that is more correlated with conversion

stats_base_model <- lm_model %>% 
  tidy() %>% 
  filter(term == 'exposuretreatment')


# incorporate covariates from the pre-experiment period
lm_model_with_covariates <- lm(conversion ~ exposure + clicks_in_week_prior , data = df) 

stats_model_with_covariates <- lm_model_with_covariates %>% 
  tidy() %>% 
  filter(term == 'exposuretreatment')

stats <- bind_rows(
  stats_base_model %>% mutate(cuped=FALSE),
  stats_model_with_covariates %>% mutate(cuped=TRUE)
) 

stats %>% 
  mutate(var_reduction  = (std.error - lag(std.error)) / lag(std.error))  %>%   
  knitr::kable(caption = 'data after var reduction', digits = 6)

data after var reduction
term	estimate	std.error	statistic	p.value	cuped	var_reduction
exposuretreatment	0.005781	0.002914	1.983936	0.047281	FALSE	NA
exposuretreatment	0.005309	0.002408	2.205168	0.027457	TRUE	-0.173793

cuped

Ben Weiher

1/8/2021

show data

test assumptions

show lm OLS == t.test

CUPED