imps <- read_csv("dsc_imps.csv")
## Parsed with column specification:
## cols(
## tdb_session_id = col_character(),
## dt = col_date(format = ""),
## device = col_character(),
## ct_vars = col_character()
## )
exp <- read_csv("dsc_exp_ids.csv")
## Parsed with column specification:
## cols(
## tdb_session_id = col_character(),
## dt = col_date(format = ""),
## device = col_character(),
## exp_id = col_character(),
## exp_var = col_double()
## )
exp %>%
filter(exp_id == '4RnrQSacRKKVkTWbGlvLgg',
exp_var < 2) %>%
count(dt, exp_var) %>%
group_by(exp_var) %>%
dplyr::mutate(cs = cumsum(n)) %>%
ggplot(aes(dt, cs, color = as.factor(exp_var))) +
geom_line() +
geom_point() +
labs(x = "",
y = "",
title = "DAEM Removal Experiment Cumulative Exposure Curve",
subtitle = "Sessions Exposed") +
scale_color_discrete(labels=c("Control", "Treatment")) +
theme_minimal() +
scale_y_continuous(labels = scales::comma) +
theme(legend.position = "bottom",
legend.title = element_blank(),
plot.title.position = "plot")
Doesn’t appear to be SS exposure imbalance (given data set used for plotting)
binom.test(c(19519,19559), 39118, 0.5)
##
## Exact binomial test
##
## data: c(19519, 19559)
## number of successes = 19519, number of trials = 39078, p-value = 0.8436
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
## 0.4945182 0.5044583
## sample estimates:
## probability of success
## 0.4994882
joined_df <- exp %>%
filter(exp_id == '4RnrQSacRKKVkTWbGlvLgg',
exp_var < 2) %>%
mutate(device = recode(device, "desktop" = "Desktop")) %>%
inner_join(imps, by=c("tdb_session_id", "dt", "device"))
exp %>%
filter(exp_id == '4RnrQSacRKKVkTWbGlvLgg',
exp_var < 2) %>%
mutate(device = recode(device, "desktop" = "Desktop")) %>%
inner_join(imps, by=c("tdb_session_id", "dt", "device")) %>%
count(tdb_session_id, exp_var, sort = TRUE) %>%
ggplot() +
geom_density(aes(n, color = as.factor(exp_var))) +
labs(x = "",
y = "",
title = "Probability Density: Ads Seen per Session") +
scale_color_discrete(labels=c("Control", "Treatment")) +
theme_minimal() +
theme(legend.position = "bottom",
legend.title = element_blank(),
plot.title.position = "plot")
DAEM impressions aren’t logged directly.
You should assume that 1 DAEM impression is delivered and monetized per user session with the fourth non-DAEM impression on an article page type. That is, if a session featured four or more article page impressions, it also featured a single DAEM impression. Further assume that a DAEM impression generates identical revenue to a non-DAEM ad impression.
joined_df %>%
count(exp_var, tdb_session_id)
## # A tibble: 27,394 x 3
## exp_var tdb_session_id n
## <dbl> <chr> <int>
## 1 0 00014924-bb18-4715-b45e-be80f2c805e0 11
## 2 0 00073585-a02f-4390-9c33-05b6b9140179 24
## 3 0 00101c9f-36fa-4ce4-917b-efe3c32610f1 10
## 4 0 0010a121-e620-4e5f-be70-ba9694763300 5
## 5 0 00152cd3-ebd5-4969-9220-3cfba2121c61 14
## 6 0 001db346-9b5d-41dd-b69a-8ff8e98e0f66 21
## 7 0 001f9e38-e628-41ff-92f1-55b68651fca6 3
## 8 0 00277cfb-b00a-4e9a-a15a-91b5a140fc8d 3
## 9 0 00282ede-d548-4c8c-b919-d9533be11a3b 4
## 10 0 0028b967-292a-4984-ab02-1fb8c7c2c907 17
## # … with 27,384 more rows
joined_df %>%
group_by(exp_var) %>%
summarize(num_sessions = n_distinct(tdb_session_id))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## exp_var num_sessions
## <dbl> <int>
## 1 0 13718
## 2 1 13676
joined_df %>%
mutate(
ct_vars = str_to_lower(ct_vars),
imp_type = case_when(
grepl("article", ct_vars) ~ 'article',
grepl("author", ct_vars) ~ 'author',
grepl("home", ct_vars) ~ 'home'
)) %>%
count(tdb_session_id, imp_type, exp_var) %>%
filter(n >= 4,
imp_type == 'article') %>%
group_by(exp_var) %>%
summarize(nd = n_distinct(tdb_session_id))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## exp_var nd
## <dbl> <int>
## 1 0 10305
## 2 1 10417
# mutate(daems_seen = round(n / 4))
full_ts <- joined_df %>%
group_by(dt, tdb_session_id, exp_var) %>%
summarize(ad_impression = n(), .groups="drop") %>%
group_by(dt, exp_var) %>%
summarize(mean = mean(ad_impression),
n_sessions = n_distinct(tdb_session_id),
sd = sd(ad_impression),
me = 1.96 * (sd / sqrt(n_sessions)),
.groups="drop")
just_me <- joined_df %>%
group_by(dt, tdb_session_id) %>%
summarize(ad_impression = n(), .groups = "drop") %>%
group_by(dt) %>%
summarize(mean = mean(ad_impression),
n_sessions = n_distinct(tdb_session_id),
sd = sd(ad_impression),
me = 1.96 * (sd / sqrt(n_sessions)),
.groups="drop")
full_ts %>%
select(dt, exp_var, mean) %>%
pivot_wider(names_from = exp_var, values_from = mean) %>%
mutate(delta_pct = (`1` - `0`)*100. / `0`) %>%
inner_join(just_me , by = c("dt")) %>%
ggplot() +
geom_errorbar(aes(dt, ymin = delta_pct - me, ymax = delta_pct + me)) +
geom_point(aes(dt, delta_pct)) +
theme_minimal() +
labs(x = "",
y = "Pct. Delta")