rm(list = ls())
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(patchwork)
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
library(correlation)
## Warning: package 'correlation' was built under R version 4.1.2
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
theme_set(theme_minimal())
d0 <- fread("../data_ukraine/user_activity.csv")
d0 <- mutate_if(d0, is.integer, as.numeric)
setDT(d0)
d0[, lineitem := paste0("adgroup", group-1)]
glimpse(d0)
## Rows: 17,049
## Columns: 16
## $ id_str <int64> 1470103877282320391, 1424873675795308555, 92507535,…
## $ subcamp01_tweets <dbl> 58, 19, 0, 22, 0, 66, 0, 0, 30, 2, 0, 15, 30, 20, 0, …
## $ subcamp02_tweets <dbl> 86, 0, 0, 7, 0, 112, 0, 0, 25, 2, 0, 131, 3, 17, 2, 2…
## $ subcamp03_tweets <dbl> 54, 0, 0, 0, 0, 201, 0, 5, 30, 1, 5, 71, 9, 31, 1, 8,…
## $ subcamp04_tweets <dbl> 79, 0, 0, 14, 0, 149, 0, 7, 18, 4, 0, 79, 10, 41, 3, …
## $ subcamp05_tweets <dbl> 40, 0, 7, 10, 0, 68, 0, 0, 31, 3, 0, 76, 5, 30, 2, 8,…
## $ subcamp06_tweets <dbl> 20, 0, 1, 3, 0, 108, 0, 1, 14, 0, 0, 16, 5, 21, 0, 7,…
## $ subcamp07_tweets <dbl> 29, 0, 0, 2, 0, 57, 0, 0, 19, 0, 0, 1, 1, 9, 0, 3, 23…
## $ subcamp08_tweets <dbl> 44, 0, 6, 1, 0, 91, 0, 1, 2, 0, 0, 14, 16, 13, 1, 12,…
## $ subcamp09_tweets <dbl> 46, 0, 1, 0, 0, 63, 0, 1, 0, 1, 0, 34, 6, 16, 1, 0, 1…
## $ subcamp10_tweets <dbl> 72, 0, 4, 2, 0, 57, 0, 0, 15, 2, 3, 10, 3, 15, 2, 8, …
## $ subcamp11_tweets <dbl> 53, 0, 6, 6, 0, 125, 0, 0, 30, 4, 1, 28, 3, 10, 2, 6,…
## $ group <dbl> 8, 8, 12, 11, 12, 24, 8, 8, 8, 8, 10, 8, 8, 8, 12, 7,…
## $ condition <chr> "t", "t", "c", "t", "t", "t", "t", "c", "c", "c", "c"…
## $ block <chr> "08_108", "08_128", "12_55", "11_25", "12_26", "24_82…
## $ lineitem <chr> "adgroup7", "adgroup7", "adgroup11", "adgroup10", "ad…
d0[, .(group, lineitem)][order(group)] |> distinct()
## group lineitem
## 1: 1 adgroup0
## 2: 2 adgroup1
## 3: 3 adgroup2
## 4: 4 adgroup3
## 5: 5 adgroup4
## 6: 6 adgroup5
## 7: 7 adgroup6
## 8: 8 adgroup7
## 9: 9 adgroup8
## 10: 10 adgroup9
## 11: 11 adgroup10
## 12: 12 adgroup11
## 13: 13 adgroup12
## 14: 14 adgroup13
## 15: 15 adgroup14
## 16: 16 adgroup15
## 17: 17 adgroup16
## 18: 18 adgroup17
## 19: 19 adgroup18
## 20: 20 adgroup19
## 21: 21 adgroup20
## 22: 22 adgroup21
## 23: 23 adgroup22
## 24: 24 adgroup23
## 25: 25 adgroup24
## 26: 26 adgroup25
## 27: 27 adgroup26
## group lineitem
d0[, .N, keyby = .(condition)]
## condition N
## 1: c 8561
## 2: t 8488
d_groupn <- d0[, .N, keyby = .(group, lineitem, condition)]
d_groupn
## group lineitem condition N
## 1: 1 adgroup0 c 290
## 2: 1 adgroup0 t 292
## 3: 2 adgroup1 c 363
## 4: 2 adgroup1 t 356
## 5: 3 adgroup2 c 387
## 6: 3 adgroup2 t 383
## 7: 4 adgroup3 c 371
## 8: 4 adgroup3 t 363
## 9: 5 adgroup4 c 227
## 10: 5 adgroup4 t 224
## 11: 6 adgroup5 c 305
## 12: 6 adgroup5 t 301
## 13: 7 adgroup6 c 251
## 14: 7 adgroup6 t 243
## 15: 8 adgroup7 c 412
## 16: 8 adgroup7 t 409
## 17: 9 adgroup8 c 377
## 18: 9 adgroup8 t 370
## 19: 10 adgroup9 c 381
## 20: 10 adgroup9 t 375
## 21: 11 adgroup10 c 227
## 22: 11 adgroup10 t 229
## 23: 12 adgroup11 c 274
## 24: 12 adgroup11 t 269
## 25: 13 adgroup12 c 361
## 26: 13 adgroup12 t 360
## 27: 14 adgroup13 c 348
## 28: 14 adgroup13 t 349
## 29: 15 adgroup14 c 271
## 30: 15 adgroup14 t 275
## 31: 16 adgroup15 c 398
## 32: 16 adgroup15 t 401
## 33: 17 adgroup16 c 305
## 34: 17 adgroup16 t 300
## 35: 18 adgroup17 c 336
## 36: 18 adgroup17 t 338
## 37: 19 adgroup18 c 386
## 38: 19 adgroup18 t 387
## 39: 20 adgroup19 c 400
## 40: 20 adgroup19 t 394
## 41: 21 adgroup20 c 220
## 42: 21 adgroup20 t 227
## 43: 22 adgroup21 c 386
## 44: 22 adgroup21 t 366
## 45: 23 adgroup22 c 225
## 46: 23 adgroup22 t 232
## 47: 24 adgroup23 c 352
## 48: 24 adgroup23 t 345
## 49: 25 adgroup24 c 290
## 50: 25 adgroup24 t 294
## 51: 26 adgroup25 c 204
## 52: 26 adgroup25 t 199
## 53: 27 adgroup26 c 214
## 54: 27 adgroup26 t 207
## group lineitem condition N
d1 <- melt(d0, id.vars = c("id_str", "group", "lineitem", "condition", "block"), variable.name = "campaign", value.name = "active")
d2 <- d1 %>% filter(active > 0) %>% group_by(group, lineitem, condition, campaign) %>% summarise(active_user_count = n(),
avg_active_tweets = mean(active),
super_user_count = sum(active >= 500),
idle_user_count = sum(active <= 2))
## `summarise()` has grouped output by 'group', 'lineitem', 'condition'. You can override using the `.groups` argument.
# d2 <- d1[active > 0, .(active_user_count = .N), keyby = .(group, lineitem, condition, campaign)]
d3 <- left_join(d2, d_groupn)
## Joining, by = c("group", "lineitem", "condition")
setDT(d3)
d3[, active_prop := active_user_count / N]
d3[, super_user_prop := super_user_count / N]
d3[, idle_user_prop := idle_user_count / N]
d1[d1$active > 500, ][order(-d1$active), ] %>%
ggplot(aes(active)) +
geom_histogram(bins = 100) +
labs(x = "tweets per day (only > 500 tweets per day)", y = "no. of users")
## Warning: Removed 183948 rows containing non-finite values (stat_bin).
ggplot(d1, aes(active, col = campaign)) +
stat_ecdf(size = 1) +
labs(x = "no. of tweets", y = "proportion of users")
reach <- list.files("../campaign-adgroup/ukraine/export_data", pattern = "*-total.csv", full.names = T)
reach <- reach[!str_detect(reach, "06-28|06-30|07-01")]
campaigns <- d3[, unique(campaign)]
change_ad_group_name <- function(x){
if (substring(x, 8, 8) == "0"){
return (paste0(substring(x, 1, 7), substring(x, 9, 9)))
}
return (x)
}
plots <- list()
c <- 1
for (c in 1:length(campaigns)) {
camp <- campaigns[c]
tempdat <- d3[campaign == camp & condition == "t"]
temp_reach <- fread(reach[c])
if (c >= 6){
temp_reach <- temp_reach %>% filter(Objective == "Video views") %>% filter(`Ad Group status` == "Running")
temp_reach$`Ad Group name` <- sapply(temp_reach$`Ad Group name`, function(x) change_ad_group_name(x))
}
temp_reach <- temp_reach[, .(`Ad Group name`, `Total audience reach`)]
setnames(temp_reach, c("lineitem", "reach"))
tempdat <- left_join(tempdat, temp_reach)
setDT(tempdat)
tempdat[, reach := as.numeric(reach)]
tempdat$reach[is.na(tempdat$reach)] <- 0
tempdat[, reach_prop := reach / N]
ord <- tempdat[order(reach_prop), lineitem]
tempdat[, lineitem := factor(lineitem, levels = ord)]
p1 <- ggplot(tempdat, aes(active_prop, lineitem)) +
geom_bar(stat = "identity", alpha = 0.3) +
geom_point(aes(x = reach_prop)) +
geom_text(aes(label = paste0(active_user_count, "/", N)), size = 3) +
scale_x_continuous(limits = c(0, 1)) +
labs(x = "proportion active users (treatment)", y = "", title = glue("day {c}"))
p1
plots[[c]] <- p1
}
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
plt <- plots[[1]]
for (pi in 2:length(plots)) {
plt <- plt + plots[[pi]]
}
plt <- plt + plot_layout(nrow = 4, ncol = 3)
plt
# ggsave("../campaign-adgroup/ukraine/figures/adgroup_reach_activity.png", plt, dpi = 300, bg = 'white', width = 13, height = 8)
tempdt <- select(d0, starts_with("subcamp"))
c <- correlation(tempdt, p_adjust = "none")
c
## # Correlation Matrix (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(17047) | p
## --------------------------------------------------------------------------------
## subcamp01_tweets | subcamp02_tweets | 0.83 | [0.82, 0.83] | 192.55 | < .001***
## subcamp01_tweets | subcamp03_tweets | 0.57 | [0.56, 0.58] | 89.82 | < .001***
## subcamp01_tweets | subcamp04_tweets | 0.69 | [0.68, 0.69] | 123.29 | < .001***
## subcamp01_tweets | subcamp05_tweets | 0.76 | [0.75, 0.76] | 151.02 | < .001***
## subcamp02_tweets | subcamp03_tweets | 0.61 | [0.60, 0.62] | 99.38 | < .001***
## subcamp02_tweets | subcamp04_tweets | 0.72 | [0.71, 0.73] | 134.76 | < .001***
## subcamp02_tweets | subcamp05_tweets | 0.80 | [0.79, 0.80] | 172.31 | < .001***
## subcamp03_tweets | subcamp04_tweets | 0.73 | [0.72, 0.73] | 138.52 | < .001***
## subcamp03_tweets | subcamp05_tweets | 0.58 | [0.57, 0.59] | 92.02 | < .001***
## subcamp04_tweets | subcamp05_tweets | 0.71 | [0.70, 0.71] | 130.61 | < .001***
##
## p-value adjustment method: none
## Observations: 17049
c2 <- correlation(tempdt, p_adjust = "none", method = "kendall")
c2
## # Correlation Matrix (kendall-method)
##
## Parameter1 | Parameter2 | tau | 95% CI | z | p
## ------------------------------------------------------------------------------
## subcamp01_tweets | subcamp02_tweets | 0.67 | [0.67, 0.68] | 126.84 | < .001***
## subcamp01_tweets | subcamp03_tweets | 0.62 | [0.61, 0.63] | 116.79 | < .001***
## subcamp01_tweets | subcamp04_tweets | 0.66 | [0.65, 0.66] | 124.07 | < .001***
## subcamp01_tweets | subcamp05_tweets | 0.63 | [0.62, 0.63] | 118.08 | < .001***
## subcamp02_tweets | subcamp03_tweets | 0.68 | [0.67, 0.68] | 128.13 | < .001***
## subcamp02_tweets | subcamp04_tweets | 0.69 | [0.69, 0.70] | 131.32 | < .001***
## subcamp02_tweets | subcamp05_tweets | 0.68 | [0.67, 0.68] | 127.76 | < .001***
## subcamp03_tweets | subcamp04_tweets | 0.71 | [0.71, 0.72] | 135.65 | < .001***
## subcamp03_tweets | subcamp05_tweets | 0.67 | [0.67, 0.68] | 126.98 | < .001***
## subcamp04_tweets | subcamp05_tweets | 0.71 | [0.71, 0.72] | 135.80 | < .001***
##
## p-value adjustment method: none
## Observations: 17049
tempplt <- ggpairs(tempdt)
# ggsave("../campaign-adgroup/ukraine/figures/adgroup_activity_cor_user.png", tempplt, dpi = 300, bg = 'white', width = 13, height = 8)
tempdt <- select(d0, starts_with("subcamp"), lineitem)
tempdt <- tempdt[, lapply(.SD, mean), lineitem]
tempdt
## lineitem subcamp01_tweets subcamp02_tweets subcamp03_tweets
## 1: adgroup7 44.31912 59.88551 56.09622
## 2: adgroup11 14.17311 16.94843 16.76427
## 3: adgroup10 53.46053 64.48684 54.37719
## 4: adgroup23 79.72023 97.51506 89.21951
## 5: adgroup9 35.30556 46.09259 45.10053
## 6: adgroup6 42.98381 56.07490 56.24291
## 7: adgroup19 46.64358 58.58438 64.81864
## 8: adgroup17 82.59644 98.67507 93.86944
## 9: adgroup8 77.14056 101.97323 97.62115
## 10: adgroup18 51.90168 62.42432 60.11125
## 11: adgroup1 46.91933 55.70932 62.47844
## 12: adgroup3 60.84332 71.23569 74.49591
## 13: adgroup2 50.78182 65.55844 65.47273
## 14: adgroup25 32.60794 40.88337 39.53598
## 15: adgroup21 52.71543 66.34707 72.10904
## 16: adgroup4 72.12639 87.75166 93.70953
## 17: adgroup13 104.14060 130.43042 122.40316
## 18: adgroup15 84.64831 105.51189 85.68335
## 19: adgroup16 75.72397 101.73884 95.90744
## 20: adgroup0 49.34364 59.76804 57.11512
## 21: adgroup5 36.86469 50.09076 47.52310
## 22: adgroup26 118.85748 142.76960 125.98812
## 23: adgroup22 65.44420 77.27133 76.88403
## 24: adgroup12 85.14147 104.82108 102.60055
## 25: adgroup24 62.17808 74.03253 70.80479
## 26: adgroup20 166.69575 205.60403 199.55928
## 27: adgroup14 82.54945 101.69963 101.82234
## lineitem subcamp01_tweets subcamp02_tweets subcamp03_tweets
## subcamp04_tweets subcamp05_tweets
## 1: 98.25213 63.91474
## 2: 28.32781 17.53407
## 3: 97.10526 59.48026
## 4: 164.60115 106.96987
## 5: 73.18122 51.28439
## 6: 99.77126 57.49393
## 7: 100.81864 64.75063
## 8: 161.00148 119.58012
## 9: 157.20750 101.11647
## 10: 98.86805 69.05433
## 11: 93.39360 62.09040
## 12: 111.65804 82.52452
## 13: 110.48571 68.01688
## 14: 65.97767 37.67494
## 15: 112.72074 68.31516
## 16: 135.57871 100.47450
## 17: 188.55524 140.91535
## 18: 168.11264 110.84481
## 19: 175.98843 106.88760
## 20: 104.09450 60.93471
## 21: 83.59736 52.86469
## 22: 222.34917 152.71021
## 23: 149.92341 82.74617
## 24: 175.38696 112.74619
## 25: 124.74315 76.18664
## 26: 354.80761 222.20134
## 27: 163.17399 111.72344
## subcamp04_tweets subcamp05_tweets
tempdt[, lineitem := NULL]
c <- correlation(tempdt, p_adjust = "none")
c
## # Correlation Matrix (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(25) | p
## -----------------------------------------------------------------------------
## subcamp01_tweets | subcamp02_tweets | 1.00 | [0.99, 1.00] | 59.53 | < .001***
## subcamp01_tweets | subcamp03_tweets | 0.98 | [0.97, 0.99] | 28.45 | < .001***
## subcamp01_tweets | subcamp04_tweets | 0.99 | [0.97, 0.99] | 29.10 | < .001***
## subcamp01_tweets | subcamp05_tweets | 0.99 | [0.99, 1.00] | 45.92 | < .001***
## subcamp02_tweets | subcamp03_tweets | 0.99 | [0.97, 0.99] | 31.32 | < .001***
## subcamp02_tweets | subcamp04_tweets | 0.99 | [0.98, 0.99] | 33.03 | < .001***
## subcamp02_tweets | subcamp05_tweets | 0.99 | [0.99, 1.00] | 45.26 | < .001***
## subcamp03_tweets | subcamp04_tweets | 0.98 | [0.96, 0.99] | 27.02 | < .001***
## subcamp03_tweets | subcamp05_tweets | 0.99 | [0.97, 0.99] | 32.04 | < .001***
## subcamp04_tweets | subcamp05_tweets | 0.98 | [0.96, 0.99] | 26.76 | < .001***
##
## p-value adjustment method: none
## Observations: 27
c2 <- correlation(tempdt, p_adjust = "none", method = "kendall")
c2
## # Correlation Matrix (kendall-method)
##
## Parameter1 | Parameter2 | tau | 95% CI | z | p
## ----------------------------------------------------------------------------
## subcamp01_tweets | subcamp02_tweets | 0.91 | [0.85, 0.95] | 6.65 | < .001***
## subcamp01_tweets | subcamp03_tweets | 0.83 | [0.73, 0.90] | 6.11 | < .001***
## subcamp01_tweets | subcamp04_tweets | 0.85 | [0.76, 0.91] | 6.23 | < .001***
## subcamp01_tweets | subcamp05_tweets | 0.90 | [0.83, 0.94] | 6.57 | < .001***
## subcamp02_tweets | subcamp03_tweets | 0.85 | [0.75, 0.91] | 6.19 | < .001***
## subcamp02_tweets | subcamp04_tweets | 0.87 | [0.79, 0.93] | 6.40 | < .001***
## subcamp02_tweets | subcamp05_tweets | 0.86 | [0.78, 0.92] | 6.32 | < .001***
## subcamp03_tweets | subcamp04_tweets | 0.86 | [0.77, 0.91] | 6.27 | < .001***
## subcamp03_tweets | subcamp05_tweets | 0.88 | [0.80, 0.93] | 6.44 | < .001***
## subcamp04_tweets | subcamp05_tweets | 0.85 | [0.76, 0.91] | 6.23 | < .001***
##
## p-value adjustment method: none
## Observations: 27
tempplt <- ggpairs(tempdt)
# ggsave("../campaign-adgroup/ukraine/figures/adgroup_activity_cor_adgroup.png", tempplt, dpi = 300, bg = 'white', width = 13, height = 8)
tempdt <- data.table()
c <- 1
for (c in 1:length(campaigns)) {
camp <- campaigns[c]
tempdat <- d3[campaign == camp & condition == "t"]
temp_reach <- fread(reach[c])
if (c >= 6){
temp_reach <- temp_reach %>% filter(Objective == "Video views") %>% filter(`Ad Group status` == "Running")
temp_reach$`Ad Group name` <- sapply(temp_reach$`Ad Group name`, function(x) change_ad_group_name(x))
}
temp_reach <- temp_reach[, .(`Ad Group name`, `Total audience reach`)]
setnames(temp_reach, c("lineitem", "reach"))
tempdat <- left_join(tempdat, temp_reach)
setDT(tempdat)
tempdat[, reach := as.numeric(reach)]
tempdat$reach[is.na(tempdat$reach)] <- 0
tempdat[, reach_prop := reach / N]
tempdat <- tempdat[, .(lineitem, campaign, N, reach, reach_prop)]
temp_name <- as.vector(tempdat[, unique(campaign)])
tempdat <- tempdat[, .(lineitem, reach_prop)]
setnames(tempdat, "reach_prop", temp_name)
if (nrow(tempdt) == 0) {
tempdt <- tempdat
}
tempdt <- left_join(tempdt, tempdat) |> data.table()
tempdt
}
## Joining, by = "lineitem"
## Joining, by = c("lineitem", "subcamp01_tweets")
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
tempdt
## lineitem subcamp01_tweets subcamp02_tweets subcamp03_tweets
## 1: adgroup0 0.10616438 0.22945205 0.18835616
## 2: adgroup1 0.07022472 0.20505618 0.15730337
## 3: adgroup2 0.10966057 0.20626632 0.17754569
## 4: adgroup3 0.01928375 0.05785124 0.07713499
## 5: adgroup4 0.01339286 0.06696429 0.07589286
## 6: adgroup5 0.15946844 0.34219269 0.31229236
## 7: adgroup6 0.09465021 0.22633745 0.22222222
## 8: adgroup7 0.04645477 0.13447433 0.12224939
## 9: adgroup8 0.11081081 0.22162162 0.19729730
## 10: adgroup9 0.08266667 0.17333333 0.16800000
## 11: adgroup10 0.09170306 0.21397380 0.20087336
## 12: adgroup11 0.04832714 0.13754647 0.11895911
## 13: adgroup12 0.07777778 0.16111111 0.14166667
## 14: adgroup13 0.01719198 0.02865330 0.04871060
## 15: adgroup14 0.02545455 0.06909091 0.08000000
## 16: adgroup15 0.03990025 0.16957606 0.13216958
## 17: adgroup16 0.04333333 0.17000000 0.18333333
## 18: adgroup17 0.05029586 0.12426036 0.14792899
## 19: adgroup18 0.04392765 0.20930233 0.18087855
## 20: adgroup19 0.08883249 0.25380711 0.21065990
## 21: adgroup20 0.04845815 0.12775330 0.13656388
## 22: adgroup21 0.10928962 0.23770492 0.20218579
## 23: adgroup22 0.15948276 0.32758621 0.25862069
## 24: adgroup23 0.06956522 0.19130435 0.15072464
## 25: adgroup24 0.07823129 0.22448980 0.20068027
## 26: adgroup25 0.07035176 0.15075377 0.16582915
## 27: adgroup26 0.07729469 0.14975845 0.12077295
## lineitem subcamp01_tweets subcamp02_tweets subcamp03_tweets
## subcamp04_tweets subcamp05_tweets subcamp06_tweets subcamp07_tweets
## 1: 0.19863014 0.19178082 0.17465753 0.17123288
## 2: 0.19101124 0.16853933 0.12078652 0.12921348
## 3: 0.19321149 0.19582245 0.15665796 0.16449086
## 4: 0.06336088 0.07713499 0.04407713 0.03856749
## 5: 0.07589286 0.04910714 0.04464286 0.03125000
## 6: 0.38538206 0.33887043 0.33887043 0.26245847
## 7: 0.24279835 0.29218107 0.21810700 0.22633745
## 8: 0.13447433 0.11735941 0.10757946 0.11735941
## 9: 0.23783784 0.21351351 0.19729730 0.16216216
## 10: 0.18400000 0.18133333 0.12800000 0.13866667
## 11: 0.23144105 0.19650655 0.19213974 0.18340611
## 12: 0.13754647 0.12267658 0.08921933 0.10037175
## 13: 0.15277778 0.16666667 0.15833333 0.12777778
## 14: 0.04011461 0.05444126 0.02578797 0.02865330
## 15: 0.08363636 0.05818182 0.04363636 0.04363636
## 16: 0.16708229 0.15710723 0.14962594 0.15461347
## 17: 0.20333333 0.15333333 0.13666667 0.17333333
## 18: 0.13609467 0.16272189 0.11538462 0.13905325
## 19: 0.18087855 0.18087855 0.14728682 0.15503876
## 20: 0.25634518 0.21573604 0.20558376 0.19543147
## 21: 0.15859031 0.14977974 0.13215859 0.08810573
## 22: 0.20765027 0.18306011 0.19125683 0.18032787
## 23: 0.31465517 0.24137931 0.22844828 0.25431034
## 24: 0.15942029 0.19420290 0.14492754 0.15942029
## 25: 0.17006803 0.16666667 0.18367347 0.14285714
## 26: 0.16080402 0.13065327 0.11055276 0.13065327
## 27: 0.16908213 0.14492754 0.13526570 0.12077295
## subcamp04_tweets subcamp05_tweets subcamp06_tweets subcamp07_tweets
## subcamp08_tweets subcamp09_tweets subcamp10_tweets subcamp11_tweets
## 1: 0.16095890 0.14383562 0.15410959 0.17465753
## 2: 0.18820225 0.14044944 0.11235955 0.14887640
## 3: 0.14882507 0.13838120 0.13054830 0.14621410
## 4: 0.03856749 0.04683196 0.03856749 0.04958678
## 5: 0.06250000 0.05357143 0.07589286 0.03571429
## 6: 0.22259136 0.26245847 0.20598007 0.28239203
## 7: 0.22633745 0.21810700 0.17283951 0.23045267
## 8: 0.10024450 0.09046455 0.08068460 0.10024450
## 9: 0.15675676 0.18648649 0.12702703 0.14324324
## 10: 0.13066667 0.14933333 0.12533333 0.15200000
## 11: 0.18340611 0.20087336 0.15720524 0.17030568
## 12: 0.11152416 0.09665428 0.07063197 0.09293680
## 13: 0.11111111 0.12500000 0.09722222 0.13055556
## 14: 0.04297994 0.03438395 0.03151862 0.02292264
## 15: 0.05090909 0.06545455 0.03272727 0.04000000
## 16: 0.13965087 0.13216958 0.09226933 0.14713217
## 17: 0.17000000 0.18333333 0.13666667 0.13000000
## 18: 0.12426036 0.12130178 0.08579882 0.07988166
## 19: 0.16795866 0.14211886 0.14728682 0.14987080
## 20: 0.15228426 0.16497462 0.12690355 0.13959391
## 21: 0.09691630 0.11013216 0.08370044 0.07488987
## 22: 0.15300546 0.15846995 0.16120219 0.18032787
## 23: 0.20689655 0.18103448 0.20258621 0.23706897
## 24: 0.12463768 0.12173913 0.10724638 0.14202899
## 25: 0.13945578 0.13945578 0.12585034 0.16666667
## 26: 0.12060302 0.11557789 0.10552764 0.11557789
## 27: 0.08212560 0.13526570 0.07246377 0.11594203
## subcamp08_tweets subcamp09_tweets subcamp10_tweets subcamp11_tweets
tempdt[, lineitem := NULL]
c <- correlation(tempdt, p_adjust = "none")
c
## # Correlation Matrix (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(25) | p
## -----------------------------------------------------------------------------
## subcamp01_tweets | subcamp02_tweets | 0.91 | [0.80, 0.96] | 10.75 | < .001***
## subcamp01_tweets | subcamp03_tweets | 0.88 | [0.76, 0.95] | 9.46 | < .001***
## subcamp01_tweets | subcamp04_tweets | 0.89 | [0.78, 0.95] | 9.96 | < .001***
## subcamp01_tweets | subcamp05_tweets | 0.85 | [0.70, 0.93] | 8.22 | < .001***
## subcamp02_tweets | subcamp03_tweets | 0.96 | [0.92, 0.98] | 17.46 | < .001***
## subcamp02_tweets | subcamp04_tweets | 0.95 | [0.90, 0.98] | 15.82 | < .001***
## subcamp02_tweets | subcamp05_tweets | 0.91 | [0.81, 0.96] | 10.90 | < .001***
## subcamp03_tweets | subcamp04_tweets | 0.96 | [0.92, 0.98] | 18.07 | < .001***
## subcamp03_tweets | subcamp05_tweets | 0.93 | [0.85, 0.97] | 12.72 | < .001***
## subcamp04_tweets | subcamp05_tweets | 0.93 | [0.85, 0.97] | 12.61 | < .001***
##
## p-value adjustment method: none
## Observations: 27
c2 <- correlation(tempdt, p_adjust = "none", method = "kendall")
c2
## # Correlation Matrix (kendall-method)
##
## Parameter1 | Parameter2 | tau | 95% CI | z | p
## ----------------------------------------------------------------------------
## subcamp01_tweets | subcamp02_tweets | 0.66 | [0.48, 0.79] | 4.86 | < .001***
## subcamp01_tweets | subcamp03_tweets | 0.67 | [0.49, 0.79] | 4.90 | < .001***
## subcamp01_tweets | subcamp04_tweets | 0.66 | [0.48, 0.79] | 4.86 | < .001***
## subcamp01_tweets | subcamp05_tweets | 0.70 | [0.54, 0.81] | 5.13 | < .001***
## subcamp02_tweets | subcamp03_tweets | 0.81 | [0.70, 0.89] | 5.94 | < .001***
## subcamp02_tweets | subcamp04_tweets | 0.81 | [0.69, 0.88] | 5.90 | < .001***
## subcamp02_tweets | subcamp05_tweets | 0.75 | [0.61, 0.85] | 5.50 | < .001***
## subcamp03_tweets | subcamp04_tweets | 0.80 | [0.68, 0.88] | 5.86 | < .001***
## subcamp03_tweets | subcamp05_tweets | 0.76 | [0.62, 0.85] | 5.55 | < .001***
## subcamp04_tweets | subcamp05_tweets | 0.76 | [0.63, 0.86] | 5.59 | < .001***
##
## p-value adjustment method: none
## Observations: 27
tempplt <- ggpairs(tempdt)
# ggsave("../campaign-adgroup/ukraine/figures/adgroup_reach_cor_adgroup.png", tempplt, dpi = 300, bg = 'white', width = 13, height = 8)
Reach Prop vs Mean Number of Tweets
plots <- list()
full_dat <- c()
for (c in 1:length(campaigns)) {
camp <- campaigns[c]
tempdat <- d3[campaign == camp & condition == "t"]
temp_reach <- fread(reach[c])
if (c >= 6){
temp_reach <- temp_reach %>% filter(Objective == "Video views") %>% filter(`Ad Group status` == "Running")
temp_reach$`Ad Group name` <- sapply(temp_reach$`Ad Group name`, function(x) change_ad_group_name(x))
}
temp_reach <- temp_reach[, .(`Ad Group name`, `Total audience reach`)]
setnames(temp_reach, c("lineitem", "reach"))
tempdat <- left_join(tempdat, temp_reach)
setDT(tempdat)
tempdat[, reach := as.numeric(reach)]
tempdat$reach[is.na(tempdat$reach)] <- 0
tempdat[, reach_prop := reach / N]
ord <- tempdat[order(reach_prop), lineitem]
tempdat[, lineitem := factor(lineitem, levels = ord)]
full_dat <- rbind(full_dat, tempdat)
p1 <- ggplot(tempdat, aes(x = reach_prop, y = avg_active_tweets)) +
geom_point(aes(color = lineitem)) +
# geom_text(aes(label = paste0(active_user_count, "/", N)), size = 3) +
scale_x_continuous(limits = c(0, 0.5)) +
labs(x = "proportion reach (treatment)", y = "mean number of tweets", title = glue("day {c}"))
p1
plots[[c]] <- p1
}
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
full_dat %>% group_by(campaign) %>% summarize(cor = cor(reach_prop, avg_active_tweets))
## # A tibble: 11 × 2
## campaign cor
## <fct> <dbl>
## 1 subcamp01_tweets -0.361
## 2 subcamp02_tweets -0.393
## 3 subcamp03_tweets -0.390
## 4 subcamp04_tweets -0.237
## 5 subcamp05_tweets -0.350
## 6 subcamp06_tweets -0.240
## 7 subcamp07_tweets -0.261
## 8 subcamp08_tweets -0.321
## 9 subcamp09_tweets -0.307
## 10 subcamp10_tweets -0.325
## 11 subcamp11_tweets -0.327
Reach Prop vs Super User Prop
plots <- list()
full_dat <- c()
for (c in 1:length(campaigns)) {
camp <- campaigns[c]
tempdat <- d3[campaign == camp & condition == "t"]
temp_reach <- fread(reach[c])
if (c >= 6){
temp_reach <- temp_reach %>% filter(Objective == "Video views") %>% filter(`Ad Group status` == "Running")
temp_reach$`Ad Group name` <- sapply(temp_reach$`Ad Group name`, function(x) change_ad_group_name(x))
}
temp_reach <- temp_reach[, .(`Ad Group name`, `Total audience reach`)]
setnames(temp_reach, c("lineitem", "reach"))
tempdat <- left_join(tempdat, temp_reach)
setDT(tempdat)
tempdat[, reach := as.numeric(reach)]
tempdat$reach[is.na(tempdat$reach)] <- 0
tempdat[, reach_prop := reach / N]
ord <- tempdat[order(reach_prop), lineitem]
tempdat[, lineitem := factor(lineitem, levels = ord)]
full_dat <- rbind(full_dat, tempdat)
p1 <- ggplot(tempdat, aes(x = reach_prop, y = super_user_prop)) +
geom_point(aes(color = lineitem)) +
# geom_text(aes(label = paste0(active_user_count, "/", N)), size = 3) +
scale_x_continuous(limits = c(0, 0.5)) +
labs(x = "proportion reach (treatment)", y = "proportion of super users", title = glue("day {c}"))
p1
plots[[c]] <- p1
}
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
full_dat %>% group_by(campaign) %>% summarise(cor = cor(reach_prop, super_user_prop))
## # A tibble: 11 × 2
## campaign cor
## <fct> <dbl>
## 1 subcamp01_tweets -0.391
## 2 subcamp02_tweets -0.419
## 3 subcamp03_tweets -0.446
## 4 subcamp04_tweets -0.201
## 5 subcamp05_tweets -0.427
## 6 subcamp06_tweets -0.124
## 7 subcamp07_tweets -0.0294
## 8 subcamp08_tweets -0.172
## 9 subcamp09_tweets -0.344
## 10 subcamp10_tweets -0.262
## 11 subcamp11_tweets -0.286
Reach Prop vs Idle User Prop
plots <- list()
full_dat <- c()
for (c in 1:length(campaigns)) {
camp <- campaigns[c]
tempdat <- d3[campaign == camp & condition == "t"]
temp_reach <- fread(reach[c])
if (c >= 6){
temp_reach <- temp_reach %>% filter(Objective == "Video views") %>% filter(`Ad Group status` == "Running")
temp_reach$`Ad Group name` <- sapply(temp_reach$`Ad Group name`, function(x) change_ad_group_name(x))
}
temp_reach <- temp_reach[, .(`Ad Group name`, `Total audience reach`)]
setnames(temp_reach, c("lineitem", "reach"))
tempdat <- left_join(tempdat, temp_reach)
setDT(tempdat)
tempdat[, reach := as.numeric(reach)]
tempdat$reach[is.na(tempdat$reach)] <- 0
tempdat[, reach_prop := reach / N]
ord <- tempdat[order(reach_prop), lineitem]
tempdat[, lineitem := factor(lineitem, levels = ord)]
full_dat <- rbind(full_dat, tempdat)
p1 <- ggplot(tempdat, aes(x = reach_prop, y = idle_user_prop)) +
geom_point(aes(color = lineitem)) +
# geom_text(aes(label = paste0(active_user_count, "/", N)), size = 3) +
scale_x_continuous(limits = c(0, 0.5)) +
labs(x = "proportion reach (treatment)", y = "proportion of idle users", title = glue("day {c}"))
p1
plots[[c]] <- p1
}
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
## Joining, by = "lineitem"
plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
full_dat %>% group_by(campaign) %>% summarise(cor = cor(reach_prop, idle_user_prop))
## # A tibble: 11 × 2
## campaign cor
## <fct> <dbl>
## 1 subcamp01_tweets 0.155
## 2 subcamp02_tweets 0.346
## 3 subcamp03_tweets 0.266
## 4 subcamp04_tweets 0.146
## 5 subcamp05_tweets 0.259
## 6 subcamp06_tweets 0.210
## 7 subcamp07_tweets 0.283
## 8 subcamp08_tweets 0.163
## 9 subcamp09_tweets 0.495
## 10 subcamp10_tweets 0.407
## 11 subcamp11_tweets 0.350