df <- read_csv("GOTV_Experiment.csv")
## Rows: 50000 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): female, age, white, black, employed, urban, treatmentattempt, succe...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(df)
## Rows: 50,000
## Columns: 9
## $ female <dbl> 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ age <dbl> 34, 37, 43, 45, 47, 45, 57, 20, 30, 25, 36, 27, 25…
## $ white <dbl> 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,…
## $ black <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,…
## $ employed <dbl> 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,…
## $ urban <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,…
## $ treatmentattempt <dbl> 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,…
## $ successfultreatment <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,…
## $ turnout <dbl> 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,…
df_valid <- df %>% filter(!is.na(turnout))
# Mean turnout by treatment assignment
turnout_by_assignment <- df_valid %>%
group_by(treatmentattempt) %>%
summarise(mean_turnout = mean(turnout))
# Mean turnout by successful treatment
turnout_by_success <- df_valid %>%
group_by(successfultreatment) %>%
summarise(mean_turnout = mean(turnout))
turnout_by_assignment
## # A tibble: 2 × 2
## treatmentattempt mean_turnout
## <dbl> <dbl>
## 1 0 0.485
## 2 1 0.556
turnout_by_success
## # A tibble: 2 × 2
## successfultreatment mean_turnout
## <dbl> <dbl>
## 1 0 0.474
## 2 1 0.594
Interpretation:
- Assigned to treatment (ITT): 55.57% vs 48.47% → Difference:
+7.1 pp
- Actually received treatment: 59.35% vs 47.43% → Difference:
+11.92 pp
df_valid %>%
group_by(urban, treatmentattempt) %>%
summarise(mean_turnout = mean(turnout), n = n())
## `summarise()` has grouped output by 'urban'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 4
## # Groups: urban [2]
## urban treatmentattempt mean_turnout n
## <dbl> <dbl> <dbl> <int>
## 1 0 0 0.533 5007
## 2 0 1 0.566 20002
## 3 1 0 0.472 19754
## 4 1 1 0.513 4979
Interpretation:
- Urban: Control = 47.23%, Treated = 51.34% → Difference = 4.11
pp
- Non-Urban: Control = 53.35%, Treated = 56.62% → Difference =
3.27 pp
mean_treated <- mean(df_valid$turnout[df_valid$treatmentattempt == 1])
mean_control <- mean(df_valid$turnout[df_valid$treatmentattempt == 0])
ITT <- mean_treated - mean_control
compliance_rate <- mean(df$successfultreatment[df$treatmentattempt == 1])
CACE <- ITT / compliance_rate
ITT
## [1] 0.07098882
compliance_rate
## [1] 0.7688753
CACE
## [1] 0.09232814
Interpretation:
- ITT = 7.1 pp
- Compliance = 76.89%
- CACE ≈ 9.2 pp
This reflects the true causal effect among compliers.
# Overall missing turnout
sum(is.na(df$turnout))
## [1] 258
# Missing by group
df %>%
group_by(treatmentattempt) %>%
summarise(missing = sum(is.na(turnout)),
total = n(),
prop_missing = missing / total)
## # A tibble: 2 × 4
## treatmentattempt missing total prop_missing
## <dbl> <int> <int> <dbl>
## 1 0 140 24901 0.00562
## 2 1 118 25099 0.00470
df %>%
group_by(successfultreatment) %>%
summarise(missing = sum(is.na(turnout)),
total = n(),
prop_missing = missing / total)
## # A tibble: 2 × 4
## successfultreatment missing total prop_missing
## <dbl> <int> <int> <dbl>
## 1 0 171 30702 0.00557
## 2 1 87 19298 0.00451
# Lower bound: assume all missing did NOT vote
df_lower <- df %>%
mutate(turnout_adj = if_else(is.na(turnout), 0, turnout))
# Upper bound: assume all missing DID vote
df_upper <- df %>%
mutate(turnout_adj = if_else(is.na(turnout), 1, turnout))
# Re-estimate CACE for both scenarios
ITT_lower <- with(df_lower, mean(turnout_adj[treatmentattempt == 1]) - mean(turnout_adj[treatmentattempt == 0]))
ITT_upper <- with(df_upper, mean(turnout_adj[treatmentattempt == 1]) - mean(turnout_adj[treatmentattempt == 0]))
CACE_lower <- ITT_lower / compliance_rate
CACE_upper <- ITT_upper / compliance_rate
CACE_lower
## [1] 0.09247457
CACE_upper
## [1] 0.09127687
cat("Best estimate: CACE ≈ 9.2 percentage points (range depending on attrition: conservative to optimistic).
",
"The treatment is effective, but its estimated impact depends on how we handle noncompliance and missing data.")
## Best estimate: CACE ≈ 9.2 percentage points (range depending on attrition: conservative to optimistic).
## The treatment is effective, but its estimated impact depends on how we handle noncompliance and missing data.