SBCC Coach Power Analysis

Reading in data:

library(googlesheets4)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

internetaccess_url <- "https://docs.google.com/spreadsheets/d/1VDtL9oIoo4MU5j1q4Ys9pyP5zzvxwVGO2wYvwEcMNng/edit?gid=904275613#gid=904275613"
compliance_url <- "https://docs.google.com/spreadsheets/d/1ovzfiwbwy9iDPW_wthtn5WXF9MVwxzaF80DTOyHK5yY/edit?gid=674835239#gid=674835239"

ia_meta <- gs4_get(internetaccess_url)

! Using an auto-discovered, cached token.
  To suppress this message, modify your code or options to clearly consent to
  the use of a cached token.
  See gargle's "Non-interactive auth" vignette for more details:
  <https://gargle.r-lib.org/articles/non-interactive-auth.html>
ℹ The googlesheets4 package is using a cached token for 'robert@agency.fund'.

ia_sheet_name <- ia_meta$sheets$name[ia_meta$sheets$id == str_remove(internetaccess_url, "^.*gid=")]
cs_meta <- gs4_get(compliance_url)
cs_sheet_name <- cs_meta$sheets$name[cs_meta$sheets$id == str_remove(compliance_url, "^.*gid=")]

# Read the target sheet
internet <- read_sheet(internetaccess_url, sheet = ia_sheet_name)

✔ Reading from "Internet Access status (1)".
✔ Range ''2026A''.

compliance <- read_sheet(compliance_url, sheet = cs_sheet_name)

✔ Reading from "BHS_2026A_Compliance_scores".
✔ Range ''Sheet1''.

Looking for duplicates:

internet %>% 
  group_by(across(c(-`Reliable internet access`, -`Number_HHs`))) %>%
  filter(n() > 1)

# A tibble: 2 × 8
# Groups:   District, Region, Sub_County, Parish, Cluster, Village [1]
  District Region   Sub_County  Parish  Cluster Village  Reliable internet acc…¹
  <chr>    <chr>    <chr>       <chr>   <chr>   <chr>    <chr>                  
1 Kagadi   Mid_West Kyaterekera Wangeyo Wangeyo Tweyanze Yes                    
2 Kagadi   Mid_West Kyaterekera Wangeyo Wangeyo Tweyanze Yes                    
# ℹ abbreviated name: ¹`Reliable internet access`
# ℹ 1 more variable: Number_HHs <dbl>

Twenyanze village has duplicates with Number_HHs missing for this row, it looks safe to drop.

compliance %>%
  group_by(pre_hhid) %>%
  filter(n() > 1) %>%
  arrange(pre_hhid) %>%
  select(pre_hhid, `Household Compliance (%)`, everything())

# A tibble: 50 × 9
# Groups:   pre_hhid [25]
   pre_hhid         Household Compliance…¹ pre_district pre_subcounty pre_parish
   <chr>                             <dbl> <chr>        <chr>         <chr>     
 1 BUH-NYA-AMO-M-1…                 0.0958 Buhweju      Burere        Rushambya 
 2 BUH-NYA-AMO-M-1…                 0.179  Buhweju      Burere        Rushambya 
 3 BUH-NYA-DID-M-0…                 0.112  Buhweju      Burere        Rushambya 
 4 BUH-NYA-DID-M-0…                 0.251  Buhweju      Burere        Rushambya 
 5 BUH-NYA-EDI-F-1…                 0.170  Buhweju      Burere        Rushambya 
 6 BUH-NYA-EDI-F-1…                 0.112  Buhweju      Burere        Rushambya 
 7 BUH-NYA-JOV-F-1…                 0.0542 Buhweju      Burere        Rushambya 
 8 BUH-NYA-JOV-F-1…                 0.205  Buhweju      Burere        Rushambya 
 9 BUH-NYA-KAS-M-1…                 0.112  Buhweju      Burere        Rushambya 
10 BUH-NYA-KAS-M-1…                 0.149  Buhweju      Burere        Rushambya 
# ℹ 40 more rows
# ℹ abbreviated name: ¹`Household Compliance (%)`
# ℹ 4 more variables: pre_cluster <chr>, pre_village <chr>, `2026` <dbl>,
#   A <chr>

There are 25 duplicates of pre_hhid rows with differing Household Compliance (%) values - lets drop the latter for now upon clarification from the RTV team.

Joining data

normalize_geo <- function(x) tolower(trimws(gsub(" ", "_", x)))

compliance_norm <- compliance %>%
  group_by(pre_hhid) %>%
  filter(row_number() == 1) %>%
  ungroup() %>%
  mutate(across(c(pre_district, pre_subcounty, pre_parish, pre_cluster, pre_village), normalize_geo))

internet_norm <- internet %>% 
  group_by(across(c(-`Reliable internet access`, -`Number_HHs`))) %>%
  filter(n() == 1) %>%
  ungroup() %>%
  mutate(across(c(District, Region, Sub_County, Parish, Cluster, Village), normalize_geo))

joined <- internet_norm |>
  left_join(compliance_norm, by = c(
    "District"   = "pre_district",
    "Sub_County" = "pre_subcounty",
    "Parish"     = "pre_parish",
    "Cluster"    = "pre_cluster",
    "Village"    = "pre_village"
  )) %>%
  unite(village_id, District, Sub_County, Parish, Cluster, Village) %>%
  rename(hh_compliance=`Household Compliance (%)`,
         num_hhs = `Number_HHs`,
         internet_access = `Reliable internet access`,
         hh_id = `pre_hhid`) %>%
  filter(!is.na(hh_id))

Villages without matches in the compliance data:

joined %>%
  filter(is.na(hh_compliance))

# A tibble: 0 × 8
# ℹ 8 variables: village_id <chr>, Region <chr>, internet_access <chr>,
#   num_hhs <dbl>, hh_id <chr>, 2026 <dbl>, A <chr>, hh_compliance <dbl>

There are no more missing matches in the data but there were manual edits made to the spreadsheets to repair some of the fixes (mispellings, shifted columns, etc).

library(lme4)
library(performance)
library(pwr)

# --- Parameters (adjust as needed) ---
compliance_col  <- "hh_compliance"  # column name for household compliance (0/1 or proportion)
alpha           <- 0.05             # two-sided significance level
power_target    <- 0.80             # desired power
effect_rel      <- 1.20             # desired relative effect size (baseline * effect_rel)

# --- Cluster-level summaries ---
cluster_stats <- joined |>
  group_by(village_id) |>
  summarise(
    n_hh           = n(),
    village_compliance = mean(.data[[compliance_col]], na.rm = TRUE),
    .groups = "drop"
  )

K     <- nrow(cluster_stats)          # total number of villages
m_bar <- mean(cluster_stats$n_hh)     # average households per village
p0    <- mean(cluster_stats$village_compliance, na.rm = TRUE)  # baseline compliance
p1    <- p0 * effect_rel              # treatment-arm compliance

# --- ICC via lmer ---
model <- lmer(hh_compliance ~ 1 + (1 | village_id), data = joined)
ICC   <- icc(model)$ICC_adjusted

# --- Design effect and effective sample size (equal allocation, K/2 villages per arm) ---
DEFF           <- 1 + (m_bar - 1) * ICC
K_per_arm      <- K / 2
n_eff_per_arm  <- K_per_arm * m_bar / DEFF

# --- Power (Cohen's h arcsine transformation for proportions) ---
h      <- ES.h(p1, p0)
result <- pwr.2p.test(h = h, n = n_eff_per_arm, sig.level = alpha, alternative = "two.sided")

# --- Sensitivity: power across a range of cluster counts ---
k_seq   <- seq(10, max(K * 2, 100), by = 5)
pwr_seq <- sapply(k_seq, function(k) {
  pwr.2p.test(h = h, n = (k / 2) * m_bar / DEFF, sig.level = alpha, alternative = "two.sided")$power
})

sensitivity <- data.frame(
  villages_total   = k_seq,
  villages_per_arm = k_seq / 2,
  power_pct        = round(pwr_seq * 100, 1)
)

=== Cluster RCT Power Analysis: Village Randomisation ===

  Villages (clusters)      : 477 total  |  238 per arm

  Avg households / village : 28.7

  Baseline compliance      : 22.9%

  Treatment compliance     : 27.4%  (*1.2)

  Estimated ICC            : 0.2327

  Design effect (DEFF)     : 7.44

  Effective n per arm      : 919.2 households

  Cohen's h                : 0.1055


  --> Estimated power      : 61.8%


--- Power by number of villages (holding m_bar, ICC, effect fixed) ---

 villages_total villages_per_arm power_pct
            460            230.0      60.3
            465            232.5      60.7
            470            235.0      61.2
            475            237.5      61.7
            480            240.0      62.1
            485            242.5      62.6
            490            245.0      63.0
            495            247.5      63.4
            500            250.0      63.9
            505            252.5      64.3
            510            255.0      64.7
            515            257.5      65.2
            520            260.0      65.6
            525            262.5      66.0
            530            265.0      66.4
            535            267.5      66.8
            540            270.0      67.2
            545            272.5      67.6
            550            275.0      68.0
            555            277.5      68.4
            560            280.0      68.8
            565            282.5      69.2
            570            285.0      69.6
            575            287.5      69.9
            580            290.0      70.3
            585            292.5      70.7
            590            295.0      71.1
            595            297.5      71.4
            600            300.0      71.8
            605            302.5      72.1
            610            305.0      72.5
            615            307.5      72.8
            620            310.0      73.2
            625            312.5      73.5
            630            315.0      73.9
            635            317.5      74.2
            640            320.0      74.5
            645            322.5      74.8
            650            325.0      75.2
            655            327.5      75.5
            660            330.0      75.8
            665            332.5      76.1
            670            335.0      76.4
            675            337.5      76.7
            680            340.0      77.0
            685            342.5      77.3
            690            345.0      77.6
            695            347.5      77.9
            700            350.0      78.2
            705            352.5      78.5
            710            355.0      78.8
            715            357.5      79.1
            720            360.0      79.3
            725            362.5      79.6
            730            365.0      79.9
            735            367.5      80.2
            740            370.0      80.4
            745            372.5      80.7
            750            375.0      80.9
            755            377.5      81.2
            760            380.0      81.4
            765            382.5      81.7
            770            385.0      81.9
            775            387.5      82.2
            780            390.0      82.4
            785            392.5      82.7
            790            395.0      82.9
            795            397.5      83.1
            800            400.0      83.4
            805            402.5      83.6
            810            405.0      83.8
            815            407.5      84.0
            820            410.0      84.3
            825            412.5      84.5
            830            415.0      84.7
            835            417.5      84.9
            840            420.0      85.1
            845            422.5      85.3
            850            425.0      85.5
            855            427.5      85.7
            860            430.0      85.9
            865            432.5      86.1
            870            435.0      86.3
            875            437.5      86.5
            880            440.0      86.7
            885            442.5      86.9
            890            445.0      87.1
            895            447.5      87.2
            900            450.0      87.4
            905            452.5      87.6
            910            455.0      87.8
            915            457.5      87.9
            920            460.0      88.1
            925            462.5      88.3
            930            465.0      88.4
            935            467.5      88.6
            940            470.0      88.8
            945            472.5      88.9
            950            475.0      89.1