# =================================================================
# FINAL DESIGN — 3-stage with PPS BG selection
#
# Both goals achieved EXACTLY [Lecture Slides 34-35]:
#
# Stage 1: Select m=40 PSUs (tracts) by PPS systematic
# pi_1 = m * S_i / S+
# S_i = sum_d f_d * Q_i(d) [PSU composite MOS, HH residents]
#
# Stage 2: Select 1 BG per PSU by PPS systematic
# pi_{2|i} = S_ij / S_i
# S_ij = sum_d f_d * Q_ij(d) [BG composite MOS, HH residents]
# Small/zero BGs linked before selection (same feasibility logic)
#
# Stage 3: Person sampling within selected BG
# pi_{3|ij}(d) = n_bar * f_d / S_ij
# 1 adult per HH (last-birthday); HH rate q_ij = n_bar / TotHH_ij
#
# SELF-WEIGHTING [EXACT, Slide 35]:
# pi_1 * pi_{2|i} * pi_{3|ij}(d)
# = (m*S_i/S+) * (S_ij/S_i) * (n_bar*f_d/S_ij)
# = m*n_bar*f_d / S+ = f_d [S_i and S_ij both cancel]
#
# EQUAL WORKLOAD [EXACT per selected BG, Slide 34]:
# sum_d n*_ij(d) = sum_d Q_ij(d) * (n_bar*f_d/S_ij)
# = n_bar * S_ij / S_ij = n_bar
# Since 1 BG selected per PSU, BG workload = PSU workload = n_bar
#
# GQ HANDLING:
# N_i(d) and N_ij(d) count only HH residents (TotHH>0 BGs).
# GQ BGs excluded from sampling frame entirely.
# =================================================================
library(readxl); library(readr); library(dplyr)
library(stringr); library(tidyr); library(sampling); library(tibble)
xlsx_url <- paste0(
"https://raw.githubusercontent.com/LinneaLiny/",
"-Multi-Stage-Sample-for-Detroit-Michigan-U.S.-Project/main/",
"Detroit%20MI_Updated.xlsx")
fips_url <- paste0(
"https://raw.githubusercontent.com/LinneaLiny/-Multi-Stage-Sample-",
"for-Detroit-Michigan-U.S.-Project/main/",
"Census%20Tract%20FIPS%20Code%20Detroit%20MI_Updated.csv")
tmp_xlsx <- tempfile(fileext = ".xlsx")
tmp_csv <- tempfile(fileext = ".csv")
download.file(xlsx_url, tmp_xlsx, mode = "wb", quiet = TRUE)
download.file(fips_url, tmp_csv, mode = "wb", quiet = TRUE)
tract_raw <- read_excel(tmp_xlsx, sheet = "Tract")
bg_raw <- read_excel(tmp_xlsx, sheet = "BlockGroup")
tract_fips <- read_csv(tmp_csv, show_col_types = FALSE) %>%
rename(GEOID = 1) %>% mutate(GEOID = as.character(GEOID))
cat("Detroit tracts:", nrow(tract_fips), "\n")
## Detroit tracts: 276
tract <- tract_raw %>%
mutate(Tract = as.character(Tract), tract_id = Tract) %>%
filter(tract_id %in% tract_fips$GEOID)
bg <- bg_raw %>%
mutate(BlockGroup = as.character(BlockGroup),
tract_id = str_sub(BlockGroup, 1, 11),
bg_id = BlockGroup) %>%
filter(tract_id %in% tract_fips$GEOID)
make_age_groups <- function(df) {
df %>% mutate(
age18_34 =
Male18to19Yrs+Male20Yrs+Male21Yrs+Male22to24Yrs+
Male25to29Yrs+Male30to34Yrs+Female18to19Yrs+Female20Yrs+
Female21Yrs+Female22to24Yrs+Female25to29Yrs+Female30to34Yrs,
age35_49 =
Male35to39Yrs+Male40to44Yrs+Male45to49Yrs+
Female35to39Yrs+Female40to44Yrs+Female45to49Yrs,
age50_64 =
Male50to54Yrs+Male55to59Yrs+Male60to61Yrs+Male62to64Yrs+
Female50to54Yrs+Female55to59Yrs+Female60to61Yrs+Female62to64Yrs,
age65plus =
Male65to66Yrs+Male67to69Yrs+Male70to74Yrs+Male75to79Yrs+
Male80to84Yrs+MaleGE85Yrs+Female65to66Yrs+Female67to69Yrs+
Female70to74Yrs+Female75to79Yrs+Female80to84Yrs+FemaleGE85Yrs,
age18plus = age18_34+age35_49+age50_64+age65plus)
}
tract <- make_age_groups(tract)
bg <- make_age_groups(bg)
# Separate HH-BGs (TotHH>0) from GQ-BGs (TotHH=0, age18plus>0).
# Only HH-BGs enter the sampling frame and contribute to MOS.
# GQ residents cannot be reached via household listing.
bg_frame <- bg %>%
transmute(bg_id, tract_id, NAME, TotHH, TotPerson,
age18_34, age35_49, age50_64, age65plus, age18plus) %>%
mutate(is_gq = (TotHH == 0 & age18plus > 0),
has_hh = (TotHH > 0))
cat("Total BGs:", nrow(bg_frame), "\n")
## Total BGs: 627
cat("HH BGs (TotHH>0):", sum(bg_frame$has_hh), "\n")
## HH BGs (TotHH>0): 612
cat("GQ BGs (TotHH=0, age18plus>0):", sum(bg_frame$is_gq), "\n\n")
## GQ BGs (TotHH=0, age18plus>0): 2
# HH-BG frame: the sampling frame for both PSU and BG stages
bg_hh <- bg_frame %>% filter(has_hh)
# Aggregate HH-resident counts to tract level for PSU MOS
tract_hh <- bg_hh %>%
group_by(tract_id) %>%
summarise(
n_bg_hh = n(),
TotHH_tract = sum(TotHH),
age18_34_hh = sum(age18_34),
age35_49_hh = sum(age35_49),
age50_64_hh = sum(age50_64),
age65plus_hh = sum(age65plus),
age18plus_hh = sum(age18plus),
.groups = "drop")
tract_frame <- tract %>%
transmute(tract_id, NAME, TotHH, TotPerson,
age18_34, age35_49, age50_64, age65plus, age18plus) %>%
left_join(tract_hh, by = "tract_id") %>%
mutate(across(c(n_bg_hh, TotHH_tract,
age18_34_hh, age35_49_hh, age50_64_hh,
age65plus_hh, age18plus_hh),
~replace_na(., 0))) %>%
arrange(tract_id) # GEOID sort FIRST, before linkage
cat("=== HH residents per tract (age18plus_hh) ===\n")
## === HH residents per tract (age18plus_hh) ===
summary(tract_frame$age18plus_hh)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1156 1702 1740 2310 4417
cat("\n=== HH-BGs per tract ===\n")
##
## === HH-BGs per tract ===
print(table(tract_frame$n_bg_hh))
##
## 0 1 2 3 4 5
## 5 44 139 65 20 3
n_psu <- 40
target_resp <- c(age18_34=250, age35_49=250, age50_64=250, age65plus=250)
resp_rate <- c(age18_34=0.35, age35_49=0.45, age50_64=0.50, age65plus=0.65)
target_table <- tibble(
age_group = names(target_resp),
n_resp = as.numeric(target_resp),
rr = as.numeric(resp_rate)) %>%
mutate(n_selected = ceiling(n_resp / rr))
print(target_table)
## # A tibble: 4 × 4
## age_group n_resp rr n_selected
## <chr> <dbl> <dbl> <dbl>
## 1 age18_34 250 0.35 715
## 2 age35_49 250 0.45 556
## 3 age50_64 250 0.5 500
## 4 age65plus 250 0.65 385
total_n <- sum(target_table$n_selected) # S+ = 2156
m_bar <- total_n / n_psu # n_bar = 53.9
cat("\nS+ =", total_n, "| n_bar =", m_bar, "\n")
##
## S+ = 2156 | n_bar = 53.9
# Outcome variables are household-level or highly correlated within HH:
# (1) HH vehicle ownership -> IDENTICAL for all HH members (rho_HH=1)
# (2) Main transport mode -> highly correlated within HH
# (3) Skipped trips -> correlated within HH
# (4) Asked family for help-> correlated within HH
# Selecting 2+ per HH: DEFF_HH = 1+(k-1)*rho_HH >> 1 for these vars.
# Decision: 1 eligible adult per HH via Last-Birthday Method.
cat("1 adult per HH. Last-Birthday selection rule.\n")
## 1 adult per HH. Last-Birthday selection rule.
N_d <- c(
age18_34 = sum(tract_frame$age18_34_hh),
age35_49 = sum(tract_frame$age35_49_hh),
age50_64 = sum(tract_frame$age50_64_hh),
age65plus = sum(tract_frame$age65plus_hh))
f_18_34 <- target_table$n_selected[target_table$age_group=="age18_34"] / N_d["age18_34"]
f_35_49 <- target_table$n_selected[target_table$age_group=="age35_49"] / N_d["age35_49"]
f_50_64 <- target_table$n_selected[target_table$age_group=="age50_64"] / N_d["age50_64"]
f_65plus <- target_table$n_selected[target_table$age_group=="age65plus"] / N_d["age65plus"]
f_vals <- c(f_18_34, f_35_49, f_50_64, f_65plus)
max_f <- max(f_vals)
f_unif <- total_n / sum(N_d)
tibble(
age_group = names(N_d),
N_d_hh = as.integer(N_d),
n_needed = target_table$n_selected,
rr = as.numeric(resp_rate),
f_d = round(f_vals, 6),
resp_unif = round(f_unif * as.integer(N_d) * as.numeric(resp_rate))) %>%
print()
## # A tibble: 4 × 6
## age_group N_d_hh n_needed rr f_d resp_unif
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 age18_34 161352 715 0.35 0.00443 254
## 2 age35_49 113271 556 0.45 0.00491 229
## 3 age50_64 117759 500 0.5 0.00425 264
## 4 age65plus 87738 385 0.65 0.00439 256
cat("\nA uniform rate cannot achieve 250 respondents per domain.",
"Domain-specific rates f_d are required.\n")
##
## A uniform rate cannot achieve 250 respondents per domain. Domain-specific rates f_d are required.
# PSU MOS: S_i = sum_d f_d * Q_i(d) using HH-resident counts
# BG MOS: S_ij = sum_d f_d * Q_ij(d) using HH-resident counts
#
# Feasibility [Slide 34]:
# PSU: S_i >= n_bar * max(f_d)
# BG: S_ij >= n_bar * max(f_d) (same threshold, same formula)
# If a BG is too small, it is linked with adjacent BGs before selection.
min_feasible_MOS <- m_bar * max_f
cat("Feasibility threshold:", round(min_feasible_MOS, 6), "\n\n")
## Feasibility threshold: 0.264573
# PSU-level MOS (from HH tract aggregates)
tract_frame <- tract_frame %>%
mutate(
MOS_comp = f_18_34*age18_34_hh + f_35_49*age35_49_hh +
f_50_64*age50_64_hh + f_65plus*age65plus_hh,
flag_zero_hh = age18plus_hh == 0,
flag_lt2_bg = n_bg_hh < 2,
flag_infeas = (MOS_comp < min_feasible_MOS) & !flag_zero_hh)
# BG-level MOS (HH residents only)
bg_hh <- bg_hh %>%
mutate(MOS_bg = f_18_34*age18_34 + f_35_49*age35_49 +
f_50_64*age50_64 + f_65plus*age65plus)
cat("=== PSU composite MOS ===\n")
## === PSU composite MOS ===
summary(tract_frame$MOS_comp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 5.205 7.642 7.812 10.415 19.562
cat("\nSum S_i = S+ (should =", total_n, "):",
round(sum(tract_frame$MOS_comp[!tract_frame$flag_zero_hh]), 2), "\n")
##
## Sum S_i = S+ (should = 2156 ): 2156
cat("Tracts with zero HH adults:", sum(tract_frame$flag_zero_hh), "\n")
## Tracts with zero HH adults: 6
cat("Tracts with <2 HH-BGs:", sum(tract_frame$flag_lt2_bg & !tract_frame$flag_zero_hh), "\n")
## Tracts with <2 HH-BGs: 43
cat("Tracts with infeasible MOS:", sum(tract_frame$flag_infeas), "\n\n")
## Tracts with infeasible MOS: 4
cat("=== BG composite MOS ===\n")
## === BG composite MOS ===
summary(bg_hh$MOS_bg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.417 3.328 3.523 4.361 11.509
cat("BGs with MOS < threshold:",
sum(bg_hh$MOS_bg < min_feasible_MOS), "\n")
## BGs with MOS < threshold: 14
# Sort FIRST (Section 4), then link. Linked PSUs inherit GEOID order
# through sort_key = min(tract_id).
tract_frame <- tract_frame %>%
mutate(needs_link = !flag_zero_hh & (flag_infeas | flag_lt2_bg))
cat("Tracts needing PSU linkage:", sum(tract_frame$needs_link), "\n\n")
## Tracts needing PSU linkage: 44
build_psu_map <- function(tract_df, min_MOS) {
regular <- tract_df %>%
filter(age18plus_hh > 0, !needs_link) %>%
transmute(tract_id, psu_id = tract_id,
psu_type = "single_tract", link_group = NA_integer_)
pool <- tract_df %>%
filter(age18plus_hh > 0, needs_link) %>%
arrange(tract_id) %>%
mutate(grp = ceiling(row_number() / 2))
if (nrow(pool) == 0) return(regular)
for (iter in seq_len(30)) {
grp_mos <- pool %>% group_by(grp) %>%
summarise(mos = sum(MOS_comp), .groups = "drop")
bad <- grp_mos$grp[grp_mos$mos < min_MOS]
if (length(bad) == 0) break
for (b in bad) {
all_g <- sort(unique(pool$grp))
idx <- which(all_g == b)
partner <- if (idx < length(all_g)) all_g[idx+1] else all_g[idx-1]
pool$grp[pool$grp == partner] <- b
pool$grp <- as.integer(factor(pool$grp,
levels = sort(unique(pool$grp))))
}
}
linked <- pool %>% group_by(grp) %>%
mutate(
psu_id = paste0("LPSU_", first(tract_id), "_", last(tract_id)),
psu_type = case_when(n()==1~"singleton_link",
n()==2~"paired_link",
TRUE ~paste0(n(),"tract_link"))) %>%
ungroup() %>%
transmute(tract_id, psu_id, psu_type, link_group = grp)
bind_rows(regular, linked)
}
psu_map <- build_psu_map(tract_frame, min_feasible_MOS)
cat("=== PSU types ===\n"); print(psu_map %>% count(psu_type))
## === PSU types ===
## # A tibble: 3 × 2
## psu_type n
## <chr> <int>
## 1 4tract_link 4
## 2 paired_link 40
## 3 single_tract 226
psu_frame <- tract_frame %>%
filter(age18plus_hh > 0) %>%
left_join(psu_map, by = "tract_id") %>%
group_by(psu_id, psu_type) %>%
summarise(
sort_key = min(tract_id),
n_tracts = n(),
n_bg_hh = sum(n_bg_hh, na.rm = TRUE),
TotHH = sum(TotHH_tract, na.rm = TRUE),
age18_34_hh = sum(age18_34_hh, na.rm = TRUE),
age35_49_hh = sum(age35_49_hh, na.rm = TRUE),
age50_64_hh = sum(age50_64_hh, na.rm = TRUE),
age65plus_hh = sum(age65plus_hh, na.rm = TRUE),
age18plus_hh = sum(age18plus_hh, na.rm = TRUE),
age18_34 = sum(age18_34, na.rm = TRUE),
age35_49 = sum(age35_49, na.rm = TRUE),
age50_64 = sum(age50_64, na.rm = TRUE),
age65plus = sum(age65plus, na.rm = TRUE),
MOS_psu = sum(MOS_comp, na.rm = TRUE),
.groups = "drop") %>%
arrange(sort_key) # geographic order via first constituent GEOID
cat("Total PSUs:", nrow(psu_frame), "\n")
## Total PSUs: 247
cat("Sum MOS_psu = S+ (should =", total_n, "):",
round(sum(psu_frame$MOS_psu), 2), "\n")
## Sum MOS_psu = S+ (should = 2156 ): 2156
n_bad <- sum(psu_frame$MOS_psu < min_feasible_MOS)
if (n_bad > 0) {
cat("INFEASIBLE PSUs:\n")
print(psu_frame %>% filter(MOS_psu < min_feasible_MOS) %>%
dplyr::select(psu_id, psu_type, n_bg_hh, age18plus_hh, MOS_psu))
stop("Linkage incomplete.")
}
cat("PSU feasibility check PASSED.\n")
## PSU feasibility check PASSED.
cat("\n=== PSU MOS ===\n"); summary(psu_frame$MOS_psu)
##
## === PSU MOS ===
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3183 6.4253 8.4826 8.7287 10.8684 19.5623
cat("\n=== PSU types ===\n"); print(table(psu_frame$psu_type))
##
## === PSU types ===
##
## 4tract_link paired_link single_tract
## 1 20 226
set.seed(-77)
pik_psu <- inclusionprobabilities(psu_frame$MOS_psu, n = n_psu)
s_psu <- UPsystematic(pik_psu)
psu_frame <- psu_frame %>%
mutate(pik_psu = pik_psu, selected_psu = as.logical(s_psu))
psu_sample <- psu_frame %>% filter(selected_psu)
cat("PSUs selected:", nrow(psu_sample), "\n")
## PSUs selected: 40
cat("sum(pik_psu) =", round(sum(pik_psu),4), "(should =", n_psu, ")\n\n")
## sum(pik_psu) = 40 (should = 40 )
cat("=== MOS of selected PSUs ===\n"); summary(psu_sample$MOS_psu)
## === MOS of selected PSUs ===
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.747 7.809 10.140 10.109 12.127 19.562
cat("\n=== pi_1 of selected PSUs ===\n"); summary(psu_sample$pik_psu)
##
## === pi_1 of selected PSUs ===
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.06951 0.14487 0.18812 0.18756 0.22498 0.36294
cat("\n=== Types ===\n"); print(table(psu_sample$psu_type))
##
## === Types ===
##
## paired_link single_tract
## 6 34
# Within each selected PSU, select 1 BG by PPS systematic using S_ij.
# Before selection, BGs too small (S_ij < threshold) are linked with
# adjacent BGs within the same PSU (GEOID order within PSU).
#
# pi_{2|i} = S_ij / S_i => pi_{2|i} * S_i = S_ij
#
# This gives the key cancellation:
# pi_1 * pi_{2|i} * pi_{3|ij}(d)
# = (m*S_i/S+) * (S_ij/S_i) * (n_bar*f_d/S_ij) = f_d EXACT
selected_tract_ids <- psu_map %>%
filter(psu_id %in% psu_sample$psu_id) %>%
pull(tract_id)
# BG frame for selected PSUs (HH-BGs only, sorted by GEOID within PSU)
bg_in_sample <- bg_hh %>%
filter(tract_id %in% selected_tract_ids) %>%
left_join(psu_map %>% dplyr::select(tract_id, psu_id), by = "tract_id") %>%
left_join(psu_frame %>% dplyr::select(psu_id, MOS_psu), by = "psu_id") %>%
arrange(psu_id, bg_id) # GEOID sort within PSU before linkage
cat("HH-BGs in sampled PSUs:", nrow(bg_in_sample), "\n")
## HH-BGs in sampled PSUs: 97
cat("BGs with MOS_bg < threshold:",
sum(bg_in_sample$MOS_bg < min_feasible_MOS), "\n\n")
## BGs with MOS_bg < threshold: 2
# BG linkage within each PSU (same iterative logic as PSU linkage)
build_bg_map <- function(bg_df, min_MOS) {
bg_df %>%
group_by(psu_id) %>%
group_modify(function(df, key) {
df <- df %>% arrange(bg_id) %>% mutate(grp = row_number())
# Mark BGs needing linkage
df <- df %>% mutate(needs_link_bg = MOS_bg < min_MOS | age18plus == 0)
# Iterative linkage within PSU
pool_idx <- which(df$needs_link_bg)
if (length(pool_idx) == 0) {
return(df %>% mutate(bg_oper_id = bg_id, bg_oper_type = "single_bg"))
}
# Simple approach: pair small BGs with next BG in GEOID order
assigned <- rep(NA_integer_, nrow(df))
grp_id <- 1L
i <- 1L
while (i <= nrow(df)) {
if (df$needs_link_bg[i]) {
# Pair with next row
j <- i + 1L
if (j <= nrow(df)) {
assigned[i] <- grp_id; assigned[j] <- grp_id
i <- j + 1L
} else {
# Last row, pair with previous
assigned[i] <- grp_id - 1L
i <- i + 1L
}
} else {
assigned[i] <- grp_id
i <- i + 1L
}
grp_id <- grp_id + 1L
}
assigned[is.na(assigned)] <- max(assigned, na.rm=TRUE) + 1L
df %>%
mutate(
bg_grp = assigned,
bg_oper_id = ave(bg_id, bg_grp, FUN=function(x)
if(length(x)==1) x[1] else paste0("LBG_",x[1],"_",x[length(x)])),
bg_oper_type = ave(bg_id, bg_grp, FUN=function(x)
if(length(x)==1) "single_bg" else "linked_bg"))
}) %>%
ungroup()
}
bg_in_sample_linked <- build_bg_map(bg_in_sample, min_feasible_MOS)
# Aggregate to operational BG level
bg_oper_frame <- bg_in_sample_linked %>%
group_by(psu_id, MOS_psu, bg_oper_id, bg_oper_type) %>%
summarise(
n_bg_combined = n(),
TotHH = sum(TotHH),
age18_34 = sum(age18_34),
age35_49 = sum(age35_49),
age50_64 = sum(age50_64),
age65plus = sum(age65plus),
age18plus = sum(age18plus),
MOS_bg_oper = sum(MOS_bg), # S_ij for this operational BG
.groups = "drop") %>%
arrange(psu_id, bg_oper_id)
cat("Operational BGs in sampled PSUs:", nrow(bg_oper_frame), "\n")
## Operational BGs in sampled PSUs: 95
cat("BGs still infeasible after linkage:",
sum(bg_oper_frame$MOS_bg_oper < min_feasible_MOS), "\n\n")
## BGs still infeasible after linkage: 0
cat("=== BG MOS after linkage ===\n")
## === BG MOS after linkage ===
summary(bg_oper_frame$MOS_bg_oper)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.362 3.200 3.975 4.257 5.109 11.509
# PPS systematic selection: 1 operational BG per PSU
# pi_{2|i} = S_ij / S_i (inclusion prob proportional to BG MOS)
set.seed(-77 + 1)
bg_sample_list <- bg_oper_frame %>%
group_by(psu_id) %>%
group_map(function(df, key) {
n_here <- nrow(df)
S_i_val <- df$MOS_psu[1]
if (n_here == 1) {
df %>% mutate(pik_bg = df$MOS_bg_oper / S_i_val,
selected_bg = TRUE)
} else {
pik <- inclusionprobabilities(df$MOS_bg_oper, n = 1)
sel <- as.logical(UPsystematic(pik))
df %>% mutate(pik_bg = pik, selected_bg = sel)
}
}, .keep = TRUE) %>%
bind_rows()
bg_sample <- bg_sample_list %>% filter(selected_bg)
cat("\nBGs selected:", nrow(bg_sample), "(should be", n_psu, ")\n")
##
## BGs selected: 40 (should be 40 )
cat("\n=== pi_{2|i} = S_ij/S_i of selected BGs ===\n")
##
## === pi_{2|i} = S_ij/S_i of selected BGs ===
summary(bg_sample$pik_bg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1832 0.3710 0.4973 0.4826 0.5741 1.0000
cat("\n=== MOS_bg of selected BGs (S_ij) ===\n")
##
## === MOS_bg of selected BGs (S_ij) ===
summary(bg_sample$MOS_bg_oper)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.965 3.359 4.436 4.645 5.536 11.509
cat("\n=== TotHH of selected BGs ===\n")
##
## === TotHH of selected BGs ===
summary(bg_sample$TotHH)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 260.0 452.2 590.0 619.9 701.8 1429.0
# pi_{3|ij}(d) = n_bar * f_d / S_ij
# where S_ij = MOS_bg_oper of the selected BG
#
# Field operation:
# List all HHs in selected BG.
# Sample HHs at composite rate q_ij = n_bar / TotHH_selected_BG.
# Select 1 adult per sampled HH (last-birthday method).
sample_design <- bg_sample %>%
mutate(
# Person-stage rates: pi_{3|ij}(d) = n_bar * f_d / S_ij
p3_18_34 = m_bar * f_18_34 / MOS_bg_oper,
p3_35_49 = m_bar * f_35_49 / MOS_bg_oper,
p3_50_64 = m_bar * f_50_64 / MOS_bg_oper,
p3_65plus = m_bar * f_65plus / MOS_bg_oper,
# Cap at 1 (take-all for very small BGs)
p3_18_34_cap = pmin(p3_18_34, 1),
p3_35_49_cap = pmin(p3_35_49, 1),
p3_50_64_cap = pmin(p3_50_64, 1),
p3_65plus_cap = pmin(p3_65plus, 1),
takeall_18_34 = p3_18_34 > 1,
takeall_35_49 = p3_35_49 > 1,
takeall_50_64 = p3_50_64 > 1,
takeall_65plus = p3_65plus > 1,
# HH sampling rate for field use: q_ij = n_bar / TotHH_BG
hh_rate = pmin(m_bar / TotHH, 1),
# Expected workload per selected BG [n_bar * S_ij / S_ij = n_bar]
exp_n_18_34 = age18_34 * p3_18_34_cap,
exp_n_35_49 = age35_49 * p3_35_49_cap,
exp_n_50_64 = age50_64 * p3_50_64_cap,
exp_n_65plus = age65plus * p3_65plus_cap,
exp_n_total = exp_n_18_34 + exp_n_35_49 + exp_n_50_64 + exp_n_65plus)
cat("=== Take-all check ===\n")
## === Take-all check ===
cat("age18_34 :", sum(sample_design$takeall_18_34), "BGs\n")
## age18_34 : 0 BGs
cat("age35_49 :", sum(sample_design$takeall_35_49), "BGs\n")
## age35_49 : 0 BGs
cat("age50_64 :", sum(sample_design$takeall_50_64), "BGs\n")
## age50_64 : 0 BGs
cat("age65plus:", sum(sample_design$takeall_65plus), "BGs\n")
## age65plus: 0 BGs
cat("\n=== HH sampling rates (q_ij = n_bar/TotHH) ===\n")
##
## === HH sampling rates (q_ij = n_bar/TotHH) ===
summary(sample_design$hh_rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.03772 0.07688 0.09146 0.09883 0.11918 0.20731
cat("NA count:", sum(is.na(sample_design$hh_rate)), "(should be 0)\n")
## NA count: 0 (should be 0)
sample_final <- sample_design %>%
left_join(psu_frame %>% dplyr::select(psu_id, pik_psu),
by = "psu_id") %>%
mutate(
# Overall: pi_1 * pi_{2|i} * pi_{3|ij}(d) = f_d [EXACT]
p_overall_18_34 = pik_psu * pik_bg * p3_18_34_cap,
p_overall_35_49 = pik_psu * pik_bg * p3_35_49_cap,
p_overall_50_64 = pik_psu * pik_bg * p3_50_64_cap,
p_overall_65plus = pik_psu * pik_bg * p3_65plus_cap,
w_base_18_34 = 1 / p_overall_18_34,
w_base_35_49 = 1 / p_overall_35_49,
w_base_50_64 = 1 / p_overall_50_64,
w_base_65plus = 1 / p_overall_65plus)
cat("=== GOAL 1: SELF-WEIGHTING ===\n")
## === GOAL 1: SELF-WEIGHTING ===
cat("pi_1 * pi_{2|i} * pi_{3|ij}(d) should equal f_d exactly\n\n")
## pi_1 * pi_{2|i} * pi_{3|ij}(d) should equal f_d exactly
for (ag in c("18_34","35_49","50_64","65plus")) {
col <- paste0("p_overall_", ag)
fd <- switch(ag,"18_34"=f_18_34,"35_49"=f_35_49,
"50_64"=f_50_64,"65plus"=f_65plus)
v <- sample_final[[col]]
ok <- max(abs(v - fd)/fd, na.rm=TRUE) < 0.001
cat(sprintf(" age%-6s: [%.6f, %.6f] f_d=%.6f %s\n",
ag, min(v,na.rm=T), max(v,na.rm=T), fd,
ifelse(ok, "✓ EXACT", "⚠deviation")))
}
## age18_34 : [0.004431, 0.004431] f_d=0.004431 ✓ EXACT
## age35_49 : [0.004909, 0.004909] f_d=0.004909 ✓ EXACT
## age50_64 : [0.004246, 0.004246] f_d=0.004246 ✓ EXACT
## age65plus: [0.004388, 0.004388] f_d=0.004388 ✓ EXACT
cat("\nCV of base weights (0 = perfect self-weighting):\n")
##
## CV of base weights (0 = perfect self-weighting):
sample_final %>%
summarise(
CV_18_34 = sd(w_base_18_34) /mean(w_base_18_34),
CV_35_49 = sd(w_base_35_49) /mean(w_base_35_49),
CV_50_64 = sd(w_base_50_64) /mean(w_base_50_64),
CV_65plus = sd(w_base_65plus)/mean(w_base_65plus)) %>%
print()
## # A tibble: 1 × 4
## CV_18_34 CV_35_49 CV_50_64 CV_65plus
## <dbl> <dbl> <dbl> <dbl>
## 1 1.72e-16 1.20e-16 1.24e-16 1.49e-16
cat("\n=== GOAL 2: EQUAL WORKLOAD PER SELECTED BG/PSU ===\n")
##
## === GOAL 2: EQUAL WORKLOAD PER SELECTED BG/PSU ===
cat("exp_n_total = sum_d Q_ij(d) * pi_{3|ij}(d) = n_bar * S_ij/S_ij = n_bar\n\n")
## exp_n_total = sum_d Q_ij(d) * pi_{3|ij}(d) = n_bar * S_ij/S_ij = n_bar
summary(sample_final$exp_n_total)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 53.9 53.9 53.9 53.9 53.9 53.9
cat("Target n_bar:", m_bar, "\n")
## Target n_bar: 53.9
cat("Deviation only from take-all capping (p3>1 => workload slightly < n_bar).\n\n")
## Deviation only from take-all capping (p3>1 => workload slightly < n_bar).
cat("exp_n_total for all 40 PSUs:\n")
## exp_n_total for all 40 PSUs:
sample_final %>%
dplyr::select(psu_id, MOS_psu, MOS_bg_oper, pik_bg, exp_n_total) %>%
mutate(across(where(is.numeric), ~round(., 4))) %>%
print(n = Inf)
## # A tibble: 40 × 5
## psu_id MOS_psu MOS_bg_oper pik_bg exp_n_total
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 26163500200 9.41 6.67 0.709 53.9
## 2 26163500900 11.6 5.19 0.448 53.9
## 3 26163501400 12.1 5.58 0.462 53.9
## 4 26163501900 7.76 3.37 0.435 53.9
## 5 26163505100 10.1 4.17 0.413 53.9
## 6 26163506900 11.5 3.41 0.297 53.9
## 7 26163508000 3.75 3.75 1 53.9
## 8 26163511400 7.48 3.76 0.502 53.9
## 9 26163513900 5.21 2.73 0.524 53.9
## 10 26163515700 19.6 7.42 0.379 53.9
## 11 26163516700 11.6 3.11 0.268 53.9
## 12 26163517500 11.2 5.52 0.492 53.9
## 13 26163520200 15.8 11.5 0.730 53.9
## 14 26163523200 10.2 3.21 0.315 53.9
## 15 26163524200 13.1 4.87 0.372 53.9
## 16 26163525700 13.2 4.86 0.368 53.9
## 17 26163526400 4.35 2.06 0.474 53.9
## 18 26163530800 4.88 2.90 0.593 53.9
## 19 26163531800 5.05 2.59 0.512 53.9
## 20 26163533600 4.66 1.97 0.422 53.9
## 21 26163534700 9.93 2.52 0.254 53.9
## 22 26163535600 13.0 2.37 0.183 53.9
## 23 26163536500 7.18 4.08 0.568 53.9
## 24 26163538100 8.07 4.06 0.503 53.9
## 25 26163538600 16.2 3.69 0.227 53.9
## 26 26163539500 10.7 3.48 0.327 53.9
## 27 26163540400 9.08 4.77 0.526 53.9
## 28 26163540900 11.8 3.32 0.280 53.9
## 29 26163541500 13.0 6.92 0.534 53.9
## 30 26163542300 7.83 4.83 0.617 53.9
## 31 26163543100 8.58 4.65 0.543 53.9
## 32 26163545500 11.7 4.22 0.360 53.9
## 33 26163545900 11.4 5.78 0.507 53.9
## 34 26163546700 9.38 5.68 0.606 53.9
## 35 LPSU_26163503400_26163503600 7.89 4.86 0.616 53.9
## 36 LPSU_26163506100_26163506200 12.3 6.19 0.505 53.9
## 37 LPSU_26163521800_26163522400 8.01 5.26 0.656 53.9
## 38 LPSU_26163537200_26163538800 7.49 5.46 0.729 53.9
## 39 LPSU_26163539100_26163539700 13.6 6.10 0.449 53.9
## 40 LPSU_26163544000_26163546301 14.9 8.91 0.598 53.9
# Anticipated Precision — correct calculation
p0 <- 0.20 # planning proportion: public transit use
rho <- 0.05 # assumed ICC within tracts (typical urban survey)
# Overall response rate (weighted by selections needed)
r_bar <- sum(target_table$n_resp) / sum(target_table$n_selected)
# = 1000 / 2156 = 0.4638
# Expected respondents per PSU
n_resp_per_psu <- m_bar * r_bar # 53.9 * 0.4638 ≈ 25.0
n_resp_total <- sum(target_resp) # 1000
# DEFF using respondents per PSU (correct denominator)
DEFF <- 1 + (n_resp_per_psu - 1) * rho
n_eff <- n_resp_total / DEFF
SE <- sqrt(p0 * (1 - p0) / n_eff)
MOE <- 1.96 * SE
cat("=== Anticipated Precision ===\n")
## === Anticipated Precision ===
cat("Overall response rate (r_bar):", round(r_bar, 4), "\n")
## Overall response rate (r_bar): 0.4638
cat("Expected respondents per PSU:", round(n_resp_per_psu, 2), "\n")
## Expected respondents per PSU: 25
cat("DEFF =", round(DEFF, 3), "\n")
## DEFF = 2.2
cat("n_eff =", round(n_eff, 1), "\n")
## n_eff = 454.5
cat("SE(p_hat) =", round(SE, 4), "\n")
## SE(p_hat) = 0.0188
cat("95% MOE =", round(MOE, 4), "(", round(MOE*100, 2), "%)\n\n")
## 95% MOE = 0.0368 ( 3.68 %)
# Domain-level DEFF (each domain: 250 respondents over 40 PSUs)
n_resp_per_psu_per_domain <- n_resp_per_psu / 4 # = 6.25
DEFF_domain <- 1 + (n_resp_per_psu_per_domain - 1) * rho
cat("Domain-level DEFF (per age group):", round(DEFF_domain, 3), "\n")
## Domain-level DEFF (per age group): 1.262
cat("(For domain-specific estimates, e.g., transit use among 18-34)\n\n")
## (For domain-specific estimates, e.g., transit use among 18-34)
# Sensitivity table
tibble(rho = c(0.01, 0.03, 0.05, 0.10, 0.15)) %>%
mutate(
n_resp_psu = n_resp_per_psu,
DEFF = 1 + (n_resp_psu - 1) * rho,
n_eff = n_resp_total / DEFF,
SE = sqrt(p0 * (1 - p0) / n_eff),
MOE_95pct = round(1.96 * SE, 4)) %>%
print()
## # A tibble: 5 × 6
## rho n_resp_psu DEFF n_eff SE MOE_95pct
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.01 25 1.24 806. 0.0141 0.0276
## 2 0.03 25 1.72 581. 0.0166 0.0325
## 3 0.05 25 2.2 455. 0.0188 0.0368
## 4 0.1 25 3.4 294. 0.0233 0.0457
## 5 0.15 25 4.6 217. 0.0271 0.0532
cat("library(survey)\n")
## library(survey)
cat("design <- svydesign(\n")
## design <- svydesign(
cat(" ids = ~psu_id + bg_oper_id,\n")
## ids = ~psu_id + bg_oper_id,
cat(" weights = ~w_base_d,\n")
## weights = ~w_base_d,
cat(" data = respondent_data\n")
## data = respondent_data
cat(")\n\n")
## )
cat("Both stages used systematic PPS => no unbiased variance estimator.\n")
## Both stages used systematic PPS => no unbiased variance estimator.
cat("Use successive-differences estimator or WR approximation.\n")
## Use successive-differences estimator or WR approximation.
frame1 <- psu_frame %>%
dplyr::select(psu_id, psu_type, sort_key, n_tracts, n_bg_hh,
TotHH, age18plus_hh,
age18_34_hh, age35_49_hh, age50_64_hh, age65plus_hh,
MOS_psu, pik_psu, selected_psu)
write.csv(frame1, "/tmp/FRAME1_PSU.csv", row.names = FALSE)
frame2 <- bg_sample_list %>%
dplyr::select(psu_id, bg_oper_id, n_bg_combined,
TotHH, age18plus, age18_34, age35_49, age50_64, age65plus,
MOS_bg_oper, pik_bg, selected_bg)
write.csv(frame2, "/tmp/FRAME2_BG.csv", row.names = FALSE)
# psu_type lives in psu_frame; bg_oper_type lives in bg_sample.
# Join them in before selecting for output.
sample_out <- sample_final %>%
left_join(psu_frame %>% dplyr::select(psu_id, psu_type),
by = "psu_id") %>%
left_join(bg_sample %>% dplyr::select(psu_id, bg_oper_id, bg_oper_type),
by = c("psu_id", "bg_oper_id")) %>%
dplyr::select(
psu_id, psu_type, bg_oper_id,
TotHH, age18plus, age18_34, age35_49, age50_64, age65plus,
MOS_psu, pik_psu, MOS_bg_oper, pik_bg,
p3_18_34, p3_35_49, p3_50_64, p3_65plus,
p_overall_18_34, p_overall_35_49, p_overall_50_64, p_overall_65plus,
w_base_18_34, w_base_35_49, w_base_50_64, w_base_65plus,
hh_rate, exp_n_18_34, exp_n_35_49, exp_n_50_64, exp_n_65plus, exp_n_total,
takeall_18_34, takeall_35_49, takeall_50_64, takeall_65plus)
write.csv(sample_out, "/tmp/SAMPLE.csv", row.names = FALSE)
cat("FRAME1_PSU.csv:", nrow(frame1), "rows\n")
## FRAME1_PSU.csv: 247 rows
cat("FRAME2_BG.csv :", nrow(frame2), "rows\n")
## FRAME2_BG.csv : 95 rows
cat("SAMPLE.csv :", nrow(sample_out), "rows\n\n")
## SAMPLE.csv : 40 rows
cat("=== Sample preview ===\n")
## === Sample preview ===
sample_out %>%
dplyr::select(psu_id, psu_type, bg_oper_id,
TotHH, pik_psu, pik_bg,
hh_rate, w_base_18_34, exp_n_total) %>%
mutate(across(where(is.numeric), ~round(., 4))) %>%
print(n = Inf)
## # A tibble: 40 × 9
## psu_id psu_type bg_oper_id TotHH pik_psu pik_bg hh_rate w_base_18_34
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 26163500200 single_… 261635002… 764 0.175 0.709 0.0705 226.
## 2 26163500900 single_… 261635009… 661 0.215 0.448 0.0815 226.
## 3 26163501400 single_… 261635014… 652 0.224 0.462 0.0827 226.
## 4 26163501900 single_… 261635019… 502 0.144 0.435 0.107 226.
## 5 26163505100 single_… 261635051… 689 0.187 0.413 0.0782 226.
## 6 26163506900 single_… 261635069… 430 0.213 0.297 0.125 226.
## 7 26163508000 single_… LBG_26163… 629 0.0695 1 0.0857 226.
## 8 26163511400 single_… 261635114… 625 0.139 0.502 0.0862 226.
## 9 26163513900 single_… 261635139… 453 0.0966 0.524 0.119 226.
## 10 26163515700 single_… 261635157… 1429 0.363 0.379 0.0377 226.
## 11 26163516700 single_… 261635167… 465 0.216 0.268 0.116 226.
## 12 26163517500 single_… 261635175… 921 0.208 0.492 0.0585 226.
## 13 26163520200 single_… 261635202… 415 0.293 0.730 0.130 226.
## 14 26163523200 single_… 261635232… 347 0.189 0.315 0.155 226.
## 15 26163524200 single_… 261635242… 547 0.243 0.372 0.0985 226.
## 16 26163525700 single_… LBG_26163… 570 0.245 0.368 0.0946 226.
## 17 26163526400 single_… 261635264… 260 0.0808 0.474 0.207 226.
## 18 26163530800 single_… 261635308… 545 0.0906 0.593 0.0989 226.
## 19 26163531800 single_… 261635318… 465 0.0936 0.512 0.116 226.
## 20 26163533600 single_… 261635336… 414 0.0864 0.422 0.130 226.
## 21 26163534700 single_… 261635347… 360 0.184 0.254 0.150 226.
## 22 26163535600 single_… 261635356… 299 0.240 0.183 0.180 226.
## 23 26163536500 single_… 261635365… 644 0.133 0.568 0.0837 226.
## 24 26163538100 single_… 261635381… 522 0.150 0.503 0.103 226.
## 25 26163538600 single_… 261635386… 414 0.301 0.227 0.130 226.
## 26 26163539500 single_… 261635395… 450 0.198 0.327 0.120 226.
## 27 26163540400 single_… 261635404… 610 0.168 0.526 0.0884 226.
## 28 26163540900 single_… 261635409… 378 0.220 0.280 0.143 226.
## 29 26163541500 single_… 261635415… 980 0.240 0.534 0.055 226.
## 30 26163542300 single_… 261635423… 679 0.145 0.617 0.0794 226.
## 31 26163543100 single_… 261635431… 546 0.159 0.543 0.0987 226.
## 32 26163545500 single_… 261635455… 545 0.217 0.360 0.0989 226.
## 33 26163545900 single_… 261635459… 826 0.212 0.507 0.0653 226.
## 34 26163546700 single_… 261635467… 740 0.174 0.606 0.0728 226.
## 35 LPSU_261635034… paired_… 261635036… 643 0.146 0.616 0.0838 226.
## 36 LPSU_261635061… paired_… 261635061… 744 0.228 0.505 0.0724 226.
## 37 LPSU_261635218… paired_… 261635218… 901 0.149 0.656 0.0598 226.
## 38 LPSU_261635372… paired_… 261635388… 678 0.139 0.729 0.0795 226.
## 39 LPSU_261635391… paired_… 261635391… 804 0.252 0.449 0.067 226.
## 40 LPSU_261635440… paired_… 261635440… 1250 0.276 0.598 0.0431 226.
## # ℹ 1 more variable: exp_n_total <dbl>
library(tigris)
library(sf)
library(leaflet)
library(ggplot2)
options(tigris_class = "sf")
options(tigris_use_cache = TRUE)
# Download Wayne County shapefiles
wayne_tracts <- tracts(state = "MI", county = "Wayne", year = 2020,
progress_bar = FALSE)
wayne_bgs <- block_groups(state = "MI", county = "Wayne", year = 2020,
progress_bar = FALSE)
# Filter to Detroit only
detroit_tracts_sf <- wayne_tracts %>%
right_join(tract_fips, by = "GEOID")
detroit_bgs_sf <- wayne_bgs %>%
mutate(GEOIDt = substr(GEOID, 1, 11)) %>%
filter(GEOIDt %in% tract_fips$GEOID)
# Project to WGS84
detroit_tracts_trans <- st_transform(detroit_tracts_sf, 4326)
detroit_bgs_trans <- st_transform(detroit_bgs_sf, 4326)
cat("Detroit tracts in shapefile:", nrow(detroit_tracts_trans), "\n")
## Detroit tracts in shapefile: 276
cat("Detroit BGs in shapefile:", nrow(detroit_bgs_trans), "\n")
## Detroit BGs in shapefile: 627
ggplot() +
geom_sf(data = detroit_bgs_trans,
fill = NA,
linetype = "dotted",
linewidth = 0.3,
color = "grey50") +
geom_sf(data = detroit_tracts_trans,
fill = NA,
color = "black",
linewidth = 0.7) +
labs(title = "Detroit Tracts (Solid) and Block Groups (Dotted)",
caption = "Source: 2020 Census TIGER/Line Shapefiles") +
theme_minimal() +
theme(plot.title = element_text(face = "bold", size = 12))
Figure 1: City of Detroit Census Tracts (solid) and Block Groups (dotted)
# ---------------------------------------------------------------
# FIX for 3-stage design:
#
# In 745_final_3stage.Rmd the relevant objects are:
# psu_map -> maps tract_id to psu_id
# psu_sample -> the 40 selected PSUs (has psu_id)
# bg_sample -> the 40 selected BGs (has bg_oper_id, NOT bg_id directly)
# bg_in_sample_linked -> original bg_id rows with their bg_oper_id
#
# The GEOID column in tigris block_groups() is 12-digit (= original bg_id).
# bg_oper_id may be "LBG_..." for linked BGs, which won't match tigris.
# Solution: trace back from bg_sample$bg_oper_id to original bg_id(s)
# via bg_in_sample_linked, then match those to tigris GEOID.
# ---------------------------------------------------------------
# Step 1: selected PSU tract GEOIDs
selected_tract_geoids <- psu_map %>%
filter(psu_id %in% psu_sample$psu_id) %>%
pull(tract_id)
# Step 2: map selected bg_oper_ids back to original 12-digit bg_ids
# bg_in_sample_linked has columns: bg_id, bg_oper_id, psu_id
selected_oper_ids <- bg_sample$bg_oper_id
listing_bg_geoids <- bg_in_sample_linked %>%
filter(psu_id %in% psu_sample$psu_id,
bg_oper_id %in% selected_oper_ids) %>%
pull(bg_id) %>%
unique()
cat("Selected PSU tracts:", length(selected_tract_geoids), "\n")
## Selected PSU tracts: 46
cat("Listing BG original GEOIDs:", length(listing_bg_geoids), "\n")
## Listing BG original GEOIDs: 42
# Mark flags on spatial data
detroit_tracts_map <- detroit_tracts_trans %>%
mutate(selected_psu = GEOID %in% selected_tract_geoids)
detroit_bgs_map <- detroit_bgs_trans %>%
mutate(is_listing_bg = GEOID %in% listing_bg_geoids)
selected_tracts_sf <- detroit_tracts_map %>% filter(selected_psu)
listing_bgs_sf <- detroit_bgs_map %>% filter(is_listing_bg)
cat("Tracts matched as selected:", nrow(selected_tracts_sf),
"(should be close to", length(selected_tract_geoids), ")\n")
## Tracts matched as selected: 46 (should be close to 46 )
cat("BGs matched as listing:", nrow(listing_bgs_sf),
"(may be > 40 if any listing BG spans a linked operational unit)\n")
## BGs matched as listing: 42 (may be > 40 if any listing BG spans a linked operational unit)
leaflet() %>%
addTiles() %>%
# Base: all Detroit tracts
addPolygons(
data = detroit_tracts_trans,
color = "grey40",
weight = 0.8,
fillOpacity = 0.0,
popup = ~NAME,
group = "All Tracts") %>%
# Selected PSUs (blue)
addPolygons(
data = selected_tracts_sf,
color = "blue",
weight = 2,
fillColor = "blue",
fillOpacity = 0.25,
popup = ~paste0("<b>Selected PSU</b><br>GEOID: ", GEOID,
"<br>", NAME),
group = "Selected PSUs (Blue)") %>%
# Listing BGs (red)
addPolygons(
data = listing_bgs_sf,
color = "darkred",
weight = 2,
fillColor = "red",
fillOpacity = 0.70,
popup = ~paste0("<b>Listing Block Group</b><br>GEOID: ", GEOID),
group = "Listing BGs (Red)") %>%
addLayersControl(
overlayGroups = c("All Tracts",
"Selected PSUs (Blue)",
"Listing BGs (Red)"),
options = layersControlOptions(collapsed = FALSE)) %>%
addLegend(
position = "bottomright",
colors = c("blue", "red"),
labels = c("Selected PSU (Tract)", "Designated Listing BG"),
title = "Sample Units",
opacity = 0.8)
Figure 2: Selected Sample PSUs (blue) and Designated Listing Block Groups (red)