Adding Treatment Indicator to Data
rent.df <- read.csv("Rent All.csv") %>%
mutate(treat = ifelse(state == "LA" & YEAR >= 2005, 1, 0)) %>%
group_by(AREANAME) %>%
filter(!any(is.na(Unemployee.Rate)))
Propensity Score Calculation
# Step 1: Estimate Propensity Scores
ps_model <- matchit(treat ~ RENT_0 + Price + Unemployee.Rate +
Personal.Income + Resident.Population,
data = rent.df, method = "optimal")
# Step 2: Rank Propensity Scores
rent.df$propensity_score <- ps_model$distance
ranked_data <- rent.df[order(rent.df$propensity_score, decreasing = TRUE), ]
# Step 3: Set Threshold
threshold <- 0.5 # Example threshold (retain top 50% of control counties)
# Step 4: Select Control Counties based on Threshold
treated_counties <- subset(ranked_data, treat == 1)
control_counties <- subset(ranked_data, treat == 0)
threshold_index <- ceiling(threshold * length(control_counties))
reduced_control_counties <- control_counties[1:threshold_index, ]
head(reduced_control_counties[, c(15, 23)], n = 5)
## # A tibble: 5 × 2
## MSA propensity_score
## <chr> <dbl>
## 1 Salinas, CA 0.396
## 2 Casper, WY 0.379
## 3 Boulder, CO 0.372
## 4 Casper, WY 0.344
## 5 Bismarck, ND 0.300