Adding Treatment Indicator to Data

rent.df <- read.csv("Rent All.csv") %>%
    mutate(treat = ifelse(state == "LA" & YEAR >= 2005, 1, 0)) %>%
    group_by(AREANAME) %>%
    filter(!any(is.na(Unemployee.Rate)))

Propensity Score Calculation

# Step 1: Estimate Propensity Scores
ps_model <- matchit(treat ~ RENT_0 + Price + Unemployee.Rate +
    Personal.Income + Resident.Population,
    data = rent.df, method = "optimal")

# Step 2: Rank Propensity Scores
rent.df$propensity_score <- ps_model$distance
ranked_data <- rent.df[order(rent.df$propensity_score, decreasing = TRUE), ]

# Step 3: Set Threshold
threshold <- 0.5  # Example threshold (retain top 50% of control counties)

# Step 4: Select Control Counties based on Threshold
treated_counties <- subset(ranked_data, treat == 1)
control_counties <- subset(ranked_data, treat == 0)

threshold_index <- ceiling(threshold * length(control_counties))
reduced_control_counties <- control_counties[1:threshold_index, ]
head(reduced_control_counties[, c(15, 23)], n = 5)
## # A tibble: 5 × 2
##   MSA          propensity_score
##   <chr>                   <dbl>
## 1 Salinas, CA             0.396
## 2 Casper, WY              0.379
## 3 Boulder, CO             0.372
## 4 Casper, WY              0.344
## 5 Bismarck, ND            0.300