#install.packages("fredr")Final Project
Project Proposal
This project aims to analyze the housing affordability crisis in New York State by integrating housing market data, demographic statistics, and macroeconomic indicators to understand the relationship between income levels, housing costs, and broader economic conditions. The motivation for performing this analysis is to better understand the widening gap between household incomes and housing prices/rents in New York, and to use data-driven methods to identify how affordability varies across regions and over time. This topic is important because housing costs directly impact financial stability, migration patterns, and quality of life, and combining multiple data sources provides a more complete picture than any single dataset alone. Data will be sourced from Zillow housing data (for rent and home price estimates), the U.S. Census American Community Survey 5-Year Estimates accessed via the Census API (for income, rent burden, and housing value indicators at county and state levels), and the Federal Reserve Economic Data (FRED) API (for macroeconomic variables such as interest rates, inflation, and mortgage rates). The analysis will follow a structured data science workflow involving data acquisition through APIs, data cleaning and transformation to align geographic and temporal scales, exploratory data analysis, and statistical modeling to examine affordability ratios and trends. Visualizations such as income-to-housing-cost ratios, geographic affordability comparisons, and macroeconomic trend overlays will be used to support findings, demonstrating how multi-source data integration can provide insight into housing affordability pressures in New York State.
Running Code
library(tidyverse)
library(httr)
library(jsonlite)
library(tidycensus)
library(fredr)
library(lubridate)
library(zoo)
library(gt)############################################################
# ACS 5-YEAR - MULTI-YEAR PULL (NY COUNTIES)
# CLEAN COUNTY NAMES FOR JOINING
############################################################
library(tidyverse)
library(jsonlite)
years <- 2009:2024
acs_5yr_all <- list()
for (yr in years) {
url <- paste0(
"https://api.census.gov/data/", yr, "/acs/acs5?",
"get=NAME,B19013_001E,B25077_001E,B25064_001E&",
"for=county:*&in=state:36"
)
try({
raw <- fromJSON(url)
df <- as.data.frame(raw[-1, ], stringsAsFactors = FALSE)
colnames(df) <- raw[1, ]
df <- df %>%
mutate(year = yr) %>%
rename(
county_name = NAME,
median_income = B19013_001E,
median_home = B25077_001E,
median_rent = B25064_001E,
state_fips = state,
county_fips = county
) %>%
# CLEAN COUNTY NAME
mutate(
county_name = county_name %>%
str_remove(", New York") %>%
str_trim()
) %>%
# NUMERIC CONVERSION
mutate(
median_income = as.numeric(median_income),
median_home = as.numeric(median_home),
median_rent = as.numeric(median_rent)
)
acs_5yr_all[[as.character(yr)]] <- df
}, silent = TRUE)
}
############################################################
# COMBINE ALL YEARS
############################################################
acs_ny_5yr <- bind_rows(acs_5yr_all)
############################################################
# EXPORT CSV
############################################################
write_csv(acs_ny_5yr, "acs_ny_5year_2005_2024.csv")
############################################################
# CHECK
############################################################
acs_ny_5yr |> head(5) |> gt()| county_name | median_income | median_home | median_rent | state_fips | county_fips | year |
|---|---|---|---|---|---|---|
| Albany County | 55350 | 192500 | 830 | 36 | 001 | 2009 |
| Allegany County | 40917 | 65100 | 566 | 36 | 003 | 2009 |
| Bronx County | 33794 | 369600 | 885 | 36 | 005 | 2009 |
| Broome County | 43467 | 95600 | 598 | 36 | 007 | 2009 |
| Cattaraugus County | 41482 | 76000 | 576 | 36 | 009 | 2009 |
############################################################
# ACS 5-YEAR EDA — STATEWIDE TRENDS + AFFORDABILITY RATIO
############################################################
library(tidyverse)
library(scales)
library(patchwork)
# ── PREP ─────────────────────────────────────────────────
acs_clean <- acs_ny_5yr |>
filter(median_income > 0, median_home > 0, median_rent > 0)
state_trends <- acs_clean |>
group_by(year) |>
summarise(
`Median Income` = mean(median_income, na.rm = TRUE),
`Median Home Value` = mean(median_home, na.rm = TRUE),
`Median Monthly Rent` = mean(median_rent, na.rm = TRUE),
.groups = "drop"
) |>
pivot_longer(-year, names_to = "Metric", values_to = "Value")
# ── PLOT 1: TRENDS ───────────────────────────────────────
p1 <- ggplot(state_trends, aes(x = year, y = Value, color = Metric)) +
geom_line(linewidth = 1.3) +
geom_point(size = 2) +
facet_wrap(~Metric, scales = "free_y", ncol = 1) +
scale_color_manual(values = c(
"Median Income" = "#1B9E77",
"Median Home Value" = "#D95F02",
"Median Monthly Rent" = "#7570B3"
)) +
scale_x_continuous(breaks = seq(2009, 2024, 2)) +
scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
labs(
title = "NY Housing and Income Trends",
subtitle = "State averages across all NY counties",
x = "Year", y = "USD"
) +
theme_minimal(base_size = 13) +
theme(
legend.position = "none",
strip.text = element_text(face = "bold", size = 11),
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11, color = "gray40"),
axis.title = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
# ── PLOT 2: PRICE-TO-INCOME RATIO ────────────────────────
pti_summary <- acs_clean |>
mutate(price_to_income = median_home / median_income) |>
group_by(year) |>
summarise(
pti_mean = mean(price_to_income, na.rm = TRUE),
pti_lo = quantile(price_to_income, 0.25, na.rm = TRUE),
pti_hi = quantile(price_to_income, 0.75, na.rm = TRUE),
.groups = "drop"
)
p2 <- ggplot(pti_summary, aes(x = year, y = pti_mean)) +
geom_ribbon(aes(ymin = pti_lo, ymax = pti_hi), fill = "#D95F02", alpha = 0.15) +
geom_line(color = "#D95F02", linewidth = 1.3) +
geom_point(color = "#D95F02", size = 2) +
geom_hline(yintercept = 3, linetype = "dashed", color = "gray50") +
annotate("text", x = 2009, y = 3.15, label = "3× affordability threshold",
hjust = 0, size = 3.5, color = "gray40") +
scale_x_continuous(breaks = seq(2009, 2024, 2)) +
scale_y_continuous(labels = label_number(suffix = "×", accuracy = 0.1)) +
labs(
title = "Home Price-to-Income Ratio (2009–2024)",
subtitle = "Mean across NY counties; shaded band = interquartile range",
x = "Year", y = "Home Value ÷ Median Income"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11, color = "gray40"),
axis.title = element_text(face = "bold"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()
)
# ── COMBINE ──────────────────────────────────────────────
p1 + p2 +
plot_layout(widths = c(1, 1)) +
plot_annotation(
caption = "Note: ACS 5-Year data averages survey responses over 5 years, producing smoother trends that reduce year-to-year volatility but may lag real-time market changes.",
theme = theme(
plot.caption = element_text(size = 10, color = "gray40", hjust = 0, face = "italic")
)
)############################################################
# ROBUST FRED PULL (FIXES RANDOM 500 SERVER ERROR)
############################################################
library(tidyverse)
library(httr)
library(jsonlite)
library(lubridate)
library(zoo)
############################################################
# SAFE FUNCTION WITH RETRY LOGIC
############################################################
get_fred_data <- function(series_id, retries = 5, wait_sec = 10) {
api_key <- Sys.getenv("FRED_API_KEY")
url <- paste0(
"https://api.stlouisfed.org/fred/series/observations?",
"series_id=", series_id,
"&api_key=", api_key,
"&file_type=json"
)
for(i in 1:retries){
cat("Trying", series_id, "- Attempt", i, "\n")
result <- try(GET(url), silent = TRUE)
if(!inherits(result, "try-error") && status_code(result) == 200){
txt <- content(result, "text", encoding = "UTF-8")
data_raw <- fromJSON(txt)
df <- data_raw$observations %>%
select(date, value) %>%
mutate(
date = as.Date(date),
value = as.numeric(value)
)
return(df)
}
Sys.sleep(wait_sec)
}
stop(paste("Failed after retries:", series_id))
}
############################################################
# PULL DATA
############################################################
mortgage30 <- get_fred_data("MORTGAGE30US")Trying MORTGAGE30US - Attempt 1
Sys.sleep(5)
fed_funds <- get_fred_data("FEDFUNDS")Trying FEDFUNDS - Attempt 1
Sys.sleep(5)
cpi <- get_fred_data("CPIAUCSL")Trying CPIAUCSL - Attempt 1
############################################################
# MONTHLY CLEAN DATASET
############################################################
mortgage_m <- mortgage30 %>%
mutate(month = floor_date(date, "month")) %>%
group_by(month) %>%
summarise(mortgage_rate = mean(value, na.rm = TRUE), .groups = "drop")
fed_m <- fed_funds %>%
mutate(month = floor_date(date, "month")) %>%
group_by(month) %>%
summarise(fed_rate = mean(value, na.rm = TRUE), .groups = "drop")
cpi_m <- cpi %>%
mutate(month = floor_date(date, "month")) %>%
group_by(month) %>%
summarise(cpi = mean(value, na.rm = TRUE), .groups = "drop")
fred_full_monthly <- mortgage_m %>%
inner_join(fed_m, by = "month") %>%
inner_join(cpi_m, by = "month") %>%
arrange(month) %>%
mutate(
mortgage_rate = na.approx(mortgage_rate, na.rm = FALSE),
fed_rate = na.approx(fed_rate, na.rm = FALSE),
cpi = na.approx(cpi, na.rm = FALSE)
)
############################################################
# EXPORT
############################################################
write_csv(fred_full_monthly, "fred_full_monthly_1971_present.csv")
fred_full_monthly |> head(5) |> gt()| month | mortgage_rate | fed_rate | cpi |
|---|---|---|---|
| 1971-04-01 | 7.3100 | 4.16 | 40.1 |
| 1971-05-01 | 7.4250 | 4.63 | 40.3 |
| 1971-06-01 | 7.5300 | 4.91 | 40.5 |
| 1971-07-01 | 7.6040 | 5.31 | 40.6 |
| 1971-08-01 | 7.6975 | 5.57 | 40.7 |
############################################################
# FRED EDA — INTEREST RATES + CPI (YoY INFLATION)
############################################################
library(tidyverse)
library(scales)
library(patchwork)
# Okabe-Ito colorblind-friendly palette
COL_MORTGAGE <- "#0072B2" # blue
COL_FED <- "#D55E00" # vermillion
COL_CPI_POS <- "#D55E00" # vermillion (inflation up)
COL_CPI_NEG <- "#0072B2" # blue (deflation)
# ── PLOT 1: MORTGAGE RATE vs FED FUNDS RATE ──────────────
p1 <- fred_full_monthly |>
select(month,
`30-Yr Mortgage Rate` = mortgage_rate,
`Fed Funds Rate` = fed_rate) |>
pivot_longer(-month, names_to = "Series", values_to = "Rate") |>
ggplot(aes(x = month, y = Rate, color = Series)) +
geom_line(linewidth = 0.9) +
scale_color_manual(values = c(
"30-Yr Mortgage Rate" = COL_MORTGAGE,
"Fed Funds Rate" = COL_FED
)) +
scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
scale_y_continuous(labels = label_percent(scale = 1, accuracy = 1)) +
labs(
title = "Interest Rates Over Time",
subtitle = "30-year fixed mortgage rate closely tracks Fed policy cycles",
x = NULL, y = "Rate (%)", color = NULL
) +
theme_minimal(base_size = 13) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11, color = "gray40"),
axis.title = element_text(face = "bold"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()
)
# ── PLOT 2: CPI YEAR-OVER-YEAR % CHANGE ──────────────────
cpi_yoy <- fred_full_monthly |>
arrange(month) |>
mutate(
cpi_yoy = (cpi / lag(cpi, 12) - 1) * 100,
positive = cpi_yoy >= 0
) |>
filter(!is.na(cpi_yoy))
p2 <- ggplot(cpi_yoy, aes(x = month, y = cpi_yoy)) +
geom_hline(yintercept = 0, color = "gray60", linewidth = 0.5) +
geom_hline(yintercept = 2, linetype = "dashed", color = "gray50", linewidth = 0.5) +
geom_col(aes(fill = positive), width = 20, show.legend = FALSE) +
annotate("text", x = min(cpi_yoy$month), y = 2.4,
label = "2% Fed target", hjust = 0, size = 3.2, color = "gray40") +
scale_fill_manual(values = c("TRUE" = COL_CPI_POS, "FALSE" = COL_CPI_NEG)) +
scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
scale_y_continuous(labels = label_percent(scale = 1, accuracy = 1)) +
labs(
title = "Annual Inflation Rate (CPI Year-over-Year)",
subtitle = "Monthly % change vs. same month prior year; 2022 spike reflects post-COVID surge",
x = NULL, y = "YoY Change (%)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11, color = "gray40"),
axis.title = element_text(face = "bold"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()
)
# ── COMBINE ───────────────────────────────────────────────
p1 / p2 +
plot_annotation(
caption = "Note: Monthly FRED data. Mortgage rate averaged to monthly from weekly Freddie Mac survey. Fed Funds Rate is the effective rate. CPI inflation = YoY % change in CPIAUCSL (seasonally adjusted).",
theme = theme(
plot.caption = element_text(size = 10, color = "gray40", hjust = 0, face = "italic")
)
)############################################################
# DATASET 2: ACS-ALIGNED YEARLY FRED (CLEAN VERSION)
############################################################
library(dplyr)
library(lubridate)
start_date <- as.Date("2009-01-01")
fred_acs_ready <- fred_full_monthly %>%
filter(month >= start_date) %>%
mutate(year = year(month)) %>%
group_by(year) %>%
summarise(
# macroeconomic level indicators (core variables)
mortgage_rate = mean(mortgage_rate, na.rm = TRUE),
fed_rate = mean(fed_rate, na.rm = TRUE),
cpi = mean(cpi, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(year)
# View
head(fred_acs_ready, 5)# A tibble: 5 × 4
year mortgage_rate fed_rate cpi
<dbl> <dbl> <dbl> <dbl>
1 2009 5.04 0.16 215.
2 2010 4.69 0.175 218.
3 2011 4.46 0.102 225.
4 2012 3.66 0.14 230.
5 2013 3.98 0.108 233.
# Save
write_csv(fred_acs_ready, "fred_acs_ready_2009_present.csv")
fred_acs_ready |> head(5) |> gt()| year | mortgage_rate | fed_rate | cpi |
|---|---|---|---|
| 2009 | 5.041375 | 0.1600000 | 214.5647 |
| 2010 | 4.690583 | 0.1750000 | 218.0762 |
| 2011 | 4.455833 | 0.1016667 | 224.9230 |
| 2012 | 3.655917 | 0.1400000 | 229.5861 |
| 2013 | 3.981917 | 0.1075000 | 232.9518 |
library(tidyverse)
############################################################
# PULL HUD FMR DATA FROM GITHUB (2009–2024)
############################################################
years <- sprintf("%02d", 9:24)
base_url <- "https://raw.githubusercontent.com/NafeesKhandker/DATA-607-Final-Project/main/HUD%20dataset/"
############################################################
# FUNCTION TO READ EACH FILE
############################################################
get_hud_year <- function(yy){
file_url <- paste0(base_url, "FY", yy, "_FMRs.csv")
message("Reading: ", file_url)
df <- read_csv(file_url, show_col_types = FALSE)
df %>%
mutate(
year = as.integer(paste0("20", yy))
)
}
############################################################
# COMBINE ALL YEARS
############################################################
hud_all <- map_dfr(years, get_hud_year)
############################################################
# CLEAN COLUMN NAMES
############################################################
names(hud_all) <- names(hud_all) %>%
str_to_lower() %>%
str_replace_all(" ", "_")
############################################################
# VIEW DATA
############################################################
hud_all |> head(n = 5) |> gt()| stusps | state | countyname | pop | fmr_0 | fmr_1 | fmr_2 | fmr_3 | fmr_4 | year |
|---|---|---|---|---|---|---|---|---|---|
| NY | 36 | Albany County | 294565 | 686 | 711 | 868 | 1039 | 1135 | 2009 |
| NY | 36 | Allegany County | 49927 | 553 | 555 | 665 | 829 | 1018 | 2009 |
| NY | 36 | Bronx County | 1332650 | 1091 | 1180 | 1313 | 1615 | 1817 | 2009 |
| NY | 36 | Broome County | 200536 | 580 | 583 | 697 | 910 | 1067 | 2009 |
| NY | 36 | Cattaraugus County | 83955 | 560 | 562 | 676 | 888 | 1019 | 2009 |
############################################################
# HUD FMR — FMR BY COUNTY & BEDROOM SIZE (2024)
############################################################
library(tidyverse)
library(scales)
library(patchwork)
# ── PREP ─────────────────────────────────────────────────
hud_avg <- hud_all |>
filter(stusps == "NY", year == 2024) |>
mutate(county_label = str_remove(countyname, " County")) |>
group_by(county_label) |>
summarise(across(fmr_0:fmr_4, ~ mean(.x, na.rm = TRUE)), .groups = "drop") |>
pivot_longer(fmr_0:fmr_4, names_to = "bedroom", values_to = "avg_fmr") |>
mutate(bedroom = recode(bedroom,
fmr_0 = "Studio", fmr_1 = "1-BR", fmr_2 = "2-BR",
fmr_3 = "3-BR", fmr_4 = "4-BR"
))
county_order <- hud_avg |>
filter(bedroom == "2-BR") |>
arrange(avg_fmr) |>
pull(county_label)
hud_avg <- hud_avg |>
mutate(
county_label = factor(county_label, levels = county_order),
bedroom = factor(bedroom, levels = c("Studio","1-BR","2-BR","3-BR","4-BR")),
panel = ifelse(
as.integer(county_label) > length(county_order) / 2,
"Higher-Rent Counties", "Lower-Rent Counties"
)
)
COLOR_VALS <- c(
"Studio" = "#332288", "1-BR" = "#44AA99", "2-BR" = "#117733",
"3-BR" = "#DDCC77", "4-BR" = "#CC6677"
)
# ── SHARED PLOT FUNCTION ──────────────────────────────────
dot_panel <- function(data, show_legend = FALSE) {
ggplot(data, aes(x = avg_fmr, y = county_label, color = bedroom)) +
geom_line(aes(group = county_label), color = "gray80", linewidth = 0.5) +
geom_point(size = 2.2, alpha = 0.9) +
scale_color_manual(values = COLOR_VALS, name = "Bedroom Size") +
scale_x_continuous(
labels = label_dollar(),
expand = expansion(mult = c(0.02, 0.05))
) +
scale_y_discrete(drop = TRUE) +
facet_wrap(~ panel, scales = "free_y") +
labs(x = "Monthly FMR (USD)", y = NULL) +
theme_minimal(base_size = 11) +
theme(
legend.position = if (show_legend) "top" else "none",
legend.title = element_text(face = "bold", size = 9),
strip.text = element_text(face = "bold", size = 11),
axis.text.y = element_text(size = 8.5),
axis.title.x = element_text(face = "bold"),
axis.ticks.y = element_blank(),
panel.grid.major.y = element_line(color = "gray93"),
panel.grid.major.x = element_line(color = "gray90"),
panel.grid.minor = element_blank()
)
}
# ── BUILD TWO PANELS ──────────────────────────────────────
p_high <- dot_panel(filter(hud_avg, panel == "Higher-Rent Counties"), show_legend = TRUE)
p_low <- dot_panel(filter(hud_avg, panel == "Lower-Rent Counties"), show_legend = FALSE)
# ── COMBINE ───────────────────────────────────────────────
p_high + p_low +
plot_annotation(
title = "HUD Fair Market Rents by NY County & Bedroom Size (2024)", # changed
subtitle = "Counties ordered by 2-BR FMR (ascending); connecting line shows spread across bedroom sizes",
caption = "Note: 2024 HUD Fair Market Rents represent the 40th percentile gross rent for standard units.\nCounties with wider horizontal spans have a larger bedroom-size premium, often reflecting tighter family-sized unit supply.", # changed
theme = theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10, color = "gray40"),
plot.caption = element_text(size = 9, color = "gray40", hjust = 0, face = "italic")
)
)ggsave("hud_fmr_dotplot.png", width = 16, height = 10, dpi = 150)############################################################
# FINAL MASTER DATASET (ACS5 VERSION)
# YEARS RESTRICTED TO 2009–2024
# CLEAN FEATURE SET (LOW MULTICOLLINEARITY)
############################################################
library(tidyverse)
library(lubridate)
library(zoo)
############################################################
# 1. CLEAN HUD DATA
############################################################
hud_clean <- hud_all %>%
filter(
stusps == "NY",
year >= 2009,
year <= 2024
) %>%
mutate(
county_name = countyname %>%
str_remove(", NY") %>%
str_remove(", New York") %>%
str_trim()
) %>%
select(
county_name,
year,
pop,
fmr_0,
fmr_1,
fmr_2,
fmr_3,
fmr_4
) %>%
distinct()
############################################################
# 2. CLEAN ACS5 DATA
############################################################
acs_clean <- acs_ny_5yr %>%
filter(
year >= 2009,
year <= 2024
) %>%
distinct()
############################################################
# 3. CLEAN FRED DATA
############################################################
fred_clean <- fred_acs_ready %>%
filter(
year >= 2009,
year <= 2024
) %>%
distinct()
############################################################
# 4. BUILD FULL COUNTY-YEAR GRID
############################################################
all_counties <- sort(unique(hud_clean$county_name))
all_years <- 2009:2024
full_grid <- expand_grid(
county_name = all_counties,
year = all_years
)
############################################################
# 5. JOIN ACS5
############################################################
master1 <- full_grid %>%
left_join(acs_clean, by = c("county_name", "year"))
############################################################
# 6. JOIN HUD
############################################################
master2 <- master1 %>%
left_join(hud_clean, by = c("county_name", "year"))
############################################################
# 7. JOIN FRED
############################################################
final_master <- master2 %>%
left_join(fred_clean, by = "year")
############################################################
# 8. INFLATION RATE (FIXED)
############################################################
inflation_tbl <- fred_clean %>%
arrange(year) %>%
mutate(
inflation_rate = (cpi / lag(cpi) - 1) * 100
) %>%
select(year, inflation_rate)
final_master <- final_master %>%
select(-any_of("inflation_rate")) %>%
left_join(inflation_tbl, by = "year")
############################################################
# 9. MINIMAL & NON-REDUNDANT DERIVED FEATURES
############################################################
final_master <- final_master %>%
mutate(
# 1. Housing burden (market-based)
housing_burden = (median_rent * 12) / median_income,
# 2. HUD affordability burden (policy benchmark)
hud_burden = (fmr_2 * 12) / median_income,
# 3. Real income (CPI-adjusted purchasing power)
real_income_index = (median_income / cpi) * 100,
# 4. Housing stress from macro environment
macro_housing_pressure = housing_burden * mortgage_rate
)
############################################################
# 10. FINAL SORT
############################################################
final_master <- final_master %>%
arrange(county_name, year)
############################################################
# 11. EXPORT
############################################################
write_csv(
final_master,
"ny_housing_final_master_dataset_acs5_2009_2024.csv"
)
final_master# A tibble: 992 × 21
county_name year median_income median_home median_rent state_fips
<chr> <dbl> <dbl> <dbl> <dbl> <chr>
1 Albany County 2009 55350 192500 830 36
2 Albany County 2010 56090 202500 855 36
3 Albany County 2011 57715 207300 880 36
4 Albany County 2012 59359 210200 890 36
5 Albany County 2013 59394 209300 904 36
6 Albany County 2014 59940 208600 918 36
7 Albany County 2015 59887 208400 919 36
8 Albany County 2016 60904 211100 931 36
9 Albany County 2017 62293 214400 969 36
10 Albany County 2018 64535 218100 993 36
# ℹ 982 more rows
# ℹ 15 more variables: county_fips <chr>, pop <dbl>, fmr_0 <dbl>, fmr_1 <dbl>,
# fmr_2 <dbl>, fmr_3 <dbl>, fmr_4 <dbl>, mortgage_rate <dbl>, fed_rate <dbl>,
# cpi <dbl>, inflation_rate <dbl>, housing_burden <dbl>, hud_burden <dbl>,
# real_income_index <dbl>, macro_housing_pressure <dbl>
############################################################
# SHINY APP — NY HOUSING AFFORDABILITY
############################################################
library(shiny)
library(leaflet)
library(tigris)
library(sf)
library(tidyverse)
options(tigris_use_cache = TRUE)
ny_sf <- counties(state = "NY", cb = TRUE, year = 2020, progress_bar = FALSE) |>
st_transform(crs = 4326) |>
mutate(county_name = paste0(NAME, " County"))
metric_choices <- c(
"Median Household Income" = "median_income",
"Median Home Value" = "median_home",
"Median Monthly Rent" = "median_rent",
"FMR — Studio (0 BR)" = "fmr_0",
"FMR — 1 Bedroom" = "fmr_1",
"FMR — 2 Bedroom" = "fmr_2",
"FMR — 3 Bedroom" = "fmr_3",
"FMR — 4 Bedroom" = "fmr_4",
"Housing Burden (Rent/Income)" = "housing_burden",
"HUD Burden (FMR-2/Income)" = "hud_burden",
"Real Income Index" = "real_income_index"
)
dollar_metrics <- c("median_income","median_home","median_rent",
"fmr_0","fmr_1","fmr_2","fmr_3","fmr_4")
ratio_metrics <- c("housing_burden","hud_burden")
fmt_display <- function(m, val) {
if (is.na(val) || !is.finite(val)) return("N/A")
if (m %in% dollar_metrics)
paste0("$", formatC(val, format = "f", digits = 0, big.mark = ","))
else if (m %in% ratio_metrics)
paste0(round(val * 100, 1), "%")
else
as.character(round(val, 2))
}
# ── UI ─────────────────────────────────────────────────────
ui <- fluidPage(
tags$head(tags$style(HTML("
body, .container-fluid { margin: 0; padding: 0; }
#controls {
display: flex;
align-items: flex-end;
gap: 32px; /* more breathing room between items */
padding: 14px 24px; /* more vertical + horizontal padding */
background: #f8f9fa;
border-bottom: 2px solid #dee2e6;
flex-wrap: wrap;
}
#controls h4 {
margin: 0 16px 6px 0; /* push title away from dropdowns */
font-size: 1.05em;
font-weight: 700;
align-self: center;
white-space: nowrap;
color: #1e2d40;
}
.ctrl-group {
display: flex;
flex-direction: column;
gap: 3px; /* small gap between label and input */
}
.ctrl-group label {
font-size: 0.78em;
font-weight: 600;
color: #555;
margin: 0;
}
.form-group { margin-bottom: 0; }
.irs--shiny .irs-single { font-size: 11px; }
"))),
div(id = "controls",
tags$h4("NY Housing Affordability Explorer"),
div(class = "ctrl-group",
tags$label("Metric"),
selectInput("metric", label = NULL,
choices = metric_choices,
selected = "housing_burden",
width = "230px")
),
div(class = "ctrl-group",
tags$label("Year"),
sliderInput("year", label = NULL,
min = 2009, max = 2024, value = 2024,
step = 1, sep = "", width = "320px",
animate = animationOptions(interval = 1000, loop = FALSE))
)
),
leafletOutput("map", width = "100%", height = "calc(100vh - 85px)")
)
# ── SERVER ─────────────────────────────────────────────────
server <- function(input, output, session) {
year_data <- reactive({
final_master |>
filter(year == input$year) |>
transmute(
county_name,
pop = as.numeric(pop),
median_income = as.numeric(median_income),
median_rent = as.numeric(median_rent),
value = as.numeric(.data[[input$metric]])
)
})
map_df <- reactive({
ny_sf |> left_join(year_data(), by = "county_name")
})
output$map <- renderLeaflet({
leaflet() |>
addProviderTiles(providers$CartoDB.Positron) |>
setView(lng = -75.5, lat = 42.9, zoom = 7)
})
observe({
req(input$metric, input$year)
df <- map_df()
m <- input$metric
mlabel <- names(metric_choices)[metric_choices == m]
finite_vals <- df$value[is.finite(df$value)]
req(length(finite_vals) > 0)
pal <- colorNumeric("YlOrRd", domain = finite_vals, na.color = "#cccccc")
if (m %in% ratio_metrics) {
leg_vals <- finite_vals * 100
leg_pal <- colorNumeric("YlOrRd", domain = leg_vals, na.color = "#cccccc")
leg_title <- paste0(mlabel, " (%)<br/><small>", input$year, "</small>")
} else {
leg_vals <- finite_vals
leg_pal <- pal
leg_title <- paste0(mlabel, "<br/><small>", input$year, "</small>")
}
# ── TOOLTIPS ─────────────────────────────────────────
labels <- lapply(seq_len(nrow(df)), function(i) {
cname <- df$county_name[i]
pop <- df$pop[i]
inc <- df$median_income[i]
rent <- df$median_rent[i]
val <- df$value[i]
pop_fmt <- if (is.na(pop) || !is.finite(pop)) "N/A" else
formatC(pop, format = "f", digits = 0, big.mark = ",")
inc_fmt <- fmt_display("median_income", inc)
rent_fmt <- fmt_display("median_rent", rent)
val_fmt <- fmt_display(m, val)
htmltools::HTML(paste0(
"<div style='font-family:Arial,sans-serif; font-size:12px;",
"min-width:180px; line-height:1.5;'>",
"<div style='font-size:13px; font-weight:700; color:#1e2d40;",
"border-bottom:1px solid #ddd; padding-bottom:4px;",
"margin-bottom:6px;'>",
cname,
"</div>",
"<div style='color:#555;'>",
"<b>Population:</b> ", pop_fmt, "<br/>",
"<b>Median Income:</b> ", inc_fmt, "<br/>",
"<b>Median Rent:</b> ", rent_fmt, "<br/>",
"</div>",
"<div style='margin-top:6px; padding-top:5px;",
"border-top:1px solid #ddd; color:#1e2d40;'>",
"<b>", mlabel, ":</b> ",
"<span style='color:#c0392b; font-weight:700;'>", val_fmt, "</span>",
"</div>",
"</div>"
))
})
leafletProxy("map", session) |>
clearShapes() |>
clearControls() |>
addPolygons(
data = df,
fillColor = ~pal(value),
fillOpacity = 0.72,
color = "white",
weight = 1,
highlight = highlightOptions(
weight = 2.5,
color = "#1e2d40",
fillOpacity = 0.9,
bringToFront = TRUE
),
label = labels,
labelOptions = labelOptions(
style = list(
"background-color" = "white",
"border" = "1px solid #ccc",
"border-radius" = "6px",
"padding" = "8px",
"box-shadow" = "2px 2px 6px rgba(0,0,0,0.15)"
),
textsize = "12px",
direction = "auto",
opacity = 1
),
layerId = ~county_name
) |>
addLegend(
pal = leg_pal,
values = leg_vals,
title = leg_title,
position = "bottomright",
opacity = 0.85
)
})
}
shinyApp(ui, server)