Final Project

Author

Khandker Qaiduzzaman

Project Proposal

This project aims to analyze the housing affordability crisis in New York State by integrating housing market data, demographic statistics, and macroeconomic indicators to understand the relationship between income levels, housing costs, and broader economic conditions. The motivation for performing this analysis is to better understand the widening gap between household incomes and housing prices/rents in New York, and to use data-driven methods to identify how affordability varies across regions and over time. This topic is important because housing costs directly impact financial stability, migration patterns, and quality of life, and combining multiple data sources provides a more complete picture than any single dataset alone. Data will be sourced from Zillow housing data (for rent and home price estimates), the U.S. Census American Community Survey 5-Year Estimates accessed via the Census API (for income, rent burden, and housing value indicators at county and state levels), and the Federal Reserve Economic Data (FRED) API (for macroeconomic variables such as interest rates, inflation, and mortgage rates). The analysis will follow a structured data science workflow involving data acquisition through APIs, data cleaning and transformation to align geographic and temporal scales, exploratory data analysis, and statistical modeling to examine affordability ratios and trends. Visualizations such as income-to-housing-cost ratios, geographic affordability comparisons, and macroeconomic trend overlays will be used to support findings, demonstrating how multi-source data integration can provide insight into housing affordability pressures in New York State.

Running Code

#install.packages("fredr")
library(tidyverse)
library(httr)
library(jsonlite)
library(tidycensus)
library(fredr)
library(lubridate)
library(zoo)
library(gt)
############################################################
# ACS 5-YEAR - MULTI-YEAR PULL (NY COUNTIES)
# CLEAN COUNTY NAMES FOR JOINING
############################################################

library(tidyverse)
library(jsonlite)

years <- 2009:2024

acs_5yr_all <- list()

for (yr in years) {
  
  url <- paste0(
    "https://api.census.gov/data/", yr, "/acs/acs5?",
    "get=NAME,B19013_001E,B25077_001E,B25064_001E&",
    "for=county:*&in=state:36"
  )
  
  try({
    
    raw <- fromJSON(url)
    
    df <- as.data.frame(raw[-1, ], stringsAsFactors = FALSE)
    colnames(df) <- raw[1, ]
    
    df <- df %>%
      mutate(year = yr) %>%
      rename(
        county_name   = NAME,
        median_income = B19013_001E,
        median_home   = B25077_001E,
        median_rent   = B25064_001E,
        state_fips    = state,
        county_fips   = county
      ) %>%
      
      # CLEAN COUNTY NAME
      mutate(
        county_name = county_name %>%
          str_remove(", New York") %>%
          str_trim()
      ) %>%
      
      # NUMERIC CONVERSION
      mutate(
        median_income = as.numeric(median_income),
        median_home   = as.numeric(median_home),
        median_rent   = as.numeric(median_rent)
      )
    
    acs_5yr_all[[as.character(yr)]] <- df
    
  }, silent = TRUE)
}

############################################################
# COMBINE ALL YEARS
############################################################

acs_ny_5yr <- bind_rows(acs_5yr_all)

############################################################
# EXPORT CSV
############################################################

write_csv(acs_ny_5yr, "acs_ny_5year_2005_2024.csv")

############################################################
# CHECK
############################################################

acs_ny_5yr |> head(5) |> gt()
county_name median_income median_home median_rent state_fips county_fips year
Albany County 55350 192500 830 36 001 2009
Allegany County 40917 65100 566 36 003 2009
Bronx County 33794 369600 885 36 005 2009
Broome County 43467 95600 598 36 007 2009
Cattaraugus County 41482 76000 576 36 009 2009
############################################################
# ACS 5-YEAR EDA — STATEWIDE TRENDS + AFFORDABILITY RATIO
############################################################
library(tidyverse)
library(scales)
library(patchwork)

# ── PREP ─────────────────────────────────────────────────
acs_clean <- acs_ny_5yr |>
  filter(median_income > 0, median_home > 0, median_rent > 0)

state_trends <- acs_clean |>
  group_by(year) |>
  summarise(
    `Median Income`       = mean(median_income, na.rm = TRUE),
    `Median Home Value`   = mean(median_home,   na.rm = TRUE),
    `Median Monthly Rent` = mean(median_rent,   na.rm = TRUE),
    .groups = "drop"
  ) |>
  pivot_longer(-year, names_to = "Metric", values_to = "Value")

# ── PLOT 1: TRENDS ───────────────────────────────────────
p1 <- ggplot(state_trends, aes(x = year, y = Value, color = Metric)) +
  geom_line(linewidth = 1.3) +
  geom_point(size = 2) +
  facet_wrap(~Metric, scales = "free_y", ncol = 1) +
  scale_color_manual(values = c(
    "Median Income"       = "#1B9E77",
    "Median Home Value"   = "#D95F02",
    "Median Monthly Rent" = "#7570B3"
  )) +
  scale_x_continuous(breaks = seq(2009, 2024, 2)) +
  scale_y_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  labs(
    title    = "NY Housing and Income Trends",
    subtitle = "State averages across all NY counties",
    x = "Year", y = "USD"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position  = "none",
    strip.text       = element_text(face = "bold", size = 11),
    plot.title       = element_text(face = "bold", size = 14),
    plot.subtitle    = element_text(size = 11, color = "gray40"),
    axis.title       = element_text(face = "bold"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

# ── PLOT 2: PRICE-TO-INCOME RATIO ────────────────────────
pti_summary <- acs_clean |>
  mutate(price_to_income = median_home / median_income) |>
  group_by(year) |>
  summarise(
    pti_mean = mean(price_to_income, na.rm = TRUE),
    pti_lo   = quantile(price_to_income, 0.25, na.rm = TRUE),
    pti_hi   = quantile(price_to_income, 0.75, na.rm = TRUE),
    .groups  = "drop"
  )

p2 <- ggplot(pti_summary, aes(x = year, y = pti_mean)) +
  geom_ribbon(aes(ymin = pti_lo, ymax = pti_hi), fill = "#D95F02", alpha = 0.15) +
  geom_line(color = "#D95F02", linewidth = 1.3) +
  geom_point(color = "#D95F02", size = 2) +
  geom_hline(yintercept = 3, linetype = "dashed", color = "gray50") +
  annotate("text", x = 2009, y = 3.15, label = "3× affordability threshold",
           hjust = 0, size = 3.5, color = "gray40") +
  scale_x_continuous(breaks = seq(2009, 2024, 2)) +
  scale_y_continuous(labels = label_number(suffix = "×", accuracy = 0.1)) +
  labs(
    title    = "Home Price-to-Income Ratio (2009–2024)",
    subtitle = "Mean across NY counties; shaded band = interquartile range",
    x = "Year", y = "Home Value ÷ Median Income"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title       = element_text(face = "bold", size = 14),
    plot.subtitle    = element_text(size = 11, color = "gray40"),
    axis.title       = element_text(face = "bold"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  )

# ── COMBINE ──────────────────────────────────────────────
p1 + p2 +
  plot_layout(widths = c(1, 1)) +
  plot_annotation(
    caption = "Note: ACS 5-Year data averages survey responses over 5 years, producing smoother trends that reduce year-to-year volatility but may lag real-time market changes.",
    theme = theme(
      plot.caption = element_text(size = 10, color = "gray40", hjust = 0, face = "italic")
    )
  )

############################################################
# ROBUST FRED PULL (FIXES RANDOM 500 SERVER ERROR)
############################################################

library(tidyverse)
library(httr)
library(jsonlite)
library(lubridate)
library(zoo)

############################################################
# SAFE FUNCTION WITH RETRY LOGIC
############################################################

get_fred_data <- function(series_id, retries = 5, wait_sec = 10) {
  
  api_key <- Sys.getenv("FRED_API_KEY")
  
  url <- paste0(
    "https://api.stlouisfed.org/fred/series/observations?",
    "series_id=", series_id,
    "&api_key=", api_key,
    "&file_type=json"
  )
  
  for(i in 1:retries){
    
    cat("Trying", series_id, "- Attempt", i, "\n")
    
    result <- try(GET(url), silent = TRUE)
    
    if(!inherits(result, "try-error") && status_code(result) == 200){
      
      txt <- content(result, "text", encoding = "UTF-8")
      data_raw <- fromJSON(txt)
      
      df <- data_raw$observations %>%
        select(date, value) %>%
        mutate(
          date = as.Date(date),
          value = as.numeric(value)
        )
      
      return(df)
    }
    
    Sys.sleep(wait_sec)
  }
  
  stop(paste("Failed after retries:", series_id))
}

############################################################
# PULL DATA
############################################################

mortgage30 <- get_fred_data("MORTGAGE30US")
Trying MORTGAGE30US - Attempt 1 
Sys.sleep(5)

fed_funds <- get_fred_data("FEDFUNDS")
Trying FEDFUNDS - Attempt 1 
Sys.sleep(5)

cpi <- get_fred_data("CPIAUCSL")
Trying CPIAUCSL - Attempt 1 
############################################################
# MONTHLY CLEAN DATASET
############################################################

mortgage_m <- mortgage30 %>%
  mutate(month = floor_date(date, "month")) %>%
  group_by(month) %>%
  summarise(mortgage_rate = mean(value, na.rm = TRUE), .groups = "drop")

fed_m <- fed_funds %>%
  mutate(month = floor_date(date, "month")) %>%
  group_by(month) %>%
  summarise(fed_rate = mean(value, na.rm = TRUE), .groups = "drop")

cpi_m <- cpi %>%
  mutate(month = floor_date(date, "month")) %>%
  group_by(month) %>%
  summarise(cpi = mean(value, na.rm = TRUE), .groups = "drop")

fred_full_monthly <- mortgage_m %>%
  inner_join(fed_m, by = "month") %>%
  inner_join(cpi_m, by = "month") %>%
  arrange(month) %>%
  mutate(
    mortgage_rate = na.approx(mortgage_rate, na.rm = FALSE),
    fed_rate      = na.approx(fed_rate, na.rm = FALSE),
    cpi           = na.approx(cpi, na.rm = FALSE)
  )

############################################################
# EXPORT
############################################################

write_csv(fred_full_monthly, "fred_full_monthly_1971_present.csv")

fred_full_monthly |> head(5) |> gt()
month mortgage_rate fed_rate cpi
1971-04-01 7.3100 4.16 40.1
1971-05-01 7.4250 4.63 40.3
1971-06-01 7.5300 4.91 40.5
1971-07-01 7.6040 5.31 40.6
1971-08-01 7.6975 5.57 40.7
############################################################
# FRED EDA — INTEREST RATES + CPI (YoY INFLATION)
############################################################
library(tidyverse)
library(scales)
library(patchwork)

# Okabe-Ito colorblind-friendly palette
COL_MORTGAGE <- "#0072B2"   # blue
COL_FED      <- "#D55E00"   # vermillion
COL_CPI_POS  <- "#D55E00"   # vermillion (inflation up)
COL_CPI_NEG  <- "#0072B2"   # blue (deflation)

# ── PLOT 1: MORTGAGE RATE vs FED FUNDS RATE ──────────────
p1 <- fred_full_monthly |>
  select(month,
         `30-Yr Mortgage Rate` = mortgage_rate,
         `Fed Funds Rate`      = fed_rate) |>
  pivot_longer(-month, names_to = "Series", values_to = "Rate") |>
  ggplot(aes(x = month, y = Rate, color = Series)) +
  geom_line(linewidth = 0.9) +
  scale_color_manual(values = c(
    "30-Yr Mortgage Rate" = COL_MORTGAGE,
    "Fed Funds Rate"      = COL_FED
  )) +
  scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
  scale_y_continuous(labels = label_percent(scale = 1, accuracy = 1)) +
  labs(
    title    = "Interest Rates Over Time",
    subtitle = "30-year fixed mortgage rate closely tracks Fed policy cycles",
    x = NULL, y = "Rate (%)", color = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position  = "bottom",
    plot.title       = element_text(face = "bold", size = 14),
    plot.subtitle    = element_text(size = 11, color = "gray40"),
    axis.title       = element_text(face = "bold"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  )

# ── PLOT 2: CPI YEAR-OVER-YEAR % CHANGE ──────────────────
cpi_yoy <- fred_full_monthly |>
  arrange(month) |>
  mutate(
    cpi_yoy = (cpi / lag(cpi, 12) - 1) * 100,
    positive = cpi_yoy >= 0
  ) |>
  filter(!is.na(cpi_yoy))

p2 <- ggplot(cpi_yoy, aes(x = month, y = cpi_yoy)) +
  geom_hline(yintercept = 0, color = "gray60", linewidth = 0.5) +
  geom_hline(yintercept = 2, linetype = "dashed", color = "gray50", linewidth = 0.5) +
  geom_col(aes(fill = positive), width = 20, show.legend = FALSE) +
  annotate("text", x = min(cpi_yoy$month), y = 2.4,
           label = "2% Fed target", hjust = 0, size = 3.2, color = "gray40") +
  scale_fill_manual(values = c("TRUE" = COL_CPI_POS, "FALSE" = COL_CPI_NEG)) +
  scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
  scale_y_continuous(labels = label_percent(scale = 1, accuracy = 1)) +
  labs(
    title    = "Annual Inflation Rate (CPI Year-over-Year)",
    subtitle = "Monthly % change vs. same month prior year; 2022 spike reflects post-COVID surge",
    x = NULL, y = "YoY Change (%)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title       = element_text(face = "bold", size = 14),
    plot.subtitle    = element_text(size = 11, color = "gray40"),
    axis.title       = element_text(face = "bold"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank()
  )

# ── COMBINE ───────────────────────────────────────────────
p1 / p2 +
  plot_annotation(
    caption = "Note: Monthly FRED data. Mortgage rate averaged to monthly from weekly Freddie Mac survey. Fed Funds Rate is the effective rate. CPI inflation = YoY % change in CPIAUCSL (seasonally adjusted).",
    theme = theme(
      plot.caption = element_text(size = 10, color = "gray40", hjust = 0, face = "italic")
    )
  )

############################################################
# DATASET 2: ACS-ALIGNED YEARLY FRED (CLEAN VERSION)
############################################################

library(dplyr)
library(lubridate)

start_date <- as.Date("2009-01-01")

fred_acs_ready <- fred_full_monthly %>%
  
  filter(month >= start_date) %>%
  mutate(year = year(month)) %>%
  
  group_by(year) %>%
  summarise(
    
    # macroeconomic level indicators (core variables)
    mortgage_rate = mean(mortgage_rate, na.rm = TRUE),
    fed_rate      = mean(fed_rate, na.rm = TRUE),
    cpi           = mean(cpi, na.rm = TRUE),
    
    .groups = "drop"
  ) %>%
  
  arrange(year)

# View
head(fred_acs_ready, 5)
# A tibble: 5 × 4
   year mortgage_rate fed_rate   cpi
  <dbl>         <dbl>    <dbl> <dbl>
1  2009          5.04    0.16   215.
2  2010          4.69    0.175  218.
3  2011          4.46    0.102  225.
4  2012          3.66    0.14   230.
5  2013          3.98    0.108  233.
# Save
write_csv(fred_acs_ready, "fred_acs_ready_2009_present.csv")

fred_acs_ready |> head(5) |> gt()
year mortgage_rate fed_rate cpi
2009 5.041375 0.1600000 214.5647
2010 4.690583 0.1750000 218.0762
2011 4.455833 0.1016667 224.9230
2012 3.655917 0.1400000 229.5861
2013 3.981917 0.1075000 232.9518
library(tidyverse)

############################################################
# PULL HUD FMR DATA FROM GITHUB (2009–2024)
############################################################

years <- sprintf("%02d", 9:24)   

base_url <- "https://raw.githubusercontent.com/NafeesKhandker/DATA-607-Final-Project/main/HUD%20dataset/"

############################################################
# FUNCTION TO READ EACH FILE
############################################################

get_hud_year <- function(yy){

  file_url <- paste0(base_url, "FY", yy, "_FMRs.csv")
  
  message("Reading: ", file_url)

  df <- read_csv(file_url, show_col_types = FALSE)

  df %>%
    mutate(
      year = as.integer(paste0("20", yy))
    )
}

############################################################
# COMBINE ALL YEARS
############################################################

hud_all <- map_dfr(years, get_hud_year)

############################################################
# CLEAN COLUMN NAMES
############################################################

names(hud_all) <- names(hud_all) %>%
  str_to_lower() %>%
  str_replace_all(" ", "_")

############################################################
# VIEW DATA
############################################################

hud_all |> head(n = 5) |> gt()
stusps state countyname pop fmr_0 fmr_1 fmr_2 fmr_3 fmr_4 year
NY 36 Albany County 294565 686 711 868 1039 1135 2009
NY 36 Allegany County 49927 553 555 665 829 1018 2009
NY 36 Bronx County 1332650 1091 1180 1313 1615 1817 2009
NY 36 Broome County 200536 580 583 697 910 1067 2009
NY 36 Cattaraugus County 83955 560 562 676 888 1019 2009
############################################################
# HUD FMR — FMR BY COUNTY & BEDROOM SIZE (2024)
############################################################
library(tidyverse)
library(scales)
library(patchwork)

# ── PREP ─────────────────────────────────────────────────
hud_avg <- hud_all |>
  filter(stusps == "NY", year == 2024) |>                   
  mutate(county_label = str_remove(countyname, " County")) |>
  group_by(county_label) |>
  summarise(across(fmr_0:fmr_4, ~ mean(.x, na.rm = TRUE)), .groups = "drop") |>
  pivot_longer(fmr_0:fmr_4, names_to = "bedroom", values_to = "avg_fmr") |>
  mutate(bedroom = recode(bedroom,
    fmr_0 = "Studio", fmr_1 = "1-BR", fmr_2 = "2-BR",
    fmr_3 = "3-BR",   fmr_4 = "4-BR"
  ))

county_order <- hud_avg |>
  filter(bedroom == "2-BR") |>
  arrange(avg_fmr) |>
  pull(county_label)

hud_avg <- hud_avg |>
  mutate(
    county_label = factor(county_label, levels = county_order),
    bedroom      = factor(bedroom, levels = c("Studio","1-BR","2-BR","3-BR","4-BR")),
    panel        = ifelse(
      as.integer(county_label) > length(county_order) / 2,
      "Higher-Rent Counties", "Lower-Rent Counties"
    )
  )

COLOR_VALS <- c(
  "Studio" = "#332288", "1-BR" = "#44AA99", "2-BR" = "#117733",
  "3-BR"   = "#DDCC77", "4-BR" = "#CC6677"
)

# ── SHARED PLOT FUNCTION ──────────────────────────────────
dot_panel <- function(data, show_legend = FALSE) {
  ggplot(data, aes(x = avg_fmr, y = county_label, color = bedroom)) +
    geom_line(aes(group = county_label), color = "gray80", linewidth = 0.5) +
    geom_point(size = 2.2, alpha = 0.9) +
    scale_color_manual(values = COLOR_VALS, name = "Bedroom Size") +
    scale_x_continuous(
      labels = label_dollar(),
      expand = expansion(mult = c(0.02, 0.05))
    ) +
    scale_y_discrete(drop = TRUE) +
    facet_wrap(~ panel, scales = "free_y") +
    labs(x = "Monthly FMR (USD)", y = NULL) +
    theme_minimal(base_size = 11) +
    theme(
      legend.position    = if (show_legend) "top" else "none",
      legend.title       = element_text(face = "bold", size = 9),
      strip.text         = element_text(face = "bold", size = 11),
      axis.text.y        = element_text(size = 8.5),
      axis.title.x       = element_text(face = "bold"),
      axis.ticks.y       = element_blank(),
      panel.grid.major.y = element_line(color = "gray93"),
      panel.grid.major.x = element_line(color = "gray90"),
      panel.grid.minor   = element_blank()
    )
}

# ── BUILD TWO PANELS ──────────────────────────────────────
p_high <- dot_panel(filter(hud_avg, panel == "Higher-Rent Counties"), show_legend = TRUE)
p_low  <- dot_panel(filter(hud_avg, panel == "Lower-Rent Counties"),  show_legend = FALSE)

# ── COMBINE ───────────────────────────────────────────────
p_high + p_low +
  plot_annotation(
    title    = "HUD Fair Market Rents by NY County & Bedroom Size (2024)",          # changed
    subtitle = "Counties ordered by 2-BR FMR (ascending); connecting line shows spread across bedroom sizes",
    caption  = "Note: 2024 HUD Fair Market Rents represent the 40th percentile gross rent for standard units.\nCounties with wider horizontal spans have a larger bedroom-size premium, often reflecting tighter family-sized unit supply.",  # changed
    theme = theme(
      plot.title    = element_text(face = "bold", size = 14),
      plot.subtitle = element_text(size = 10, color = "gray40"),
      plot.caption  = element_text(size = 9, color = "gray40", hjust = 0, face = "italic")
    )
  )

ggsave("hud_fmr_dotplot.png", width = 16, height = 10, dpi = 150)
############################################################
# FINAL MASTER DATASET (ACS5 VERSION)
# YEARS RESTRICTED TO 2009–2024
# CLEAN FEATURE SET (LOW MULTICOLLINEARITY)
############################################################

library(tidyverse)
library(lubridate)
library(zoo)

############################################################
# 1. CLEAN HUD DATA
############################################################

hud_clean <- hud_all %>%
  filter(
    stusps == "NY",
    year >= 2009,
    year <= 2024
  ) %>%
  mutate(
    county_name = countyname %>%
      str_remove(", NY") %>%
      str_remove(", New York") %>%
      str_trim()
  ) %>%
  select(
    county_name,
    year,
    pop,
    fmr_0,
    fmr_1,
    fmr_2,
    fmr_3,
    fmr_4
  ) %>%
  distinct()

############################################################
# 2. CLEAN ACS5 DATA
############################################################

acs_clean <- acs_ny_5yr %>%
  filter(
    year >= 2009,
    year <= 2024
  ) %>%
  distinct()

############################################################
# 3. CLEAN FRED DATA
############################################################

fred_clean <- fred_acs_ready %>%
  filter(
    year >= 2009,
    year <= 2024
  ) %>%
  distinct()

############################################################
# 4. BUILD FULL COUNTY-YEAR GRID
############################################################

all_counties <- sort(unique(hud_clean$county_name))
all_years    <- 2009:2024

full_grid <- expand_grid(
  county_name = all_counties,
  year = all_years
)

############################################################
# 5. JOIN ACS5
############################################################

master1 <- full_grid %>%
  left_join(acs_clean, by = c("county_name", "year"))

############################################################
# 6. JOIN HUD
############################################################

master2 <- master1 %>%
  left_join(hud_clean, by = c("county_name", "year"))

############################################################
# 7. JOIN FRED
############################################################

final_master <- master2 %>%
  left_join(fred_clean, by = "year")

############################################################
# 8. INFLATION RATE (FIXED)
############################################################

inflation_tbl <- fred_clean %>%
  arrange(year) %>%
  mutate(
    inflation_rate = (cpi / lag(cpi) - 1) * 100
  ) %>%
  select(year, inflation_rate)

final_master <- final_master %>%
  select(-any_of("inflation_rate")) %>%
  left_join(inflation_tbl, by = "year")

############################################################
# 9. MINIMAL & NON-REDUNDANT DERIVED FEATURES
############################################################

final_master <- final_master %>%
  mutate(
    
    # 1. Housing burden (market-based)
    housing_burden = (median_rent * 12) / median_income,
    
    # 2. HUD affordability burden (policy benchmark)
    hud_burden = (fmr_2 * 12) / median_income,
    
    # 3. Real income (CPI-adjusted purchasing power)
    real_income_index = (median_income / cpi) * 100,
    
    # 4. Housing stress from macro environment
    macro_housing_pressure = housing_burden * mortgage_rate
  )

############################################################
# 10. FINAL SORT
############################################################

final_master <- final_master %>%
  arrange(county_name, year)

############################################################
# 11. EXPORT
############################################################

write_csv(
  final_master,
  "ny_housing_final_master_dataset_acs5_2009_2024.csv"
)

final_master
# A tibble: 992 × 21
   county_name    year median_income median_home median_rent state_fips
   <chr>         <dbl>         <dbl>       <dbl>       <dbl> <chr>     
 1 Albany County  2009         55350      192500         830 36        
 2 Albany County  2010         56090      202500         855 36        
 3 Albany County  2011         57715      207300         880 36        
 4 Albany County  2012         59359      210200         890 36        
 5 Albany County  2013         59394      209300         904 36        
 6 Albany County  2014         59940      208600         918 36        
 7 Albany County  2015         59887      208400         919 36        
 8 Albany County  2016         60904      211100         931 36        
 9 Albany County  2017         62293      214400         969 36        
10 Albany County  2018         64535      218100         993 36        
# ℹ 982 more rows
# ℹ 15 more variables: county_fips <chr>, pop <dbl>, fmr_0 <dbl>, fmr_1 <dbl>,
#   fmr_2 <dbl>, fmr_3 <dbl>, fmr_4 <dbl>, mortgage_rate <dbl>, fed_rate <dbl>,
#   cpi <dbl>, inflation_rate <dbl>, housing_burden <dbl>, hud_burden <dbl>,
#   real_income_index <dbl>, macro_housing_pressure <dbl>
############################################################
# SHINY APP — NY HOUSING AFFORDABILITY
############################################################
library(shiny)
library(leaflet)
library(tigris)
library(sf)
library(tidyverse)

options(tigris_use_cache = TRUE)

ny_sf <- counties(state = "NY", cb = TRUE, year = 2020, progress_bar = FALSE) |>
  st_transform(crs = 4326) |>
  mutate(county_name = paste0(NAME, " County"))

metric_choices <- c(
  "Median Household Income"      = "median_income",
  "Median Home Value"            = "median_home",
  "Median Monthly Rent"          = "median_rent",
  "FMR — Studio (0 BR)"          = "fmr_0",
  "FMR — 1 Bedroom"              = "fmr_1",
  "FMR — 2 Bedroom"              = "fmr_2",
  "FMR — 3 Bedroom"              = "fmr_3",
  "FMR — 4 Bedroom"              = "fmr_4",
  "Housing Burden (Rent/Income)" = "housing_burden",
  "HUD Burden (FMR-2/Income)"    = "hud_burden",
  "Real Income Index"            = "real_income_index"
)

dollar_metrics <- c("median_income","median_home","median_rent",
                    "fmr_0","fmr_1","fmr_2","fmr_3","fmr_4")
ratio_metrics  <- c("housing_burden","hud_burden")

fmt_display <- function(m, val) {
  if (is.na(val) || !is.finite(val)) return("N/A")
  if (m %in% dollar_metrics)
    paste0("$", formatC(val, format = "f", digits = 0, big.mark = ","))
  else if (m %in% ratio_metrics)
    paste0(round(val * 100, 1), "%")
  else
    as.character(round(val, 2))
}

# ── UI ─────────────────────────────────────────────────────
ui <- fluidPage(
  tags$head(tags$style(HTML("
    body, .container-fluid { margin: 0; padding: 0; }

    #controls {
      display: flex;
      align-items: flex-end;
      gap: 32px;                          /* more breathing room between items */
      padding: 14px 24px;                 /* more vertical + horizontal padding */
      background: #f8f9fa;
      border-bottom: 2px solid #dee2e6;
      flex-wrap: wrap;
    }

    #controls h4 {
      margin: 0 16px 6px 0;              /* push title away from dropdowns */
      font-size: 1.05em;
      font-weight: 700;
      align-self: center;
      white-space: nowrap;
      color: #1e2d40;
    }

    .ctrl-group {
      display: flex;
      flex-direction: column;
      gap: 3px;                           /* small gap between label and input */
    }

    .ctrl-group label {
      font-size: 0.78em;
      font-weight: 600;
      color: #555;
      margin: 0;
    }

    .form-group { margin-bottom: 0; }
    .irs--shiny .irs-single { font-size: 11px; }
  "))),

  div(id = "controls",
    tags$h4("NY Housing Affordability Explorer"),
    div(class = "ctrl-group",
      tags$label("Metric"),
      selectInput("metric", label = NULL,
                  choices  = metric_choices,
                  selected = "housing_burden",
                  width    = "230px")
    ),
    div(class = "ctrl-group",
      tags$label("Year"),
      sliderInput("year", label = NULL,
                  min = 2009, max = 2024, value = 2024,
                  step = 1, sep = "", width = "320px",
                  animate = animationOptions(interval = 1000, loop = FALSE))
    )
  ),

  leafletOutput("map", width = "100%", height = "calc(100vh - 85px)")
)

# ── SERVER ─────────────────────────────────────────────────
server <- function(input, output, session) {

  year_data <- reactive({
    final_master |>
      filter(year == input$year) |>
      transmute(
        county_name,
        pop           = as.numeric(pop),
        median_income = as.numeric(median_income),
        median_rent   = as.numeric(median_rent),
        value         = as.numeric(.data[[input$metric]])
      )
  })

  map_df <- reactive({
    ny_sf |> left_join(year_data(), by = "county_name")
  })

  output$map <- renderLeaflet({
    leaflet() |>
      addProviderTiles(providers$CartoDB.Positron) |>
      setView(lng = -75.5, lat = 42.9, zoom = 7)
  })

  observe({
    req(input$metric, input$year)

    df     <- map_df()
    m      <- input$metric
    mlabel <- names(metric_choices)[metric_choices == m]

    finite_vals <- df$value[is.finite(df$value)]
    req(length(finite_vals) > 0)

    pal <- colorNumeric("YlOrRd", domain = finite_vals, na.color = "#cccccc")

    if (m %in% ratio_metrics) {
      leg_vals  <- finite_vals * 100
      leg_pal   <- colorNumeric("YlOrRd", domain = leg_vals, na.color = "#cccccc")
      leg_title <- paste0(mlabel, " (%)<br/><small>", input$year, "</small>")
    } else {
      leg_vals  <- finite_vals
      leg_pal   <- pal
      leg_title <- paste0(mlabel, "<br/><small>", input$year, "</small>")
    }

    # ── TOOLTIPS ─────────────────────────────────────────
    labels <- lapply(seq_len(nrow(df)), function(i) {
      cname <- df$county_name[i]
      pop   <- df$pop[i]
      inc   <- df$median_income[i]
      rent  <- df$median_rent[i]
      val   <- df$value[i]

      pop_fmt  <- if (is.na(pop)  || !is.finite(pop))  "N/A" else
                    formatC(pop, format = "f", digits = 0, big.mark = ",")
      inc_fmt  <- fmt_display("median_income", inc)
      rent_fmt <- fmt_display("median_rent",   rent)
      val_fmt  <- fmt_display(m, val)

      htmltools::HTML(paste0(
        "<div style='font-family:Arial,sans-serif; font-size:12px;",
                     "min-width:180px; line-height:1.5;'>",
          "<div style='font-size:13px; font-weight:700; color:#1e2d40;",
                      "border-bottom:1px solid #ddd; padding-bottom:4px;",
                      "margin-bottom:6px;'>",
            cname,
          "</div>",
          "<div style='color:#555;'>",
            "<b>Population:</b> ",    pop_fmt,  "<br/>",
            "<b>Median Income:</b> ", inc_fmt,  "<br/>",
            "<b>Median Rent:</b> ",   rent_fmt, "<br/>",
          "</div>",
          "<div style='margin-top:6px; padding-top:5px;",
                      "border-top:1px solid #ddd; color:#1e2d40;'>",
            "<b>", mlabel, ":</b> ",
            "<span style='color:#c0392b; font-weight:700;'>", val_fmt, "</span>",
          "</div>",
        "</div>"
      ))
    })

    leafletProxy("map", session) |>
      clearShapes() |>
      clearControls() |>
      addPolygons(
        data         = df,
        fillColor    = ~pal(value),
        fillOpacity  = 0.72,
        color        = "white",
        weight       = 1,
        highlight    = highlightOptions(
          weight       = 2.5,
          color        = "#1e2d40",
          fillOpacity  = 0.9,
          bringToFront = TRUE
        ),
        label        = labels,
        labelOptions = labelOptions(
          style     = list(
            "background-color" = "white",
            "border"           = "1px solid #ccc",
            "border-radius"    = "6px",
            "padding"          = "8px",
            "box-shadow"       = "2px 2px 6px rgba(0,0,0,0.15)"
          ),
          textsize  = "12px",
          direction = "auto",
          opacity   = 1
        ),
        layerId = ~county_name
      ) |>
      addLegend(
        pal      = leg_pal,
        values   = leg_vals,
        title    = leg_title,
        position = "bottomright",
        opacity  = 0.85
      )
  })
}

shinyApp(ui, server)