library(tidyverse)
library(usmap)
library(viridis)
library(patchwork)
# Load data
df <- read.csv("C:/Users/mferdo2/OneDrive - Louisiana State University/Finance_PhD/Real_Estate_project/02_data/processed/broadband_annual_2018_2024.csv")

# Calculate broadband penetration: consumer connections per 100 housing units
df <- df %>%
  mutate(
    fips = sprintf("%05d", county_fips),
    bbd_penetration = (consumer_bbd / Housing_Units) * 100
  )

# Summary by year
df %>%
  group_by(year) %>%
  summarise(
    counties = n(),
    mean = round(mean(bbd_penetration, na.rm = TRUE), 1),
    median = round(median(bbd_penetration, na.rm = TRUE), 1),
    min = round(min(bbd_penetration, na.rm = TRUE), 1),
    max = round(max(bbd_penetration, na.rm = TRUE), 1)
  ) %>%
  print()
## # A tibble: 7 × 6
##    year counties  mean median   min   max
##   <int>    <int> <dbl>  <dbl> <dbl> <dbl>
## 1  2018     3234   0.1    0.1     0   0.1
## 2  2019     3234   0.1    0.1     0   0.1
## 3  2020     3234   0.1    0.1     0   0.1
## 4  2021     3234   0.1    0.1     0   0.3
## 5  2022     3236   0.1    0.1     0   0.5
## 6  2023     3236   0.1    0.1     0   0.1
## 7  2024     3236   0.1    0.1     0   0.1

Individual Maps by Year

# Create map for each year
for (yr in 2018:2024) {
  
  df_year <- df %>% filter(year == yr)
  
  p <- plot_usmap(
    data = df_year, 
    values = "bbd_penetration",
    regions = "counties",
    color = "white",
    size = 0.1
  ) +
    scale_fill_viridis_c(
      name = "Residential\nBroadband\nper 100 Homes",
      option = "plasma",
      na.value = "gray90",
      limits = c(0, 120),
      breaks = seq(0, 120, 30)
    ) +
    labs(
      title = paste0("Broadband Penetration by County (", yr, ")"),
      subtitle = "Consumer broadband connections per 100 housing units",
      caption = paste0("Data: FCC | N = ", sum(!is.na(df_year$bbd_penetration)), " counties")
    ) +
    theme(
      plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
      plot.subtitle = element_text(size = 12, hjust = 0.5),
      legend.position = "right"
    )
  
  ggsave(
    filename = paste0("broadband_penetration_", yr, ".png"),
    plot = p,
    width = 12,
    height = 8,
    dpi = 300
  )
  
  print(p)
}

All Years Combined

plot_list <- list()

for (yr in 2018:2024) {
  
  df_year <- df %>% filter(year == yr)
  
  p <- plot_usmap(
    data = df_year, 
    values = "bbd_penetration",
    regions = "counties",
    color = "white",
    size = 0.05
  ) +
    scale_fill_viridis_c(
      name = "Per 100\nHomes",
      option = "plasma",
      na.value = "gray90",
      limits = c(0, 120)
    ) +
    labs(title = yr) +
    theme(
      plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
      legend.position = "right",
      legend.title = element_text(size = 9),
      legend.text = element_text(size = 8)
    )
  
  plot_list[[length(plot_list) + 1]] <- p
}

p_combined <- wrap_plots(plot_list, ncol = 3) +
  plot_annotation(
    title = "Broadband Penetration by County (2018-2024)",
    subtitle = "Consumer broadband connections per 100 housing units",
    theme = theme(
      plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
      plot.subtitle = element_text(size = 14, hjust = 0.5)
    )
  )

ggsave(
  filename = "broadband_penetration_all_years.png",
  plot = p_combined,
  width = 18,
  height = 12,
  dpi = 300
)

print(p_combined)

National Trend

trend <- df %>%
  group_by(year) %>%
  summarise(
    mean = mean(bbd_penetration, na.rm = TRUE),
    median = median(bbd_penetration, na.rm = TRUE)
  )

ggplot(trend, aes(x = year)) +
  geom_line(aes(y = mean, color = "Mean"), linewidth = 1.5) +
  geom_line(aes(y = median, color = "Median"), linewidth = 1.5) +
  geom_point(aes(y = mean, color = "Mean"), size = 3) +
  geom_point(aes(y = median, color = "Median"), size = 3) +
  scale_color_manual(values = c("Mean" = "#e63946", "Median" = "#457b9d")) +
  scale_x_continuous(breaks = 2018:2024) +
  labs(
    title = "National Broadband Penetration Trend (2018-2024)",
    subtitle = "Consumer broadband connections per 100 housing units",
    x = "Year",
    y = "Connections per 100 Homes",
    color = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "top"
  )

ggsave("broadband_trend.png", width = 10, height = 6, dpi = 300)

Top and Bottom Counties (2024)

# Top 20
df %>%
  filter(year == 2024, !is.na(bbd_penetration)) %>%
  arrange(desc(bbd_penetration)) %>%
  select(countyname, statename, bbd_penetration, Housing_Units, consumer_bbd) %>%
  head(20) %>%
  print()
##                    countyname    statename bbd_penetration Housing_Units
## 1  Aleutians West Census Area       Alaska       0.1493652          1339
## 2      Aleutians East Borough       Alaska       0.1472754           679
## 3               Oldham County        Texas       0.1356852           737
## 4             Mellette County South Dakota       0.1277139           783
## 5             Garfield County      Montana       0.1256281           796
## 6             Ouachita Parish    Louisiana       0.1255300         71696
## 7                 Rock County     Nebraska       0.1254705           797
## 8             San Juan County     Colorado       0.1234568           810
## 9               Carter County      Montana       0.1213592           824
## 10          Rio Blanco County     Colorado       0.1202646          3326
## 11               Caddo Parish    Louisiana       0.1196968        112785
## 12            Hodgeman County       Kansas       0.1193317           838
## 13           Stonewall County        Texas       0.1184834           844
## 14                Real County        Texas       0.1183432          1690
## 15           Armstrong County        Texas       0.1169591           855
## 16               Story County         Iowa       0.1162405         42154
## 17             Briscoe County        Texas       0.1140251           877
## 18              Cottle County        Texas       0.1129944           885
## 19             Ziebach County South Dakota       0.1127396           887
## 20              Pitkin County     Colorado       0.1111029         13501
##    consumer_bbd
## 1             2
## 2             1
## 3             1
## 4             1
## 5             1
## 6            90
## 7             1
## 8             1
## 9             1
## 10            4
## 11          135
## 12            1
## 13            1
## 14            2
## 15            1
## 16           49
## 17            1
## 18            1
## 19            1
## 20           15
# Bottom 20
df %>%
  filter(year == 2024, !is.na(bbd_penetration)) %>%
  arrange(bbd_penetration) %>%
  select(countyname, statename, bbd_penetration, Housing_Units, consumer_bbd) %>%
  head(20) %>%
  print()
##              countyname            statename bbd_penetration Housing_Units
## 1          Kiowa County             Colorado               0           748
## 2  District of Columbia District of Columbia               0        367147
## 3          Baker County              Georgia               0          1455
## 4       Glascock County              Georgia               0          1417
## 5          Camas County                Idaho               0           731
## 6          Clark County                Idaho               0           490
## 7        Greeley County               Kansas               0           637
## 8        Wallace County               Kansas               0           739
## 9      Robertson County             Kentucky               0          1046
## 10     Issaquena County          Mississippi               0           445
## 11 Golden Valley County              Montana               0           475
## 12     Petroleum County              Montana               0           334
## 13       Prairie County              Montana               0           673
## 14      Treasure County              Montana               0           449
## 15        Wibaux County              Montana               0           521
## 16        Arthur County             Nebraska               0           225
## 17        Banner County             Nebraska               0           326
## 18        Blaine County             Nebraska               0           299
## 19         Hayes County             Nebraska               0           441
## 20        Hooker County             Nebraska               0           418
##    consumer_bbd
## 1             0
## 2             0
## 3             0
## 4             0
## 5             0
## 6             0
## 7             0
## 8             0
## 9             0
## 10            0
## 11            0
## 12            0
## 13            0
## 14            0
## 15            0
## 16            0
## 17            0
## 18            0
## 19            0
## 20            0

Metric Used: Consumer broadband connections per 100 housing units

Formula: (consumer_bbd / Housing_Units) × 100

Why this metric?
- Focuses on residential connections (relevant for real estate) - Housing units is the appropriate denominator for county-level real estate analysis - Simple, interpretable, and industry-standard