SIFEA <- read_excel("/Users/ryan/Desktop/Winterdatachallenge/SIFEA.xlsx")
## New names:
## • `Score` -> `Score...2`
## • `Decile` -> `Decile...3`
## • `Score` -> `Score...4`
## • `Decile` -> `Decile...5`
## • `Score` -> `Score...6`
## • `Decile` -> `Decile...7`
## • `Score` -> `Score...8`
## • `Decile` -> `Decile...9`
bus_trip_counts <- read.csv("/Users/ryan/Desktop/Winterdatachallenge/WDAC_2025_data/busTripcounts.csv", stringsAsFactors = FALSE)
transport_routes <- read.csv("/Users/ryan/Desktop/Winterdatachallenge/WDAC_2025_data/routes_1.csv")
location_covid_cases <- read.csv("/Users/ryan/Desktop/Winterdatachallenge/WDAC_2025_data/confirmed_cases_table1_location_agg.csv")
AusPostCodes <- read.csv("/Users/ryan/Desktop/Winterdatachallenge/WDAC_2025_data/australian_postcodes.csv")
opal_patronage_path <- "/Users/ryan/Desktop/Winterdatachallenge/WDAC_2025_data/OpalPatronage"

nsw_lga_shapes <- nswgeo::lga_nsw
lga_to_ti_region_lookup <- tribble(
  ~lga_name,               ~ti_region,
  # --- Key Commercial Hubs ---
  "Sydney",                "Sydney CBD",
  "North Sydney",          "North Sydney",
  "Parramatta",            "Parramatta",
  "Strathfield",           "Strathfield",
  "Burwood",               "Strathfield",
  "Ryde",                  "Macquarie Park",
  "Willoughby",            "Chatswood",
  "Newcastle",             "Newcastle and surrounds",
  "Lake Macquarie",        "Newcastle and surrounds",
  "Cessnock",              "Newcastle and surrounds",
  "Maitland",              "Newcastle and surrounds",
  "Wollongong",            "Wollongong and surrounds",
  "Shellharbour",          "Wollongong and surrounds"
)

bus_trip_counts <- bus_trip_counts %>%
  mutate(
    Time.period = str_trim(Time.period),
    Time.period = dmy(paste0("01-", Time.period)),
    Trip = str_trim(Trip),
    Trip = na_if(Trip, ""),
    Trip = na_if(Trip, "N/A"),
    Trip = na_if(Trip, "unknown"),
    Trip = as.numeric(Trip)
  ) %>%
  filter(Time.period > as.Date("2019-12-01")) %>%
  filter(!is.na(Trip))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Trip = as.numeric(Trip)`.
## Caused by warning:
## ! NAs introduced by coercion
AusPostCodes <- AusPostCodes %>%
  mutate(postcode = as.character(postcode))

full_opal_patronage <-list.files(path = opal_patronage_path, pattern = "\\.txt$", full.names = TRUE, recursive = TRUE)
opal_patronage_data <- map_dfr(full_opal_patronage, ~ read_delim(., delim = "|", col_types = cols(.default = "c")))
location_covid_cases <- left_join(location_covid_cases, AusPostCodes, by = "postcode", relationship = "many-to-many") %>%
  mutate(notification_date = as.Date(notification_date))%>%
  group_by(postcode, lat, long, locality) %>%
  summarise(total_confirmed_cases = sum(confirmed_cases_count, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'postcode', 'lat', 'long'. You can override
## using the `.groups` argument.
SIFEAL <- anti_join(SIFEA, AusPostCodes, by = "postcode") 

summary_bus_trip_counts <- bus_trip_counts %>%
  group_by(Time.period, Card.Type) %>%
  summarise(TotalTrips = sum(Trip), .groups = "drop")

routes_clean <- transport_routes %>%
  select(operator_name,operator_id, my_timetable_route_name,route_variant_type,transport_name,mot_for_interchange,
    route_search_name,
    service_direction_name,
    start_date,
    gtfs_route_id_in
  )
postcode_to_lga_lookup <- AusPostCodes %>%
  filter(state == "NSW") %>%
  select(postcode, lga_name = lgaregion) %>% 
  mutate(lga_name = str_remove(lga_name, " \\(C\\)| \\(A\\)| \\(NSW\\)")) %>%
  distinct(postcode, .keep_all = TRUE)

regional_covid_data <- location_covid_cases %>%
  group_by(postcode) %>%
  summarise(total_cases = sum(total_confirmed_cases, na.rm = TRUE), .groups = "drop") %>%
  left_join(postcode_to_lga_lookup, by = "postcode") %>%
  left_join(lga_to_ti_region_lookup, by = "lga_name") %>%
  filter(!is.na(ti_region)) %>%
  group_by(ti_region) %>%
  summarise(total_cases = sum(total_cases, na.rm = TRUE), .groups = "drop")

1 The traditional 9-to-5 commute has fractured post-COVID, with higher socio-economic regions showing the most significant declines in peak-hour public transport use.

Key questions explored:

ggplot(summary_bus_trip_counts, aes(x = Time.period, y = TotalTrips, fill = Card.Type)) + 
  geom_bar(stat = "identity") + 
  labs(title = "Annual Bus Trips by Card Type (2020–2025)",
       x = "Year", y = "Total Trips") +
  theme_minimal() +
  theme(legend.position = "right")

pre_covid_start <- ymd("2020-01-01")
pre_covid_end <- ymd("2020-03-12")

post_covid_start <- ymd("2025-01-01")
post_covid_end <- ymd("2025-07-01")


hourly_summary <- opal_patronage_data %>%
  mutate(
    trip_origin_date = ymd(trip_origin_date),
    tap_hour = as.numeric(tap_hour),
    Tap_Ons = as.numeric(Tap_Ons)
  ) %>%
  mutate(Period = case_when(
    trip_origin_date >= pre_covid_start & trip_origin_date <= pre_covid_end ~ "Pre-COVID (Jan-Mar 2020)",
    trip_origin_date >= post_covid_start & trip_origin_date <= post_covid_end ~ "Post-COVID (Jan-Jul 2025)",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(Period)) %>%
  mutate(weekday = wday(trip_origin_date, label = TRUE, week_start = 1)) %>%
  filter(!weekday %in% c("Sat", "Sun")) %>%
  group_by(Period, ti_region, tap_hour) %>%
  summarise(
    average_tap_ons = mean(Tap_Ons, na.rm = TRUE),
    .groups = 'drop'
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Tap_Ons = as.numeric(Tap_Ons)`.
## Caused by warning:
## ! NAs introduced by coercion
head(hourly_summary)
## # A tibble: 6 × 4
##   Period                    ti_region tap_hour average_tap_ons
##   <chr>                     <chr>        <dbl>           <dbl>
## 1 Post-COVID (Jan-Jul 2025) All - NSW        0           1257.
## 2 Post-COVID (Jan-Jul 2025) All - NSW        1            832.
## 3 Post-COVID (Jan-Jul 2025) All - NSW        2            569.
## 4 Post-COVID (Jan-Jul 2025) All - NSW        3            712.
## 5 Post-COVID (Jan-Jul 2025) All - NSW        4           4461.
## 6 Post-COVID (Jan-Jul 2025) All - NSW        5          10120.
base_data <- opal_patronage_data %>%
  mutate(trip_origin_date = ymd(trip_origin_date),tap_hour = as.numeric(tap_hour),Tap_Ons = as.numeric(Tap_Ons)) %>%
  mutate(Period = case_when(
    trip_origin_date >= pre_covid_start & trip_origin_date <= pre_covid_end ~ "Pre-COVID",
    trip_origin_date >= post_covid_start & trip_origin_date <= post_covid_end ~ "Post-COVID",
    TRUE ~ NA_character_)) %>%
  filter(!is.na(Period)) %>%
  mutate(weekday = wday(trip_origin_date, label = TRUE, week_start = 1)) %>%
  filter(!weekday %in% c("Sat", "Sun"))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Tap_Ons = as.numeric(Tap_Ons)`.
## Caused by warning:
## ! NAs introduced by coercion
peak_summary <- base_data %>%
  mutate(Peak_Type = case_when(tap_hour %in% 7:9 ~ "Morning Peak (7-9am)",tap_hour %in% 16:18 ~ "Afternoon Peak (4-6pm)",TRUE ~ "Off-Peak")) %>%
  group_by(Period, ti_region, Peak_Type) %>%
  summarise(Total_Taps = sum(Tap_Ons, na.rm = TRUE),Num_Days = n_distinct(trip_origin_date),.groups = 'drop') %>%
  mutate(Average_Daily_Taps = Total_Taps / Num_Days)

overall_summary <- base_data %>%
  group_by(Period, ti_region) %>%
  summarise(Total_Taps = sum(Tap_Ons, na.rm = TRUE),Num_Days = n_distinct(trip_origin_date),.groups = 'drop') %>%
  mutate(Average_Daily_Taps = Total_Taps / Num_Days) %>%
  mutate(Peak_Type = "Overall Day")

final_summary <- bind_rows(peak_summary, overall_summary) %>%
  select(ti_region, Period, Peak_Type, Average_Daily_Taps)

percentage_change_table <- final_summary %>%
  pivot_wider(names_from = Period,values_from = Average_Daily_Taps) %>%
  filter(!is.na(`Pre-COVID`) & !is.na(`Post-COVID`)) %>%
  mutate(pct_change = (`Post-COVID` - `Pre-COVID`) / `Pre-COVID` * 100) %>%
  select(ti_region, Peak_Type, `Pre-COVID`, `Post-COVID`, pct_change) %>%
  arrange(ti_region, Peak_Type)


print(percentage_change_table, n = 40)
## # A tibble: 40 × 5
##    ti_region                Peak_Type        `Pre-COVID` `Post-COVID` pct_change
##    <chr>                    <chr>                  <dbl>        <dbl>      <dbl>
##  1 All - NSW                Afternoon Peak …     584456.      498652.     -14.7 
##  2 All - NSW                Morning Peak (7…     598006.      508615.     -14.9 
##  3 All - NSW                Off-Peak            1046952.     1012403.      -3.30
##  4 All - NSW                Overall Day         2229413.     2019669.      -9.41
##  5 Chatswood                Afternoon Peak …      12123.       10701.     -11.7 
##  6 Chatswood                Morning Peak (7…       7267.        7088.      -2.47
##  7 Chatswood                Off-Peak              17206.       19174.      11.4 
##  8 Chatswood                Overall Day           36596.       36962.       1.00
##  9 Macquarie Park           Afternoon Peak …      11942.       10970       -8.14
## 10 Macquarie Park           Morning Peak (7…       5267.        6608.      25.5 
## 11 Macquarie Park           Off-Peak              12196.       15142.      24.2 
## 12 Macquarie Park           Overall Day           29406.       32721.      11.3 
## 13 Newcastle and surrounds  Afternoon Peak …       2240.        1425.     -36.4 
## 14 Newcastle and surrounds  Morning Peak (7…       1496.        1005.     -32.8 
## 15 Newcastle and surrounds  Off-Peak               3723.        2726.     -26.8 
## 16 Newcastle and surrounds  Overall Day            7460.        5156.     -30.9 
## 17 North Sydney             Afternoon Peak …      25100        15669.     -37.6 
## 18 North Sydney             Morning Peak (7…       8744.        6773.     -22.5 
## 19 North Sydney             Off-Peak              20204.       16967.     -16.0 
## 20 North Sydney             Overall Day           54048.       39409.     -27.1 
## 21 Other                    Afternoon Peak …     270569.      251583.      -7.02
## 22 Other                    Morning Peak (7…     495410.      416847.     -15.9 
## 23 Other                    Off-Peak             697423.      667628.      -4.27
## 24 Other                    Overall Day         1463402.     1336058.      -8.70
## 25 Parramatta               Afternoon Peak …      25215.       20563.     -18.5 
## 26 Parramatta               Morning Peak (7…      17027.       13328.     -21.7 
## 27 Parramatta               Off-Peak              32738.       32072.      -2.04
## 28 Parramatta               Overall Day           74981.       65962.     -12.0 
## 29 Strathfield              Afternoon Peak …       6173.        5886.      -4.65
## 30 Strathfield              Morning Peak (7…       9465.        7676.     -18.9 
## 31 Strathfield              Off-Peak              14519.       14195.      -2.23
## 32 Strathfield              Overall Day           30158.       27758.      -7.96
## 33 Sydney CBD               Afternoon Peak …     229677.      178930      -22.1 
## 34 Sydney CBD               Morning Peak (7…      51590.       46115.     -10.6 
## 35 Sydney CBD               Off-Peak             243585.      234858.      -3.58
## 36 Sydney CBD               Overall Day          524852.      459903.     -12.4 
## 37 Wollongong and surrounds Afternoon Peak …       1415.        2925.     107.  
## 38 Wollongong and surrounds Morning Peak (7…       1738.        3175.      82.6 
## 39 Wollongong and surrounds Off-Peak               5358.        9641.      79.9 
## 40 Wollongong and surrounds Overall Day            8512.       15740       84.9
plot_data <- percentage_change_table %>%
  mutate(Peak_Type = factor(Peak_Type, levels = c("Morning Peak (7-9am)", "Afternoon Peak (4-6pm)", "Off-Peak", "Overall Day")))

ggplot(data = plot_data, mapping = aes(x = pct_change, y = fct_reorder(ti_region, pct_change), fill = Peak_Type)) +
  geom_col(position = "dodge") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +

  labs(
    title = "The Uneven Recovery of Sydney's Commute",
    subtitle = "Percentage Change in Average Weekday Patronage (Pre vs. Post-COVID)",
    x = "Percentage Change (%)",
    y = "Transport Region",
    fill = "Time Period"
  ) +
  theme_minimal()

ggplot(data = hourly_summary, aes(x = tap_hour, y = average_tap_ons, color = Period, group = Period)) +
  geom_line(linewidth = 1) +
  geom_point() +
  facet_wrap(~ ti_region, scales = "free_y", ncol = 3) +
  geom_vline(xintercept = 7, linetype = "dashed", color = "grey60") +
  geom_vline(xintercept = 9, linetype = "dashed", color = "grey60") +
  geom_vline(xintercept = 16, linetype = "dashed", color = "grey30") +
  geom_vline(xintercept = 18, linetype = "dashed", color = "grey30") +
  scale_color_manual(values = c(
    "Pre-COVID (Jan-Mar 2020)" = "#0072B2",
    "Post-COVID (Jan-Jul 2025)" = "#D55E00" )) +
  
  scale_x_continuous(breaks = seq(0, 23, by = 4)) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "bottom",
    strip.text = element_text(face = "bold", size = 10)
  ) +
  labs(
    title = "The Fracturing of the 9-to-5 Commute Across Sydney",
    subtitle = "Comparing Average Weekday Hourly Tap-Ons by Transport Region",
    x = "Hour of the Day",
    y = "Average Tap-Ons",
    color = "Period" 
  )
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_point()`).

seifa_filepath <- "/Users/ryan/Desktop/Winterdatachallenge/SIFEA.xlsx"
regional_socio_economic_data <- read_excel(seifa_filepath, sheet = 1) %>%
  select(
    postcode,
    seifa_score = `Score...2`, 
    population = `Usual Resident Population`
  ) %>%
  mutate(postcode = as.character(postcode), across(c(seifa_score, population), as.numeric)) %>%
  filter(!is.na(seifa_score) & !is.na(population)) %>%
  left_join(postcode_to_lga_lookup, by = "postcode") %>%
  left_join(lga_to_ti_region_lookup, by = "lga_name") %>%
  filter(!is.na(ti_region)) %>%
  group_by(ti_region) %>%
  summarise(
    avg_seifa_score = weighted.mean(seifa_score, w = population, na.rm = TRUE),
    total_population = sum(population, na.rm = TRUE),
    .groups = "drop"  )
## New names:
## • `Score` -> `Score...2`
## • `Decile` -> `Decile...3`
## • `Score` -> `Score...4`
## • `Decile` -> `Decile...5`
## • `Score` -> `Score...6`
## • `Decile` -> `Decile...7`
## • `Score` -> `Score...8`
## • `Decile` -> `Decile...9`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(c(seifa_score, population), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
final_analysis_df <- percentage_change_table %>%
  filter(Peak_Type == "Morning Peak (7-9am)") %>%
  filter(ti_region != "Other") %>%
  left_join(regional_socio_economic_data, by = "ti_region") %>%
  left_join(regional_covid_data, by = "ti_region") %>%
  mutate(
    cases_per_10k_people = (total_cases / total_population) * 10000
  ) %>%
  select(
    ti_region,
    pct_change,
    avg_seifa_score,
    cases_per_10k_people,
    total_population
  )

vars_to_correlate <- c("pct_change", "avg_seifa_score", "cases_per_10k_people")

correlation_data <- final_analysis_df %>%
  filter(if_all(all_of(vars_to_correlate), is.finite)) %>%
  select(all_of(vars_to_correlate))

if (nrow(correlation_data) > 3) {
  corr_matrix <- round(cor(correlation_data), 2)
  
  ggcorrplot(
    corr_matrix, 
    hc.order = TRUE,
    type = "lower",
    lab = TRUE,
    lab_size = 4,
    colors = c("#D55E00", "white", "#0072B2"),
    title = "Correlation of Commuting Shift Drivers",
    ggtheme = theme_minimal(base_size = 14)
  )
} else {
  message("Correlation analysis cancelled: Not enough complete data rows.")
}

ggplot(
  data = final_analysis_df %>% filter(is.finite(avg_seifa_score) & is.finite(pct_change)), 
  mapping = aes(x = avg_seifa_score, y = pct_change)
) +
  geom_point(aes(color = ti_region, size = total_population), alpha = 0.8) + 
  geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") + 
  ggrepel::geom_text_repel(aes(label = ti_region), size = 3.5) +
  labs(
    title = "The Socio-Economic Divide in Sydney's Commute Recovery",
    subtitle = "Higher advantage regions saw a much larger drop in morning peak patronage",
    x = "Average Socio-Economic Score (SEIFA)",
    y = "Percentage Change in Morning Peak Patronage (%)",
    color = "Transport Region",
    size = "Total Population"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

glimpse(final_analysis_df)
## Rows: 9
## Columns: 5
## $ ti_region            <chr> "All - NSW", "Chatswood", "Macquarie Park", "Newc…
## $ pct_change           <dbl> -14.948209, -2.471553, 25.461847, -32.802057, -22…
## $ avg_seifa_score      <dbl> NA, 1099.2894, 1052.2391, 995.7994, 1093.4241, 10…
## $ cases_per_10k_people <dbl> NA, 32638.98, 17269.49, 68541.79, 16631.96, 22767…
## $ total_population     <dbl> NA, 98902, 92741, 538902, 53405, 448404, 60101, 2…
census_data_folder <- "/Users/ryan/Desktop/Winterdatachallenge/2021_GCP_all_for_NSW_short-header/2021 Census GCP All Geographies for NSW/LGA/NSW"
file_pattern <- "G62" 
files_to_load <- list.files(
  path = census_data_folder,
  pattern = file_pattern,
  full.names = TRUE
)
travel_to_work_lga <- map_dfr(files_to_load, read.csv)
lga_transport_baseline <- travel_to_work_lga %>%
  clean_names() %>%
  
  mutate(lga_code_2021 = as.character(lga_code_2021)) %>%
  
  select(
    lga_code_2021,
    train = one_method_train_p,
    bus = one_method_bus_p,
    ferry = one_method_ferry_p,
    tram = one_met_tram_or_lt_rail_p,
    worked_from_home = worked_home_p,
    did_not_go_to_work = did_not_go_to_work_p,
    total_workforce = tot_p
  ) %>%
  
  mutate(across(-lga_code_2021, as.numeric)) %>%
  mutate(across(-lga_code_2021, ~replace_na(., 0))) %>%

  mutate(
    total_public_transport = train + bus + ferry + tram,
    total_commuters = total_workforce - worked_from_home - did_not_go_to_work,
    pct_public_transport_2021 = (total_public_transport / (total_commuters + 0.001)) * 100
  ) %>%

  mutate(join_key = str_remove(lga_code_2021, "LGA")) %>%
  left_join(nswgeo::lga_nsw, by = c("join_key" = "LGA_CODE_2024")) %>%
  select(lga_name = LGA_NAME_2024, pct_public_transport_2021) %>%
  mutate(lga_name = str_remove(lga_name, " \\(C\\)| \\(A\\)| \\(NSW\\)")) %>%
  filter(!is.na(lga_name))
regional_transport_baseline <- lga_transport_baseline %>%
  left_join(lga_to_ti_region_lookup, by = "lga_name") %>%
  filter(!is.na(ti_region)) %>%
  group_by(ti_region) %>%
  summarise(
    avg_pct_public_transport_2021 = mean(pct_public_transport_2021, na.rm = TRUE),
    .groups = "drop"
  )

master_analysis_df <- final_analysis_df %>%
  left_join(regional_transport_baseline, by = "ti_region")
ggplot(
  data = master_analysis_df %>% filter(is.finite(avg_pct_public_transport_2021)),
  mapping = aes(x = avg_pct_public_transport_2021, y = pct_change)
) +
  geom_smooth(method = "lm", se = FALSE, color = "#E69F00", linetype = "dashed") +
  geom_point(aes(color = avg_seifa_score, size = total_population), alpha = 0.8) +
  ggrepel::geom_text_repel(aes(label = ti_region), size = 3.5, max.overlaps = 15) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") +
  scale_color_viridis_c() +
  labs(
    title = "The Vulnerability of Sydney's Commuting Habits",
    subtitle = "Regions with higher pre-pandemic PT usage saw a larger drop in patronage",
    x = "Baseline Public Transport Use in 2021 (%)",
    y = "Percentage Change in Morning Peak Patronage (2020 vs. 2025)",
    color = "SEIFA Score",
    size = "Total Population"
  ) +
  theme_minimal(base_size = 14)
## `geom_smooth()` using formula = 'y ~ x'

map_data <- nsw_lga_shapes %>%
  mutate(lga_name = str_remove(LGA_NAME_2024, " \\(C\\)| \\(A\\)| \\(NSW\\)")) %>%
  left_join(lga_to_ti_region_lookup, by = "lga_name") %>%
  left_join(master_analysis_df, by = "ti_region") %>%
  filter(!is.na(pct_change))

ggplot() +
  geom_sf(data = nsw_lga_shapes, fill = "grey85", color = "white", linewidth = 0.2) +
  geom_sf(data = map_data, aes(fill = pct_change), color = "black", linewidth = 0.4) +
  scale_fill_gradient2(
    midpoint = 0, 
    low = "#D55E00", 
    mid = "white", 
    high = "#0072B2",
    name = "Patronage \nChange (%)"
  ) +
  
  labs(
    title = "Sydney's Commuting Shift: A Regional View",
    subtitle = "Mapping the percentage change in morning peak patronage"
  ) +
  coord_sf(xlim = c(150.0, 151.8), ylim = c(-34.5, -32.7), expand = FALSE) +
  
  theme_void(base_size = 14)

pca_data <- master_analysis_df %>%
  filter(!ti_region %in% c("All - NSW", "Other")) %>%
  filter(is.finite(avg_seifa_score) & is.finite(avg_pct_public_transport_2021)) %>%
  select(ti_region, pct_change, avg_seifa_score, avg_pct_public_transport_2021)

numeric_pca_data <- pca_data %>%
  select(-ti_region) %>%
  scale()

pca_result <- prcomp(numeric_pca_data, center = FALSE, scale. = FALSE)

pca_scores <- as.data.frame(pca_result$x)

indexed_df <- pca_data %>%
  select(ti_region) %>%
  mutate(disruption_index = pca_scores$PC1)

map_data_indexed <- nsw_lga_shapes %>%
  mutate(lga_name = str_remove(LGA_NAME_2024, " \\(C\\)| \\(A\\)| \\(NSW\\)")) %>%
  left_join(lga_to_ti_region_lookup, by = "lga_name") %>%
  left_join(indexed_df, by = "ti_region") %>%
  filter(!is.na(disruption_index))
ggplot() +

  geom_sf(data = nsw_lga_shapes, fill = "grey85", color = "white", linewidth = 0.2) +
  geom_sf(data = map_data_indexed, aes(fill = disruption_index), color = "black", linewidth = 0.4) +
  scale_fill_viridis_c(
    option = "plasma", 
    name = "Remote Work \nDisruption Index"
  ) +
  
  labs(
    title = "A Spectrum of Change Across Sydney's Commuting Zones",
    subtitle = "Regions ranked by a PCA-derived 'Disruption Index' based on SEIFA, patronage change, and baseline usage"
  ) +
  coord_sf(xlim = c(150.0, 151.8), ylim = c(-34.5, -32.7), expand = FALSE) +
  
  theme_void(base_size = 14)