library(dplyr)
library(readr)
library(tidyr)
library(plotly)
library(geosphere)
library(readxl)
library(rvec)
central_lat = -34.9211
central_long = 138.5964

Adding in Referendum Results

pp <- read_csv("data/GeneralPollingPlacesDownload-29581.csv", skip=1)
pp_primary <- read_csv("data/ReferendumPollingPlaceResultsByStateDownload-29581-SA.csv", skip=1) %>%
  left_join(pp, by=c("PollingPlaceId" = "PollingPlaceID")) %>%
  mutate(
    Latitude = as.numeric(Latitude),
    Longitude = as.numeric(Longitude),
  ) %>%
  filter(!is.na(Latitude)) %>%
  filter(!is.na(Longitude)) %>% rowwise() %>% mutate(
  dist = distm(c(central_long, central_lat), c(Longitude, Latitude), fun=distHaversine)[,1]/1000
) %>% select(
  c(DivisionName, PollingPlaceNm.x, PremisesSuburb, YesVotes, YesPercentage, NoVotes, NoPercentage, dist)
) %>% pivot_longer(
    cols = c(YesVotes, YesPercentage, NoVotes, NoPercentage),
    names_to = c("Response", ".value"),
    names_pattern = "(Yes|No)(Votes|Percentage)"
  ) %>% rename(
    PollingPlaceName = PollingPlaceNm.x,
    Suburb=PremisesSuburb,
    Party = "Response"
  ) %>% mutate(
      election = "23Voice"
    )

Adding federal 25

pp <- read_csv("data/GeneralPollingPlacesDownload-31496.csv", skip=1)
pp_to_add <- read_csv("data/HouseStateFirstPrefsByPollingPlaceDownload-31496-SA.csv", skip=1) %>%
  filter(StateAb == "SA") %>%
  filter(CandidateID != 999) %>%
  filter(PartyAb %in% c("ALP", "LP", "ON", "GRN")) %>%
  group_by(DivisionID, PollingPlaceID) %>%
  mutate(
    pp_percent = OrdinaryVotes*100/sum(OrdinaryVotes)
  ) %>% ungroup() %>%
  left_join(pp, by=c("PollingPlaceID"="PollingPlaceID")) %>%
  mutate(
    Latitude = as.numeric(Latitude),
    Longitude = as.numeric(Longitude),
  ) %>%
  filter(!is.na(Latitude)) %>%
  filter(!is.na(Longitude)) %>% rowwise() %>% mutate(
  dist = distm(c(central_long, central_lat), c(Longitude, Latitude), fun=distHaversine)[,1]/1000
) %>% select(DivisionNm.x, PollingPlace, PremisesSuburb, dist, PartyAb, OrdinaryVotes, pp_percent) %>%
  rename(
    DivisionName = DivisionNm.x,
    PollingPlaceName = PollingPlace,
    Suburb=PremisesSuburb,
    Party = PartyAb,
    Votes = OrdinaryVotes,
    Percentage = pp_percent
  ) %>% mutate(
    election = "25Fed"
  )

pp_primary <- pp_primary %>% rbind(pp_to_add)

Adding SA26

pp <- read_csv("data/SA-2026-Pollingplaces.xlsx - Sheet1.csv")
pp_to_add <- read_csv("data/SA-2026-HA-Primary-PollingPlace.xlsx - Sheet1.csv") %>% 
  filter(candidate_id != 999) %>%
  filter(party_code %in% c("ALP", "LIB", "ON", "GRN")) %>%
  group_by(district_id, pp_id) %>%
  mutate(
    pp_percent = votes*100/sum(votes)
  ) %>% ungroup() %>%
  left_join(pp, by = c("pp_id" = "pp_id")) %>%
  mutate(
    latitude = as.numeric(latitude),
    longitude = as.numeric(longitude),
  ) %>%
  filter(!is.na(latitude)) %>%
  filter(!is.na(longitude)) %>% rowwise() %>% mutate(
  dist = distm(c(central_long, central_lat), c(longitude, latitude), fun=distHaversine)[,1]/1000
) %>% select(district_name.x, pp_name.x, locality, dist, party_code, votes, pp_percent) %>%
  rename(
    DivisionName = district_name.x,
    PollingPlaceName = pp_name.x,
    Suburb=locality,
    Party = party_code,
    Votes = votes,
    Percentage = pp_percent
  ) %>% mutate(
    election = "26SA",
    Party = recode(Party, "LIB" = "LP")
  )

pp_primary <- pp_primary %>% rbind(pp_to_add)
pp_primary$Party = factor(pp_primary$Party)
house_prices <- read_excel("data/lsg_stats_2026_q1.xlsx") %>%
  select(!"Median Change") %>% select(!c("Sales 1Q 2025", "Median 1Q 2025")) %>%
  mutate(
    Year = 2026,
    Quarter = 1
  ) %>% rename(
    Sales = "Sales 1Q 2026",
    Median = "Median 1Q 2026"
  )
house_prices = rbind(
  read_excel("data/lsg_stats_2026_q1.xlsx") %>%
  select(!"Median Change") %>% select(!c("Sales 1Q 2025", "Median 1Q 2025")) %>%
  mutate(
    Year = 2026,
    Quarter = 1
  ) %>% rename(
    Sales = "Sales 1Q 2026",
    Median = "Median 1Q 2026"
  ),
  read_excel("data/lsg_stats_2025_q4.xlsx") %>%
  select(!"Median Change") %>% select(!c("Sales 4Q 2024", "Median 4Q 2024")) %>%
  mutate(
    Year = 2025,
    Quarter = 4
  ) %>% rename(
    Sales = "Sales 4Q 2025",
    Median = "Median 4Q 2025"
  ),
  read_excel("data/lsg_stats_2025_q3.xlsx") %>%
  select(!"Median Change") %>% select(!c("Sales 3Q 2024", "Median 3Q 2024")) %>%
  mutate(
    Year = 2025,
    Quarter = 3
  ) %>% rename(
    Sales = "Sales 3Q 2025",
    Median = "Median 3Q 2025"
  ),
  read_excel("data/lsg_stats_2025_q2.xlsx") %>%
  select(!"Median Change") %>% select(!c("Sales 2Q 2024", "Median 2Q 2024")) %>%
  mutate(
    Year = 2025,
    Quarter = 2
  ) %>% rename(
    Sales = "Sales 2Q 2025",
    Median = "Median 2Q 2025"
  )
) %>% group_by(City,Suburb) %>%
  replace_na(list(Sales = 0, Median=0)) %>%
  summarise(
    Median = weighted_median(Median, wt=Sales),
    Sales = sum(Sales),
  )
party_pal <- c("#d8352a", "#505aaf", "orange", "#29a37a", "#b81f14", "#29339b")
party_pal <- setNames(party_pal, c("ALP", "LP", "ON", "GRN", "Yes", "No"))

election_pal <- c("#b81f14", "#29339b", "#1f7a5c")
election_pal <- setNames(election_pal, c("23Voice", "25Fed", "26SA"))

pp_primary %>%
  filter(Party %in% c("ALP", "LP", "ON", "GRN")) %>%
  filter(!grepl('PPVC', PollingPlaceName)) %>%
  filter(!grepl('Early Voting', PollingPlaceName)) %>%
  arrange(dist) %>%
  group_by(Party) %>%
  group_map(~plot_ly(data=., 
        x=~dist, y=~Percentage, color=~Party,
        text = ~paste("District: ", DivisionName, '<br>Polling Place:', PollingPlaceName, "<br>Distance:", round(dist, digits=3), "km<br>Percent:", round(Percentage, digits=2)),
        size=~Votes,
        colors=party_pal,
        frame = ~election
        ) %>% layout(xaxis=list(type="log")), .keep=TRUE) %>%
  subplot(nrows = 2, shareX = TRUE, shareY=TRUE, titleX = FALSE, titleY = FALSE) %>%
  layout(
    annotations=list(
      list(
        text = "Distance from Adelaide (log km)",
        x = 0.5, y = -0.1,
        xref = "paper", yref = "paper",
        showarrow = FALSE
      ),
      list(
        text = "Polling Place Primary Result (%)",
        x = -0.08, y = 0.5,
        xref = "paper", yref = "paper",
        textangle = -90,
        showarrow = FALSE
      )
    ),
    width=600
  )
pp_primary %>%
  filter(Party %in% c("ALP", "LP", "ON", "GRN")) %>%
  filter(!grepl('PPVC', PollingPlaceName)) %>%
  filter(!grepl('Early Voting', PollingPlaceName)) %>% 
  left_join(house_prices, by = c("Suburb" = "Suburb")) %>%
  filter(!is.na(Median)) %>%
  arrange(Median) %>%
  group_by(Party) %>%
  group_map(~plot_ly(data=., 
        x=~Median, y=~Percentage, color=~Party,
        text = ~paste("District: ", DivisionName, '<br>Polling Place:', PollingPlaceName, "<br>Price:", paste0("$", Median), "<br>Percent:", round(Percentage, digits=2)),
        size=~Votes,
        colors=party_pal,
        frame = ~election
        ) %>% layout(xaxis=list(type="log")), .keep=TRUE) %>%
  subplot(nrows = 2, shareX = TRUE, shareY=TRUE, titleX = FALSE, titleY = FALSE) %>%
  layout(
    annotations=list(
      list(
        text = "Median House Price (Log) (Q1 2025-Q1 2026)",
        x = 0.5, y = -0.15,
        xref = "paper", yref = "paper",
        showarrow = FALSE
      ),
      list(
        text = "Polling Place Primary Result (%)",
        x = -0.08, y = 0.5,
        xref = "paper", yref = "paper",
        textangle = -90,
        showarrow = FALSE
      )
    ),
    width=600
  )
pp_primary %>%
  filter(Party %in% c("Yes", "No")) %>%
  filter(!grepl('PPVC', PollingPlaceName)) %>%
  filter(!grepl('Early Voting', PollingPlaceName)) %>% 
  left_join(house_prices, by = c("Suburb" = "Suburb")) %>%
  filter(!is.na(Median)) %>%
  arrange(Median) %>%
  group_by(Party) %>%
  plot_ly(data=., 
      x=~Median, y=~Percentage, color=~Party,
      text = ~paste("District: ", DivisionName, '<br>Polling Place:', PollingPlaceName, "<br>Price:", paste0("$", Median), "<br>Percent:", round(Percentage, digits=2)),
      size=~Votes,
      colors=party_pal
      ) %>% 
  layout(
      xaxis=list(type="log", title="Median House Price (Log) (Q1 2025-Q1 2026)"),
      yaxis=list(title="Polling Place Primary Result (%)"),
      width=600
  )
pp_primary %>%
  filter(Party %in% c("Yes", "No")) %>%
  filter(!grepl('PPVC', PollingPlaceName)) %>%
  filter(!grepl('Early Voting', PollingPlaceName)) %>% 
  left_join(house_prices, by = c("Suburb" = "Suburb")) %>%
  filter(!is.na(dist)) %>%
  arrange(dist) %>%
  group_by(Party) %>%
  plot_ly(data=., 
        x=~dist, y=~Percentage, color=~Party,
        text = ~paste("District: ", DivisionName, '<br>Polling Place:', PollingPlaceName, "<br>Distance:", round(dist, digits=3), "km<br>Percent:", round(Percentage, digits=2)),
        size=~Votes,
        colors=party_pal
        ) %>% layout(
          xaxis=list(type="log", title="Distance from Adelaide (log km)"),
          yaxis=list(title="Polling Place Primary Result (%)"),
          width=600
        )
pp_primary %>% select(Suburb, election, dist, Party, Votes) %>%
  group_by(Suburb, election, Party) %>%
  summarise(
    Votes = sum(Votes)
  ) %>% ungroup() %>%
  group_by(Suburb, election) %>%
  mutate(
    suburb_percent = Votes*100/sum(Votes)
  ) %>% filter(Party %in% c("No", "ON")) %>%
  mutate(
    Party = recode(Party, "No" = "ON")
  ) %>% ungroup() %>% pivot_wider(
    names_from = election,
    values_from = c(Votes, suburb_percent)
  ) %>% 
  plot_ly(
    data=.,
    x=~suburb_percent_23Voice
  ) %>%
    add_markers(y=~suburb_percent_25Fed, name="Federal 25") %>%
    add_markers(y=~suburb_percent_26SA, name="South Australia 26") %>%
  layout(yaxis = list(title = "One Nation Suburb Result (%)"),
         xaxis = list(title = "No Voice Referendum Suburb Result (%)"), width=600, legend = list(x = 0.1, y = 0.9))
pp_primary %>% select(Suburb, election, dist, Party, Votes) %>%
  group_by(Suburb, election, Party) %>%
  summarise(
    Votes = sum(Votes)
  ) %>% ungroup() %>%
  group_by(Suburb, election) %>%
  mutate(
    suburb_percent = Votes*100/sum(Votes)
  ) %>% filter(Party %in% c("No", "LP")) %>%
  mutate(
    Party = recode(Party, "No" = "LP")
  ) %>% ungroup() %>% pivot_wider(
    names_from = election,
    values_from = c(Votes, suburb_percent)
  ) %>% 
  plot_ly(
    data=.,
    x=~suburb_percent_23Voice
  ) %>%
    add_markers(y=~suburb_percent_25Fed, name="Federal 25") %>%
    add_markers(y=~suburb_percent_26SA, name="South Australia 26") %>%
  layout(yaxis = list(title = "Liberal Suburb Result (%)"),
         xaxis = list(title = "No Voice Referendum Suburb Result (%)"), width=600, legend = list(x = 0.1, y = 0.9))