# Read Cities_CPI.csv (row1 = categories, row2 = regions; col2=Year, col3=Quarter; values from col4)
raw <- readr::read_csv("Cities_CPI.csv", col_names = FALSE, show_col_types = FALSE)

hdr_cat <- clean_header(unlist(raw[1, ]))
hdr_reg <- clean_header(unlist(raw[2, ]))

year_col <- which(trimws(hdr_cat) == "Year")[1]
qtr_col  <- which(trimws(hdr_cat) == "Quarter")[1]
if (is.na(year_col) | is.na(qtr_col)) stop("Could not locate Year/Quarter columns in row 1")

val_idx <- which(seq_along(hdr_cat) > qtr_col & !is.na(hdr_cat) & trimws(hdr_cat) != "")
comb_names <- paste0(clean_header(hdr_cat[val_idx]), "__", clean_header(hdr_reg[val_idx]))

dat <- raw[-c(1,2), c(year_col, qtr_col, val_idx)]
names(dat) <- c("Year","Quarter", comb_names)

# make names unique so mutate() works even if headers repeat
names(dat) <- make.unique(names(dat), sep = "__dup")

# Long form, strip de-dup tag, split Category/Region, collapse true duplicates
long <- dat %>%
  mutate(Year = clean_header(Year),
         Quarter = clean_header(Quarter)) %>%
  pivot_longer(-c(Year, Quarter), names_to = "CatReg", values_to = "Value") %>%
  mutate(CatReg = sub("__dup\\d+$", "", CatReg)) %>%
  tidyr::separate(CatReg, into = c("Category","Region"), sep = "__", fill = "right") %>%
  mutate(
    Category = clean_header(Category),
    Region   = clean_header(Region),
    Value    = suppressWarnings(as.numeric(as.character(Value)))
  ) %>%
  filter(Category != "", Region != "", !is.na(Value)) %>%
  group_by(Year, Quarter, Category, Region) %>%
  summarise(Value = mean(Value, na.rm = TRUE), .groups = "drop")

# Add Category_key for joining with weights and for merges
long <- long %>% mutate(Category_key = norm_cat(Category))

# Parse time
qt <- parse_quarter_cols(long$Year, long$Quarter)
long <- long %>% mutate(Time = qt$date, QuarterLabel = qt$label) %>% filter(!is.na(Time))

# Read Weights.csv and reshape to long with Category_key
w_raw <- readr::read_csv("Weights.csv", show_col_types = FALSE)

weights_long <- w_raw %>%
  select(any_of(c("Groups","Sydney","Melbourne","Brisbane","Adelaide","Perth","Hobart","Darwin","Canberra"))) %>%
  rename(Category_raw = Groups) %>%
  pivot_longer(-Category_raw, names_to = "Region", values_to = "Weight") %>%
  mutate(
    Category_key = norm_cat(Category_raw),
    Region = stringr::str_squish(Region),
    Weight = suppressWarnings(as.numeric(Weight))
  ) %>%
  filter(!is.na(Weight), Category_key != "", Region != "")

# Define merges and keepers
merge_specs <- list(
  "Food, Beverages and Alcohol" = c("food and beverages","alcohol and tobacco"),
  "Transport and Communication" = c("transport","communication"),
  "Clothing and footwear household equipment" = c("clothing and footwear","furnishings and household equipment")
)
keep_keys   <- c("housing","health","education")
keep_labels <- c("housing"="Housing","health"="Health","education"="Education")

# Build merged blocks using region weights renormalised within each pair
build_merged_block <- function(target_label, source_keys, dat_long, w_long){
  dat_long %>%
    filter(Category_key %in% source_keys) %>%
    left_join(w_long, by = c("Region","Category_key")) %>%
    group_by(Time, QuarterLabel, Region) %>%
    mutate(w_norm = Weight / sum(Weight, na.rm = TRUE)) %>%
    summarise(Value = sum(Value * w_norm, na.rm = TRUE), .groups = "drop") %>%
    mutate(Category = target_label)
}

# Keep categories as-is for Housing, Health, Education
keep_block <- long %>%
  filter(Category_key %in% keep_keys) %>%
  mutate(Category = keep_labels[Category_key]) %>%
  select(Time, QuarterLabel, Region, Category, Value)

# Merge the specified pairs
merged_blocks <- purrr::imap_dfr(merge_specs, ~build_merged_block(.y, .x, long, weights_long))

# Final dataset for plotting
cpi_final <- bind_rows(keep_block, merged_blocks) %>%
  mutate(
    Category = factor(Category,
      levels = c("Food, Beverages and Alcohol",
                 "Clothing and footwear household equipment",
                 "Housing",
                 "Transport and Communication",
                 "Health",
                 "Education"))
  ) %>%
  arrange(Category, Time, Region)

region_levels <- c("Sydney","Melbourne","Brisbane","Adelaide","Perth","Hobart","Darwin","Canberra")
cpi_final <- cpi_final %>% mutate(Region = factor(Region, levels = region_levels %||% unique(Region)))

Bar animation

region_palette <- c(
  "Sydney"   = "#1b9e77",
  "Melbourne"= "#d95f02",
  "Brisbane" = "#7570b3",
  "Adelaide" = "#e7298a",
  "Perth"    = "#66a61e",
  "Hobart"   = "#e6ab02",
  "Darwin"   = "#a6761d",
  "Canberra" = "#1f78b4"
)

bar_anim <- function(cat_name, dat = cpi_final){
  d <- dat %>% dplyr::filter(Category == cat_name)
  if (nrow(d) == 0) {
    return(plotly::plot_ly() %>% plotly::layout(title = list(text = paste0(cat_name, " — no data found")),
                                                 showlegend = FALSE))
  }
  xmax <- max(d$Value, na.rm = TRUE) * 1.05

  plotly::plot_ly(
    data   = d,
    frame  = ~QuarterLabel,
    x      = ~Value,
    y      = ~Region,
    type   = "bar",
    orientation = "h",
    color  = ~Region,
    colors = region_palette,
    hovertemplate = paste0(
      cat_name, "<br>Region: %{y}<br>Quarter: %{frame}<br>Index: %{x:.1f}<extra></extra>"
    )
  ) %>%
    plotly::layout(
      title = list(text = paste0(cat_name, " — CPI by Region (Quarterly)")),
      xaxis = list(title = "Index", range = c(0, xmax), tickformat = ",.1f"),
      yaxis = list(
        title = "",
        categoryorder = "array",
        categoryarray = levels(d$Region)
      ),
      margin = list(l = 90, r = 20, t = 30, b = 40),
      showlegend = FALSE   # <- hide legend
    ) %>%
    plotly::animation_opts(frame = 500, transition = 200, easing = "linear", redraw = FALSE) %>%
    plotly::animation_slider(currentvalue = list(prefix = "Quarter: ")) %>%
    plotly::animation_button(x = 1, xanchor = "right", y = 1.1, yanchor = "top")
}

Food, Beverages and Alcohol

Clothing and footwear household equipment

Housing

Transport and Communication

Health

Education

knitr::opts_chunk$set(fig.width = 12, fig.height = 6, dpi = 96)

median house price

Median house price vs Year (bubble = population)