# Read Cities_CPI.csv (row1 = categories, row2 = regions; col2=Year, col3=Quarter; values from col4)
raw <- readr::read_csv("Cities_CPI.csv", col_names = FALSE, show_col_types = FALSE)
hdr_cat <- clean_header(unlist(raw[1, ]))
hdr_reg <- clean_header(unlist(raw[2, ]))
year_col <- which(trimws(hdr_cat) == "Year")[1]
qtr_col <- which(trimws(hdr_cat) == "Quarter")[1]
if (is.na(year_col) | is.na(qtr_col)) stop("Could not locate Year/Quarter columns in row 1")
val_idx <- which(seq_along(hdr_cat) > qtr_col & !is.na(hdr_cat) & trimws(hdr_cat) != "")
comb_names <- paste0(clean_header(hdr_cat[val_idx]), "__", clean_header(hdr_reg[val_idx]))
dat <- raw[-c(1,2), c(year_col, qtr_col, val_idx)]
names(dat) <- c("Year","Quarter", comb_names)
# make names unique so mutate() works even if headers repeat
names(dat) <- make.unique(names(dat), sep = "__dup")
# Long form, strip de-dup tag, split Category/Region, collapse true duplicates
long <- dat %>%
mutate(Year = clean_header(Year),
Quarter = clean_header(Quarter)) %>%
pivot_longer(-c(Year, Quarter), names_to = "CatReg", values_to = "Value") %>%
mutate(CatReg = sub("__dup\\d+$", "", CatReg)) %>%
tidyr::separate(CatReg, into = c("Category","Region"), sep = "__", fill = "right") %>%
mutate(
Category = clean_header(Category),
Region = clean_header(Region),
Value = suppressWarnings(as.numeric(as.character(Value)))
) %>%
filter(Category != "", Region != "", !is.na(Value)) %>%
group_by(Year, Quarter, Category, Region) %>%
summarise(Value = mean(Value, na.rm = TRUE), .groups = "drop")
# Add Category_key for joining with weights and for merges
long <- long %>% mutate(Category_key = norm_cat(Category))
# Parse time
qt <- parse_quarter_cols(long$Year, long$Quarter)
long <- long %>% mutate(Time = qt$date, QuarterLabel = qt$label) %>% filter(!is.na(Time))
# Read Weights.csv and reshape to long with Category_key
w_raw <- readr::read_csv("Weights.csv", show_col_types = FALSE)
weights_long <- w_raw %>%
select(any_of(c("Groups","Sydney","Melbourne","Brisbane","Adelaide","Perth","Hobart","Darwin","Canberra"))) %>%
rename(Category_raw = Groups) %>%
pivot_longer(-Category_raw, names_to = "Region", values_to = "Weight") %>%
mutate(
Category_key = norm_cat(Category_raw),
Region = stringr::str_squish(Region),
Weight = suppressWarnings(as.numeric(Weight))
) %>%
filter(!is.na(Weight), Category_key != "", Region != "")
# Define merges and keepers
merge_specs <- list(
"Food, Beverages and Alcohol" = c("food and beverages","alcohol and tobacco"),
"Transport and Communication" = c("transport","communication"),
"Clothing and footwear household equipment" = c("clothing and footwear","furnishings and household equipment")
)
keep_keys <- c("housing","health","education")
keep_labels <- c("housing"="Housing","health"="Health","education"="Education")
# Build merged blocks using region weights renormalised within each pair
build_merged_block <- function(target_label, source_keys, dat_long, w_long){
dat_long %>%
filter(Category_key %in% source_keys) %>%
left_join(w_long, by = c("Region","Category_key")) %>%
group_by(Time, QuarterLabel, Region) %>%
mutate(w_norm = Weight / sum(Weight, na.rm = TRUE)) %>%
summarise(Value = sum(Value * w_norm, na.rm = TRUE), .groups = "drop") %>%
mutate(Category = target_label)
}
# Keep categories as-is for Housing, Health, Education
keep_block <- long %>%
filter(Category_key %in% keep_keys) %>%
mutate(Category = keep_labels[Category_key]) %>%
select(Time, QuarterLabel, Region, Category, Value)
# Merge the specified pairs
merged_blocks <- purrr::imap_dfr(merge_specs, ~build_merged_block(.y, .x, long, weights_long))
# Final dataset for plotting
cpi_final <- bind_rows(keep_block, merged_blocks) %>%
mutate(
Category = factor(Category,
levels = c("Food, Beverages and Alcohol",
"Clothing and footwear household equipment",
"Housing",
"Transport and Communication",
"Health",
"Education"))
) %>%
arrange(Category, Time, Region)
region_levels <- c("Sydney","Melbourne","Brisbane","Adelaide","Perth","Hobart","Darwin","Canberra")
cpi_final <- cpi_final %>% mutate(Region = factor(Region, levels = region_levels %||% unique(Region)))
Bar animation
region_palette <- c(
"Sydney" = "#1b9e77",
"Melbourne"= "#d95f02",
"Brisbane" = "#7570b3",
"Adelaide" = "#e7298a",
"Perth" = "#66a61e",
"Hobart" = "#e6ab02",
"Darwin" = "#a6761d",
"Canberra" = "#1f78b4"
)
bar_anim <- function(cat_name, dat = cpi_final){
d <- dat %>% dplyr::filter(Category == cat_name)
if (nrow(d) == 0) {
return(plotly::plot_ly() %>% plotly::layout(title = list(text = paste0(cat_name, " — no data found")),
showlegend = FALSE))
}
xmax <- max(d$Value, na.rm = TRUE) * 1.05
plotly::plot_ly(
data = d,
frame = ~QuarterLabel,
x = ~Value,
y = ~Region,
type = "bar",
orientation = "h",
color = ~Region,
colors = region_palette,
hovertemplate = paste0(
cat_name, "<br>Region: %{y}<br>Quarter: %{frame}<br>Index: %{x:.1f}<extra></extra>"
)
) %>%
plotly::layout(
title = list(text = paste0(cat_name, " — CPI by Region (Quarterly)")),
xaxis = list(title = "Index", range = c(0, xmax), tickformat = ",.1f"),
yaxis = list(
title = "",
categoryorder = "array",
categoryarray = levels(d$Region)
),
margin = list(l = 90, r = 20, t = 30, b = 40),
showlegend = FALSE # <- hide legend
) %>%
plotly::animation_opts(frame = 500, transition = 200, easing = "linear", redraw = FALSE) %>%
plotly::animation_slider(currentvalue = list(prefix = "Quarter: ")) %>%
plotly::animation_button(x = 1, xanchor = "right", y = 1.1, yanchor = "top")
}
Food, Beverages and Alcohol
Clothing and footwear household equipment
Housing
Transport and Communication
Health
Education
knitr::opts_chunk$set(fig.width = 12, fig.height = 6, dpi = 96)
median house price
Median house price vs Year (bubble = population)