# =============================================================================
# Even Money: How Sport Became Australia's Biggest Betting Shop
# Author : Arya Khamkar
# Data   : Australian Gambling Statistics, 40th Edition (1998-99 to 2023-24)
#          Queensland Government Statistician's Office, QLD Treasury
# =============================================================================
# 0. Libraries
library(tidyverse)    # dplyr, ggplot2, tidyr, stringr, purrr, readr
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)       # read the AGS Excel file
## Warning: package 'readxl' was built under R version 4.5.2
library(plotly)       # interactive charts
## Warning: package 'plotly' was built under R version 4.5.2
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(scales)       # nice $ and % axis labels
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
# 1. Colour palette & factor order
#    Wagering is the hero colour (red); everything else is muted.
type_pal <- c(
  "Wagering"           = "#D8352A",
  "Gaming machines"    = "#33485A",
  "Casino"             = "#6B7E8C",
  "Lotteries"          = "#A9B6BE",
  "Keno"               = "#C9D1D3",
  "Interactive gaming" = "#F39237",
  "Minor gaming"       = "#E3E7E9"
)

order_lvls <- c(
  "Gaming machines",
  "Casino",
  "Lotteries",
  "Keno",
  "Minor gaming",
  "Interactive gaming",
  "Wagering"
)
# 2. Data ingestion
# Path to the downloaded AGS Excel file — adjust if your folder is different
state_file <- "data/australian-gambling-statistics-40th-edn-1998-99-2023-24-state-tables.xlsx"

# The 7 product categories live in columns B, D, F, H, J, L, N of every sheet.
# (Odd columns in between hold footnote flags — we skip them.)
products <- c(
  "Casino", "Gaming machines", "Interactive gaming",
  "Keno",   "Lotteries",       "Minor gaming",       "Wagering"
)
val_cols <- c(2, 4, 6, 8, 10, 12, 14)

# Helper: reads one AGS sheet, keeps year + the 7 value columns, tidies it.
read_ags <- function(sheet_name, value_name = "value") {
  
  raw <- read_excel(state_file, sheet = sheet_name, skip = 7, col_names = FALSE)
  
  dat <- raw[, c(1, val_cols)]          # year column + 7 dollar columns
  names(dat) <- c("year", products)
  
  dat %>%
    filter(str_detect(as.character(year), "^[0-9]{4}")) %>%   # keep data rows only
    mutate(
      year_end = as.integer(str_sub(year, 1, 4)) + 1,         # "1998-99" -> 1999
      across(all_of(products), ~ suppressWarnings(as.numeric(.)))
    ) %>%
    pivot_longer(
      cols      = all_of(products),
      names_to  = "gambling_type",
      values_to = value_name
    ) %>%
    select(year_end, gambling_type, all_of(value_name))
}

# States to loop over
states <- c("NSW", "VIC", "QLD", "WA", "SA", "TAS", "ACT", "NT")

# Table "X 5" = Total Gambling Expenditure (A$ million)
exp_state <- map_dfr(
  states,
  ~ read_ags(paste(.x, "5"), "expenditure") %>% mutate(state = .x)
)
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
exp_aus <- read_ags("AUS 5", "expenditure")        # national totals
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
# Table "X 7" = Per-Capita Gambling Expenditure ($ per adult)
pc_state <- map_dfr(
  states,
  ~ read_ags(paste(.x, "7"), "per_adult") %>% mutate(state = .x)
)
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
# 3. World comparison data  (H2 Gambling Capital estimates — confirm/update)
world <- tibble(
  country        = c("Australia",   "Hong Kong", "Singapore", "Ireland",
                     "United States", "Finland", "New Zealand", "Norway",
                     "Italy",        "Canada"),
  loss_per_adult = c(1635, 1430, 1170, 800, 760, 720, 690, 600, 560, 540)
)

# =============================================================================
# Charts
# =============================================================================
# Chart 1 sets the scene: how does Australia compare to the rest of the world?
# A simple ranking is the "hook"- it grabs attention before we go deeper.
# Australia is coloured red so the eye lands on it instantly.
# Chart 1 — Australia leads the world for losing
p1 <- world %>%
  mutate(
    country   = fct_reorder(country, loss_per_adult),
    highlight = if_else(country == "Australia", "Australia", "Other")
  ) %>%
  ggplot(aes(
    x    = loss_per_adult,
    y    = country,
    fill = highlight,
    text = paste0(country, ": US$", comma(loss_per_adult), " per adult")
  )) +
  geom_col() +
  scale_fill_manual(
    values = c("Australia" = "#D8352A", "Other" = "#b0b0b0"),
    guide  = "none"
  ) +
  scale_x_continuous(labels = label_dollar(prefix = "US$")) +
  labs(
    title = "Australia loses more to gambling than any country",
    x     = "Gambling loss per adult (USD)",
    y     = NULL
  ) +
  theme_minimal(base_size = 12)

ggplotly(p1, tooltip = "text") %>% config(displayModeBar = FALSE)
#--------------------------------------------------------------------------
# We've shown Australia loses the most. The natural next question is:
# WHERE is all that money going? Chart 2 breaks the national gambling bill
# into its parts, year by year, so we can see which type is growing.
# (Stacked columns: each colour is one gambling type; bar height = the total.)
# Chart 2 — A $32-billion-a-year habit  (stacked area)
exp_aus2 <- exp_aus %>%
  mutate(gambling_type = factor(gambling_type, levels = order_lvls)) %>%
  arrange(year_end, gambling_type)

p2 <- plot_ly()

for (gt in order_lvls) {
  df_sub <- exp_aus2 %>% filter(gambling_type == gt)
  p2 <- add_trace(p2,
                  data       = df_sub,
                  x          = ~year_end,
                  y          = ~expenditure,
                  name       = gt,
                  type       = "scatter",
                  mode       = "none",
                  stackgroup = "one",
                  fillcolor  = type_pal[gt],
                  hovertemplate = paste0(gt, ", FY%{x}: $%{y:,.0f}M<extra></extra>")
  )
}

p2 <- p2 %>% layout(
  title = list(
    text    = "Australia's gambling bill has reached $32 billion a year",
    x       = 0.5,
    xanchor = "center",
    xref    = "paper"
  ),
  xaxis = list(
    title = "Financial year ending"
  ),
  yaxis = list(
    title      = "Expenditure (A$ millions)",
    tickprefix = "$",
    ticksuffix = "M",
    tickformat = ","
  ),
  legend = list(traceorder = "normal")
)

p2
# Chart 2 showed pokies still dominate, but WAGERING (the red band - betting
# on racing and sport) is the part climbing fastest. Chart 3 zooms into
# wagering alone and asks: is this happening everywhere, or just one state?
# One line per state lets the reader compare trends side by side.
# Chart 3 — Betting is rising in every state
p3 <- exp_state %>%
  filter(gambling_type == "Wagering") %>%
  ggplot(aes(
    x      = year_end,
    y      = expenditure,
    colour = state,
    group  = state,
    text   = paste0(state, ", FY", year_end,
                    ": $", comma(round(expenditure)), "M")
  )) +
  geom_line(linewidth = 1) +
  scale_y_continuous(labels = label_dollar(suffix = "M")) +
  labs(
    title    = "Betting spend is climbing across the states",
    subtitle = "Wagering expenditure (racing + sports betting)",
    x        = "Financial year ending",
    y        = "Wagering expenditure (A$M)",
    colour   = NULL
  ) +
  theme_minimal(base_size = 12)

ggplotly(p3, tooltip = "text")
# Big states naturally spend more simply because they have more people.
# To compare fairly, Chart 4 switches to spending PER ADULT (the "X 7"
# tables). The heat-map shows state x gambling type at a glance - darker red
# means a heavier loss per person.
# Chart 4 — Who loses the most per adult  (heat-map tile)
latest <- max(pc_state$year_end)

p4 <- pc_state %>%
  filter(year_end == latest) %>%
  mutate(
    state         = factor(state, levels = rev(states)),
    gambling_type = factor(gambling_type, levels = order_lvls)
  ) %>%
  ggplot(aes(
    x    = gambling_type,
    y    = state,
    fill = per_adult,
    text = paste0(state, " - ", gambling_type,
                  ": $", comma(round(per_adult)), " per adult")
  )) +
  geom_tile(colour = "white") +
  scale_fill_gradient(
    low    = "#fde3e1",
    high   = "#D8352A",
    name   = "$/adult",
    labels = label_dollar()
  ) +
  labs(
    title = paste0("Where losses land hardest, per adult (FY", latest, ")"),
    x     = NULL,
    y     = NULL
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

ggplotly(p4, tooltip = "text")
# Finally, the payoff. Chart 5 turns the dollars into a share: how much of
# every gambling dollar now goes on betting? The dashed line marks 25% so
# the reader can see wagering has reached a quarter of all gambling losses -
# nearly double its 1990s level. This is the point the 2026 ad reforms target.
# Chart 5 — Betting now eats a quarter of every gambling dollar
share <- exp_aus %>%
  group_by(year_end) %>%
  mutate(share = expenditure / sum(expenditure) * 100) %>%
  filter(gambling_type == "Wagering") %>%
  ungroup()

p5 <- share %>%
  ggplot(aes(
    x     = year_end,
    y     = share,
    group = 1,
    text  = paste0("FY", year_end, ": ", round(share, 1),
                   "% of all gambling losses")
  )) +
  geom_hline(yintercept = 25, linetype = "dashed",
             colour = "grey60", linewidth = 0.5) +
  geom_line(colour = "#D8352A", linewidth = 1.3) +
  scale_y_continuous(labels = label_percent(scale = 1)) +
  labs(
    title    = "Betting now eats a quarter of every gambling dollar",
    subtitle = "Wagering as a share of total gambling expenditure, Australia",
    x        = "Financial year ending",
    y        = "Share of all gambling losses"
  ) +
  theme_minimal(base_size = 12)

ggplotly(p5, tooltip = "text")
# =============================================================================
# End of script
# =============================================================================