# CHART 1
# Australians lost a record $32.2 billion to gambling

state <- read_excel(
  "C:/Users/evita/OneDrive/Desktop/Data Vis A3/DATA/australian-gambling-statistics-40th-edn-1998-99-2023-24-state-tables (1).xlsx",
  sheet = "AUS 20",
  skip = 6
)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
# Clean data
state_clean <- state %>%
  rename(
    year = `Value ($ million)`,
    gaming = ...2,
    wagering = ...4,
    total = ...6
  ) %>%
  select(year, gaming, wagering, total) %>%
  
  # Keep only rows that start with a year
  filter(!is.na(year)) %>%
  filter(str_detect(year, "^[0-9]")) %>%
  
  # Convert columns to numeric
  mutate(
    gaming = as.numeric(gaming),
    wagering = as.numeric(wagering),
    total = as.numeric(total)
  )

chart1 <- state_clean %>%
  select(
    year,
    gaming,
    wagering
  ) %>%
  pivot_longer(
    cols = c(gaming, wagering),
    names_to = "type",
    values_to = "expenditure"
  )

chart1$type <- recode(
  chart1$type,
  gaming = "Gaming",
  wagering = "Wagering"
)

#Plotting Chart 1
p1 <-ggplot(chart1,
       aes(x = year,
           y = expenditure,
           colour = type,
           group = type)) +
  
  geom_line(linewidth = 1.6) +
  
  scale_color_manual(
    values = c(
      "Gaming" = "darkorange3",
      "Wagering" = "dodgerblue3"
    )
  ) +
  
  scale_y_continuous(
    labels = function(x) paste0("$", x / 1000, "B"),
    limits = c(0, 30000),
    breaks = seq(0, 30000, 5000)
  ) +
  
  annotate(
    "segment",
    x = "2019–20",
    xend = "2019–20",
    y = 18800,
    yend = 17200,
    colour = "grey40",
    arrow = arrow(length = unit(0.2, "cm"))
  ) +
  
  annotate(
    "text",
    x = "2016–17",
    y = 14500,
    label = "Losses briefly dipped during\nCOVID-19 venue closures",
    size = 3.8,
    colour = "grey20",
    hjust = 0.5
  ) +
  
  annotate(
    "text",
    x = "2021–22",
    y = 25500,
    label = "Gaming\n$23.8B",
    colour = "darkorange3",
    fontface = "bold",
    size = 4
  ) +
  
  annotate(
    "text",
    x = "2021–22",
    y = 2500,
    label = "Wagering\n$8.4B",
    colour = "dodgerblue3",
    fontface = "bold",
    size = 4
  ) +
  
  labs(
    title = "Australians lost a record $32.2 billion to gambling",
    x = "Financial Year",
    y = "Gambling expenditure ($ billion)",
    caption = "Source: Queensland Government Statistician's Office, Australian Gambling Statistics, 40th Edition (2024)"
  ) +
  
  coord_cartesian(clip = "off") +
  
  theme_minimal(base_size = 13) +
  
  theme(
    plot.title = element_text(
      face = "bold",
      size = 16,
      hjust = 0.5
    ),
    
    plot.subtitle = element_text(
      size = 12,
      colour = "grey30",
      hjust = 0.5
    ),
    
    axis.title.y = element_text(
      face = "bold",
      size = 11
    ),
    
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      size = 9
    ),
    
    axis.text.y = element_text(
      size = 10
    ),
    
    plot.caption = element_text(
      size = 10,
      colour = "grey30",
      hjust = 0,
      face = "italic",
      margin = margin(t = 10)
    ),
    
    legend.position = "none",
    
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    
    plot.margin = margin(
      t = 10,
      r = 40,
      b = 10,
      l = 10
    )
  )
ggplotly(p1) %>%
  layout(
    title = list(
      text = paste(
        "Australians lost a record $32.2 billion to gambling",
        "<br><sup>Gaming remains dominant, while wagering expenditure has grown rapidly since 1998–99</sup>"
      ),
      x = 0.5
    ),
    margin = list(
      l = 80,
      r = 80,
      t = 100,
      b = 80
    )
  )
# CHART 2
# Gaming machines account for half of Australia's gambling losses
product_file <- "C:/Users/evita/OneDrive/Desktop/Data Vis A3/DATA/australian-gambling-statistics-40th-edn-1998-99-2023-24-product-tables.xlsx"

gaming_machines <- read_excel(
  product_file,
  sheet = "GAMING_MACHINES 5",
  skip = 5
) %>%
  select(
    year = `Value ($ million)`,
    expenditure = ...18
  ) %>%
  filter(!is.na(year)) %>%
  filter(str_detect(year, "^[0-9]")) %>%
  mutate(product = "Gaming Machines")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
wagering <- read_excel(
  product_file,
  sheet = "WAGERING 5",
  skip = 5
) %>%
  select(
    year = `Value ($ million)`,
    expenditure = ...18
  ) %>%
  filter(!is.na(year)) %>%
  filter(str_detect(year, "^[0-9]")) %>%
  mutate(product = "Wagering")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
casino <- read_excel(
  product_file,
  sheet = "CASINO 5",
  skip = 5
) %>%
  select(
    year = `Value ($ million)`,
    expenditure = ...18
  ) %>%
  filter(!is.na(year)) %>%
  filter(str_detect(year, "^[0-9]")) %>%
  mutate(product = "Casino")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
lotteries <- read_excel(
  product_file,
  sheet = "LOTTERIES 5",
  skip = 5
) %>%
  select(
    year = `Value ($ million)`,
    expenditure = ...18
  ) %>%
  filter(!is.na(year)) %>%
  filter(str_detect(year, "^[0-9]")) %>%
  mutate(product = "Lotteries")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
keno <- read_excel(
  product_file,
  sheet = "KENO 5",
  skip = 5
) %>%
  select(
    year = `Value ($ million)`,
    expenditure = ...18
  ) %>%
  filter(!is.na(year)) %>%
  filter(str_detect(year, "^[0-9]")) %>%
  mutate(product = "Keno")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
chart2 <- bind_rows(
  gaming_machines,
  wagering,
  casino,
  lotteries,
  keno
)

chart2_2024 <- chart2 %>%
  filter(year == "2023–24") %>%
  arrange(desc(expenditure)) %>%
  mutate(
    product = factor(
      product,
      levels = rev(product)
    )
  )

#Plotting Chart 2

product_order <- c(
  "Gaming Machines",
  "Wagering",
  "Casino",
  "Lotteries",
  "Keno"
)

product_colours <- c(
  "Gaming Machines" = "#D55E00",
  "Wagering"        = "#0072B2",
  "Casino"          = "#009E73",
  "Lotteries"       = "#E69F00",
  "Keno"            = "#CC79A7"
)

chart2_plot <- chart2 %>%
  mutate(
    product = factor(product, levels = product_order),
    expenditure_b = expenditure / 1000
  )

p2 <- ggplot(
  chart2_plot,
  aes(
    x = year,
    y = expenditure_b,
    fill = product,
    group = product
  )
) +
  
  geom_area(
    alpha = 0.9,
    colour = "white",
    linewidth = 0.3
  ) +
  
  scale_fill_manual(
    values = product_colours
  ) +
  
  scale_y_continuous(
    labels = function(x) paste0("$", x, "B"),
    breaks = seq(0, 35, 5),
    expand = expansion(mult = c(0, 0.01))
  ) +
  
  labs(
  x = NULL,
  y = "Gambling expenditure ($ billion)",
  fill = NULL
  ) +
  
  scale_x_discrete(
    breaks = c(
      "1998–99",
      "2003–04",
      "2008–09",
      "2013–14",
      "2018–19",
      "2023–24"
    )
  ) +
  
  theme_minimal(base_size = 13) +
  
  theme(
    plot.title = element_text(
      face = "bold",
      size = 15,
      hjust = 0.5
    ),
    
    plot.subtitle = element_text(
      size = 12,
      colour = "grey30",
      hjust = 0.5,
      margin = margin(b = 12)
    ),
    
    axis.title.y = element_text(
      face = "bold",
      size = 11,
      margin = margin(r = 15)
    ),
    
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      size = 6
    ),
    
    axis.text.y = element_text(
      size = 10
    ),
    
    legend.position = "bottom",
    
    legend.text = element_text(
      size = 10
    ),
    
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    
    plot.caption = element_text(
      size = 10,
      colour = "grey30",
      hjust = 0,
      face = "italic",
      margin = margin(t = 10)
    ),
    
    plot.margin = margin(
      t = 10,
      r = 20,
      b = 10,
      l = 10
    )
  )
ggplotly(p2) %>%
  layout(
    title = list(
      text = paste(
        "Gaming machines account for half of gambling losses",
        "<br><sup>Australian gambling expenditure by product type, 1998–99 to 2023–24</sup>"
      ),
      x = 0.5
    ),

    xaxis = list(
      title = "",
      tickangle = -45
    ),

    yaxis = list(
      title = list(
        text = "Gambling expenditure ($ billion)",
        font = list(size = 16)
      )
    ),

    legend = list(
      title = list(text = "")
    ),

    margin = list(
      l = 100,
      r = 80,
      t = 120,
      b = 120
    )
  )
# CHART 3
# Relationship between gaming machine density and gambling losses
density <- read_excel(
  "C:/Users/evita/OneDrive/Desktop/Data Vis A3/DATA/yearly_density_statistical_release_nov_24-(2).xlsx",
  sheet = "Detail Data 2023-24",
  skip = 11
)
## New names:
## • `as at June 2024` -> `as at June 2024...17`
## • `as at June 2024` -> `as at June 2024...18`
## • `as at June 2024` -> `as at June 2024...19`
chart3_scatter <- density %>%
  select(
    lga = `LGA Name`,
    region = Region,
    expenditure_per_adult = `per Adult 2024`,
    egm_density = `per 1,000 Adults 2024`
  ) %>%
  filter(
    !is.na(expenditure_per_adult),
    !is.na(egm_density),
    !is.na(region),
    expenditure_per_adult > 0
  )
ggplot(
  chart3_scatter,
  aes(
    x = egm_density,
    y = expenditure_per_adult,
    colour = region
  )
) +
  
  geom_point(
    size = 3.5,
    alpha = 0.75
  ) +
  
  geom_smooth(
    method = "lm",
    se = FALSE,
    colour = "grey30",
    linewidth = 1
  ) +
  
  
  annotate(
    "text",
    x = 3,
    y = 900,
    label = "Highest losses occur\nin large Metro LGAs",
    size = 3,
    colour = "steelblue4",
    hjust = 0,
    fontface = "italic"
  ) +
  
  scale_colour_manual(
    values = c(
      "Metro" = "steelblue",
      "Country" = "darkorange"
    )
  ) +
  
  scale_x_continuous(
    breaks = seq(0, 10, 2)
  ) +
  
  scale_y_continuous(
    labels = scales::dollar_format()
  ) +
  
  labs(
    title = "Communities with more poker machines lose more money",
    subtitle = "Higher gaming machine density is associated with greater gambling losses per adult",
    x = "EGMs per 1,000 adults",
    y = "Gambling expenditure per adult ($)",
    colour = "Region",
    caption = "Source: Victorian Gambling and Casino Control Commission (2024)"
  ) +
  
  theme_minimal(base_size = 13) +
  
  theme(
    plot.title = element_text(
      face = "bold",
      size = 16,
      margin = margin(b = 6)
    ),
    
    plot.subtitle = element_text(
      size = 12,
      colour = "grey30",
      margin = margin(b = 10)
    ),
    
    axis.title.x = element_text(
      face = "bold",
      size = 11
    ),
    
    axis.title.y = element_text(
      face = "bold",
      size = 11
    ),
    
    axis.text = element_text(
      size = 10
    ),
    
    legend.position = c(0.83, 0.22),
    
    legend.title = element_text(
      face = "bold"
    ),
    
    panel.grid.minor = element_blank(),
    
    plot.caption = element_text(
      size = 10,
      colour = "grey30",
      face = "italic",
      hjust = 0,
      margin = margin(t = 10)
    ),
    
    plot.margin = margin(
      t = 10,
      r = 20,
      b = 10,
      l = 10
    )
  )
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

# CHART 4
# Governments collect up to one-third of gambling losses
state_file <- "C:/Users/evita/OneDrive/Desktop/Data Vis A3/DATA/australian-gambling-statistics-40th-edn-1998-99-2023-24-state-tables (1).xlsx"

states <- c(
  "NSW",
  "VIC",
  "QLD",
  "SA",
  "WA",
  "TAS",
  "ACT",
  "NT"
)

chart4 <- lapply(states, function(state) {
  
  exp_data <- read_excel(
    state_file,
    sheet = paste0(state, " 20"),
    skip = 5
  )
  
  rev_data <- read_excel(
    state_file,
    sheet = paste0(state, " 26"),
    skip = 5
  )
  
  tibble(
    state = state,
    
    expenditure = exp_data %>%
      filter(...1 == "2023–24") %>%
      pull(TOTAL),
    
    revenue = rev_data %>%
      filter(...1 == "2023–24") %>%
      pull(TOTAL)
  )
  
}) %>%
  bind_rows() %>%
  mutate(
    revenue_pct = (revenue / expenditure) * 100,
    revenue_billion = revenue / 1000
  )
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...3`
## • `` -> `...5`
## • `` -> `...7`
#Plotting Chart 4
ggplot(
  chart4,
  aes(
    x = revenue_pct,
    y = reorder(state, revenue_pct),
    fill = revenue_billion
  )
) +
  
  geom_col() +
  
  geom_text(
    aes(label = paste0(round(revenue_pct, 1), "%")),
    hjust = -0.2,
    size = 4
  ) +
  
  scale_fill_gradient(
    low = "#fee8c8",
    high = "#d7301f",
    name = "Government revenue\n($ billion)"
  ) +
  
  scale_x_continuous(
    limits = c(0, max(chart4$revenue_pct) + 8)
  ) +
  
  labs(
    title = "Governments collect up to one-third of gambling losses",
    subtitle = "SA captures the largest share of gambling losses, while NSW collects the most revenue",
    x = "Government revenue (% of gambling expenditure)",
    y = NULL,
    caption = "Source: Queensland Government Statistician's Office, Australian Gambling Statistics, 40th Edition (2024)"
  )+
  
  theme_minimal(base_size = 13) +
  
  theme(
    plot.title = element_text(
      face = "bold",
      size = 15,
      hjust = 0
    ),
    
    plot.subtitle = element_text(
      size = 12,
      colour = "grey30",
      margin = margin(b = 10)
    ),
    
    legend.position = "right",
    
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    
    plot.caption = element_text(
      size = 10,
      colour = "grey30",
      face = "italic",
      hjust = 0
    )
  )

# CHART 5
# Who pays the price of gambling?
chart5 <- density %>%
  transmute(
    region = Region,
    disadvantage = `DIS Score`,
    expenditure_per_adult = `per Adult 2024`,
    egm_density = `per 1,000 Adults 2024`
  ) %>%
  filter(
    !is.na(region),
    !is.na(disadvantage),
    !is.na(expenditure_per_adult),
    !is.na(egm_density)
  ) %>%
  mutate(
    disadvantage_group = ntile(disadvantage, 5),
    disadvantage_group = factor(
      disadvantage_group,
      levels = 1:5,
      labels = c(
        "Most disadvantaged",
        "Disadvantaged",
        "Average",
        "Advantaged",
        "Most advantaged"
      )
    )
  )

chart5_summary <- chart5 %>%
  group_by(disadvantage_group, region) %>%
  summarise(
    avg_expenditure = mean(expenditure_per_adult, na.rm = TRUE),
    avg_egm = mean(egm_density, na.rm = TRUE),
    .groups = "drop"
  )

#Plotting Chart 5
ggplot(
  chart5_summary,
  aes(
    x = region,
    y = disadvantage_group,
    fill = avg_expenditure
  )
) +
  
  geom_tile(
    colour = "white",
    linewidth = 1.5
  ) +
  
  geom_text(
    aes(
      label = paste0(
        "$", round(avg_expenditure),
        "\n",
        round(avg_egm, 1),
        " EGMs"
      )
    ),
    colour = "black",
    fontface = "bold",
    lineheight = 0.9,
    size = 4
  ) +
  
  scale_fill_gradientn(
    colours = c(
      "#f2f0f7",
      "#cbc9e2",
      "#9e9ac8",
      "#6a51a3"
    ),
    name = "Losses ($)",
    labels = scales::dollar_format()
  ) +
  
  labs(
    title = "Who pays the price of gambling?",
    subtitle = "Gambling losses and EGM density are highest in disadvantaged Metro communities",
    x = NULL,
    y = NULL,
    caption = "Source: Victorian Gambling and Casino Control Commission (2024)\nSEIFA disadvantage score used to classify LGAs. Lower scores indicate greater disadvantage."
  ) +
  
  theme_minimal(base_size = 13) +
  
  theme(
    plot.title = element_text(
      hjust = 0.5,
      face = "bold",
      size = 14,
      margin = margin(b = 6)
    ),
    
    plot.subtitle = element_text(
      hjust = 0.5,
      size = 11,
      colour = "grey30",
      margin = margin(b = 12)
    ),
    
    axis.text = element_text(
      size = 10,
      face = "bold"
    ),
    
    axis.title = element_blank(),
    
    panel.grid = element_blank(),
    
    legend.position = "right",
    
    plot.caption = element_text(
      size = 9,
      colour = "grey35",
      face = "italic",
      hjust = 0,
      margin = margin(t = 12)
    ),
    
    plot.margin = margin(
      t = 10,
      r = 20,
      b = 35,
      l = 10
    )
  )