# Load required libraries
library(readxl)
library(dplyr)
library(ggplot2)
library(plotly)
library(tidyr)
library(scales)

Data Preprocessing:

# Load table 2 data
energy_table2 <- read_excel("Data/46040DO0002.xlsx", sheet = "Table 2", skip = 14, n_max = 15, col_names = FALSE)

# Extract only required columns
energy_table2 <- energy_table2[, c(1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 
                                                 19, 21, 22, 23, 24, 25, 26, 27, 28, 29)]

# Rename column names
colnames(energy_table2) <- c("Year", "Black_coal_PJ", "Black_coal_m", "Brown_coal_PJ", "Brown_coal_m", 
                                    "Crude_oil_PJ", "Crude_oil_m", "Condensate_PJ", "Condensate_m", "LPG_PJ", "LPG_m",
                                    "Natural_gas_PJ","Natural_gas_m", "Uranium_PJ", "Uranium_m","Remaining_Black_coal",
                                    "Remaining_Brown_coal", "Remaining_Crude_oil", "Remaining_Condensate",
                                    "Remaining_LPG","Remaining_Natural_gas","Remaining_Uranium")

# Convert Year from date to numeric
energy_table2$Year <- as.numeric(format(energy_table2$Year, "%Y"))
# Load table 1 data
energy_table1 <- read_excel("Data/46040DO0001.xlsx", sheet = "Table 1.1", skip = 14, n_max = 14, col_names = FALSE)
# Extract only required columns
energy_table1 <- energy_table1[, c(1, 14, 15, 16, 29)]

# Rename column names
colnames(energy_table1) <- c("Year", "Hydro_PJ", "Solar_PJ", "Wind_PJ", "Self_sufficiency_percent")

# Convert Year from date to numeric
energy_table1$Year <- as.numeric(format(energy_table1$Year, "%Y"))
# Load table 4 data
energy_table4 <- read_excel("Data/46040DO0004.xlsx", sheet = "Table 4", skip = 4, n_max = 72, col_names = TRUE)

# Extract only required columns
energy_table4 <- energy_table4[, c("Product", "Industry_name", "2023-24")]

# Rename column names
colnames(energy_table4) <- c("Product", "Industry", "Physical_use_PJ_2024")

# Filter out unwanted rows and convert Physical_use_PJ_2024 to numeric
energy_table4 <- energy_table4 %>%
  filter(Physical_use_PJ_2024 != "-",
         Industry != "Total - All industries",
         Industry != "Commercial and services (a)") %>%
  mutate(Physical_use_PJ_2024 = as.numeric(Physical_use_PJ_2024))

Chart 1: Remaining resource life by fuel type (2025)

# Prepare data
chart1_data <- energy_table2 %>%
  filter(Year == 2025) %>%
  select(Remaining_Black_coal, Remaining_Brown_coal, 
         Remaining_Crude_oil, Remaining_Condensate, 
         Remaining_LPG, Remaining_Natural_gas, Remaining_Uranium) %>%
  pivot_longer(everything(), names_to = "Fuel", values_to = "Years") %>%
  mutate(Fuel = gsub("Remaining_", "", Fuel)) %>%
  mutate(Fuel = gsub("_", " ", Fuel)) %>%
  arrange(Years)

# Create chart
chart1 <- ggplot(chart1_data, aes(x = reorder(Fuel, Years), y = Years, fill = Fuel)) + 
  geom_col() +
  coord_flip(clip = "off") +
  labs(title = "How long will Australia's energy resources last?",
       x = "Fuel Type", y = "Years remaining") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"), legend.position = "none")

ggplotly(chart1, tooltip = c("x", "y"))

Chart 2: Self-sufficiency vs total remaining stock (2011-2024)

# Prepare data
chart2_data <- energy_table1 %>%
  select(Year, Self_sufficiency_percent) %>%
  left_join(energy_table2 %>%
    mutate(Total_Stock_PJ = Black_coal_PJ + Brown_coal_PJ + Crude_oil_PJ + 
             Condensate_PJ + LPG_PJ + Natural_gas_PJ + Uranium_PJ) %>%
    select(Year, Total_Stock_PJ), by = "Year") %>%
  filter(between(Year, 2011, 2024)) %>%
  pivot_longer(c(Self_sufficiency_percent, Total_Stock_PJ), names_to = "Metric", values_to = "Value") %>%
  mutate(Metric = recode(Metric, "Self_sufficiency_percent" = "Self-sufficiency (%)", 
                         "Total_Stock_PJ" = "Total stock (million PJ)"),
         Value = ifelse(Metric == "Total stock (million PJ)", Value / 1e6, Value))


# Create chart
chart2 <- ggplot(chart2_data, aes(x = Year, y = Value)) +
  geom_line(colour = "#0072B2", linewidth = 1.2) +
  geom_point(colour = "#0072B2", size = 2) +
  facet_wrap(~Metric, scales = "free_y", ncol = 1) +
  scale_x_continuous(breaks = seq(2011, 2024, by = 1)) +
  labs(title = "Australia's energy self-sufficiency remains high despite falling reserves", x = "Year", y = NULL) +
  theme_minimal(base_size = 12) +
  theme(strip.text = element_text(face = "bold"), 
        plot.title = element_text(face = "bold", hjust = 0.48, size = 12),
        panel.grid.minor = element_blank())

ggplotly(chart2, tooltip = c("x", "y"))

Chart 3: Fuel value vs remaining lifespan

# Prepare data
data_2025 <- energy_table2 %>% filter(Year == 2025)

chart3_data <- tibble(
  Fuel = c("Black coal", "Brown coal", "Crude oil", "Condensate", 
           "LPG", "Natural gas", "Uranium"),
  Years   = c(data_2025$Remaining_Black_coal,
              data_2025$Remaining_Brown_coal,
              data_2025$Remaining_Crude_oil,
              data_2025$Remaining_Condensate,
              data_2025$Remaining_LPG,
              data_2025$Remaining_Natural_gas,
              data_2025$Remaining_Uranium),
  Value_B = c(data_2025$Black_coal_m,
              data_2025$Brown_coal_m,
              data_2025$Crude_oil_m,
              data_2025$Condensate_m,
              data_2025$LPG_m,
              data_2025$Natural_gas_m,
              data_2025$Uranium_m) / 1000
)

# Create chart
chart3 <- ggplot(chart3_data, aes(x = Years, y = Value_B, colour = Fuel)) +
  geom_point(size = 4, alpha = 0.75) +
  scale_x_log10(limits = c(20, max(chart3_data$Years) * 1.2), 
                breaks = c(25, 50, 100, 200, 500, 1000, 2000)) +
  scale_y_continuous(limits = c(0, max(chart3_data$Value_B) * 1.2), 
                     labels = label_number(suffix = "B")) +
  labs(title = "The fuel paradox: Short lifespan, high value", 
       x = "Years remaining (log scale)", 
       y = "Economic value ($ billion)", colour = "Fuel") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "right", plot.title = element_text(face = "bold"))

ggplotly(chart3, tooltip = c("Fuel", "Years", "Value_B"))

Chart 4: Industry fossil fuel consumption

# Prepare data
chart4_data <- energy_table4 %>%
  filter(!Product %in% c("Electricity (b)")) %>%
  mutate(Product = case_when(
    Product == "Brown coal (b)" ~ "Brown coal", 
    Product == "Natural gas (b)" ~ "Natural gas",
    Product == "Crude oil, condensates and other petroleum products" ~ "Crude oil & condensates",
    TRUE ~ Product))

# Create heatmap
chart4 <- ggplot(chart4_data, aes(x = Product, y = Industry, fill = Physical_use_PJ_2024)) +
  geom_tile() +
  geom_text(aes(label = round(Physical_use_PJ_2024, 0)), size = 3) +
  scale_fill_gradient(low = "lightyellow", high = "darkred", 
                      name = "PJ",
                      labels = scales::label_number()) +
  labs(title = "Which industries consume the most fossil fuel energy?", x = NULL, y = NULL,) +
  theme_minimal(base_size = 11) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10), 
        axis.text.y = element_text(size = 10), 
        plot.title = element_text(face = "bold", hjust = 0.9), 
        legend.position = "right", 
        panel.grid = element_blank())

ggplotly(chart4, tooltip = c("x", "y", "fill"))

Chart 5: Renewable energy growth (2011-2024)

# Prepare data
chart5_data <- energy_table1 %>%
  select(Year, Solar_PJ, Wind_PJ, Hydro_PJ) %>%
  mutate(
    Hydro_cum = Hydro_PJ,
    Wind_cum = Hydro_PJ + Wind_PJ,
    Solar_cum = Hydro_PJ + Wind_PJ + Solar_PJ
  )

# Create chart
chart5 <- ggplot() +
  geom_ribbon(aes(x = Year, ymin = 0, ymax = Hydro_cum), 
              data = chart5_data, fill = "#1B9E77", alpha = 0.6) + 
  geom_ribbon(aes(x = Year, ymin = Hydro_cum, ymax = Wind_cum), 
              data = chart5_data, fill = "#7570B3", alpha = 0.6) + 
  geom_ribbon(aes(x = Year, ymin = Wind_cum, ymax = Solar_cum), 
              data = chart5_data, fill = "#D95F02", alpha = 0.6) + 
  geom_line(aes(x = Year, y = Hydro_cum, colour = "Hydro", 
                text = paste("Year:", Year, "<br>Hydro:", Hydro_PJ, "PJ")), data = chart5_data, size = 1.1) +
  geom_line(aes(x = Year, y = Wind_cum, colour = "Wind", 
                text = paste("Year:", Year, "<br>Wind:", Wind_PJ, "PJ")), data = chart5_data, size = 1.1) + 
  geom_line(aes(x = Year, y = Solar_cum, colour = "Solar", 
                text = paste("Year:", Year, "<br>Solar:", Solar_PJ, "PJ")), data = chart5_data, size = 1.1) + 
  scale_colour_manual(values = c("Hydro" = "#1B9E77", "Wind" = "#7570B3", "Solar" = "#D95F02")) + 
  scale_x_continuous(breaks = seq(2011, 2024, by = 1)) + 
  labs(title = "Renewable energy is rapidly expanding", x = "Year", 
       y = "Cumulative energy supply (PJ)", colour = "Source") + 
  theme_minimal() + 
  theme(legend.position = "right", plot.title = element_text(face = "bold"))

ggplotly(chart5, tooltip = "text")