1 Load libraries

library(tidyverse)
library(lubridate)
library(knitr)
library(ggplot2)

2 Load data

df <- read_csv("full_panel.csv")

#view dataframe
str(df)

#need to convert data to date data type 
emissions_full <- df %>%
  #keep only one row per date + settlement period
  distinct(Date, SettlementPeriod, .keep_all = TRUE) %>% 
  
  #convert to date data type
  mutate(Date = mdy(Date),
         half_hour = SettlementPeriod - 1,
         DATETIME = as.POSIXct(Date) + minutes(30 * half_hour),
         intensity = emissions_intensity_kg_per_MW)

#view dataframe again
str(emissions_full)

2.1 Quick snapshot of intensity:

summary(emissions_full$intensity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   61.96  105.30  114.72  155.29  483.28       2

3 Top 100 most carbon-intensive periods

top100 <- emissions_full %>%
  arrange(desc(intensity)) %>%
  slice_head(n = 100)

top100 %>% 
  select(Date, InitialDemandOutturn, intensity) %>% 
  head() %>% 
  kable()
Date InitialDemandOutturn intensity
2024-12-13 40591 483.2759
2025-01-20 41955 477.0998
2024-12-12 41269 473.7999
2025-10-13 34143 473.5352
2024-11-05 37691 466.1016
2024-12-11 42509 465.3683

4 Visualizing the highest intensity periods over the year

ggplot(emissions_full, aes(DATETIME, intensity))+
  geom_line()+
  geom_point(data = top100, aes(DATETIME,intensity), color = "red")+
  scale_x_datetime(date_breaks = "2 month",
                   date_labels = "%m-%Y")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(title = "Grid Emissions Intensity Over Time",
       subtitle = "Red points = 100 most emissions intensive settlement periods",
       y = 'kg CO2 per MWh',
       x = 'date')+
  theme_bw()


The most emissions-intensive settlement periods appear to be concentrated between September and May.

5 Identify patterns by time of day (across the year)

hourly_intensity <- emissions_full %>% 
  mutate(hour = hour(DATETIME)) %>% 
  group_by(hour) %>% 
  summarize(avg_intensity = mean(intensity, na.rm = TRUE)) 

#what hours have the maximum average emissions intensity?
max_hr_emissions <- hourly_intensity %>% 
  slice_max(avg_intensity, n = 1) 

max_hr <- max_hr_emissions %>% pull(hour)
  
#plot
hourly_intensity %>% 
  ggplot (aes(hour, avg_intensity))+
  geom_line()+
  geom_vline (xintercept = max_hr, linetype ="dashed")+
  labs(title = "Average emissions intensity by hour of day",
       y = 'kg CO2 per MWh')+
  theme_bw()

Hour 17 has the greatest emissions, at 172.45 kgCO2 per MWh. (Note: need to figure out what time zone this is in…)

We can also look at the three hours with the greatest emissions:

kable(hourly_intensity %>% slice_max(avg_intensity, n = 3))
hour avg_intensity
17 172.4518
16 146.8558
19 138.8560

6 Now, we can also look into patterns by month or season

monthly_intensity <- emissions_full %>% 
  mutate(month = month(DATETIME, label = TRUE)) %>% 
  group_by(month) %>% 
  summarize(avg_intensity = mean(intensity, na.rm = TRUE)) 
  
#let's compute the overall mean
overall_avg <- mean(monthly_intensity$avg_intensity)
overall_avg
## [1] 116.959
#add a flag for above/below average
monthly_intensity <- monthly_intensity %>%
  mutate(
    status = ifelse(avg_intensity > overall_avg, "Above Average", "Below Average")
  )

#plot
ggplot(monthly_intensity, aes(x = month, y = avg_intensity, fill = status)) +
  geom_col(width = 0.7) +
  geom_hline(yintercept = overall_avg, linetype = "dashed", color = "red", linewidth = 1) +
  scale_fill_manual(values = c("Above Average" = "orange", 
                               "Below Average" = "steelblue")) +
  labs(
    title = "Average Emissions Intensity by Month",
    y = "kg COâ‚‚ per MWh",
    x = "Month",
    fill = "Intensity vs Annual Avg"
  ) +
  theme_minimal(base_size = 14)

7 Combine hour and month to find when the worst periods occur

hour_month_intensity <- emissions_full %>%
  mutate(hour = hour(DATETIME),
    month = month(DATETIME, label = TRUE)) %>%
  
  group_by(month, hour) %>%
  summarise(avg_intensity = mean(intensity, na.rm = TRUE))

#plot
ggplot(hour_month_intensity, aes(hour, avg_intensity, color = month)) +
  geom_line() +
  labs(title = "Hourly emissions intensity across months",
       y = "kg COâ‚‚ per MWh")

8 When does the mix cause high-intensity outcomes?

#plot
ggplot(top100_mix, aes(x = DATETIME, y = gen_MW, fill = tech_group)) +
  geom_area(alpha = 0.9, colour = "white", linewidth = 0.2) +
  labs(
    title    = "Generation mix during 100 most carbon-intensive periods",
    subtitle = "Stacked by major fuel type",
    x        = "Datetime",
    y        = "Generation (MW)",
    fill     = "Technology group"
  ) +
  scale_x_datetime(date_breaks = "1 month", date_labels = "%b\n%Y") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "bottom",
    legend.title    = element_text(face = "bold")
  )