df <- read_csv("full_panel.csv")
#view dataframe
str(df)
#need to convert data to date data type
emissions_full <- df %>%
#keep only one row per date + settlement period
distinct(Date, SettlementPeriod, .keep_all = TRUE) %>%
#convert to date data type
mutate(Date = mdy(Date),
half_hour = SettlementPeriod - 1,
DATETIME = as.POSIXct(Date) + minutes(30 * half_hour),
intensity = emissions_intensity_kg_per_MW)
#view dataframe again
str(emissions_full)top100 <- emissions_full %>%
arrange(desc(intensity)) %>%
slice_head(n = 100)
top100 %>%
select(Date, InitialDemandOutturn, intensity) %>%
head() %>%
kable()| Date | InitialDemandOutturn | intensity |
|---|---|---|
| 2024-12-13 | 40591 | 483.2759 |
| 2025-01-20 | 41955 | 477.0998 |
| 2024-12-12 | 41269 | 473.7999 |
| 2025-10-13 | 34143 | 473.5352 |
| 2024-11-05 | 37691 | 466.1016 |
| 2024-12-11 | 42509 | 465.3683 |
ggplot(emissions_full, aes(DATETIME, intensity))+
geom_line()+
geom_point(data = top100, aes(DATETIME,intensity), color = "red")+
scale_x_datetime(date_breaks = "2 month",
date_labels = "%m-%Y")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(title = "Grid Emissions Intensity Over Time",
subtitle = "Red points = 100 most emissions intensive settlement periods",
y = 'kg CO2 per MWh',
x = 'date')+
theme_bw()
The most emissions-intensive settlement periods appear to be
concentrated between September and May.
hourly_intensity <- emissions_full %>%
mutate(hour = hour(DATETIME)) %>%
group_by(hour) %>%
summarize(avg_intensity = mean(intensity, na.rm = TRUE))
#what hours have the maximum average emissions intensity?
max_hr_emissions <- hourly_intensity %>%
slice_max(avg_intensity, n = 1)
max_hr <- max_hr_emissions %>% pull(hour)
#plot
hourly_intensity %>%
ggplot (aes(hour, avg_intensity))+
geom_line()+
geom_vline (xintercept = max_hr, linetype ="dashed")+
labs(title = "Average emissions intensity by hour of day",
y = 'kg CO2 per MWh')+
theme_bw()
Hour 17 has the greatest emissions, at 172.45 kgCO2 per MWh. (Note:
need to figure out what time zone this is in…)
We can also look at the three hours with the greatest emissions:
| hour | avg_intensity |
|---|---|
| 17 | 172.4518 |
| 16 | 146.8558 |
| 19 | 138.8560 |
monthly_intensity <- emissions_full %>%
mutate(month = month(DATETIME, label = TRUE)) %>%
group_by(month) %>%
summarize(avg_intensity = mean(intensity, na.rm = TRUE))
#let's compute the overall mean
overall_avg <- mean(monthly_intensity$avg_intensity)
overall_avg## [1] 116.959
#add a flag for above/below average
monthly_intensity <- monthly_intensity %>%
mutate(
status = ifelse(avg_intensity > overall_avg, "Above Average", "Below Average")
)
#plot
ggplot(monthly_intensity, aes(x = month, y = avg_intensity, fill = status)) +
geom_col(width = 0.7) +
geom_hline(yintercept = overall_avg, linetype = "dashed", color = "red", linewidth = 1) +
scale_fill_manual(values = c("Above Average" = "orange",
"Below Average" = "steelblue")) +
labs(
title = "Average Emissions Intensity by Month",
y = "kg COâ‚‚ per MWh",
x = "Month",
fill = "Intensity vs Annual Avg"
) +
theme_minimal(base_size = 14)hour_month_intensity <- emissions_full %>%
mutate(hour = hour(DATETIME),
month = month(DATETIME, label = TRUE)) %>%
group_by(month, hour) %>%
summarise(avg_intensity = mean(intensity, na.rm = TRUE))
#plot
ggplot(hour_month_intensity, aes(hour, avg_intensity, color = month)) +
geom_line() +
labs(title = "Hourly emissions intensity across months",
y = "kg COâ‚‚ per MWh")#plot
ggplot(top100_mix, aes(x = DATETIME, y = gen_MW, fill = tech_group)) +
geom_area(alpha = 0.9, colour = "white", linewidth = 0.2) +
labs(
title = "Generation mix during 100 most carbon-intensive periods",
subtitle = "Stacked by major fuel type",
x = "Datetime",
y = "Generation (MW)",
fill = "Technology group"
) +
scale_x_datetime(date_breaks = "1 month", date_labels = "%b\n%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme_minimal(base_size = 13) +
theme(
legend.position = "bottom",
legend.title = element_text(face = "bold")
)