# Load necessary libraries
pacman::p_load(pacman, ggplot2, dplyr, lubridate, sf, gridExtra, rnaturalearth, rnaturalearthdata)
# Load the data
data <- read.csv("full_scottish_sewage_spills.csv")
# Convert date columns to Date-Time format
data$Overflow.Event.Start.Time <- as.POSIXct(data$Overflow.Event.Start.Time, format="%Y-%m-%d %H:%M:%S", tz="UTC")
data$Overflow.Event.End.Time <- as.POSIXct(data$Overflow.Event.End.Time, format="%Y-%m-%d %H:%M:%S", tz="UTC")
# Load Scotland map using rnaturalearth and filter to just the UK
world <- ne_countries(scale = "medium", returnclass = "sf")
scotland_map <- world %>% filter(admin == "United Kingdom")
# Filter out rows with missing or invalid coordinates
data <- data %>%
filter(!is.na(Longitude), !is.na(Latitude), is.finite(Longitude), is.finite(Latitude))
# Convert event locations to an sf object after filtering for valid coordinates
data_sf <- st_as_sf(data, coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
# Define a custom colour palette for Source Type
source_colors <- c(
"Combined Sewage Overflow" = "#00ff00",
"WWTW" = "#990000",
"Storm Overflow" = "#000000",
"Unknown Source Type" = "#ff33cc"
)
# Convert to month and handle NA values by assigning them as "Unknown"
data$Month <- month(data$Overflow.Event.Start.Time, label = TRUE, abbr = TRUE)
data$Month[is.na(data$Month)] <- "Unknown"
## Warning in `[<-.factor`(`*tmp*`, is.na(data$Month), value = structure(c(NA, :
## invalid factor level, NA generated
# Convert month to a factor with the correct order including "Unknown"
data$Month <- factor(data$Month, levels = c(month.abb, "Unknown"))
data$Season <- case_when(
month(data$Overflow.Event.Start.Time) %in% c(12, 1, 2) ~ "Winter",
month(data$Overflow.Event.Start.Time) %in% c(3, 4, 5) ~ "Spring",
month(data$Overflow.Event.Start.Time) %in% c(6, 7, 8) ~ "Summer",
month(data$Overflow.Event.Start.Time) %in% c(9, 10, 11) ~ "Autumn"
)
data_filtered <- data %>%
filter(!is.na(Season))
# 1. Volume discharged by month, grouped by Source Type
p1 <- ggplot(data, aes(x = Month, y = Volume.Discharged, color = Source.Type, fill = Source.Type)) +
geom_boxplot() +
scale_y_continuous(labels = scales::comma) +
scale_fill_manual(values = source_colors) +
scale_color_manual(values = source_colors) +
labs(title = "Monthly Volume Discharged by Source Type", x = "Month", y = "Volume Discharged (litres)") +
theme(legend.position = "bottom")
# 2. Frequency of overflow events by area, grouped by Source Type
p2 <- data %>%
count(Area, Source.Type) %>%
ggplot(aes(x = reorder(Area, n), y = n, fill = Source.Type)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_y_continuous(labels = scales::comma) +
scale_fill_manual(values = source_colors) +
labs(title = "Frequency of Overflow Events by Area and Source Type", x = "Area", y = "Number of Events") +
theme(legend.position = "bottom")
# 3. Discharge volume by event duration, grouped by Source Type
p3 <- ggplot(data, aes(x = Duration.Hours, y = Volume.Discharged, color = Source.Type)) +
geom_point(alpha = 0.6) +
scale_y_continuous(labels = scales::comma) +
scale_color_manual(values = source_colors) +
labs(title = "Volume Discharged by Duration of Overflow Event by Source Type", x = "Duration (Hours)", y = "Volume Discharged (litres)") +
theme(legend.position = "bottom")
# 4. Event frequency by season, grouped by Source Type
p4 <- ggplot(data_filtered, aes(x = Season, fill = Source.Type)) +
geom_bar(position = "dodge") +
scale_y_continuous(labels = scales::comma) +
scale_fill_manual(values = source_colors) +
labs(title = "Overflow Events by Season and Source Type", x = "Season", y = "Number of Events") +
theme(legend.position = "bottom")
# Combine the first four plots
grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2)
## Warning: Removed 184 rows containing missing values or values outside the scale range
## (`geom_point()`).

# 5. Map of overflow events in Scotland by Source Type
ggplot() +
geom_sf(data = scotland_map, fill = "#cccccc", color = "#000000") +
geom_sf(data = data_sf, aes(color = Source.Type, size = Volume.Discharged), alpha = 0.7) +
scale_color_manual(values = source_colors) +
labs(title = "Map of Sewage Overflow Events in Scotland by Source Type",
x = "Longitude", y = "Latitude", color = "Source Type", size = "Volume Discharged (litres)") +
theme(legend.position = "bottom")
