library(tidyverse)
library(here)
library(ggbeeswarm)
library(RColorBrewer)
# preset graph theme
theme_set(theme_classic())
plotsites <- read_csv(here("data", "sites.csv"))
main story: more impervious + more slope, with rain, tends to mean
higher runoff and erosion.
audience: public works (stormwater) staff prioritizing inspections and
reporting.
## Extra code, preprocessing ---
# # Impervious surface vs runoff risk
# plotsites %>%
# ggplot(aes(x = impervious_pct, y = runoff_risk)) +
# geom_point() +
# geom_smooth()
# # records by lc type
# plotsites %>%
# group_by(land_cover) %>%
# summarise(obs = n())
# # jitter
# plotsites %>%
# ggplot(aes(x = impervious_pct, y = runoff_risk)) +
# geom_jitter()
# # runoff risk by landcover
# plotsites %>%
# na.omit() %>%
# ggplot(aes(x = land_cover, y = erosion_score, color = land_cover))+
# geom_jitter() +
# coord_flip()
# # facet wrap
# plotsites %>%
# na.omit() %>%
# ggplot(aes(x = slope_deg, y = erosion_score, colour = land_cover)) +
# geom_jitter() +
# facet_wrap(~ land_cover)
# # filter lower erosion, facet
# plotsites %>%
# na.omit() %>%
# filter(erosion_score < 90) %>%
# ggplot(aes(x = slope_deg, y = erosion_score, colour = land_cover)) +
# geom_jitter() +
# facet_wrap(~ land_cover)
# # by type
# plot residential and industrial sites with an erosion score <60, x axis set as slope of terrain at site.
plotsites %>%
na.omit() %>%
filter(erosion_score < 60) %>%
filter(land_cover %in% c("residential", "industrial")) %>%
ggplot(aes(x = slope_deg, y = erosion_score, colour = land_cover)) +
geom_jitter() +
# create separate plots for residential and industrial land use
facet_wrap(~ land_cover)
ggsave(here("output", "erosion_res_ind.png"))
## Saving 7 x 5 in image
## Extra code, preprocessing ---
# # boxes and violins
# plotsites %>%
# na.omit() %>%
# ggplot(aes(x = land_cover, y = erosion_score)) +
# geom_boxplot() +
# coord_flip()
# plotsites %>%
# na.omit() %>%
# ggplot(aes(x = land_cover, y = runoff_risk)) +
# geom_violin()
# # histogram
# hist(plotsites$slope_deg)
# plotsites %>%
# na.omit() %>%
# filter(land_cover == "residential",
# rainfall_mm_72h > 10,
# erosion_score > 0) %>%
# ggplot(aes(x = erosion_score)) +
# geom_histogram(binwidth = 5)
# combo plot, points on same plot
# plot sites with an erosion score >0, where x axis = land cover and y axis = erosion score.
plotsites %>%
na.omit() %>%
filter(erosion_score > 0) %>%
ggplot(aes(x = land_cover, y = erosion_score)) +
geom_boxplot() +
# display graph colors based on rainfall volume
geom_point(aes(colour = rainfall_mm_72h)) +
# flip x and y axis for better visualisation
coord_flip()
# plot residential sites, where x axis = % impervious surface and y axis = erosion score.
plotsites %>%
na.omit() %>%
filter(land_cover == "residential") %>%
ggplot(aes(x = impervious_pct, y = erosion_score)) +
# Create violin plot
geom_violin() +
# Color graph based on runoff risk.
geom_quasirandom(aes(colour = runoff_risk > 0.6))
Erosion scores are lower in residential and open space land covers
compared to industrial and commercial use land.
## Extra code, preprocessing ---
# # bar and column plots
# # geombar for count obs/frequency
# plotsites %>%
# na.omit() %>%
# ggplot(aes(x = land_cover)) +
# geom_bar() +
# facet_wrap(~ (runoff_risk))
# # geomcol for summary stat
# plotsites %>%
# na.omit() %>%
# ggplot(aes(x = land_cover, y = erosion_score)) +
# geom_col()
# # summarise, check what geomcol is plotting
# plotsites %>%
# na.omit() %>%
# group_by(land_cover) %>%
# summarise(total_erosion = sum(erosion_score))
# plot mean erosion based on land cover and runoff risk, where x axis = land cover and y axis = mean erosion.
plotsites %>%
na.omit() %>%
group_by(land_cover, runoff_risk) %>%
summarise(mean_erosion = mean(erosion_score)) %>%
ggplot(aes(x = land_cover, y = mean_erosion)) +
geom_col() +
# create separate plots based on runoff risk
facet_wrap(~ runoff_risk)
## `summarise()` has grouped output by 'land_cover'. You can override using the
## `.groups` argument.
# error bars
## Extra code, preprocessing ---
# plotsites %>%
# na.omit() %>%
# group_by(land_cover) %>%
# summarise(mean_erosion = mean(erosion_score)) %>%
# ggplot(aes(x = land_cover, y = mean_erosion)) +
# geom_col() +
# facet_wrap(~ land_cover)
# # error bars
# plotsites %>%
# na.omit() %>%
# group_by(land_cover) %>%
# summarise(mean = mean(erosion_score),
# sd = sd(erosion_score),
# n = n(),
# stderr = sd / sqrt(n)) %>%
# ggplot(aes(x = land_cover, y = mean)) +
# geom_col() +
# coord_flip() +
# geom_errorbar(aes(x = land_cover, ymin = mean - stderr, ymax = mean + stderr))
#correlation/scatter plots - rainfall vs erosion
# plot rainfall based on erosion score and color graph by slope.
plotsites %>%
na.omit() %>%
filter(erosion_score > 60) %>%
ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Generally, a higher slope means a higher erosion score. Higher rainfall
means higher erosion score but only until approx 45 mm rain, then the
score goes down.
## Extra code, preprocessing ---
# get rid of grey background and grid lines
# plotsites %>%
# na.omit() %>%
# filter(erosion_score > 50) %>%
# ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
# geom_point() +
# geom_smooth() +
# theme_classic()
# # change color to reflect low = blue, high = red
# plotsites %>%
# na.omit() %>%
# filter(erosion_score > 50) %>%
# ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
# geom_point() +
# geom_smooth() +
# theme_classic() +
# scale_color_gradient(low = "blue", high = "red")
# # Use palette for color setting
# display.brewer.all()
# plotsites %>%
# na.omit() %>%
# filter(erosion_score > 60) %>%
# ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
# geom_point() +
# geom_smooth() +
# theme_classic() +
# scale_color_distiller(palette = "RdYlBu")
# # titles and labels
# display.brewer.all()
# plot erosion scores >50 with x axis as rainfall, y axis as erosion score, graph color as slope
plotsites %>%
na.omit() %>%
filter(erosion_score > 50) %>%
ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
geom_point() +
geom_smooth() +
theme_classic() +
scale_color_distiller(palette = "RdYlBu") +
labs(title = "Erosion score as a function of rainfall and slope",
subtitle = "Sites with erosion > 60",
caption = "data: City of Bloomington stormwater inspection sites",
x = "Rainfall (mm, last 72h)",
y = "Erosion score (0–100)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Higher rainfall means higher erosion scores. Slope appears to have little determining effect on erosion scores or relation to rainfall amount.