library(tidyverse)
library(here)
library(ggbeeswarm)
library(RColorBrewer)
# preset graph theme
theme_set(theme_classic())
plotsites <- read_csv(here("data", "sites.csv"))

Introduction

main story: more impervious + more slope, with rain, tends to mean higher runoff and erosion.
audience: public works (stormwater) staff prioritizing inspections and reporting.

## Extra code, preprocessing ---
# # Impervious surface vs runoff risk
# plotsites %>% 
#   ggplot(aes(x = impervious_pct, y = runoff_risk)) + 
#   geom_point() +
#   geom_smooth()
# # records by lc type
# plotsites %>%
#   group_by(land_cover) %>%
#   summarise(obs = n())
# # jitter
# plotsites %>% 
#   ggplot(aes(x = impervious_pct, y = runoff_risk)) + 
#   geom_jitter()
# # runoff risk by landcover
# plotsites %>%
#   na.omit() %>%
#   ggplot(aes(x = land_cover, y = erosion_score, color = land_cover))+
#   geom_jitter() +
#   coord_flip()
# # facet wrap
# plotsites %>%
#   na.omit() %>%
#   ggplot(aes(x = slope_deg, y = erosion_score, colour = land_cover)) +
#   geom_jitter() +
#   facet_wrap(~ land_cover)
# # filter lower erosion, facet
# plotsites %>%
#   na.omit() %>%
#   filter(erosion_score < 90) %>%
#   ggplot(aes(x = slope_deg, y = erosion_score, colour = land_cover)) +
#   geom_jitter() +
#   facet_wrap(~ land_cover)
# # by type

# plot residential and industrial sites with an erosion score <60, x axis set as slope of terrain at site. 
plotsites %>%
  na.omit() %>%
  filter(erosion_score < 60) %>%
  filter(land_cover %in% c("residential", "industrial")) %>%
  ggplot(aes(x = slope_deg, y = erosion_score, colour = land_cover)) +
  geom_jitter() +
  # create separate plots for residential and industrial land use
  facet_wrap(~ land_cover)

ggsave(here("output", "erosion_res_ind.png"))
## Saving 7 x 5 in image

Erosion risk vs slope (degrees), industrial v residential. Lower slope in industrial areas has a higher erosion risk than residential areas with a higher slope.

## Extra code, preprocessing ---
# # boxes and violins
# plotsites %>%
#   na.omit() %>%
#   ggplot(aes(x = land_cover, y = erosion_score)) +
#   geom_boxplot() +
#   coord_flip()
# plotsites %>%
#   na.omit() %>%
#   ggplot(aes(x = land_cover, y = runoff_risk)) +
#   geom_violin()
# # histogram
# hist(plotsites$slope_deg)
# plotsites %>%  
#   na.omit() %>%
#   filter(land_cover == "residential",
#          rainfall_mm_72h > 10,
#          erosion_score > 0) %>%
#   ggplot(aes(x = erosion_score)) +
#   geom_histogram(binwidth = 5)

# combo plot, points on same plot

# plot sites with an erosion score >0, where x axis = land cover and y axis = erosion score. 
plotsites %>%
  na.omit() %>%
  filter(erosion_score > 0) %>%
  ggplot(aes(x = land_cover, y = erosion_score)) +
  geom_boxplot() +
  # display graph colors based on rainfall volume
  geom_point(aes(colour = rainfall_mm_72h)) +
  # flip x and y axis for better visualisation
  coord_flip()

# plot residential sites, where x axis = % impervious surface and y axis = erosion score. 
plotsites %>%
  na.omit() %>%
  filter(land_cover == "residential") %>%
  ggplot(aes(x = impervious_pct, y = erosion_score)) +
  # Create violin plot
  geom_violin() +
  # Color graph based on runoff risk.
  geom_quasirandom(aes(colour = runoff_risk > 0.6))

Erosion scores are lower in residential and open space land covers compared to industrial and commercial use land.

## Extra code, preprocessing ---
# # bar and column plots
# # geombar for count obs/frequency
# plotsites %>%
#   na.omit() %>%
#   ggplot(aes(x = land_cover)) +
#   geom_bar() +
#   facet_wrap(~ (runoff_risk))
# # geomcol for summary stat
# plotsites %>%
#   na.omit() %>%
#   ggplot(aes(x = land_cover, y = erosion_score)) +
#   geom_col()
# # summarise, check what geomcol is plotting
# plotsites %>%
#   na.omit() %>%
#   group_by(land_cover) %>%
#   summarise(total_erosion = sum(erosion_score))
# plot mean erosion based on land cover and runoff risk, where x axis = land cover and y axis = mean erosion.
plotsites %>%
  na.omit() %>%
  group_by(land_cover, runoff_risk) %>%
  summarise(mean_erosion = mean(erosion_score)) %>%
  ggplot(aes(x = land_cover, y = mean_erosion)) +
  geom_col() +
  # create separate plots based on runoff risk
  facet_wrap(~ runoff_risk)
## `summarise()` has grouped output by 'land_cover'. You can override using the
## `.groups` argument.

# error bars

## Extra code, preprocessing ---
# plotsites %>%
#   na.omit() %>%
#   group_by(land_cover) %>%
#   summarise(mean_erosion = mean(erosion_score)) %>%
#   ggplot(aes(x = land_cover, y = mean_erosion)) +
#   geom_col() +
#   facet_wrap(~ land_cover)
# # error bars
# plotsites %>%
#   na.omit() %>%
#   group_by(land_cover) %>%
#   summarise(mean = mean(erosion_score),
#             sd = sd(erosion_score),
#             n = n(),
#             stderr = sd / sqrt(n)) %>%
#   ggplot(aes(x = land_cover, y = mean)) +
#   geom_col() +
#   coord_flip() +
#   geom_errorbar(aes(x = land_cover, ymin = mean - stderr, ymax = mean + stderr))
#correlation/scatter plots - rainfall vs erosion
# plot rainfall based on erosion score and color graph by slope.
plotsites %>%
  na.omit() %>%
  filter(erosion_score > 60) %>%
  ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Generally, a higher slope means a higher erosion score. Higher rainfall means higher erosion score but only until approx 45 mm rain, then the score goes down.

## Extra code, preprocessing ---
# get rid of grey background and grid lines
# plotsites %>%
#   na.omit() %>%
#   filter(erosion_score > 50) %>%
#   ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
#   geom_point() +
#   geom_smooth() +
#   theme_classic()
# # change color to reflect low = blue, high = red
# plotsites %>%
#   na.omit() %>%
#   filter(erosion_score > 50) %>%
#   ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
#   geom_point() +
#   geom_smooth() +
#   theme_classic() +
#   scale_color_gradient(low = "blue", high = "red")
# # Use palette for color setting
# display.brewer.all()
# plotsites %>%
#   na.omit() %>%
#   filter(erosion_score > 60) %>%
#   ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
#   geom_point() +
#   geom_smooth() +
#   theme_classic() +
#   scale_color_distiller(palette = "RdYlBu")
# # titles and labels
# display.brewer.all()
# plot erosion scores >50 with x axis as rainfall, y axis as erosion score, graph color as slope
plotsites %>%
  na.omit() %>%
  filter(erosion_score > 50) %>%
  ggplot(aes(x = rainfall_mm_72h, y = erosion_score, color = slope_deg)) +
  geom_point() +
  geom_smooth() +
  theme_classic() +
  scale_color_distiller(palette = "RdYlBu") +
  labs(title = "Erosion score as a function of rainfall and slope",
       subtitle = "Sites with erosion > 60",
       caption = "data: City of Bloomington stormwater inspection sites",
       x = "Rainfall (mm, last 72h)",
       y = "Erosion score (0–100)")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Higher rainfall means higher erosion scores. Slope appears to have little determining effect on erosion scores or relation to rainfall amount.