This document summarizes historical hotspot data collected by MyRWA at storm water outfalls in Arlington, MA.
First, load water quality data, narrow to municipality, and summarize bacteria results.
library(RODBC)
library(plyr)
library(dplyr)
library(lubridate)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scales)
library(gridExtra)
library(pander)
setwd("\\\\psf/Home/Dropbox/MysticDB")
source("./Rcode/Sandbox/Jeff/load_wq.R")
wq <- load_wq()
## Loading wq database: \\psf\Home\Dropbox\MysticDB\MysticDB_20150227.accdb
## Fetching tables...done
## Merging tables...done
## Excluding field blanks, duplicates...done
## Excluding flagged results...done
source('./Rcode/Sandbox/Jeff/load_precip.R')
precip <- load_precip()
## Loading precip file: \\psf\Home\Dropbox\MysticDB\Processed\Precip\LoganPrecip.xlsx
wq2 <- append_weather(wq,precip)
## Computing antecedent precip...done
## Computing DateHour column in wq dataframe...done
## Merging wq and precip...done
municipality <- "Arlington" #FILL IN MUNICPALITY HERE
df <- wq2 %>%
select(WaterBodyID, LocationID, LocationTypeID, Latitude, Longitude,
VisitID, Datetime, CharacteristicID, Units, ResultValue,
ProjectID, SampleTypeID, MunicipalityID, Weather) %>%
filter(ProjectID == "HOTSPOT", SampleTypeID == "S",
CharacteristicID == "ECOLI", LocationTypeID == "27",
MunicipalityID == municipality)
# aggregate by location, summarize on median and n, order by median, and join to table with location information, using nested dplyr commands
df_dplyr <- df %>%
group_by(LocationID) %>%
summarize(MedianECOLI = median(ResultValue, na.rm = TRUE), n = n()) %>%
arrange(desc(MedianECOLI)) %>%
left_join(df, by = "LocationID")
# select columns of interest, and get rid of duplicate rows from join
df_dplyr <- df_dplyr %>%
select(LocationID, MedianECOLI, n, WaterBodyID, LocationTypeID, Latitude, Longitude,
ProjectID, SampleTypeID, MunicipalityID)
df_dplyr <- unique(df_dplyr)
df6 <- df_dplyr %>% mutate(color = "color")
for (i in seq(1,nrow(df6), by = 1)) {
if (df6[i,2] < 235) {
df6[i,11] <- "blue"
}
if (df6[i,2] > 235 & df6[i,2] < 1260) {
df6[i,11] <- "orange"
}
if (df6[i,2] > 1260 & df6[i,2] < 3600) {
df6[i,11] <- "red"
}
if (df6[i,2] > 3600) {
df6[i,11] <- "darkred"
}
}
coord <- barplot(df6$MedianECOLI, col = df6$color, border = df6$color,
cex.axis = 1,
axes = TRUE, space = 1, ylim=c(-1999,30000))
text(coord, seq(-350,-350,length = nrow(df)),
srt = 60, adj= 1, xpd = TRUE,
labels = df6$LocationID, cex=.7)
text(coord, df6$MedianECOLI + 500, df6$n, cex =.6)
abline(h = 235, col = "orange")
abline(h = 1260, col = "red")
abline(h = 3600, col = "darkred")
bacteria_type <- 'ECOLI'
# data frame containing standards and labels for bacteria
bac_standards <- data.frame(CharacteristicID=c('ECOLI', 'ENT'),
BoatingStandard=c(1260, 350),
SwimmingStandard=c(235, 104),
BacteriaLabel=c('E. coli', 'Enterococcus'))
# create bacteria label for plots and tables
bac_label <- filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel
if (bac_label=='Enterococcus') bac_label <- 'Entero.'
wq3 <- left_join(wq2, bac_standards) %>%
mutate(ExceedanceLevel=ifelse(ResultValue>=BoatingStandard,
'high',
ifelse(ResultValue>=SwimmingStandard,
'med', 'low')),
ExceedanceLevel=ordered(ExceedanceLevel, levels=c('low', 'med', 'high')))
## Joining by: "CharacteristicID"
## Warning: joining factors with different levels, coercing to character
## vector
# filter(MedianEcoli>200960)
#
# wq3 <- subset(wq3, LocationID %in% c('MALMR2','MALMR0','MALMR0E','MALMR0W',
# 'MALMR12.2','MALMR8', 'MALTL9', 'MALUIN',
# 'MALMR4.1', 'MALLC1'))
wq_summ <- wq3 %>%
group_by(LocationID) %>%
summarise(MedianEcoli = median(ResultValue, na.rm = TRUE)) %>%
arrange(desc(MedianEcoli))
wq.historical <- wq3 %>%
filter(CharacteristicID==bacteria_type,
MunicipalityID== municipality,
LocationTypeID==27) %>%
left_join(wq_summ, by ="LocationID")
wq.historical.count <- group_by(wq.historical, LocationID) %>%
summarize(N=n())
p.box <- wq.historical %>%
ggplot(aes(LocationID, ResultValue)) +
geom_boxplot(fill='grey90') +
geom_point(mapping=aes(color=Weather), size=4) +
geom_point(data=wq.historical, pch=1, size=4, color='black') +
geom_hline(aes(yintercept=BoatingStandard),
data=filter(bac_standards, CharacteristicID==bacteria_type),
color='red', size=1) +
geom_hline(aes(yintercept=SwimmingStandard),
data=filter(bac_standards, CharacteristicID==bacteria_type),
color='orange') +
geom_text(aes(x=1, y=BoatingStandard,
label=paste0('Boating Standard: ', BoatingStandard, ' #/100ml')),
data=filter(bac_standards, CharacteristicID==bacteria_type),
hjust=0, vjust=1.1, alpha=0.8, size=3) +
geom_text(aes(x=1, y=SwimmingStandard,
label=paste0('Swimming Standard: ', SwimmingStandard, ' #/100ml')),
data=filter(bac_standards, CharacteristicID==bacteria_type),
hjust=0, vjust=1.1, alpha=0.8, size=3) +
labs(y=paste0(filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel, ' #/100ml'),
x='Site ID') +
scale_y_log10(breaks=10^seq(0, 7), labels=comma) +
scale_color_manual('Weather', values=c('Dry'='cadetblue2', 'Wet'='deepskyblue4'), drop=FALSE) +
theme(axis.text.x=element_text(angle=45, hjust=1, vjust=NULL))
theme_set(theme_bw())
# get ymax for placing sample counts
ymax <- 10^ggplot_build(p.box)$panel$ranges[[1]]$y.range[2]
p.box <- p.box +
geom_text(mapping=aes(x=LocationID, y=ymax, label=paste0('(',N,')')),
data=wq.historical.count, size=4)
p.box
df_table <- df_dplyr %>%
select(LocationID, MedianECOLI, n, WaterBodyID)
pander(df_table)
| LocationID | MedianECOLI | n | WaterBodyID |
|---|---|---|---|
| ARL024 | 81640 | 1 | Mill Brook |
| ARL048 | 46110 | 1 | Mill Brook |
| ARL113 | 14390 | 1 | Mill Brook |
| ARLSPRT2 | 7078 | 8 | Spy Pond |
| ARL137 | 5830 | 2 | Mill Brook |
| ARL115 | 4813 | 1 | Mill Brook |
| ARLX11 | 4185 | 1 | Alewife Brook |
| ARL117 | 2203 | 10 | Mill Brook |
| ARL011 | 1700 | 8 | Alewife Brook |
| ARL106 | 1549 | 7 | Mill Brook |
| ARL013 | 1390 | 10 | Alewife Brook |
| ARL044 | 1106 | 4 | Mill Brook |
| ARL007 | 1027 | 1 | Alewife Brook |
| ARL043 | 844.5 | 10 | Lower Mystic Lake |
| ARL078 | 663 | 1 | Mill Brook |
| ARL088 | 588.5 | 2 | Mill Brook |
| ARL029 | 568 | 5 | Alewife Brook |
| ARLRECRR | 556 | 1 | Alewife Brook |
| ARL122 | 540 | 1 | Mill Brook |
| ARL119 | 409 | 9 | Mill Brook |
| ARL120 | 375 | 4 | Mill Brook |
| ARL026 | 326.5 | 6 | Alewife Brook |
| ARLSWMP | 228 | 1 | Alewife Brook |
| ARL014 | 198 | 9 | Alewife Brook |
| ARL091 | 131 | 5 | Mill Brook |
| ARL016 | 98 | 2 | Spy Pond |
| ARL133 | 74 | 1 | Spy Pond |
| ARL136 | 64 | 2 | Alewife Brook |
| ARL107 | 63.5 | 2 | Mill Brook |
| ARL017 | 51.5 | 6 | Alewife Brook |
| ARL083 | 48 | 3 | Mill Brook |
| ARL061 | 41.5 | 2 | Mill Brook |
| ARL085 | 34 | 2 | Mill Brook |
| ARL010 | 31 | 1 | Alewife Brook |
| ARL066 | 29 | 1 | Hill’s Pond |
| ARL027 | 12 | 2 | Alewife Brook |
| ARLSPP3 | 8 | 1 | Spy Pond |