1. Summary

This document summarizes historical hotspot data collected by MyRWA at storm water outfalls in Arlington, MA.

2. Assemble Data

First, load water quality data, narrow to municipality, and summarize bacteria results.

library(RODBC)
library(plyr)
library(dplyr)
library(lubridate)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scales)
library(gridExtra)
library(pander)


setwd("\\\\psf/Home/Dropbox/MysticDB")
source("./Rcode/Sandbox/Jeff/load_wq.R")
wq <- load_wq()
## Loading wq database: \\psf\Home\Dropbox\MysticDB\MysticDB_20150227.accdb
## Fetching tables...done
## Merging tables...done
## Excluding field blanks, duplicates...done
## Excluding flagged results...done
source('./Rcode/Sandbox/Jeff/load_precip.R')
precip <- load_precip()
## Loading precip file: \\psf\Home\Dropbox\MysticDB\Processed\Precip\LoganPrecip.xlsx
wq2 <- append_weather(wq,precip)
## Computing antecedent precip...done
## Computing DateHour column in wq dataframe...done
## Merging wq and precip...done
municipality <- "Arlington"   #FILL IN MUNICPALITY HERE

df <- wq2 %>% 
  select(WaterBodyID, LocationID, LocationTypeID, Latitude, Longitude, 
         VisitID, Datetime, CharacteristicID, Units, ResultValue, 
         ProjectID, SampleTypeID, MunicipalityID, Weather) %>%
  filter(ProjectID == "HOTSPOT", SampleTypeID == "S", 
         CharacteristicID == "ECOLI", LocationTypeID == "27", 
         MunicipalityID == municipality)

# aggregate by location, summarize on median and n, order by median, and join to table with location information, using nested dplyr commands
df_dplyr <- df %>%
  group_by(LocationID) %>%
  summarize(MedianECOLI = median(ResultValue, na.rm = TRUE), n = n()) %>%
  arrange(desc(MedianECOLI)) %>%
  left_join(df, by = "LocationID")

# select columns of interest, and get rid of duplicate rows from join
df_dplyr <- df_dplyr %>%
  select(LocationID, MedianECOLI, n, WaterBodyID, LocationTypeID, Latitude, Longitude, 
         ProjectID, SampleTypeID, MunicipalityID) 

df_dplyr <- unique(df_dplyr)

3. Barplots

df6 <- df_dplyr %>% mutate(color = "color")
for (i in seq(1,nrow(df6), by = 1)) {
  if (df6[i,2] < 235) {
    df6[i,11] <- "blue"
  }
  if (df6[i,2] > 235 & df6[i,2] < 1260) {
    df6[i,11] <- "orange"
  }
  if (df6[i,2] > 1260 & df6[i,2] < 3600) {
    df6[i,11] <- "red"
  }
  if (df6[i,2] > 3600) {
    df6[i,11] <- "darkred"
  }
}

coord <- barplot(df6$MedianECOLI, col = df6$color, border = df6$color, 
                 cex.axis = 1, 
                 axes = TRUE, space = 1, ylim=c(-1999,30000))
text(coord, seq(-350,-350,length = nrow(df)), 
     srt = 60, adj= 1, xpd = TRUE,
     labels = df6$LocationID, cex=.7)
text(coord, df6$MedianECOLI + 500, df6$n, cex =.6)
abline(h = 235, col = "orange")
abline(h = 1260, col = "red")
abline(h = 3600, col = "darkred")

3. Boxplots

bacteria_type <- 'ECOLI'

# data frame containing standards and labels for bacteria
bac_standards <- data.frame(CharacteristicID=c('ECOLI', 'ENT'),
                            BoatingStandard=c(1260, 350),
                            SwimmingStandard=c(235, 104),
                            BacteriaLabel=c('E. coli', 'Enterococcus'))

# create bacteria label for plots and tables
bac_label <- filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel
if (bac_label=='Enterococcus') bac_label <- 'Entero.'

wq3 <- left_join(wq2, bac_standards) %>%
  mutate(ExceedanceLevel=ifelse(ResultValue>=BoatingStandard,
                                'high',
                                ifelse(ResultValue>=SwimmingStandard,
                                       'med', 'low')),
         ExceedanceLevel=ordered(ExceedanceLevel, levels=c('low', 'med', 'high')))
## Joining by: "CharacteristicID"
## Warning: joining factors with different levels, coercing to character
## vector
 # filter(MedianEcoli>200960)

  
#   
# wq3 <- subset(wq3, LocationID %in% c('MALMR2','MALMR0','MALMR0E','MALMR0W',
#                                      'MALMR12.2','MALMR8', 'MALTL9', 'MALUIN',
#                                      'MALMR4.1', 'MALLC1')) 

wq_summ <- wq3 %>%
 group_by(LocationID) %>%
  summarise(MedianEcoli = median(ResultValue, na.rm = TRUE)) %>%
  arrange(desc(MedianEcoli))

wq.historical <- wq3 %>%
  filter(CharacteristicID==bacteria_type, 
                        MunicipalityID== municipality,
                        LocationTypeID==27) %>%
  left_join(wq_summ, by ="LocationID")


wq.historical.count <- group_by(wq.historical, LocationID) %>%
  summarize(N=n())

p.box <- wq.historical %>%
  ggplot(aes(LocationID, ResultValue)) +
  geom_boxplot(fill='grey90') +
  geom_point(mapping=aes(color=Weather), size=4) +
  geom_point(data=wq.historical, pch=1, size=4, color='black') +
  geom_hline(aes(yintercept=BoatingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             color='red', size=1) +
  geom_hline(aes(yintercept=SwimmingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             color='orange') +
  geom_text(aes(x=1, y=BoatingStandard,
                label=paste0('Boating Standard: ', BoatingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  geom_text(aes(x=1, y=SwimmingStandard,
                label=paste0('Swimming Standard: ', SwimmingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  labs(y=paste0(filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel, ' #/100ml'),
       x='Site ID') +
  scale_y_log10(breaks=10^seq(0, 7), labels=comma) +
  scale_color_manual('Weather', values=c('Dry'='cadetblue2', 'Wet'='deepskyblue4'), drop=FALSE) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=NULL))
  theme_set(theme_bw())

# get ymax for placing sample counts
ymax <- 10^ggplot_build(p.box)$panel$ranges[[1]]$y.range[2]

p.box <- p.box + 
  geom_text(mapping=aes(x=LocationID, y=ymax, label=paste0('(',N,')')), 
            data=wq.historical.count, size=4)

p.box

5. Table

df_table <- df_dplyr %>%
  select(LocationID, MedianECOLI, n, WaterBodyID)

pander(df_table)
LocationID MedianECOLI n WaterBodyID
ARL024 81640 1 Mill Brook
ARL048 46110 1 Mill Brook
ARL113 14390 1 Mill Brook
ARLSPRT2 7078 8 Spy Pond
ARL137 5830 2 Mill Brook
ARL115 4813 1 Mill Brook
ARLX11 4185 1 Alewife Brook
ARL117 2203 10 Mill Brook
ARL011 1700 8 Alewife Brook
ARL106 1549 7 Mill Brook
ARL013 1390 10 Alewife Brook
ARL044 1106 4 Mill Brook
ARL007 1027 1 Alewife Brook
ARL043 844.5 10 Lower Mystic Lake
ARL078 663 1 Mill Brook
ARL088 588.5 2 Mill Brook
ARL029 568 5 Alewife Brook
ARLRECRR 556 1 Alewife Brook
ARL122 540 1 Mill Brook
ARL119 409 9 Mill Brook
ARL120 375 4 Mill Brook
ARL026 326.5 6 Alewife Brook
ARLSWMP 228 1 Alewife Brook
ARL014 198 9 Alewife Brook
ARL091 131 5 Mill Brook
ARL016 98 2 Spy Pond
ARL133 74 1 Spy Pond
ARL136 64 2 Alewife Brook
ARL107 63.5 2 Mill Brook
ARL017 51.5 6 Alewife Brook
ARL083 48 3 Mill Brook
ARL061 41.5 2 Mill Brook
ARL085 34 2 Mill Brook
ARL010 31 1 Alewife Brook
ARL066 29 1 Hill’s Pond
ARL027 12 2 Alewife Brook
ARLSPP3 8 1 Spy Pond