Municipal Hotspot Summary

1. Summary

This document summarizes historical hotspot data collected by MyRWA at storm water outfalls in Arlington, MA.

2. Assemble Data

First, load water quality data, narrow to municipality, and summarize bacteria results.

library(RODBC)
library(plyr)
library(dplyr)
library(lubridate)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scales)
library(gridExtra)
library(pander)


setwd("\\\\psf/Home/Dropbox/MysticDB")
source("./Rcode/Sandbox/Jeff/load_wq.R")
wq <- load_wq()

## Loading wq database: \\psf\Home\Dropbox\MysticDB\MysticDB_20150227.accdb
## Fetching tables...done
## Merging tables...done
## Excluding field blanks, duplicates...done
## Excluding flagged results...done

source('./Rcode/Sandbox/Jeff/load_precip.R')
precip <- load_precip()

## Loading precip file: \\psf\Home\Dropbox\MysticDB\Processed\Precip\LoganPrecip.xlsx

wq2 <- append_weather(wq,precip)

## Computing antecedent precip...done
## Computing DateHour column in wq dataframe...done
## Merging wq and precip...done

municipality <- "Arlington"   #FILL IN MUNICPALITY HERE

df <- wq2 %>% 
  select(WaterBodyID, LocationID, LocationTypeID, Latitude, Longitude, 
         VisitID, Datetime, CharacteristicID, Units, ResultValue, 
         ProjectID, SampleTypeID, MunicipalityID, Weather) %>%
  filter(ProjectID == "HOTSPOT", SampleTypeID == "S", 
         CharacteristicID == "ECOLI", LocationTypeID == "27", 
         MunicipalityID == municipality)

# aggregate by location, summarize on median and n, order by median, and join to table with location information, using nested dplyr commands
df_dplyr <- df %>%
  group_by(LocationID) %>%
  summarize(MedianECOLI = median(ResultValue, na.rm = TRUE), n = n()) %>%
  arrange(desc(MedianECOLI)) %>%
  left_join(df, by = "LocationID")

# select columns of interest, and get rid of duplicate rows from join
df_dplyr <- df_dplyr %>%
  select(LocationID, MedianECOLI, n, WaterBodyID, LocationTypeID, Latitude, Longitude, 
         ProjectID, SampleTypeID, MunicipalityID) 

df_dplyr <- unique(df_dplyr)

3. Barplots

df6 <- df_dplyr %>% mutate(color = "color")
for (i in seq(1,nrow(df6), by = 1)) {
  if (df6[i,2] < 235) {
    df6[i,11] <- "blue"
  }
  if (df6[i,2] > 235 & df6[i,2] < 1260) {
    df6[i,11] <- "orange"
  }
  if (df6[i,2] > 1260 & df6[i,2] < 3600) {
    df6[i,11] <- "red"
  }
  if (df6[i,2] > 3600) {
    df6[i,11] <- "darkred"
  }
}

coord <- barplot(df6$MedianECOLI, col = df6$color, border = df6$color, 
                 cex.axis = 1, 
                 axes = TRUE, space = 1, ylim=c(-1999,30000))
text(coord, seq(-350,-350,length = nrow(df)), 
     srt = 60, adj= 1, xpd = TRUE,
     labels = df6$LocationID, cex=.7)
text(coord, df6$MedianECOLI + 500, df6$n, cex =.6)
abline(h = 235, col = "orange")
abline(h = 1260, col = "red")
abline(h = 3600, col = "darkred")

3. Boxplots

bacteria_type <- 'ECOLI'

# data frame containing standards and labels for bacteria
bac_standards <- data.frame(CharacteristicID=c('ECOLI', 'ENT'),
                            BoatingStandard=c(1260, 350),
                            SwimmingStandard=c(235, 104),
                            BacteriaLabel=c('E. coli', 'Enterococcus'))

# create bacteria label for plots and tables
bac_label <- filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel
if (bac_label=='Enterococcus') bac_label <- 'Entero.'

wq3 <- left_join(wq2, bac_standards) %>%
  mutate(ExceedanceLevel=ifelse(ResultValue>=BoatingStandard,
                                'high',
                                ifelse(ResultValue>=SwimmingStandard,
                                       'med', 'low')),
         ExceedanceLevel=ordered(ExceedanceLevel, levels=c('low', 'med', 'high')))

## Joining by: "CharacteristicID"

## Warning: joining factors with different levels, coercing to character
## vector

 # filter(MedianEcoli>200960)

  
#   
# wq3 <- subset(wq3, LocationID %in% c('MALMR2','MALMR0','MALMR0E','MALMR0W',
#                                      'MALMR12.2','MALMR8', 'MALTL9', 'MALUIN',
#                                      'MALMR4.1', 'MALLC1')) 

wq_summ <- wq3 %>%
 group_by(LocationID) %>%
  summarise(MedianEcoli = median(ResultValue, na.rm = TRUE)) %>%
  arrange(desc(MedianEcoli))

wq.historical <- wq3 %>%
  filter(CharacteristicID==bacteria_type, 
                        MunicipalityID== municipality,
                        LocationTypeID==27) %>%
  left_join(wq_summ, by ="LocationID")


wq.historical.count <- group_by(wq.historical, LocationID) %>%
  summarize(N=n())

p.box <- wq.historical %>%
  ggplot(aes(LocationID, ResultValue)) +
  geom_boxplot(fill='grey90') +
  geom_point(mapping=aes(color=Weather), size=4) +
  geom_point(data=wq.historical, pch=1, size=4, color='black') +
  geom_hline(aes(yintercept=BoatingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             color='red', size=1) +
  geom_hline(aes(yintercept=SwimmingStandard), 
             data=filter(bac_standards, CharacteristicID==bacteria_type),
             color='orange') +
  geom_text(aes(x=1, y=BoatingStandard,
                label=paste0('Boating Standard: ', BoatingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  geom_text(aes(x=1, y=SwimmingStandard,
                label=paste0('Swimming Standard: ', SwimmingStandard, ' #/100ml')), 
            data=filter(bac_standards, CharacteristicID==bacteria_type), 
            hjust=0, vjust=1.1, alpha=0.8, size=3) +
  labs(y=paste0(filter(bac_standards, CharacteristicID==bacteria_type)$BacteriaLabel, ' #/100ml'),
       x='Site ID') +
  scale_y_log10(breaks=10^seq(0, 7), labels=comma) +
  scale_color_manual('Weather', values=c('Dry'='cadetblue2', 'Wet'='deepskyblue4'), drop=FALSE) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=NULL))
  theme_set(theme_bw())

# get ymax for placing sample counts
ymax <- 10^ggplot_build(p.box)$panel$ranges[[1]]$y.range[2]

p.box <- p.box + 
  geom_text(mapping=aes(x=LocationID, y=ymax, label=paste0('(',N,')')), 
            data=wq.historical.count, size=4)

p.box

5. Table

df_table <- df_dplyr %>%
  select(LocationID, MedianECOLI, n, WaterBodyID)

pander(df_table)

LocationID	MedianECOLI	n	WaterBodyID
ARL024	81640	1	Mill Brook
ARL048	46110	1	Mill Brook
ARL113	14390	1	Mill Brook
ARLSPRT2	7078	8	Spy Pond
ARL137	5830	2	Mill Brook
ARL115	4813	1	Mill Brook
ARLX11	4185	1	Alewife Brook
ARL117	2203	10	Mill Brook
ARL011	1700	8	Alewife Brook
ARL106	1549	7	Mill Brook
ARL013	1390	10	Alewife Brook
ARL044	1106	4	Mill Brook
ARL007	1027	1	Alewife Brook
ARL043	844.5	10	Lower Mystic Lake
ARL078	663	1	Mill Brook
ARL088	588.5	2	Mill Brook
ARL029	568	5	Alewife Brook
ARLRECRR	556	1	Alewife Brook
ARL122	540	1	Mill Brook
ARL119	409	9	Mill Brook
ARL120	375	4	Mill Brook
ARL026	326.5	6	Alewife Brook
ARLSWMP	228	1	Alewife Brook
ARL014	198	9	Alewife Brook
ARL091	131	5	Mill Brook
ARL016	98	2	Spy Pond
ARL133	74	1	Spy Pond
ARL136	64	2	Alewife Brook
ARL107	63.5	2	Mill Brook
ARL017	51.5	6	Alewife Brook
ARL083	48	3	Mill Brook
ARL061	41.5	2	Mill Brook
ARL085	34	2	Mill Brook
ARL010	31	1	Alewife Brook
ARL066	29	1	Hill’s Pond
ARL027	12	2	Alewife Brook
ARLSPP3	8	1	Spy Pond