Goal: to inform planning of 2015 hotspot surveys by looking at historical summaries of MyRWA hotspot data.
Questions:
What municipalities or classes of site or particular locations should we prioritize in 2015?
First, load water quality data.
setwd("\\\\psf/Home/Dropbox/MysticDB")
#setwd("C:/Users/Monitoring Director/Dropbox/MysticDB")
source('./Rcode/Sandbox/Jeff/load_wq.R')
wq <- load_wq()
source('./Rcode/Sandbox/Jeff/load_precip.R')
precip <- load_precip()
wq2 <- append_weather(wq,precip)
Narrow to Hotspot bacteria data, and create a subset of samples taken at outfalls only
# filter for hotspot data only
hot <- tbl_df(wq2) %>%
filter(ProjectID == "HOTSPOT") %>%
filter(CharacteristicID == "ECOLI" | CharacteristicID == "ENT")
# filter(LocationTypeID == 27)
# filter for outfalls only
hot_outfalls <- tbl_df(wq2) %>%
filter(ProjectID == "HOTSPOT") %>%
filter(CharacteristicID == "ECOLI" | CharacteristicID == "ENT") %>%
filter(LocationTypeID == 27)
# To calculate percentage of outfalls visited by town, need number of outfalls for which there are bacteria results, divided by total number of outfalls, sorted by town.
# First, start with hotspot results for outfalls, then find the number of unique outfalls associated with bacteria results, grouped by town
VisitedByTown <- hot_outfalls %>%
group_by(MunicipalityID, LocationID) %>%
summarize(n = n())
all_vis_town <- unique(VisitedByTown[,c("MunicipalityID", "LocationID")]) %>%
group_by(MunicipalityID) %>%
summarize(n = n())
#print(all_vis_town)
# For total outfalls by town, get Locations table, exported manually from the database, filter for type 27 (stormater outfall), relabel location ID field, and group by town
Location <- read.csv("//psf/Home/Dropbox/MysticDB/Rcode/Sandbox/Andy/data_scratch/Location.csv")
all_locs <- tbl_df(Location) %>%
filter(LocationTypeID == 27) %>%
select(LocationID = ID, MunicipalityID, LocationTypeID) %>%
group_by(MunicipalityID) %>%
summarize(n = n())
#print(all_locs)
#Join the two resulting tables (visited outfalls by town, all outfalls by town), relabel columns, and created a percent-sampled column by dividing Sampled by All
perc_sampled <- all_locs %>%
left_join(all_vis_town, by = "MunicipalityID") %>%
select(MunicipalityID, Total = n.x, Sampled = n.y) %>%
mutate(PercSampled = (Sampled / Total) * 100) %>%
arrange(desc(PercSampled))
## Warning: joining factors with different levels, coercing to character
## vector
pander(perc_sampled)
| MunicipalityID | Total | Sampled | PercSampled |
|---|---|---|---|
| Somerville | 29 | 29 | 100 |
| Winthrop | 5 | 5 | 100 |
| Everett | 6 | 5 | 83.33 |
| Malden | 35 | 26 | 74.29 |
| Melrose | 25 | 18 | 72 |
| Belmont | 23 | 13 | 56.52 |
| Chelsea | 84 | 47 | 55.95 |
| Revere | 49 | 24 | 48.98 |
| East Boston | 35 | 17 | 48.57 |
| Boston | 51 | 21 | 41.18 |
| Cambridge | 35 | 12 | 34.29 |
| Arlington | 130 | 37 | 28.46 |
| Winchester | 102 | 23 | 22.55 |
| Burlington | 28 | 5 | 17.86 |
| Medford | 237 | 33 | 13.92 |
| Stoneham | 54 | 7 | 12.96 |
| Woburn | 735 | 38 | 5.17 |
| Lexington | 79 | 4 | 5.063 |
| Reading | 15 | NA | NA |
Note: These are all hotspot samples (unlike summary above, which was outfalls), including stream samples.
I plan to use these and other tables in planning future surveys. Where have we not focussed in the last three years that would make sense to return to? [This will need more thought. But these results themselves are interesting to look through.]
three <- hot %>%
filter(Datetime > "2012-01-01")
byTown3yr <- three %>%
group_by(MunicipalityID) %>%
summarize(q90 = quantile(ResultValue, 0.9, na.rm = TRUE),
q75 = quantile(ResultValue, 0.75, na.rm = TRUE),
median = median(ResultValue, na.rm = TRUE), n = n()) %>%
arrange(desc(n), desc(q90))
Total number of samples in last three years = 322
| MunicipalityID | q90 | q75 | median | n |
|---|---|---|---|---|
| Medford | 34300 | 5654 | 767 | 53 |
| Malden | 51720 | 9678 | 1549 | 41 |
| Somerville | 6039 | 2975 | 1450 | 28 |
| Melrose | 12695 | 3537 | 744 | 27 |
| Belmont | 4816 | 2595 | 460.5 | 24 |
| Chelsea | 310 | 196.2 | 41.5 | 24 |
| Arlington | 14390 | 4813 | 1379 | 21 |
| Woburn | 169.3 | 111.5 | 34 | 20 |
| Winchester | 50910 | 2152 | 443.5 | 16 |
| Cambridge | 79846 | 22418 | 9472 | 14 |
| Everett | 225465 | 45113 | 1280 | 12 |
| Revere | 572.3 | 100.2 | 15 | 12 |
| Stoneham | 19909 | 7692 | 2671 | 10 |
| Lexington | 34264 | 1774 | 29.5 | 8 |
| NA | 3640 | 1828 | 270 | 7 |
| Boston | 106.5 | 101.2 | 92.5 | 2 |
| Winthrop | 28.9 | 25.75 | 20.5 | 2 |
| East Boston | 784 | 784 | 784 | 1 |
five <- hot %>%
filter(Datetime > "2010-01-01")
byTown5yr <- five %>%
group_by(MunicipalityID) %>%
summarize(q90 = quantile(ResultValue, 0.9, na.rm = TRUE),
q75 = quantile(ResultValue, 0.75, na.rm = TRUE),
median = median(ResultValue, na.rm = TRUE), n = n()) %>%
arrange(desc(n), desc(q90))
Total number of samples in last five years = 625
| MunicipalityID | q90 | q75 | median | n |
|---|---|---|---|---|
| Chelsea | 6867.0 | 2416.50 | 362.0 | 74 |
| Malden | 16366.0 | 3808.50 | 970.0 | 68 |
| Medford | 63490.0 | 7201.50 | 1159.5 | 64 |
| Somerville | 18876.0 | 4826.00 | 2069.0 | 59 |
| Arlington | 14138.0 | 2523.50 | 395.0 | 55 |
| Belmont | 7945.0 | 2456.50 | 526.0 | 51 |
| Melrose | 9128.8 | 3237.00 | 395.0 | 43 |
| Revere | 24060.0 | 833.00 | 120.0 | 37 |
| Winchester | 20486.0 | 2400.00 | 432.0 | 35 |
| Woburn | 372.0 | 196.00 | 64.0 | 35 |
| Cambridge | 35796.0 | 19945.00 | 3922.0 | 23 |
| Stoneham | 9774.6 | 5885.75 | 580.5 | 18 |
| Everett | 192475.0 | 33550.00 | 1280.0 | 14 |
| Lexington | 6932.0 | 312.50 | 34.0 | 11 |
| Boston | 41754.8 | 19863.00 | 5794.0 | 9 |
| East Boston | 30120.0 | 5040.00 | 1726.0 | 9 |
| Winthrop | 3991.6 | 1342.00 | 537.0 | 7 |
| NA | 3640.0 | 1828.00 | 270.0 | 7 |
| Burlington | 2120.0 | 1700.00 | 770.0 | 5 |
| Wakefield | 324.0 | 324.00 | 324.0 | 1 |
all <- hot
#filter(Datetime > "2012-01-01")
byTownAll <- all %>%
group_by(MunicipalityID) %>%
summarize(q90 = quantile(ResultValue, 0.9, na.rm = TRUE),
q75 = quantile(ResultValue, 0.75, na.rm = TRUE),
median = median(ResultValue, na.rm = TRUE), n = n()) %>%
arrange(desc(n), desc(q90))
Total number of samples in all years = 2089
| MunicipalityID | q90 | q75 | median | n |
|---|---|---|---|---|
| Arlington | 9678.0 | 1900.00 | 399.0 | 321 |
| Medford | 10792.0 | 1956.50 | 540.0 | 219 |
| Chelsea | 5055.8 | 1080.00 | 199.0 | 217 |
| Belmont | 8600.5 | 2098.50 | 356.0 | 180 |
| Winchester | 2418.0 | 1077.00 | 289.0 | 163 |
| Woburn | 1161.1 | 473.25 | 131.5 | 163 |
| Malden | 15531.0 | 7700.00 | 1050.0 | 153 |
| Somerville | 17765.0 | 3274.50 | 725.5 | 132 |
| Cambridge | 12200.0 | 2000.00 | 800.0 | 117 |
| Revere | 24380.2 | 2572.50 | 329.5 | 98 |
| East Boston | 9625.2 | 939.00 | 120.0 | 77 |
| Melrose | 10543.2 | 1649.50 | 593.0 | 75 |
| Boston | 9403.0 | 944.00 | 78.5 | 42 |
| Stoneham | 8118.3 | 1109.50 | 384.2 | 40 |
| Everett | 33736.0 | 6400.00 | 560.0 | 33 |
| Lexington | 6165.2 | 1709.50 | 138.0 | 27 |
| Winthrop | 1811.0 | 917.00 | 374.5 | 16 |
| Burlington | 2167.6 | 1792.00 | 936.0 | 8 |
| NA | 3640.0 | 1828.00 | 270.0 | 7 |
| Wakefield | 324.0 | 324.00 | 324.0 | 1 |