Pull data and sites from the water quality portal
library(dataRetrieval) # pulls data from usgs/nwis/water quality portal
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.8 v dplyr 1.0.9
## v tidyr 1.2.0 v stringr 1.4.1
## v readr 2.1.2 v forcats 0.5.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(leaflet)
library(htmltools)
library(DT)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(writexl)
# Pull sites based on county, site type (stream), and parameter (e coli and Total coliform)
sites <- readWQPsummary(countycode=c("US:31:029", # NE - Chase
"US:31:057", # NE - Dundy
"US:31:087", # NE - Hitchcock
"US:20:023", # KS - Cheyenne
"US:20:153", # KS - Rawlins
"US:08:063", # CO - Kit Carson
"US:08:073", # CO - Lincoln
"US:08:121", # CO - Washington
"US:08:125"), # CO - Yuma
siteType="Stream",
CharacteristicName=c("Escherichia coli","Total Coliform"))
data_ecoli <- readWQPdata(countycode=c("US:31:029", # NE - Chase
"US:31:057", # NE - Dundy
"US:31:087", # NE - Hitchcock
"US:20:023", # KS - Cheyenne
"US:20:153", # KS - Rawlins
"US:08:063", # CO - Kit Carson
"US:08:073", # CO - Lincoln
"US:08:121", # CO - Washington
"US:08:125"), # CO - Yuma
siteType="Stream",
CharacteristicName=c("Escherichia coli"))
# remove sites outside ws
data_ecoli <- subset(data_ecoli, !(MonitoringLocationIdentifier=="21NEB001_WQX-SRE3FRENC114"|
MonitoringLocationIdentifier=="21KAN001_WQX-SPB353"|
MonitoringLocationIdentifier=="21COL001-5054"))
data_ecoli<- select(data_ecoli, OrganizationIdentifier, OrganizationFormalName, ActivityStartDate, ActivityStartTime.Time, ActivityStartTime.TimeZoneCode, MonitoringLocationIdentifier,ActivityCommentText,SampleCollectionMethod.MethodName,SampleCollectionEquipmentName,ResultDetectionConditionText,CharacteristicName,ResultSampleFractionText,ResultMeasureValue,ResultMeasure.MeasureUnitCode,MeasureQualifierCode,ResultValueTypeName,ResultCommentText,ResultAnalyticalMethod.MethodIdentifier,ResultAnalyticalMethod.MethodIdentifierContext,ResultAnalyticalMethod.MethodName,LaboratoryName,ResultLaboratoryCommentText,DetectionQuantitationLimitTypeName,DetectionQuantitationLimitMeasure.MeasureValue,DetectionQuantitationLimitMeasure.MeasureUnitCode)
site_join <- select (sites,MonitoringLocationIdentifier,MonitoringLocationName,MonitoringLocationUrl,CountyName,StateName,MonitoringLocationLatitude,MonitoringLocationLongitude)
data_ecoli <- left_join(data_ecoli, site_join, by="MonitoringLocationIdentifier")
data_coliform <- readWQPdata(countycode=c("US:31:029", # NE - Chase
"US:31:057", # NE - Dundy
"US:31:087", # NE - Hitchcock
"US:20:023", # KS - Cheyenne
"US:20:153", # KS - Rawlins
"US:08:063", # CO - Kit Carson
"US:08:073", # CO - Lincoln
"US:08:121", # CO - Washington
"US:08:125"), # CO - Yuma
siteType="Stream",
CharacteristicName=c("Total Coliform"))
write_xlsx(data_ecoli,"ecoli_data_20221003.xlsx")
Table of Summary Statistics
data_ecoli <-separate(data_ecoli, ActivityStartDate, into = c('year', 'month', 'day'),remove = FALSE) %>% select(-day,-month)
data_ecoli$year <-as.numeric(data_ecoli$year)
ecoli_sum <- data_ecoli %>%
group_by(MonitoringLocationIdentifier,MonitoringLocationName) %>%
summarise(mean = round(mean(ResultMeasureValue, na.rm = TRUE),2),
median = median(ResultMeasureValue, na.rm = TRUE),
min_val = min(ResultMeasureValue, na.rm=TRUE),
max_val = max(ResultMeasureValue,na.rm = TRUE),
min_yr = min(year),
max_yr = max(year),
n_total= length(ResultMeasureValue),
n_NAs = sum(is.na(ResultMeasureValue))) %>%
as.data.frame()
## `summarise()` has grouped output by 'MonitoringLocationIdentifier'. You can
## override using the `.groups` argument.
ecoli_count <- data_ecoli %>%
group_by(year, MonitoringLocationIdentifier,MonitoringLocationName) %>%
summarise(n_total= length(ResultMeasureValue))%>%
as.data.frame()
## `summarise()` has grouped output by 'year', 'MonitoringLocationIdentifier'. You
## can override using the `.groups` argument.
ecoli_count <- spread(ecoli_count,year,n_total)
ecoli_sum <- left_join(ecoli_sum,ecoli_count)
## Joining, by = c("MonitoringLocationIdentifier", "MonitoringLocationName")
ecoli_sum <- ecoli_sum[order(ecoli_sum$MonitoringLocationName), ]
datatable(ecoli_sum)
write_xlsx(ecoli_sum,"ecoli_summary_20221003.xlsx")
Map of Sites
# remove sites outside ws
sites <- subset(sites, !(MonitoringLocationIdentifier=="21NEB001_WQX-SRE3FRENC114"|
MonitoringLocationIdentifier=="21KAN001_WQX-SPB353"|
MonitoringLocationIdentifier=="21COL001-5054"))
# prep data for map
sites$MonitoringLocationLongitude <- as.numeric(sites$MonitoringLocationLongitude)
sites$MonitoringLocationLatitude <- as.numeric(sites$MonitoringLocationLatitude)
sites_ecoli <- subset(sites, CharacteristicName == "Escherichia coli")
sites_ecoli <- distinct(sites_ecoli,MonitoringLocationIdentifier, .keep_all=TRUE)
sites_ecoli <- left_join(sites_ecoli, ecoli_sum)
## Joining, by = c("MonitoringLocationIdentifier", "MonitoringLocationName")
# prep hover labels for map
labels <- paste(
"<strong>", sites_ecoli$MonitoringLocationName,
"<br>", sites_ecoli$MonitoringLocationIdentifier,
"<br>","Sample Count: ", sites_ecoli$n_total,
"<br>","Median : ", sites_ecoli$median, "</strong> (#/100 ml)") %>%
lapply(htmltools::HTML)
pal <- colorNumeric(c("light yellow","gold","orange", "red"), 1:400)
# create map
map <- leaflet() %>%
addProviderTiles("OpenStreetMap",
group = "street") %>%
addProviderTiles("Esri.WorldImagery",
group = "imagery") %>%
addLayersControl(baseGroups = c("street", "imagery"),
options = layersControlOptions(collapsed = FALSE),
overlayGroups=c("sample count","median"))%>%
hideGroup("median")%>%
addCircleMarkers(data = sites_ecoli,
lng= ~MonitoringLocationLongitude,
lat=~MonitoringLocationLatitude,
label = ~labels,
color = "black",
opacity= 1,
fillColor = ~pal(n_total ),
fillOpacity = 1,
group = "sample count") %>%
addCircleMarkers(data = sites_ecoli,
lng= ~MonitoringLocationLongitude,
lat=~MonitoringLocationLatitude,
label = ~labels,
color = "black",
opacity= 1,
fillColor = ~pal(median),
fillOpacity = 1,
group = "median")
## Warning in pal(median): Some values were outside the color scale and will be
## treated as NA
## Warning in pal(n_total): Some values were outside the color scale and will be
## treated as NA
map
Plots, static
data_ecoli <- data_ecoli[order(data_ecoli$MonitoringLocationName), ]
# list of values to loop over
# site codes
codes = unique(data_ecoli$MonitoringLocationIdentifier)
# site names
site_codes<- distinct(data_ecoli, MonitoringLocationName, MonitoringLocationIdentifier, .keep_all = FALSE)
names <- site_codes$MonitoringLocationName
plot_list = list()
var_list <- list(
MonitoringLocationIdentifier = codes,
MonitoringLocationName = names
)
for (i in 1:length(var_list$MonitoringLocationIdentifier))
{
temp_plot = ggplot(data= subset(data_ecoli, MonitoringLocationIdentifier == var_list$MonitoringLocationIdentifier[i])) +
geom_point(size=3, aes(x=ActivityStartDate, y=ResultMeasureValue )) +
ggtitle(var_list$MonitoringLocationIdentifier[i], subtitle = var_list$MonitoringLocationName[i]) +
labs(y="Result (# / 100 ml)", x="Sample date") +
scale_x_date(date_breaks = "1 year", minor_breaks = "1 month", date_labels = "%Y")
plot_list[[i]] = temp_plot
}
# view plots in R
#plot_list
# view in pdf
#pdf("ecoli_plots_20221003.pdf")
#for (i in 1:21) {
# print(plot_list[[i]])
#}
#dev.off()
Plots, interactive
plt <- htmltools::tagList()
for (i in 1:length(var_list$MonitoringLocationIdentifier))
{
temp_plot = ggplot(data= subset(data_ecoli, MonitoringLocationIdentifier == var_list$MonitoringLocationIdentifier[i])) +
geom_point(size=3, aes(x=ActivityStartDate, y=ResultMeasureValue )) +
ggtitle(var_list$MonitoringLocationName[i]) +
labs(y="Result (# / 100 ml)", x="Sample date") +
scale_x_date(date_breaks = "1 year", minor_breaks = "1 month", date_labels = "%Y")
# Print an interactive plot
# Add to list
plt[[i]] <- as_widget(ggplotly(temp_plot))
i <- i + 1
}
plt