Pull data and sites from the water quality portal

library(dataRetrieval) # pulls data from usgs/nwis/water quality portal
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.8     v dplyr   1.0.9
## v tidyr   1.2.0     v stringr 1.4.1
## v readr   2.1.2     v forcats 0.5.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(leaflet)
library(htmltools)
library(DT)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(writexl)

# Pull sites based on county, site type (stream), and parameter (e coli and Total coliform)
sites <- readWQPsummary(countycode=c("US:31:029", # NE - Chase 
                                     "US:31:057", # NE - Dundy
                                     "US:31:087", # NE - Hitchcock 
                                     "US:20:023", # KS - Cheyenne
                                     "US:20:153", # KS - Rawlins
                                     "US:08:063", # CO - Kit Carson
                                     "US:08:073", # CO - Lincoln
                                     "US:08:121", # CO - Washington
                                     "US:08:125"), # CO - Yuma
                        siteType="Stream",
                       CharacteristicName=c("Escherichia coli","Total Coliform"))



data_ecoli <- readWQPdata(countycode=c("US:31:029", # NE - Chase 
                                     "US:31:057", # NE - Dundy
                                     "US:31:087", # NE - Hitchcock 
                                     "US:20:023", # KS - Cheyenne
                                     "US:20:153", # KS - Rawlins
                                     "US:08:063", # CO - Kit Carson
                                     "US:08:073", # CO - Lincoln
                                     "US:08:121", # CO - Washington
                                     "US:08:125"), # CO - Yuma
                        siteType="Stream",
                        CharacteristicName=c("Escherichia coli"))

# remove sites outside ws
data_ecoli <- subset(data_ecoli, !(MonitoringLocationIdentifier=="21NEB001_WQX-SRE3FRENC114"|
                            MonitoringLocationIdentifier=="21KAN001_WQX-SPB353"|
                            MonitoringLocationIdentifier=="21COL001-5054"))

data_ecoli<- select(data_ecoli, OrganizationIdentifier, OrganizationFormalName, ActivityStartDate, ActivityStartTime.Time, ActivityStartTime.TimeZoneCode, MonitoringLocationIdentifier,ActivityCommentText,SampleCollectionMethod.MethodName,SampleCollectionEquipmentName,ResultDetectionConditionText,CharacteristicName,ResultSampleFractionText,ResultMeasureValue,ResultMeasure.MeasureUnitCode,MeasureQualifierCode,ResultValueTypeName,ResultCommentText,ResultAnalyticalMethod.MethodIdentifier,ResultAnalyticalMethod.MethodIdentifierContext,ResultAnalyticalMethod.MethodName,LaboratoryName,ResultLaboratoryCommentText,DetectionQuantitationLimitTypeName,DetectionQuantitationLimitMeasure.MeasureValue,DetectionQuantitationLimitMeasure.MeasureUnitCode)

site_join <- select (sites,MonitoringLocationIdentifier,MonitoringLocationName,MonitoringLocationUrl,CountyName,StateName,MonitoringLocationLatitude,MonitoringLocationLongitude)

data_ecoli <- left_join(data_ecoli, site_join, by="MonitoringLocationIdentifier")

data_coliform <- readWQPdata(countycode=c("US:31:029", # NE - Chase 
                                       "US:31:057", # NE - Dundy
                                       "US:31:087", # NE - Hitchcock 
                                       "US:20:023", # KS - Cheyenne
                                       "US:20:153", # KS - Rawlins
                                       "US:08:063", # CO - Kit Carson
                                       "US:08:073", # CO - Lincoln
                                       "US:08:121", # CO - Washington
                                       "US:08:125"), # CO - Yuma
                          siteType="Stream",
                          CharacteristicName=c("Total Coliform"))

write_xlsx(data_ecoli,"ecoli_data_20221003.xlsx")

Table of Summary Statistics

data_ecoli <-separate(data_ecoli, ActivityStartDate, into = c('year', 'month', 'day'),remove = FALSE) %>% select(-day,-month)
data_ecoli$year <-as.numeric(data_ecoli$year)


ecoli_sum <- data_ecoli %>%               
  group_by(MonitoringLocationIdentifier,MonitoringLocationName) %>%
  summarise(mean = round(mean(ResultMeasureValue, na.rm = TRUE),2),
            median = median(ResultMeasureValue, na.rm = TRUE),
            min_val = min(ResultMeasureValue, na.rm=TRUE),
            max_val = max(ResultMeasureValue,na.rm = TRUE),
            min_yr = min(year),
            max_yr = max(year), 
            n_total= length(ResultMeasureValue),
            n_NAs = sum(is.na(ResultMeasureValue))) %>% 
  as.data.frame()
## `summarise()` has grouped output by 'MonitoringLocationIdentifier'. You can
## override using the `.groups` argument.
ecoli_count <- data_ecoli %>%
  group_by(year, MonitoringLocationIdentifier,MonitoringLocationName) %>%
  summarise(n_total= length(ResultMeasureValue))%>%
  as.data.frame()
## `summarise()` has grouped output by 'year', 'MonitoringLocationIdentifier'. You
## can override using the `.groups` argument.
ecoli_count <- spread(ecoli_count,year,n_total)

ecoli_sum <- left_join(ecoli_sum,ecoli_count)
## Joining, by = c("MonitoringLocationIdentifier", "MonitoringLocationName")
ecoli_sum <- ecoli_sum[order(ecoli_sum$MonitoringLocationName), ]

datatable(ecoli_sum)
write_xlsx(ecoli_sum,"ecoli_summary_20221003.xlsx")

Map of Sites

# remove sites outside ws
sites <- subset(sites, !(MonitoringLocationIdentifier=="21NEB001_WQX-SRE3FRENC114"|
                            MonitoringLocationIdentifier=="21KAN001_WQX-SPB353"|
                            MonitoringLocationIdentifier=="21COL001-5054"))


# prep data for map
sites$MonitoringLocationLongitude <- as.numeric(sites$MonitoringLocationLongitude)
sites$MonitoringLocationLatitude <- as.numeric(sites$MonitoringLocationLatitude)

sites_ecoli <- subset(sites, CharacteristicName == "Escherichia coli")
sites_ecoli <- distinct(sites_ecoli,MonitoringLocationIdentifier, .keep_all=TRUE)


sites_ecoli <- left_join(sites_ecoli, ecoli_sum)
## Joining, by = c("MonitoringLocationIdentifier", "MonitoringLocationName")
# prep hover labels for map
labels <- paste(
  "<strong>", sites_ecoli$MonitoringLocationName,
  "<br>", sites_ecoli$MonitoringLocationIdentifier,
  "<br>","Sample Count: ", sites_ecoli$n_total,
  "<br>","Median : ", sites_ecoli$median, "</strong> (#/100 ml)") %>%
  lapply(htmltools::HTML)



pal <-  colorNumeric(c("light yellow","gold","orange", "red"), 1:400)

# create map
map <- leaflet() %>%
  addProviderTiles("OpenStreetMap",
                   group = "street") %>%
  addProviderTiles("Esri.WorldImagery",
                   group = "imagery") %>%
  addLayersControl(baseGroups = c("street", "imagery"),
                   options = layersControlOptions(collapsed = FALSE),
                   overlayGroups=c("sample count","median"))%>%
  hideGroup("median")%>%
  addCircleMarkers(data = sites_ecoli,
             lng= ~MonitoringLocationLongitude, 
             lat=~MonitoringLocationLatitude, 
             label = ~labels,
             color = "black",
             opacity= 1,
             fillColor = ~pal(n_total ),
             fillOpacity = 1,
             group = "sample count") %>%
addCircleMarkers(data = sites_ecoli,
                 lng= ~MonitoringLocationLongitude, 
                 lat=~MonitoringLocationLatitude, 
                 label = ~labels,
                 color = "black",
                 opacity= 1,
                 fillColor = ~pal(median),
                 fillOpacity = 1,
                 group = "median")
## Warning in pal(median): Some values were outside the color scale and will be
## treated as NA
## Warning in pal(n_total): Some values were outside the color scale and will be
## treated as NA
map

Plots, static

data_ecoli <- data_ecoli[order(data_ecoli$MonitoringLocationName), ]

# list of values to loop over

# site codes
codes = unique(data_ecoli$MonitoringLocationIdentifier)

# site names
site_codes<- distinct(data_ecoli, MonitoringLocationName, MonitoringLocationIdentifier, .keep_all = FALSE)
names <- site_codes$MonitoringLocationName


plot_list = list()

var_list <- list(
  MonitoringLocationIdentifier = codes,   
  MonitoringLocationName = names
)


for (i in 1:length(var_list$MonitoringLocationIdentifier))
  {
    temp_plot = ggplot(data= subset(data_ecoli, MonitoringLocationIdentifier == var_list$MonitoringLocationIdentifier[i])) + 
      geom_point(size=3, aes(x=ActivityStartDate, y=ResultMeasureValue )) +
      ggtitle(var_list$MonitoringLocationIdentifier[i], subtitle = var_list$MonitoringLocationName[i]) +
      labs(y="Result (# / 100 ml)", x="Sample date") +
      scale_x_date(date_breaks = "1 year", minor_breaks = "1 month", date_labels = "%Y")
    plot_list[[i]] = temp_plot
}

# view plots in R
#plot_list

# view in pdf
#pdf("ecoli_plots_20221003.pdf")
#for (i in 1:21) {
#  print(plot_list[[i]])
#}
#dev.off()

Plots, interactive

plt <- htmltools::tagList()

  for (i in 1:length(var_list$MonitoringLocationIdentifier))
  {
    temp_plot = ggplot(data= subset(data_ecoli, MonitoringLocationIdentifier == var_list$MonitoringLocationIdentifier[i])) + 
      geom_point(size=3, aes(x=ActivityStartDate, y=ResultMeasureValue )) +
      ggtitle(var_list$MonitoringLocationName[i]) +
      labs(y="Result (# / 100 ml)", x="Sample date") +
      scale_x_date(date_breaks = "1 year", minor_breaks = "1 month", date_labels = "%Y")
    
          # Print an interactive plot
      # Add to list
      plt[[i]] <- as_widget(ggplotly(temp_plot))
      i <- i + 1
}
plt