Document last updated 2021-05-02 17:48:07 by Benjamin Meyer ()


Introduction

This draft document describes a data query and summary performed to generate average hardness values for select sites included in the Kenai River Baseline Water Quality Monitoring Program (KRBWQM) and supplemental Copper/Zinc monitoring. These hardness values are then interpreted as CCC values (criterion chronic concentration) for zinc and copper.

Access full code repository for this project on GitHub at https://github.com/Kenai-Watershed-Forum/KWF_Metals_2019_2020, including a draft report in progress for the copper/zinc monitoring project.


Notes:



Data Read-in

EPA Repository data

Read in Calcium and Magnesium data from EPA repository

# read in metals data 2000 - 2013
# data sourced from the following query at EPA water quality repository:

# https://www.waterqualitydata.us/portal/#countrycode=US&statecode=US%3A02&countycode=US%3A02%3A122&sampleMedia=water&sampleMedia=Water&characteristicType=Inorganics%2C%20Major%2C%20Metals&characteristicType=Inorganics%2C%20Minor%2C%20Metals&mimeType=csv&dataProfile=narrowResult

epa_dat <- read.csv("data/waterqualitydata_epa_repo_all_metals.csv")

# choose variables to retain
vars <- as.character(c(
          "OrganizationFormalName",
          "ActivityIdentifier",
          "ActivityStartDate",
          "ActivityStartTime.Time",
          "MonitoringLocationIdentifier",
          "ResultIdentifier",
          "ResultDetectionConditionText",
          "CharacteristicName",
          "ResultSampleFractionText",
          "ResultMeasureValue",
          "ResultMeasure.MeasureUnitCode",
          "MeasureQualifierCode",
          "ResultStatusIdentifier",
          "ResultValueTypeName"))

# rename variables
new_vars <- c("agency",
              "activity_id",
              "date",
              "time",
              "location_id",
              "result_id",
              "detection_qualifier_text",
              "parameter",
              "substance_condition",
              "val",
              "unit",
              "detection_qualifier_code",
              "result_status",
              "result_type")

# retain selected columns
epa_dat <- epa_dat %>%
  select(all_of(vars)) 

# rename columns
colnames(epa_dat) <- new_vars

# retain ca and mg data only
epa_dat <- epa_dat %>%
  filter(parameter %in% c("Calcium","Magnesium"))


Read in site data for Kenai River region

# we want ca and mg data for KRBWQM sites only
# examine sites
# query available sites in the region at EPA repository:
# https://www.waterqualitydata.us/portal/#bBox=-151.413081%2C60.292340%2C-149.215768%2C60.715224&mimeType=csv
# (Used bounding box)
epa_krbwqm_sites <- read.csv("data/waterqualitydata_epa_repo_sites.csv") 

# retain potentially useful columns
site_vars <- c("OrganizationFormalName",
          "MonitoringLocationIdentifier",
          "MonitoringLocationName",
          "MonitoringLocationTypeName",
          "MonitoringLocationDescriptionText",
          "HUCEightDigitCode",
          "DrainageAreaMeasure.MeasureValue",
          "DrainageAreaMeasure.MeasureUnitCode",
          "LatitudeMeasure",
          "LongitudeMeasure",
          "HorizontalCollectionMethodName",
          "HorizontalCoordinateReferenceSystemDatumName",
          "VerticalMeasure.MeasureValue",
          "VerticalMeasure.MeasureUnitCode")

# rename variables
site_new_vars <- c("agency",
                   "location_id",
                   "location_name",
                   "location_type",
                   "location_description",
                   "huc",
                   "drainage_area",
                   "drainage_area_unit",
                   "lat",
                   "long",
                   "horizontal_collection_method",
                   "horizontal_coords_system",
                   "elevation",
                   "elevation_unit")

# retain selected columns
epa_krbwqm_sites <- epa_krbwqm_sites %>%
  select(all_of(site_vars)) 

# rename columns
colnames(epa_krbwqm_sites) <- site_new_vars


Join sites with Ca/Mg data to site wqx data

epa_dat <- left_join(epa_dat,epa_krbwqm_sites, by = c("location_id","agency"))


What are all the different agencies that have collected Ca/Mg data in the Kenai Peninsula Borough?

unique(epa_dat$agency)
## [1] "USGS Alaska Water Science Center"              
## [2] "National Park Service Water Resources Division"
## [3] "Kenai Watershed Forum(Volunteer)*"             
## [4] "EPA Region 10 Superfund Historical Data"       
## [5] "Seldovia Village Tribe"


We will retain only Kenai Watershed Forum’s stream/river/lake data.

epa_dat <- epa_dat %>%
  filter(agency == "Kenai Watershed Forum(Volunteer)*")



Let’s plot our data on a leaflet map to assess location data and assess if further site name QA/QC is needed:

leaflet(data = epa_dat) %>% 
  addTiles() %>%
  addMarkers(~long, 
             ~lat,
             popup = epa_dat$location_description)
#leaflet() %>%
#  addTiles() %>%  # Add default OpenStreetMap map tiles
  #fitBounds(-150, 60.04,-149.0, 60.02) %>%
  #setView(-150.210169, 60.487694, zoom = 8) %>%
#  addMarkers(lng = all.dat$Longitude, lat = all.dat$Latitude,
#             popup = paste("SiteID = ", all.dat$SiteID, "<br>",
#                           "Data Source = ", all.dat$SourceName, "<br>",
#                           "Start Year = ", all.dat$startYear, "<br>",
#                           "End Year = ", all.dat$endYear, "<br>",
#                           "Total Years of Data = ", all.dat$totYears, "<br>"))



KWF Data Repository Read-in

Read in data from Kenai Watershed Forum server

# read in metals data 2014 - 2019 from compiled file found on Kenai Watershed Forum server
kwf_dat <- read_excel("data/Compiled_KRBWQM_data_2014_2019.xlsx", sheet = "Master")

# create format to match data imported from EPA repository
kwf_dat <- kwf_dat %>%
  filter(Parameter %in% c("Calcium","Magnesium")) %>%
  select(-Year,-Season,-ChannelType,-Lab,-TestType) %>%
  rename(date = Date,
         location_description = Site,
         parameter = Parameter,
         val = Result,
         unit = Units,
         detection_qualifier_code = Code,
         duplicate = Duplicate) %>%
  mutate(agency = "Kenai Watershed Forum(Volunteer)*")

# address ND in val column
kwf_dat <- kwf_dat %>%
  mutate(detection_qualifier_text = ifelse(val == "ND","Not Detected","")) %>%
  mutate(val = na_if(val,"ND")) %>%
  # transform column classes
  transform(date = as.Date(date),
            val = as.double(val))





Join EPA and KWF data

# join, arrange, and fill in
dat <- bind_rows(epa_dat,kwf_dat) %>%
  arrange(location_description) %>%
  fill(river_mile, .direction = "up") %>%
  left_join(site_match_table) %>%
  select(-epa_site_names,-waterbody,-river_mile)

# provide waterbody type to all sites not already designated
site_match_table <- site_match_table %>%
  select(-location_description) %>%
  rename("location_description" = "epa_site_names") 

dat <- left_join(dat,site_match_table)




Ca/Mg Data Exploration


Do we have some a consistent range of concentration values? Lets see if we can identify them

dat %>%
  ggplot(aes(date,val,color = parameter)) +
  geom_point() +
  facet_wrap(. ~ location_description)+
  ylab("Concentration (mg/L)")


It appears that Ca/Mg data from the City of Kenai Docks site is potentially affected by some hydrological process. Marine influence likely? Lets examine it more closely:

p <- dat %>%
  filter(location_description == "City_of_Kenai_Docks")  %>%
  ggplot(aes(date,val,color = parameter)) +
  geom_point() +
  ggtitle("City of Kenai Docks Ca/Mg concentrations") +
  ylab("Concentration (mg/L)")

ggplotly(p)



Next, let’s visualize our data as CCC values, and with variable y-axes to see is any other anomalies exist.

dat %>%
  ggplot(aes(date,val,color = parameter)) +
  geom_point() +
  facet_wrap(. ~ location_description, scales = "free_y") +
  ylab("Concentration (mg/L)") +
  ggtitle("Ca/Mg concentrations")

Notes:

It is beyond the scope of this current draft DEC report to diagnose (or potentially correct) these data if any anomalies are present. Such work will be conducted at a later date in 2021 pending funding from the Bureau of Restoration grant proposal or other funded projects.


Plot CCC values for all sites

#sites <- c("No_Name_Creek","City_of_Kenai_Docks")



# plot raw Ca and Mg values
#dat %>%
  #filter(location_description %in% sites) %>%
#  ggplot(aes(date,val,color = parameter)) +
#  geom_point() +
#  facet_wrap(. ~ location_description, scales = "free_y") +
#  ggtitle("Raw Ca and Mg Values 2000 - 2018")

# plot CCC values for Cu and Zn w/ all available data for odd sites
hardness_ccc_vals <- dat %>%
  filter(
    #location_description %in% sites,
         date < "2019-01-01") %>%
         
         # Address replicate observations
         # Many of the replicate samples are not designated as such.  As a temporary solution, we will simply take the average wherever two values of the same parameter were collcted on the same date.
         group_by(date,parameter,unit,location_description) %>%
         summarize(val = mean(val)) %>% 

  select(date,location_description,parameter,val,unit) %>%
  pivot_wider(names_from = parameter, values_from = val) %>%
  
  # calculate hardness values
  # forula source: draft DEC report in "Clesceri, L.S., Greenberg, A.E., Eaton, A.D. (Eds.). 1998. Standard Methods for the Examination of Water and Wastewater (20th ed.), Washington D.C. American Public Health Association, American Water Works Association, and Water Environment Federation."
  mutate(hardness = 2.487*Calcium + 4.119*Magnesium,
         Cu_CCC = exp(0.8545*log(hardness) - 1.702) * 0.96,
         Zn_CCC = exp(0.8473*log(hardness) - 0.884) * 0.986) %>%
  select(-Calcium,-Magnesium) %>%
  pivot_longer(cols = c("Cu_CCC","Zn_CCC"), values_to = "CCC")

# plot CCC values for all sites
p <- hardness_ccc_vals %>%
  ggplot(aes(date,CCC,color = name)) +
  geom_point() +
  facet_wrap(. ~ location_description, scales = "free_y") +
  ggtitle("CCC (Criterion Chronic Concentration) Values\nfor Cu and Zn, 2000 - 2018")

ggplotly(p)



Summary table of hardness and CCC Values for Cu and Zn 2000 - 2018

# summary table
z1 <- hardness_ccc_vals %>%
  group_by(location_description,name) %>%
  summarise(min_hardness = min(hardness),
            max_hardness = max(hardness),
            min_CCC = min(CCC),
            max_CCC = max(CCC),
            min_date = min(date),
            max_date = max(date))

z1 %>%
  datatable() %>%
  formatRound(columns=c('min_hardness',
                        'max_hardness',
                        'min_CCC',
                        'max_CCC'), digits=3)



Boxplots of hardness and CCC Values 2000 - 2018


Modify site names to conform with data structure in DEC report (e.g. “Lower” and “Upper” creek sites).

All sites from data that was housed in the KWF server is from the “lower” sections. Assign site names as such.


Plot hardness and CCC values 2000 - 2018

Hardness values

## reassign site info
z <- dat %>%
  distinct(waterbody,location_description,river_mile)

hardness_ccc_vals <- left_join(hardness_ccc_vals,z)

# reorder site by river mile
hardness_ccc_vals$location_description <- reorder(hardness_ccc_vals$location_description, hardness_ccc_vals$river_mile)


# mainstem hardness plot
(p1 <- hardness_ccc_vals %>%
    filter(waterbody == "Mainstem") %>%
    rename("Season" = "season") %>%
  ggplot(aes(location_description,hardness, color = Season)) +
  geom_boxplot(position = position_dodge(width = 0.7)) +
  geom_jitter(position = position_jitterdodge()) +
    facet_wrap(waterbody ~ ., scales = "free") +
  xlab("") +
  ylab("Hardness (mg/L)") +
    theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text = element_text(face = "bold")) ) +
  ggtitle("Kenai River main stem hardness values 2000 - 2018")

# tributary hardness plot
(p2 <- hardness_ccc_vals %>%
    filter(waterbody == "Tributary") %>%
    rename("Season" = "season") %>%
  ggplot(aes(location_description,hardness, color = Season)) +
  geom_boxplot(position = position_dodge(width = 0.7)) +
  geom_jitter(position = position_jitterdodge()) +
    facet_wrap(waterbody ~ ., scales = "free") +
  xlab("") +
  ylab("Hardness (mg/L)") +
    theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text = element_text(face = "bold"))) +
  ggtitle("Kenai River tributaries hardness values 2000 - 2018")


CCC values

# mainstem CCC plot
(p1 <- hardness_ccc_vals %>%
    filter(waterbody == "Mainstem") %>%
    rename("Season" = "season") %>%
  ggplot(aes(location_description,CCC, color = Season)) +
  geom_boxplot(position = position_dodge(width = 0.7)) +
  geom_jitter(position = position_jitterdodge()) +
    facet_wrap(waterbody ~ ., scales = "free") +
  xlab("") +
  ylab("CCC (mg/L)") +
    theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text = element_text(face = "bold")) ) +
  ggtitle("Kenai River main stem CCC values 2000 - 2018")

# tributary hardness plot
(p2 <- hardness_ccc_vals %>%
    filter(waterbody == "Tributary") %>%
    rename("Season" = "season") %>%
  ggplot(aes(location_description,CCC, color = Season)) +
  geom_boxplot(position = position_dodge(width = 0.7)) +
  geom_jitter(position = position_jitterdodge()) +
    facet_wrap(waterbody ~ ., scales = "free") +
  xlab("") +
  ylab("CCC (mg/L)") +
    theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text = element_text(face = "bold"))) +
  ggtitle("Kenai River tributaries CCC values 2000 - 2018")