Document last updated 2021-04-26 08:22:41 by Benjamin Meyer ()


This draft document contains preliminary data explorations of 2019-2020 copper and zinc water quality data from the Kenai River Watershed.


Notes:



Data Read in and preparation

Read in results columns directly from tabs “FINALREPORT_ZnCuSampling_19-20” and “FINALREPORT_Baseline_19-20” in the Excel document “AllData_ZnCuCaMg_19-20.xlsx” prepared by MH in Fall 2020

# specify file paths and sheet names
workbook <- "data/AllData_ZnCuCaMg_19-20.xlsx"
tab <- "BM_format_2020"

# read in tabs
tbl <- read_excel(workbook, sheet = tab, skip = 1) 

# assign new column names
vars <- c("site","rm","date","waterbody_type","hardness_mgL","copper_ugL","copper_ccc_ugL","zinc_ugL","zinc_ccc_ugL")
colnames(tbl) <- vars

# create numerical columns and clean data table
tbl <- tbl%>%
  select(-rm) %>%
  filter(!is.na(date)) %>%
  fill(site) %>%
  transform(date = as.Date(date, origin = "1899-12-30")) %>%
  
  # hardness
  separate(hardness_mgL, sep = " ", into = c("hardness_mgL_1","hardness_mgL_2"), remove = F) %>%
  mutate(hardness_mgL_2 = gsub("[()]", "", hardness_mgL_2)) %>%
  
  # copper
  mutate(copper_ugL_x = gsub("[()]", "", copper_ugL)) %>%
  mutate(copper_ugL_x = gsub("J", "", copper_ugL_x)) %>%
  mutate(copper_ugL_x = gsub("U", "", copper_ugL_x)) %>%
  mutate(copper_ugL_x = str_replace(copper_ugL_x, "\\s", "|")) %>% 
  ### replace first white space with consistent character and split on that character
  separate(copper_ugL_x, into = c("copper_ugL_1", "copper_ugL_2"), sep = "\\|") %>%
  
  # copper CCC
  mutate(copper_ccc_ugL_x = gsub("[()]", "", copper_ccc_ugL)) %>%
  mutate(copper_ccc_ugL_x = gsub("J", "", copper_ccc_ugL_x)) %>%
  mutate(copper_ccc_ugL_x = gsub("U", "", copper_ccc_ugL_x)) %>%
  mutate(copper_ccc_ugL_x = str_replace(copper_ccc_ugL_x, "\\s", "|")) %>% 
  
  ### replace first white space with consistent character and split on that character
  separate(copper_ccc_ugL_x, into = c("copper_ccc_ugL_1", "copper_ccc_ugL_2"), sep = "\\|") %>%
  
  # zinc
  mutate(zinc_ugL_x = gsub("[()]", "", zinc_ugL)) %>%
  mutate(zinc_ugL_x = gsub("J", "", zinc_ugL_x)) %>%
  mutate(zinc_ugL_x = gsub("U", "", zinc_ugL_x)) %>%
  mutate(zinc_ugL_x = str_replace(zinc_ugL_x, "\\s", "|")) %>% 
  ### replace first white space with consistent character and split on that character
  separate(zinc_ugL_x, into = c("zinc_ugL_1", "zinc_ugL_2"), sep = "\\|") %>%
  
  # zinc CCC
  mutate(zinc_ccc_ugL_x = gsub("[()]", "", zinc_ccc_ugL)) %>%
  mutate(zinc_ccc_ugL_x = gsub("J", "", zinc_ccc_ugL_x)) %>%
  mutate(zinc_ccc_ugL_x = gsub("U", "", zinc_ccc_ugL_x)) %>%
  mutate(zinc_ccc_ugL_x = str_replace(zinc_ccc_ugL_x, "\\s", "|")) %>% 
  
  ### replace first white space with consistent character and split on that character
  separate(zinc_ccc_ugL_x, into = c("zinc_ccc_ugL_1", "zinc_ccc_ugL_2"), sep = "\\|") %>%
  
  # select desired columns
  select("site","date","waterbody_type",
         "hardness_mgL_1","hardness_mgL_2",
         "copper_ugL_1","copper_ugL_2","copper_ccc_ugL_1","copper_ccc_ugL_2",
         "zinc_ugL_1","zinc_ugL_2","zinc_ccc_ugL_1","zinc_ccc_ugL_2") %>%
  
  # transform numbers to numeric column classes
  mutate_at(c("hardness_mgL_1","hardness_mgL_2",
         "copper_ugL_1","copper_ugL_2","copper_ccc_ugL_1","copper_ccc_ugL_2",
         "zinc_ugL_1","zinc_ugL_2","zinc_ccc_ugL_1","zinc_ccc_ugL_2"), 
         as.numeric) %>%
  
  # modify site names
  mutate(site = as.factor(str_replace_all(site," ","_"))) %>%
  mutate(site = as.factor(str_replace_all(site,"-","_"))) %>%
  mutate(site = as.factor(str_replace_all(site,"'","")))


Export hardness values for use in the script “calcium_magnesium_historical_data.Rmd”

hardness_tbl <- tbl %>%
  select(site,date,waterbody_type,hardness_mgL_1,hardness_mgL_2) %>%
  pivot_longer(cols = c("hardness_mgL_1","hardness_mgL_2"), names_to = "obs", values_to = "val") %>%
  filter(!is.na(val))

write.csv(hardness_tbl,"output/hardness_2019_2020.csv", row.names = F)

Duplicate sample % differences

  • Calculate RDL values for Zn and Cu duplicate samples

  • In cases where a “_2” value exists, this is the “replicate” value

tbl <- tbl %>%
  ## copper rdl
  mutate(cu_rdl = paste(sprintf((abs(copper_ugL_1 - copper_ugL_2) / ((copper_ugL_1 + copper_ugL_2) / 2)*100), fmt = '%#.2f'),"%")) %>%
  mutate(cu_rdl = str_replace(cu_rdl, "NA %", "")) %>%
  
  # zinc rdl
  mutate(zn_rdl = paste(sprintf((abs(zinc_ugL_1 - zinc_ugL_2) / ((zinc_ugL_1 + zinc_ugL_2) / 2)*100), fmt = '%#.2f'),"%")) %>%
  mutate(zn_rdl = str_replace(zn_rdl, "NA %", "")) 
  
  # ca rdl
  
  # mg rdl
  
# write rdl table 
rdl_tbl <- tbl %>%
  ungroup() %>%
  filter(!is.na(cu_rdl),
         cu_rdl != "") %>%
  arrange(site,date) %>%
  select(site,date,cu_rdl,zn_rdl) %>%
  pivot_longer(cols = c("cu_rdl","zn_rdl"), values_to = "RDL") %>%
  mutate(name = str_replace_all(name,"cu_rdl","Copper"),
         name = str_replace_all(name,"zn_rdl","Zinc")) %>%
  arrange(name,site,date)

# rename columns
colnames(rdl_tbl) <- c("Site","Date","Parameter","RDL(%)")

# export RDL table
#write.csv(rdl_tbl, "output/rdl_tbl.csv", row.names = F)

datatable(rdl_tbl,
          filter = 'top', 
          options = list(pageLength = 5, autoWidth = TRUE))


What was the average percent difference for duplicate samples?

z <- rdl_tbl %>%
  mutate(`RDL(%)` = str_remove(`RDL(%)`,"%")) %>%
  transform(`RDL(%)` = as.numeric(`RDL(%)`)) %>%
  rename(RDL_pct = `RDL...`) %>%
  group_by(Parameter) %>%
  summarise(mean = mean(RDL_pct),
            se = std.error(RDL_pct))

colnames(z) <- c("Metal", "Mean RDL (%)","Std. Error(%)")

z



Raw Data Table

# read in coordinates
coords <- read_excel("data/Sampling sites 2020.xlsx", sheet = "BM_format_Sites2020") %>%
  rename(site = Site) %>%
  transform(site = as.factor(site))

# join coordinates to data
tbl <- left_join(tbl,coords)

# present table
datatable(tbl,
          filter = 'top', 
          options = list(pageLength = 5, autoWidth = TRUE))



Map

# add site identifier
tbl <- tbl %>%
  group_by(site) %>%
  mutate(Site_ID = cur_group_id())


# used arcgis online from csv exported here
write.csv(tbl, "output/CuZn_2019_2020_results.csv", row.names = F)

# viewable at https://arcg.is/0yij15


Site coordinates

cd <- coords %>%
  select(site,Habitat,Lat,Long)

datatable(cd,
          filter = 'top', 
          options = list(pageLength = 5, autoWidth = TRUE))


ArcGIS Online map

Sampling sites and raw data on an interactive map (also at https://arcg.is/0yij15)



Plots

Prep tables for plotting

# prep table

# strategy to prep for ggplot (e.g. "long form": break up in to minimum tables and use joins to rebuild min rows?
tbl <- tbl %>%
  select(site, date, waterbody_type, 
         starts_with(c("copper","cu","zinc","zn")),
         Lat, Long, BM_Notes) 

# rdl table
rdl_tbl <- tbl %>% 
  select(site,date,contains("rdl")) %>%
  filter(zn_rdl != "") %>%
  pivot_longer(contains("rdl"),names_to = "rdl", values_to = "rdl_val") %>%
  mutate(metal = gsub("_rdl","",rdl)) %>%
  select(-rdl)

# ccc table
ccc_tbl <- tbl %>%
  select(site,date,contains("ccc")) %>%
  pivot_longer(contains("ccc"), values_to = "ccc_val") %>% 
  filter(!is.na(ccc_val)) %>%
  mutate(metal = gsub("copper","cu",name)) %>%
  mutate(metal = gsub("zinc","zn",metal)) %>%
  mutate(metal = gsub("_ccc_ugL_1","",metal)) %>%
  mutate(metal = gsub("_ccc_ugL_2","",metal)) %>%
  select(-name)

# metal values observations
met_obs <- c("copper_ugL_1","copper_ugL_2","zinc_ugL_1","zinc_ugL_2")
met_tbl <- tbl %>%
  select(site,date,all_of(met_obs)) %>%
  pivot_longer(met_obs, names_to = "metal", values_to = "met_val") %>%
  filter(!is.na(met_val)) %>%
  mutate(metal = gsub("copper","cu",metal)) %>%
  mutate(metal = gsub("zinc","zn",metal)) %>%
  mutate(metal = gsub("_ugL_1","",metal)) %>%
  mutate(metal = gsub("_ugL_2","",metal)) %>%
  distinct()

  
# join tables in long form
tbl2 <- inner_join(met_tbl,ccc_tbl) %>%
  left_join(rdl_tbl) %>%
  left_join(coords) %>%
  arrange(metal,met_val) %>%
  distinct()


Summarise the range of metal and CCC values observed

dt <- tbl2 %>%
  group_by(site,metal) %>%
  summarise(met_max = max(met_val),
            met_min = min(met_val),
            ccc_max = max(ccc_val),
            ccc_min = min(ccc_val)) %>%
  arrange(metal, met_max)

datatable(dt,
          filter = 'top', 
          options = list(pageLength = 5, autoWidth = TRUE))


Tributary site plots

## tributaries
trib_order <- c("Lower_No_Name_Creek",
                "Upper_No_Name_Creek",
                "Lower_Beaver_Creek",
                "Upper_Beaver_Creek",
                "Lower_Slikok_Creek",
                "Upper_Slikok_Creek",
                "Lower_Soldotna_Creek",
                "Upper_Soldotna_Creek",
                "Lower_Funny_River")
tbl2_trib <- tbl2 %>% filter(Habitat == "Tributary")
tbl2_trib$site <- factor(tbl2_trib$site, levels = trib_order)

# common theme
cuzn_theme <- theme(
  strip.text.y = element_text(face = "bold", angle = 0),
        axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1),

        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) 

# copper in tributaries
(cu_tribs <- tbl2_trib %>%
    filter(metal == "cu") %>%
  ggplot() +
  geom_point(aes(as.factor(date),as.numeric(met_val), color = "Copper")) +
  geom_point(aes(as.factor(date),as.numeric(ccc_val), fill = "CCC"), shape = 95, size = 4) +
    scale_color_discrete(name = "") +
    scale_fill_discrete(name = "") +
  facet_wrap(site ~ ., scales = "free_y") +
  theme_bw() +
  cuzn_theme +
  xlab("") +
  ylab("Concentration (ug/L)") +
  ggtitle("Kenai River Tributaries\nCopper Concentrations and Criterion Values"))

# save to output folder
ggsave("output/figures/cu_tribs.png")


# zn in tributaries
(zn_tribs <- tbl2_trib%>%
    filter(metal == "zn") %>%
  ggplot() +
  geom_point(aes(as.factor(date),as.numeric(met_val), color = "Zinc")) +
  geom_point(aes(as.factor(date),as.numeric(ccc_val), fill = "CCC"), shape = 95, size = 4) +
    scale_color_discrete(name = "") +
    scale_fill_discrete(name = "") +
  facet_wrap(site ~ ., scales = "free_y") +
  theme_bw() +
  cuzn_theme +
  xlab("") +
  ylab("Concentration (ug/L)") +
  ggtitle("Kenai River Tributaries\nZinc Concentrations and Criterion Values"))

# save to output folder
ggsave("output/figures/zn_tribs.png")


Mainstem site plots

## mainstem
ms_order <- c("City_of_Kenai_Docks",
              "Cunningham_Park",
              "Upstream_of_Beaver_Creek",
              "Pillars",
              "Poachers_Cove",
              "Slikok_Creek_Kenai_River_Confluence",
              "Soldotna_Bridge",
              "Swiftwater_Park",
              "Jims_Landing",
              "Skilak_Lake_Outlet")

# shorten site names
ms_sites <- c("City_of_Kenai_Docks" = "Kenai_Docks",
              "Cunningham_Park" = "Cunningham",
             "Upstream_of_Beaver_Creek" = "Upstream_of_Beaver",
              "Pillars" = "Pillars",
              "Poachers_Cove" = "Poachers",
              "Slikok_Creek_Kenai_River_Confluence" = "Slikok_Confluence",
              "Soldotna_Bridge" = "Soldotna_Bridge",
              "Swiftwater_Park" = "Swiftwater",
              "Jims_Landing" = "Jims_Landing",
              "Skilak_Lake_Outlet" = "Skilak_Lake_Outlet")

tbl2_ms <- tbl2 %>% filter(Habitat == "Mainstem")
tbl2_ms$site <- factor(tbl2_ms$site, levels = ms_order)

# common theme
cuzn_theme <- theme(
  strip.text.y = element_text(face = "bold", angle = 0),
  # modified x axis text angle
        axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1),

        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) 


# copper in mainstem
(cu_ms <- tbl2_ms %>%
    filter(metal == "cu") %>%
  ggplot() +
  geom_point(aes(as.factor(date),as.numeric(met_val), color = "Copper")) +
  geom_point(aes(as.factor(date),as.numeric(ccc_val), fill = "CCC"), shape = 95, size = 4) +
    scale_color_discrete(name = "") +
    scale_fill_discrete(name = "") +
  facet_wrap(site ~ ., scales = "free_y", labeller = as_labeller(ms_sites)) +
  theme_bw() +
  cuzn_theme +
  xlab("") +
  ylab("Concentration (ug/L)") +
  ggtitle("Kenai River Mainstem\nCopper Concentrations and Criterion Values"))

# save to output folder
ggsave("output/figures/cu_ms.png")


# zn in mainstem
(zn_ms <- tbl2_ms %>%
    filter(metal == "zn") %>%
  ggplot() +
  geom_point(aes(as.factor(date),as.numeric(met_val), color = "Zinc")) +
  geom_point(aes(as.factor(date),as.numeric(ccc_val), fill = "CCC"), shape = 95, size = 4) +
    scale_color_discrete(name = "") +
    scale_fill_discrete(name = "") +
  facet_wrap(site ~ ., scales = "free_y", labeller = as_labeller(ms_sites)) +
  theme_bw() +
  cuzn_theme +
  xlab("") +
  ylab("Concentration (ug/L)") +
  ggtitle("Kenai River Mainstem\nZinc Concentrations and Criterion Values"))

# save to output folder
ggsave("output/figures/zn_ms.png")



Overall

Is there an overall temporal trend in exceedances? (Year, season…?)

# facet labels
facet_labs <- c(
  "cu" = "Copper",
  "zn" = "Zinc")

# plot
tbl2 %>%
  mutate(excd = ifelse(met_val > ccc_val,"Y","N"),
         year = year(date)) %>%
  ggplot(aes(as.factor(date),met_val, color = excd)) +
  geom_point() +
  facet_grid(metal ~ year, scales = "free", labeller = labeller(metal = facet_labs)) +
  xlab("") +
  ylab("Concentration (ug/L)") +
  theme_bw() +
  cuzn_theme +
  labs(color = "CCC Exceedance?") +
  ggtitle("All Sites")

# save
ggsave("output/figures/overall_exceedances.png", width = 8, height = 6)


Are copper and zinc values correlated?

tbl2 %>%
  ungroup() %>%
  group_by(metal) %>%
  mutate(r = row_number()) %>%
  select(metal,met_val,r,Habitat) %>%
  spread(key = metal, val = met_val) %>%
  ggplot(aes(cu,zn, color = Habitat)) +
  geom_point(size = 2) + 
  theme_classic() +
  xlab("Copper (ug/L)") +
  ylab("Zinc (ug/L)") +
  xlim(0,2.5) +
  ggtitle("Copper vs Zinc values 2019-2020")

Relationship between copper and zinc appear to follow an approximately logistic relationship. Maybe we would we be able to determintaively observe the relationship with the full 20 year dataset?