Document last updated 2021-02-11 08:56:25 by Benjamin Meyer (ben@kenaiwatershed.org)
This draft document contains preliminary data explorations of 2019-2020 beach bacteria sampling data from the Kenai River Watershed.
Notes:
Sourced data from AWQMS files and other files found on KWF local server.
2020 Microbial Source Tracing data is present in AWQMS files, 2019 MST data is not. Sourced 2019 MST data from “MST results 2019.xlsx”
In the future, request that SGS can provide sample results in .csv or .xlsx format in addition to the PDFs to eliminate potential for data entry or data management error.
# read in coordinates
beach_sites <- read_excel("data/2020 Beach Sampling Compiled Results_FINALREPORT.xlsx", sheet = "2019-2020_FinalReport_SampSites", skip = 1)
# create map title
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 22px;
}
"))
title <- tags$div(
tag.map.title, HTML("Kenai River Beach Sampling Locations")
)
# create map
beach_sites %>%
leaflet() %>%
# choose base map
addProviderTiles(providers$Esri.WorldImagery) %>%
# set map center
setView(-151.214545, 60.538691, zoom = 12.4) %>%
# add map title
addControl(title, position = "topleft", className="map-title") %>%
addCircleMarkers(~Longitude, ~Latitude,
radius = 9,
popup = paste("Site Name" = beach_sites$`Site Name`,"<br>",
"Site Description" = beach_sites$`Site description`)) %>%
# add site labels
addLabelOnlyMarkers(~Longitude, ~Latitude, label = beach_sites$`Site ID`,
labelOptions = labelOptions(noHide = T,
direction = 'right',
textOnly = T,
textsize = "12px",
style = list("color" = "white",
"font-weight" = "bold",
"font-size" = "14px")))
# import known data from local server source
# specify column import names
vars <- as.character(c("Monitoring Location Name",
"Activity Latitude",
"Activity Longitude",
"Activity Medium",
"Activity Start Date",
"Activity Start Time",
"Characteristic Name",
"Result Analytical Method",
"Result Value",
"Result Value Units",
"Result Qualifier"))
# specify new column names
new_vars <- as.character(c("location",
"lat",
"long",
"medium",
"date",
"time",
"data_type",
"analytical_method",
"val",
"units",
"qualifier"))
# import 2019 AWQMS data
## used this excel file because it had the most recent modified date
## see notes in excel file; not sure if all available data is contained within here
bs19 <- read_excel("data/KWF_2019_Kenai-BEACH_AWQMS_forQAQC_MH101119.xlsx", sheet = "AWQMS Data Template", skip = 7) %>%
select(all_of(vars))
# import 2020 AWQMS data
## see notes in excel file; not sure if all avilable data is contained within here
bs20 <- read_excel("data/2020_KWF_Kenai-BEACH_AWQMS_SUBMITTED.xlsx", sheet = "RAW AWQMS Data Template", skip = 3) %>%
select(all_of(vars))
# combine both years
dat <- bind_rows(bs19,bs20)
# rename columns
colnames(dat) <- new_vars
## 2019 MST data absent from AWQMS file. So, import separately and format to join
## imported sheet was created manually on 12/8/20 from the sheet titled "Summary"
mst19 <- read_excel("data/MST results 2019.xlsx", sheet = "AWQMS_summary_format") %>%
select(-site_id)
# join 2019 MST data to overall data set
dat <- bind_rows(dat,mst19)
# tidy column formats
dat <- dat %>%
transform(time = hms::as_hms(time))
# replace "Gull_Gull-#" with "Gull_Gull-4"
# correct location names
dat <- dat %>%
mutate(analytical_method = str_replace_all(analytical_method,c("Gull_Gull-5" = "Gull_Gull-4",
"Gull_Gull-6" = "Gull_Gull-4"))) %>%
mutate(location = str_replace_all(location, c("North Kenai River Beach 4" = "North Kenai Beach 4"))) %>%
# include only water quality data
# bacteria concs and temperature
filter(medium == "Water")
rm(bs19,bs20,mst19)
How many bacteria sample values all together do we have; including replicates?
dat %>%
filter(data_type %in% c("Enterococci","Fecal Coliform")) %>%
group_by(data_type) %>%
summarise(n = n())
## # A tibble: 2 x 2
## data_type n
## * <chr> <int>
## 1 Enterococci 161
## 2 Fecal Coliform 162
Notes post data read-in:
Procedure:
Address replicate sample values
# how many replicate samples do we have?
rep_ct <- dat %>%
filter(data_type %in% c("Enterococci","Fecal Coliform","Microbial Source Tracking")) %>%
group_by(location,date,time,data_type) %>%
count(name = "rep_set") %>%
ungroup() %>%
filter(rep_set > 1) %>%
group_by(data_type,rep_set) %>%
count(name = "count")
datatable(rep_ct)
The 2019 - 2020 data set has a number of sample values where a paired replicate also exists, both for bacteria concentration values and microbial source tracking values.
What is the average difference between replicate values? (by sample type)
# what is the average % difference among duplicates? (by sample type)
rep_diff <- dat %>%
filter(data_type %in% c("Enterococci","Fecal Coliform","Microbial Source Tracking")) %>%
group_by(location,date,analytical_method) %>%
filter(n() > 1) %>%
arrange(location,date,analytical_method) %>%
transform(val = as.numeric(val)) %>%
select(location,date,data_type,val) %>%
group_by(location,date,data_type) %>%
# calculate percent difference between instances of two replicate samples
mutate(diff = abs(lag(val) - val)) %>%
mutate(lagsum = val + lag(val)) %>%
mutate(pct_diff = (diff / (lagsum/2))*100) %>%
select(location,date,data_type,val,pct_diff) %>%
fill(pct_diff, .direction = "up")
datatable(rep_diff,
filter = 'top',
options = list(pageLength = 5, autoWidth = TRUE))
Replicate difference summary table
# summarise
rep_diff_summary <- rep_diff %>%
group_by(data_type) %>%
summarise(n = n(),
mean = mean(pct_diff),
se = std.error(pct_diff))
colnames(rep_diff_summary) <- c("Bacteria","n","Mean % Difference","Std. Error % Difference")
# print table
datatable(rep_diff_summary,
filter = 'top',
options = list(pageLength = 5, autoWidth = TRUE))
# export table
write.csv(rep_diff_summary,"output/rep_diff_summary.csv",row.names = F)
# plot 1
rep_diff %>%
ggplot(aes(val,pct_diff,color = data_type)) +
geom_point()
# plot 2
rep_diff %>%
mutate(day = yday(date)) %>%
ggplot(aes(day,pct_diff,color = data_type)) +
geom_point()
A relationship between magnitude of sample value or date and percent difference between duplicate samples is not readily apparent.
As per ADEC CALM protocol, where two replicate values exist, only the higher of the two values are used in further analyses.
# mst sample values are in sets of 3 and have alphabetical characters for concentrations, so them treat separately
mst_dat <- dat %>%
filter(data_type == "Microbial Source Tracking")
# prep bacteria concentrations dataframe
dat <- dat %>%
filter(data_type %in% c("Enterococci","Fecal Coliform")) %>%
group_by(location,lat,long,medium,data_type,analytical_method,units,qualifier,date,time) %>%
# where pairs of two replicate samples exist, retain only the higher of the two values.
## For example: South Kenai Beach 3 Enterococci 2019-05-21, rep1 = 4, rep 2 = 5; retains only rep1 = 4
filter(val == max(val)) %>%
# in some cases, when two replicate values are present, they are identical (e.g. rep1 = 1, rep2 = 1). where equivalencies exist, we want to retain only one of the values
ungroup() %>%
group_by(location,lat,long,medium,data_type,analytical_method,units,qualifier,date,time,val) %>%
summarise(val = mean(as.numeric(val))) %>%
# and, in some cases, where two identical values exist, one may have a qualifier (e.g. "U") and the other does not
## We want to retain the one without the qualifier (e.g. "higher" value)
## remove 'qualifier' as grouping factor but keep it as a column
ungroup() %>%
distinct(location,lat,long,medium,data_type,analytical_method,units,date,time,val, .keep_all = T)
# recreate table in tab "2019-2020_FinalReport_MST2020"
mst_tbl <- mst_dat %>%
arrange(location,date,analytical_method) %>%
transform(date = as.Date(date)) %>%
select(date,location,analytical_method,val) %>%
pivot_wider(names_from = analytical_method, values_from = val) %>%
arrange(date,location)
colnames(mst_tbl) <- c("Date","Location","Dog Feces","Gull Feces","Human Feces")
# print
datatable(mst_tbl,
filter = 'top',
options = list(pageLength = 5, autoWidth = TRUE))
# save mst data table
write.csv(mst_tbl, "output/mst_summary_table.csv", row.names = F)
What is the range of concentration values for each species?
mst_dat %>%
mutate(val = str_replace_all(val,c("ND" = "0",
"DNQ" = "0"))) %>%
transform(val = as.numeric(val)) %>%
group_by(analytical_method) %>%
summarise(min = min(val),
max = max(val),
mean = mean(val))
## # A tibble: 3 x 4
## analytical_method min max mean
## * <chr> <dbl> <dbl> <dbl>
## 1 Dog_BacCan-UCD 0 4660 294.
## 2 Gull_Gull-4 0 28200 5418.
## 3 Human_HF183 0 1220 176.
# prep table for plot
mst_tbl <- mst_dat %>%
filter(data_type == "Microbial Source Tracking") %>%
arrange(location,date,analytical_method) %>%
select(date,location,analytical_method,val) %>%
# replace results DNQ and ND with zero and convert to numeric
mutate(val = as.numeric(str_replace_all(val,c("DNQ" = "0", "ND" = "0"))))
# prep custom legend format functions from stack overflow: https://stackoverflow.com/questions/11366964/is-there-a-way-to-change-the-spacing-between-legend-items-in-ggplot2
draw_key_polygon3 <- function(data, params, size) {
lwd <- min(data$size, min(size) / 4)
grid::rectGrob(
width = grid::unit(0.7, "npc"),
height = grid::unit(0.7, "npc"),
gp = grid::gpar(
col = data$colour,
fill = alpha(data$fill, data$alpha),
lty = data$linetype,
lwd = lwd * .pt,
linejoin = "mitre"
))
}
GeomBar$draw_key = draw_key_polygon3
# plot
mst_tbl %>%
ggplot(aes(as.factor(date),val, fill = analytical_method)) +
geom_col(position = "dodge", key_glyph = "polygon3") +
facet_grid(.~location, labeller = labeller(location = label_wrap_gen(5))) +
xlab("") +
ylab("Concentration (Copies/100 mL)") +
theme_bw() +
# modify legend
scale_fill_discrete(name="Source",
breaks=c("Dog_BacCan-UCD", "Gull_Gull-4", "Human_HF183"),
labels=c("Dog", "Gull", "Human")) +
# theme
theme(axis.text.x = element_text(vjust = .5, angle = 85),
strip.text.y = element_text(angle = 0),
legend.key.size = unit(1.4,"cm")) +
col_theme
# save plot
ggsave("output/mst_kenai_beach_2019_2020.png", width = 12, height = 6)
# prep table
bac_tbl <- dat %>%
filter(data_type %in% c("Enterococci","Fecal Coliform")) %>%
arrange(location,date,data_type) %>%
select(date,location,data_type,val) %>%
# replace results DNQ and ND with zero and convert to numeric
mutate(val = as.numeric(str_replace_all(val,c("DNQ" = "0", "ND" = "0")))) %>%
# use day of year for x axis and year for facet grid
mutate(day = yday(date),
year = year(date))
# print
datatable(bac_tbl,
filter = 'top',
options = list(pageLength = 5, autoWidth = TRUE))
# plot 1
bac_tbl %>%
ggplot(aes(x = as.Date(paste(2014,strftime(date,format="%m-%d"),sep="-")),
y = val,
color = as.factor(year))) +
geom_jitter(size = 3) +
facet_grid(location~data_type, labeller = labeller(location = label_wrap_gen(5))) +
xlab("") +
ylab("Individual Sample Concentration (CFU/100 mL)") +
scale_color_discrete(name = "Year")+
# theme
theme_bw() +
theme(axis.text.x = element_text(vjust = 1),
strip.text.y = element_text(angle = 0)) +
points_theme +
ggtitle("Bacteria Raw Concentrations")
# save plot
ggsave("output/bacteria_conc_2019_2020_v1.png", width = 10, height = 10)
# plot 2, enterococci
bac_tbl %>%
filter(data_type == "Enterococci") %>%
# note; use of year for date format is "fake" placeholder
ggplot(aes(x = as.Date(paste(2014,strftime(date,format="%m-%d"),sep="-")),
y = val,
color = as.factor(year))) +
geom_jitter(size = 2) +
facet_grid(. ~ location, scales = "free_y", labeller = labeller(location = label_wrap_gen(5))) +
xlab("") +
ylab("Individual Sample\nConcentration (CFU/100 mL)") +
ylim(0,625) +
scale_color_discrete(name = "Year") +
# single-sample threshold
# geom_hline(aes(yintercept = 130, linetype = "Raw seafood consumption\n(130 CFU/100 mL)"), color = "blue") +
scale_linetype_manual(name = "Threshold", values = c(2, 2),
guide = guide_legend(override.aes = list(color = c("blue")))) +
# overall theme
theme_bw() +
theme(axis.text.x = element_text(vjust = 1,hjust = 1, angle = 35),
strip.text.y = element_text(angle = 0)) +
ggtitle("Enterococci Concentrations") +
points_theme
# save plot
ggsave("output/enterococci_conc_2019_2020.png", width = 10, height = 4)
# plot 3, fecal coliform
bac_tbl %>%
filter(data_type == "Fecal Coliform") %>%
# note; use of year for date format is "fake" placeholder
ggplot(aes(x = as.Date(paste(2014,strftime(date,format="%m-%d"),sep="-")),
y = val,
color = as.factor(year))) +
geom_jitter(size = 2) +
facet_grid(. ~ location, scales = "free_y", labeller = labeller(location = label_wrap_gen(5))) +
xlab("") +
ylab("Individual Sample\nConcentration (CFU/100 mL)") +
scale_color_discrete(name = "Year") +
# single-sample thresholds
geom_hline(aes(yintercept = 31, linetype = "Raw Seafood Consumption\n(31 CFU/100 mL)"), color = "blue") +
geom_hline(aes(yintercept = 400, linetype = "Water recreation, secondary\n(400 CFU/100 mL)"), color = "red") +
scale_linetype_manual(name = "Threshold", values = c(2, 2),
guide = guide_legend(override.aes = list(color = c("blue","red")))) +
# overall theme
theme_bw() +
theme(axis.text.x = element_text(vjust = 1,hjust = 1, angle = 35),
strip.text.y = element_text(angle = 0)) +
points_theme +
ggtitle("Fecal Coliform Concentrations")
# save plot
ggsave("output/fecal_coliform_conc_2019_2020.png", width = 10, height = 4)
Define ADEC threshold values
Fecal coliform and enterococci analyses results from the 2020 monitoring seasons were evaluated based on the Alaska Water Quality Standards for marine water (18 AAC 70 (14)) (Table 2). Results were evaluated both for in-season management standards, and post-season retrospective analysis. A minimum of five samples were used to calculate a geometric mean, and when two sub-criteria are specified (i.e., geometric mean and 10% of samples in a season), both criteria must both be met or the site fails the standard.
Results were evaluated as follows:
Fecal coliform
Fecal coliform exceedances were determined based on standards set for:
Secondary water recreation
a.) In-season exceedances for secondary contact recreation were identified when
i.) Individual fecal coliform sample values exceeded 400 CFU/100 mL, or
ii.) The geometric mean of fecal coliform samples exceeded 200 CFU/100 mL over a 30 day period.
b.) Post-season site exceedances were identified post-season if 10% of fecal coliform sample values exceeded 400 CFU/100 mL.
Harvesting raw aquatic life for consumption
a.) In-season exceedances for harvesting raw aquatic life for consumption were identified when
i.) Individual fecal coliform samples exceeded 31 CFU/100 mL
b.) Post-season site exceedances were identified when
i.) 10% of fecal coliform samples from season exceeded 31 CFU/100 mL, or
ii.) The geometric mean of fecal coliform samples from the season exceeded 14 CFU/100 mL.
Enterococci
Enterococci exceedances were determined based on standards set for:
Contact recreation
a.) In-season exceedances for contact recreation were identified when
i.) Individual enterococci samples exceeded 130 CFU/100 mL, or
ii.) The geometric mean of enterococci samples exceeded 35 CFU/100 mL over a 30 day period.
b.) Post-season site exceedances were identified when 10% of enterococci samples from the season exceeded 130 CFU/ 100 mL
Other notes:
General Data Notes
Calculate 30-day rolling geometric mean values for Enterococci and Fecal Coliform concentrations
dat <- dat %>%
filter(data_type %in% c("Fecal Coliform","Enterococci")) %>%
mutate(year = year(date)) %>%
transform(val = as.numeric(val)) %>%
# use 50% of value if it has an "Undetected" qualifier
mutate(val = ifelse(is.na(qualifier),val,
ifelse(qualifier == "U",(val*.5),val))) %>%
group_by(data_type,location,year) %>%
arrange(year,data_type,location,date) %>%
# calculate 30 day rolling geometric means
mutate(gm30 = rollapplyr(val, 1:n() - findInterval(date - days(30), date),
geometric.mean, fill = NA, partial = T)) %>%
# mutate values from the first annual 30 days of sampling to NA. (We need 30 days for a 30-day rolling geometric mean value).
mutate(gm30 = as.numeric(ifelse(date < min(date + days(30)),"",gm30)))
Assign exceedance criteria for 30-day geometric means and individual sample values
# create columns of Exceedance occurrences; individual and 30-day geometric means
# reshape dataframe
dat <- dat %>%
rename("value" = "val") %>%
# apply inseason criteria
## individual sample exceedances
mutate(indv_excd = case_when(
(value < 31 & data_type == "Fecal Coliform") ~ "no_indv_excd",
(value >= 400 & data_type == "Fecal Coliform") ~ "fc_excd_400",
(value >= 31 & value < 400 & data_type == "Fecal Coliform") ~ "fc_excd_31")) %>%
## 30 day rolling geometric mean exceedances
mutate(gm30_excd = case_when(
(gm30 < 200 & data_type == "Fecal Coliform") ~ "no_gm_excd",
(gm30 >= 200 & data_type == "Fecal Coliform") ~ "gm_fc_excd_200")) %>%
# enterococci
## individual sample Exceedances
mutate(indv_excd = case_when(
(indv_excd != "NA") ~ indv_excd,
(value < 130 & data_type == "Enterococci") ~ "no_indv_excd",
(value >= 130 & data_type == "Enterococci") ~ "ec_excd_130")) %>%
## 30 day rolling geometric mean exceedances
mutate(gm30_excd = case_when(
(gm30_excd != "NA") ~ gm30_excd,
(gm30 < 35 & data_type == "Enterococci") ~ "no_gm_excd",
(gm30 >= 35 & data_type == "Enterococci") ~ "gm_ec_excd_35"))
Export data table for annual report appendix
# prep table for export
excd_tbl <- dat %>%
# select columns
select(location,data_type,date,value,gm30,indv_excd,gm30_excd) %>%
transform(gm30 = format(round(as.numeric(gm30),1))) %>%
mutate(gm30_excd = case_when(
(gm30_excd == "gm_fc_excd_200") ~ "> 200",
(gm30_excd == "gm_ec_excd_35") ~ "> 35")) %>%
mutate(indv_excd = case_when(
(indv_excd == "fc_excd_31") ~ "> 31",
(indv_excd == "ec_excd_130") ~ "> 130",
(indv_excd == "fc_excd_400") ~ "> 400")) %>%
# replace NAs with blank spaces
mutate_at(vars(gm30_excd,indv_excd), ~replace_na(., "")) %>%
rename(Location = location,
Year = year,
Bacteria = data_type,
Date = date,
`30 Day Rolling Geometric Mean Value` = gm30,
`Individual Sample Value` = value,
`30 Day Geometric Mean Exceedance` = gm30_excd,
`Individual Sample Value Exceedance` = indv_excd)
# save table
write.csv(excd_tbl,"output/exceedance_table.csv", row.names = F)
## manually format conditional color and page sizes in excel
Secondary Water Recreation
In-season exceedances
# render plot
bact <- "Fecal Coliform"
dat %>%
filter(data_type == bact) %>%
ggplot(aes(as.Date(paste(2014,strftime(date,format="%m-%d"),sep="-")),
value,
color = indv_excd)) +
geom_point(size = 3) +
# indv. sample std for harvesting raw aquatic life = 31 CFU/100 mL
geom_hline(yintercept = 31, lty = "dashed") +
# indv. sample std for secondary water recreation = 400 CFU/100 mL
geom_hline(yintercept = 400, lty = "dashed") +
facet_grid(location ~ year, labeller = labeller(location = label_wrap_gen(5))) +
xlab("") +
ylab("Sample Concentration\n(CFU/100 mL)") +
theme_bw() +
theme(strip.text.y = element_text(angle = 0),
legend.direction = "vertical",
legend.position = "bottom") +
scale_color_discrete(name="Exceedance Type (CFU/100 mL)",
breaks = c("fc_excd_31",
"fc_excd_400",
"no_indv_excd"),
labels = c("Raw Aquatic Life Consumption Advisory (> 31)",
"Secondary Water Recreation Advisory (> 400)",
"No Advisory")) +
col_theme +
# top title
ggtitle("Fecal Coliform") +
# top and bottom subtitles
labs(subtitle = "Individual Samples",
caption = "18 AAC 70 Alaska Water Quality Standards (Amended as of April 6, 2018): Bacteria, for Marine Water Uses") +
theme(plot.caption = element_text(hjust=0.5, size=rel(0.8)))
# save
ggsave("output/fc_indv.png", width = 16, height = 22, units = "cm")
# prep for legend
## values of >200 rare for geometric mean. add dummy row so that it still shows up in legend
dat <- dat %>%
ungroup() %>%
add_row(gm30_excd = "gm_fc_excd_200", gm30 = 0)
# render plot
bact <- "Fecal Coliform"
dat %>%
filter(data_type == bact) %>%
ggplot(aes(as.Date(paste(2014,strftime(date,format="%m-%d"),sep="-")),
gm30,
color = gm30_excd)) +
geom_point(size = 3) +
# indv. sample std for harvesting raw aquatic life = 31 CFU/100 mL
geom_hline(yintercept = 200, lty = "dashed") +
facet_grid(location ~ year, labeller = labeller(location = label_wrap_gen(5))) +
ylim(0,250) +
xlab("") +
ylab("30 Day Geometric Mean Value\n(CFU/100 mL)") +
theme_bw() +
theme(strip.text.y = element_text(angle = 0),
legend.direction = "vertical",
legend.position = "bottom") +
scale_color_discrete(name="Exceedance Type (CFU/100 mL)",
breaks = c("fc_excd_200",
"no_gm_excd"),
labels = c("Secondary Water Recreation Advisory (> 200)",
"No Advisory"),
# force legend to show all values even if no exceedances present
limits = c("fc_excd_200", "no_gm_excd")) +
col_theme +
# top title
ggtitle("Fecal Coliform") +
# top and bottom subtitles
labs(subtitle = "30 Day Geometric Mean Sample Values",
caption = "18 AAC 70 Alaska Water Quality Standards (Amended as of April 6, 2018): Bacteria, for Marine Water Uses") +
theme(plot.caption = element_text(hjust=0.5, size=rel(0.8)))
# save
ggsave("output/fc_geomean.png", width = 16, height = 22, units = "cm")
2.) Post-season exceedances
At which sites do 10% of all samples in a season exceed
a.) 31 CFU/100 mL (Secondary Water Recreation)
b.) 400 CFU/100 mL (Harvesting Raw Aquatic Life for Consumption)
library(extdplyr)
# render table
bact <- "Fecal Coliform"
z <- dat %>%
filter(data_type == bact) %>%
# calculate percentages by exceedance type
pct_routine(location,year,indv_excd,
ungroup = T) %>%
transform(year = as.factor(year)) %>%
# assign pass/fail
filter(indv_excd != "no_indv_excd") %>%
separate(indv_excd, sep = "_",into = c("a","b","std")) %>%
mutate(pass_fail = ifelse(pct > .1, "fail","pass")) %>%
select(-a,-b) %>%
adorn_pct_formatting() %>%
rename("Location" = "location",
"Year" = "year",
"% of Sample Above Standard" = "pct",
"Standard (CFU/100 mL)" = "std",
"Pass/Fail" = "pass_fail") %>%
arrange(Location,Year)
# save table
write.csv(z, "output/fc_site_exd_pct.csv", row.names = F)
# render table
bact <- "Fecal Coliform"
(z <- dat %>%
filter(data_type == bact) %>%
# calculate seasonal geometric means
group_by(location,year) %>%
summarise(seasonal_gm30 = mean(gm30, na.rm =T),
n = n()) %>%
# assign pass/fail
mutate(pass_fail = ifelse(seasonal_gm30 > 14,"fail","pass")) %>%
rename("Location" = "location",
"Year" = "year",
"Seasonal Average of 30 Day Geometric Mean Fecal Coliform" = "seasonal_gm30",
"Pass/Fail" = "pass_fail"))
## # A tibble: 10 x 5
## # Groups: Location [5]
## Location Year `Seasonal Average of 30 Day Geomet~ n `Pass/Fail`
## <chr> <dbl> <dbl> <int> <chr>
## 1 Kenai River Gull~ 2019 21.6 14 fail
## 2 Kenai River Gull~ 2020 61.1 13 fail
## 3 Kenai River Gull~ 2019 29.2 14 fail
## 4 Kenai River Gull~ 2020 18.9 13 fail
## 5 North Kenai Beac~ 2019 36.5 14 fail
## 6 North Kenai Beac~ 2020 12.8 13 pass
## 7 South Kenai Beac~ 2019 69.2 14 fail
## 8 South Kenai Beac~ 2020 47.0 14 fail
## 9 Warren Ames Brid~ 2019 18.2 13 fail
## 10 Warren Ames Brid~ 2020 11.2 13 pass
# save
write.csv(z, "output/fc_season_geomean.csv", row.names = F)
1.) Contact recreation; in-season exceedances
# render plot
bact <- "Enterococci"
dat %>%
filter(data_type == bact) %>%
ggplot(aes(as.Date(paste(2014,strftime(date,format="%m-%d"),sep="-")),
value,
color = indv_excd)) +
geom_point(size = 3) +
# indv. sample std for harvesting raw aquatic life = 130 CFU/100 mL
geom_hline(yintercept = 130, lty = "dashed") +
facet_grid(location ~ year, labeller = labeller(location = label_wrap_gen(5))) +
ylim(0,150) +
xlab("") +
ylab("Sample Concentration Value\n(CFU/100 mL)") +
theme_bw() +
theme(strip.text.y = element_text(angle = 0),
legend.direction = "vertical",
legend.position = "bottom") +
scale_color_discrete(name="Exceedance Type (CFU/100 mL)",
breaks = c("ec_excd_130",
"no_indv_excd"),
labels = c("Contact Recreation Advisory (> 130)",
"No Advisory"),
# force legend to show all values even if no exceedances present
limits = c("ec_excd_130", "no_indv_excd")) +
col_theme +
# top title
ggtitle("Enterococci") +
# top and bottom subtitles
labs(subtitle = "Individual Sample Values",
caption = "18 AAC 70 Alaska Water Quality Standards (Amended as of April 6, 2018): Bacteria, for Marine Water Uses") +
theme(plot.caption = element_text(hjust=0.5, size=rel(0.8)))
# save
ggsave("output/ec_indv.png", width = 16, height = 22, units = "cm")
# render plot
bact <- "Enterococci"
dat %>%
filter(data_type == bact) %>%
ggplot(aes(as.Date(paste(2014,strftime(date,format="%m-%d"),sep="-")),
gm30,
color = gm30_excd)) +
geom_point(size = 3) +
# 30 day geomean standard for contact recreation = 35 CFU/100 mL
geom_hline(yintercept = 35, lty = "dashed") +
facet_grid(location ~ year, labeller = labeller(location = label_wrap_gen(5))) +
ylim(0,150) +
xlab("") +
ylab("30 Day Geometric Mean Value\n(CFU/100 mL)") +
theme_bw() +
theme(strip.text.y = element_text(angle = 0),
legend.direction = "vertical",
legend.position = "bottom") +
scale_color_discrete(name="Exceedance Type (CFU/100 mL)",
breaks = c("gm_ec_excd_35",
"no_gm_excd"),
labels = c("Contact Recreation Advisory (> 35)",
"No Advisory"),
# force legend to show all possible values even if no exceedances present
limits = c("gm_ec_excd_35", "no_gm_excd")) +
col_theme +
# top title
ggtitle(bact) +
# top and bottom subtitles
labs(subtitle = "30 Day Geometric Mean Sample Values",
caption = "18 AAC 70 Alaska Water Quality Standards (Amended as of April 6, 2018): Bacteria, for Marine Water Uses") +
theme(plot.caption = element_text(hjust=0.5, size=rel(0.8)))
# save
ggsave("output/ec_geomean.png", width = 16, height = 22, units = "cm")
2.) Contact recreation, post-season exceedance
# render table
bact <- "Enterococci"
(z <- dat %>%
filter(data_type == bact) %>%
# calculate percentages by exceedance type
pct_routine(location,year,indv_excd,
ungroup = T) %>%
transform(year = as.factor(year)) %>%
# assign pass/fail
filter(indv_excd != "no_indv_excd") %>%
separate(indv_excd, sep = "_",into = c("a","b","std")) %>%
mutate(pass_fail = ifelse(pct > .1, "fail","pass")) %>%
select(-a,-b) %>%
adorn_pct_formatting() %>%
rename("Location" = "location",
"Year" = "year",
"% of Sample Above Standard" = "pct",
"Standard (CFU/100 mL)" = "std",
"Pass/Fail" = "pass_fail") %>%
arrange(Location,Year) )
## Location Year Standard (CFU/100 mL) % of Sample Above Standard
## 1 North Kenai Beach 4 2019 130 21.4%
## 2 South Kenai Beach 3 2019 130 35.7%
## 3 South Kenai Beach 3 2020 130 15.4%
## Pass/Fail
## 1 fail
## 2 fail
## 3 fail
# save table
write.csv(z, "output/ec_site_exd_pct.csv", row.names = F)
–future goal: use “heat map calendar” format: https://vietle.info/post/calendarheatmap/
What was the range of geometric mean values?
# by year
(summ_tbl <- dat %>%
group_by(location,data_type,year) %>%
filter(!is.na(gm30)) %>%
summarise(gm30_mean = mean(gm30),
gm30_se = std.error(gm30),
gm30_max = max(gm30),
gm30_min = min(gm30)))
## # A tibble: 21 x 7
## # Groups: location, data_type [11]
## location data_type year gm30_mean gm30_se gm30_max gm30_min
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Kenai River Gull Rook~ Enterococci 2019 9.37 2.62 21.2 3.38
## 2 Kenai River Gull Rook~ Enterococci 2020 7.84 0.654 10.4 4.02
## 3 Kenai River Gull Rook~ Fecal Colif~ 2019 21.6 4.73 56.0 10.9
## 4 Kenai River Gull Rook~ Fecal Colif~ 2020 61.1 8.08 94.9 33.5
## 5 Kenai River Gull Rook~ Enterococci 2019 10.4 1.26 14.9 3.18
## 6 Kenai River Gull Rook~ Enterococci 2020 4.52 0.615 8.24 2.83
## 7 Kenai River Gull Rook~ Fecal Colif~ 2019 29.2 5.86 53.6 6.66
## 8 Kenai River Gull Rook~ Fecal Colif~ 2020 18.9 1.32 24.4 12.6
## 9 North Kenai Beach 4 Enterococci 2019 37.6 7.96 69.2 9.43
## 10 North Kenai Beach 4 Enterococci 2020 6.05 0.764 10.0 2.61
## # ... with 11 more rows
# save
write.csv(summ_tbl, "output/mean_min_max.csv", row.names = F)
# arrange and save data in alternate order
summ_tbl_v2 <- summ_tbl%>%
arrange(year,location,data_type) %>%
select(year,location,data_type,gm30_mean,gm30_se,gm30_max,gm30_min)
write.csv(summ_tbl_v2, "output/mean_min_max_v2.csv", row.names = F)
# overall
dat %>%
group_by(data_type) %>%
filter(!is.na(gm30)) %>%
summarise(gm30_mean = mean(gm30),
gm30_max = max(gm30),
gm30_min = min(gm30)) %>%
filter(!is.na(data_type))
## # A tibble: 2 x 4
## data_type gm30_mean gm30_max gm30_min
## <chr> <dbl> <dbl> <dbl>
## 1 Enterococci 20.1 126. 1
## 2 Fecal Coliform 32.9 182. 5.38