R Session Info
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.4.3 backports_1.1.2 magrittr_1.5 rprojroot_1.3-2
## [5] tools_3.4.3 htmltools_0.3.6 yaml_2.1.16 Rcpp_0.12.15
## [9] stringi_1.1.6 rmarkdown_1.8 knitr_1.19 stringr_1.2.0
## [13] digest_0.6.15 evaluate_0.10.1
Bioenergetics modeling efforts will require the following fieldwork-generated data as inputs: mean weight at start and end of simulation, proportional daily mean diet inputs, and daily mean water temperature.
When these data sources are grouped by Year, Species, Age, and River (e.g. “Beaver Creek Coho 2015”), a total of 31 groupings emerge (all age 2 fish excluded). However, sufficiently robust data does not exist to support modeling effort in all of these iterations (low sample size; temperature data missing; etc.) Additionally, in some of these groupings an increasing size trend (positive slope) is not observed; likely due to insufficent sample size or complex habitat.
Table X below
# struggling to automate the process of reading in final csv temperature files from local folder. Accomplished with manual read.csv for now
dir15 <-"/Users/bmeyer/Google Drive/Thesis/R Analysis/Temperature Data/Final Temperature Data/2015/"
dir16 <-"/Users/bmeyer/Google Drive/Thesis/R Analysis/Temperature Data/Final Temperature Data/2016/"
BC15 <- read.csv(paste0(dir15,"2015_","LBC","_H2OTemp_Final",".csv"))
RR15 <- read.csv(paste0(dir15,"2015_","LRR","_H2OTemp_Final",".csv"))
PC15 <- read.csv(paste0(dir15,"2015_","LPC","_H2OTemp_Final",".csv"))
LKR15 <- read.csv(paste0(dir15,"2015_","LKR","_H2OTemp_Final",".csv"))
BC16 <- read.csv(paste0(dir16,"2016_","LBC","_H2OTemp_Final",".csv"))
RR16 <- read.csv(paste0(dir16,"2016_","LRR","_H2OTemp_Final",".csv"))
PC16 <- read.csv(paste0(dir16,"2016_","MPC","_H2OTemp_Final",".csv"))
LKR16 <- read.csv(paste0(dir16,"2016_","LKR","_H2OTemp_Final",".csv"))
# bind all temperature files together
# binding and transformation performed seperately for 2015 and 2016 because for some reason POSIX format of DateTime column between the years is incompatible... (2/8/2018)
fnl_temps15 <- bind_rows(BC15,RR15,PC15,LKR15) %>%
transform(DateTime = mdy_hm(DateTime)) %>%
mutate(year = year(DateTime)) %>%
transform(Stream_Name= as.factor(Stream_Name),
year = as.factor(year),
Date = mdy(Date)) %>%
select(-X,-X.1,-X.2,-X.3)
fnl_temps16 <- bind_rows(BC16,RR16,PC16,LKR16) %>%
# something weird here eliminates 2016 datetime data
transform(DateTime = as.POSIXct(DateTime)) %>%
mutate(year = year(DateTime)) %>%
transform(Stream_Name= as.factor(Stream_Name),
year = as.factor(year),
Date = as.Date(Date)) %>%
select(-X)
# bind together 2015 and 2016
fnl_temps <- bind_rows(fnl_temps15,fnl_temps16)
# remove temporalry objects
rm(fnl_temps15,fnl_temps16,BC15,RR15,PC15,LKR15,BC16,RR16,PC16,LKR16)
# calculate min and max extent of water temperature logger deployments
h2o_logger_times <- fnl_temps %>%
group_by(Stream_Name,year) %>%
summarize(logger.deploy.start = min(Date),
logger.deploy.end = max(Date)) %>%
transform(year = as.factor(year))
# to do: make chart that shows temporal extent of logger deployments
# in progress
# munge
#h2o_logger_times_x <- h2o_logger_times %>%
# transform(logger.deploy.start = yday(logger.deploy.start),
# logger.deploy.end = yday(logger.deploy.end),
# year = as.factor(year))
#p <- ggplot(h2o_logger_times_x, aes(x=logger.deploy.start, xend = logger.deploy.end,
# y=Stream_Name, yend=Stream_Name)) +
# geom_segment(aes(color = Stream_Name),size = 24)
# Import resultant data from "fish_ages_all.R" script. The script uses methods described in Ch. 5 of Ogle 2016 ("Introductory fisheries analyses with R") to age all captured fish from a subset of fish scale samples. Resultant csv file was copied to the google sheet referenced here.
# ID googlesheet with fish age data
fish_ages <- gs_title("all_fish_ages.csv")
Sys.sleep(6)
#read in worksheet containing diet data
dat <- fish_ages %>%
gs_read(ws = "all_fish_ages.csv",col_names=TRUE) %>%
select (-X1) %>%
separate(sample.event, c("reach","year","sample.event.num"), sep = "-", remove = F) %>%
unite(river_spp, river, spp, remove = F, sep = " ") %>%
# manually order appearance of seasons and rivers for plots
transform(year = as.factor(year),
river = factor(river, levels=c("Beaver Creek",
'Russian River',
'Ptarmigan Creek',
'Kenai River')),
season = factor(season, levels = c("Early Summer",
"Mid-Summer",
"Late Summer",
"Fall")),
river_spp = as.factor(river_spp)) %>%
mutate(river_spp_yr = paste(year,river,"\n",spp)) %>%
# age 2 fish: this cohort is sparse and likely departing for the ocean.
filter(age != 2) %>%
# mutate column for yes/no diet sampled
mutate(diet_sample= ifelse(!is.na(sample.id), "Y", "N")) %>%
mutate(Count = 1) %>%
transform(Date = mdy(Date)) %>%
mutate(doy = yday(Date)) %>%
# filter all except coho and chinook
filter(spp %in% c("Chinook", "Coho"))
# catch summary table
catch_summary <- dat %>%
group_by(year,river,spp,age,diet_sample) %>%
summarise(ct = sum(Count)) %>%
spread(diet_sample,ct) %>%
replace(is.na(.), 0) %>%
mutate(total_fish = sum(N,Y)) %>%
rename(diet_sample = Y,
len_wt_only = N)
# calc slope of each size trend for len and wt...??
all_slopes <- dat %>%
transform(river_spp_yr = as.factor(river_spp_yr),
year = as.factor(year),
spp = as.factor(spp),
len_mm = as.numeric(len_mm)) %>%
select(river_spp_yr,river,year,spp,age,len_mm,wt_g,doy)
# length model formulation for growth trend
len_growth_model <- function(df) {
lm(len_mm ~ doy, data = df)
}
# weight model formulation for growth trend
wt_growth_model <- function(df) {
lm(wt_g ~ doy, data = df)
}
# create table of length and weight trend slopes for each simulation
size_age_spp_year <- all_slopes %>%
group_by(river, spp, age, year) %>%
nest() %>%
# calculate slopes & intercept for len_mm ~ day of year
mutate(
age_len = map(data, len_growth_model),
tidy_age_len = map(age_len, broom::tidy),
len_slope = map_dbl(tidy_age_len, ~.$estimate[2]),
len_intercept = map_dbl(tidy_age_len, ~.$estimate[[1]])) %>%
# calculate slopes for wt_g ~ day of year
mutate(
age_wt = map(data, wt_growth_model),
tidy_age_wt = map(age_wt, broom::tidy),
wt_slope = map_dbl(tidy_age_wt, ~.$estimate[2]),
wt_intercept = map_dbl(tidy_age_wt, ~.$estimate[[1]])) %>%
select(year,river, spp, age, len_slope, len_intercept, wt_slope, wt_intercept)
# Define temporal extents of proposed simulations
# NOTE 1/22/18: this code also exists as a script at "/Users/bmeyer/Google Drive/Thesis/R Analysis/Thesis Analyses R Project/Overall_scripts_2015_2016/sampling_periods.R". Attempted to just source("\path") this script in to the markdown document but, 'source' function prevents knitting through markdown for some reason.
############
#
# 2015
#
###########
# Verify that sheet is present, create object of sheet with all tabs
SCTC2015 <- gs_title("2015 EPSCoR SCTC.xlsx")
#read in specific worksheet containing fishing data
effort2015 <- SCTC2015 %>%
gs_read(ws = "B Fishing Data",col_names=TRUE, skip=2) %>%
#rename needed columns
rename(site.id=Site_ID,
sample.event=Sample_Event) %>%
#select desired columns
select(site.id,River,sample.event,sample.event.num,deploy_dt,collect_dt) %>%
#filter out blanks
filter(!is.na(sample.event)) %>%
# transform columns to desired classes
transform(site.id=as.factor(site.id),
sample.event=as.factor(sample.event),
sample.event.num=as.factor(sample.event.num))
###################
#
# 2016
#
###################
# Verify that sheet is present, create object of sheet with all tabs
SCTC2016 <- gs_title("2016_EPSCoR_Aquatic_Ecology_Database")
#read in specific worksheet containing fishing data
effort2016 <- SCTC2016 %>%
gs_read(ws = "B Fishing Data",col_names=TRUE, skip=2) %>%
#rename needed columns
rename(site.id=Site_ID,
sample.event=Sample_Event) %>%
#select desired columns
select(site.id,River,sample.event,sample.event.num,deploy_dt,collect_dt) %>%
#filter our blanks
filter(!is.na(sample.event)) %>%
# transform columns to desired classes
transform(site.id=as.factor(site.id),
sample.event=as.factor(sample.event),
sample.event.num=as.factor(sample.event.num))
# bind 2015 & 2016 data; format datetimes
effort <- bind_rows(effort2015,effort2016) %>%
transform(collect_dt = anytime(collect_dt),
deploy_dt = anytime(deploy_dt)) %>%
mutate(year = year(collect_dt))
# Define Temporal Extents of proposed simulations
# "Simulation" temporal extent
# Time period from first date of each season to first date of following season
# prepare data to tibble form
sim_extents <- effort %>%
select(-site.id) %>%
group_by(River, year, sample.event.num) %>%
nest() %>%
mutate(season.start = map_dbl(data, ~min(.$deploy_dt)),
season.end = map_dbl(data, ~min(.$deploy_dt))
) %>%
transform(season.start = anytime(season.start),
season.end = anytime(season.end)) %>%
arrange(River,year,sample.event.num)
# prepare data such that end of one season is beginning of next
t <- sim_extents %>%
mutate(season.end_x = lead(season.end)) %>%
select(-season.end) %>%
arrange(River,year,sample.event.num)
# prepare data such that final season of each year has duration of 30 days.
# 2015 had 3 seasons, 2016 had 4 seasons.
t_end15 <- t %>%
filter(sample.event.num == 3,
year == 2015) %>%
mutate(season.end = season.start + days(30)) %>%
select(-season.end_x)
t_end16 <- t %>%
filter(sample.event.num == 4,
year == 2016) %>%
mutate(season.end = season.start + days(30)) %>%
select(-season.end_x)
t_end15_1 <- t %>%
filter(sample.event.num != 3,
year != 2016) %>%
mutate(season.end = season.start + days(30)) %>%
select(-season.end_x)
t_end16_1 <- t %>%
filter(sample.event.num != 4,
year != 2015) %>%
mutate(season.end = season.start + days(30)) %>%
select(-season.end_x)
# final version of simulation extents
sim_extents <- bind_rows(t_end15_1,t_end15,t_end16_1,t_end16) %>%
rename(Stream_Name = River,
sim.start = season.start,
sim.end = season.end) %>%
transform(year = as.factor(year))
# remove temporary objects
rm(t,t_end15,t_end15_1,t_end16,t_end16_1,effort2015,effort2016)
# to do: make plot that visualizes extent of each simulation period faceted by year and river
# does water temperature data overlap with all of the simulation extents?
# NOTE: to do; fix kenai river water temp import for longer time extent
# by year, river, season, species
# join to simulation selection datatable when considering at scale of SEASON start -> finish
sim_logger_overlap <- full_join(sim_extents,h2o_logger_times,by=c("Stream_Name","year")) %>%
mutate(start_overlap = ifelse(sim.start > logger.deploy.start,"Yes","No")) %>%
mutate(end_overlap = ifelse(sim.end < logger.deploy.end,"Yes","No")) %>%
mutate(full_overlap = ifelse(start_overlap == "Yes" & end_overlap == "Yes","Yes","No")) %>%
rename(river = Stream_Name) %>%
select(river,year,sample.event.num,full_overlap) %>%
transform(year = as.factor(year),
river = as.factor(river))
# by year, river, species (NOT season)
# join to simulation selection datatable when considering at scale of YEAR start -> finish
sim_logger_overlap_yr <- sim_extents %>%
group_by(Stream_Name,year) %>%
summarise(yr.sim.start = min(sim.start),
yr.sim.end = max(sim.end)) %>%
full_join(h2o_logger_times,by=c("Stream_Name","year")) %>%
mutate(start_overlap = ifelse(yr.sim.start > logger.deploy.start,"Yes","No")) %>%
mutate(end_overlap = ifelse(yr.sim.end < logger.deploy.end,"Yes","No")) %>%
mutate(full_overlap = ifelse(start_overlap == "Yes" & end_overlap == "Yes","Yes","No")) %>%
rename(river = Stream_Name) %>%
select(river,year,full_overlap) %>%
transform(year = as.factor(year),
river = as.factor(river))
# join catch table and size trend slopes table
tbl <- full_join(catch_summary, size_age_spp_year, by=c("year","river","spp","age"))
# join water temperature extent/overlap data summary to catch/growth data summary
tbl <- full_join(tbl,sim_logger_overlap_yr,by=c("river","year"))
datatable(tbl, caption = "Juvenile Chinook and Coho Catch and Size Trend Summary Data",
filter = 'top', options = list(
pageLength = 5, autoWidth = TRUE)) %>%
formatSignif(c("len_slope","wt_slope","len_intercept","wt_intercept"), digits = 2)
The following filters are applied to the data table below:
tbl_filt <- tbl %>%
filter(total_fish > 10,
len_slope > 0,
wt_slope > 0)
datatable(tbl_filt, filter = 'top', options = list(
pageLength = 5, autoWidth = TRUE)) %>%
formatSignif(c("len_slope","wt_slope","len_intercept","wt_intercept"), digits = 2)
At the grouping level of year ~ river ~ species ~ age, about 1/3 (13 of 31) of the total potential simulations meet the ad hoc standards for sufficient data to perform a bioenergetics simulation. An additional consideration is that not all of these simulations will have 100% coverage for water temperature data.
# find start and end weights
# summary table
size_summary <- dat %>%
group_by(river, season, age, spp, year) %>%
summarise(average_wt = mean(wt_g),
stdev_wt = sd(wt_g),
average_len = mean(len_mm),
stdev_len = sd(len_mm),
count = n()) %>%
select(river,season,year,spp,age,count,average_wt,stdev_wt,average_len,stdev_len) %>%
rename("_age_" = age)
# join chosen simulations (from filter) to seasonal mean length/weight data;
# fix "_age_" name
size_summary <- size_summary %>%
rename(age = `_age_`)
# join tables; use inner_join becuase we want to include only the simulations selected by the previous table
tbl2 <- inner_join(size_summary,tbl_filt,by=c("river","year","spp","age")) %>%
transform(year = as.factor(year))
#from start doy to end doy
#want output for wt from y=mx+b
#need: day of year for the sim extent start and finish
sim_extents <- sim_extents %>%
mutate(sim.start.doy = yday(sim.start),
sim.end.doy = yday(sim.end)) %>%
mutate(season = fct_recode(as.factor(sample.event.num),
"Early Summer"="1",
"Mid-Summer" = "2",
"Late Summer" ="3",
"Fall" = "4")) %>%
rename(river = Stream_Name)
# use slope and intercept values from empirical growth observations to generate simulation start and end weights/lengths; use y = mx + b linear model
s <- left_join(tbl2,sim_extents,by=c("season","river","year")) %>%
mutate(lm_wt_start = wt_slope*sim.start.doy + wt_intercept,
lm_wt_end = wt_slope*sim.end.doy + wt_intercept,
lm_len_start = len_slope*sim.start.doy + len_intercept,
lm_len_end = len_slope*sim.end.doy + len_intercept)
s1 <- s %>%
select(-c(data,sample.event.num))
datatable(s1, filter = 'top', options = list(
pageLength = 5)) %>%
formatSignif(c("average_wt","stdev_wt","average_len","stdev_len","len_slope","wt_slope","len_intercept","wt_intercept","lm_wt_start","lm_wt_end","lm_len_start","lm_len_end"), digits = 2)