R Session Info
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.4.3 backports_1.1.2 magrittr_1.5 rprojroot_1.3-2
## [5] tools_3.4.3 htmltools_0.3.6 yaml_2.1.16 Rcpp_0.12.15
## [9] stringi_1.1.6 rmarkdown_1.8 knitr_1.20 stringr_1.3.0
## [13] digest_0.6.15 evaluate_0.10.1
This script imports and analysis results generated by two other scripts, a.) fishbioenergetics_run_template_coho.Rmd and fishbioenergetics_run_template_chnk.Rmd .
# read in coho bioenergetics results
bioenerg_results_coho <- gs_title("fishbioenergetics_results_coho")
bioenerg_results_coho <- bioenerg_results_coho %>%
gs_read(ws = "coho_sims")
# read in Chinook bioenergetics results
bioenerg_results_chnk <- gs_title("fishbioenergetics_results_chnk")
bioenerg_results_chnk <- bioenerg_results_chnk %>%
gs_read(ws = "chnk_sims")
# bind all results
results <- bind_rows(bioenerg_results_coho,bioenerg_results_chnk) %>%
# split sim name in to factors
separate(col = sim_name, into = c("year","season","river","spp","agex","age"), sep = "\\_", remove = F) %>%
select(-agex) %>%
rename(SimDay = Sim.Day)
# import table to associate Sim.Day for each season with a day of year
sim_dates <- gs_title("sim_dates") %>%
gs_read(ws = "Sheet1",col_names=TRUE) %>%
transform(sim_name = as.factor(sim_name),
Date = mdy(Date)) %>%
mutate(doy = yday(Date))
# join sim_dates table with results
results <- left_join(results,sim_dates, by = c("sim_name","SimDay")) %>%
mutate(sim_year_spp_age = paste0(year,"_",spp,"_",age))
#Import water temperatures
# struggling to automate the process of reading in final csv temperature files from local folder. Accomplished with manual read.csv for now
dir15 <-"/Users/bmeyer/Google Drive/Thesis/R Analysis/Temperature Data/Final Temperature Data/2015/"
dir16 <-"/Users/bmeyer/Google Drive/Thesis/R Analysis/Temperature Data/Final Temperature Data/2016/"
BC15 <- read.csv(paste0(dir15,"2015_","LBC","_H2OTemp_Final",".csv"))
RR15 <- read.csv(paste0(dir15,"2015_","LRR","_H2OTemp_Final",".csv"))
PC15 <- read.csv(paste0(dir15,"2015_","LPC","_H2OTemp_Final",".csv"))
LKR15 <- read.csv(paste0(dir15,"2015_","LKR","_H2OTemp_Final",".csv"))
BC16 <- read.csv(paste0(dir16,"2016_","LBC","_H2OTemp_Final",".csv"))
RR16 <- read.csv(paste0(dir16,"2016_","LRR","_H2OTemp_Final",".csv"))
PC16 <- read.csv(paste0(dir16,"2016_","MPC","_H2OTemp_Final",".csv"))
LKR16 <- read.csv(paste0(dir16,"2016_","LKR","_H2OTemp_Final",".csv"))
# bind all temperature files together
# binding and transformation performed seperately for 2015 and 2016 because for some reason POSIX format of DateTime column between the years is incompatible... (2/8/2018)
fnl_temps15 <- bind_rows(BC15,RR15,PC15,LKR15) %>%
transform(DateTime = mdy_hm(DateTime)) %>%
mutate(year = year(DateTime)) %>%
transform(Stream_Name= as.factor(Stream_Name),
year = as.factor(year),
Date = mdy(Date)) %>%
dplyr::select(-c(X,X.1,X.2,X.3))
fnl_temps16 <- bind_rows(BC16,RR16,PC16,LKR16) %>%
# something weird here eliminates 2016 datetime data
transform(DateTime = as.POSIXct(DateTime)) %>%
mutate(year = year(DateTime)) %>%
transform(Stream_Name= as.factor(Stream_Name),
year = as.factor(year),
Date = as.Date(Date)) %>%
dplyr::select(-X)
# bind together 2015 and 2016
fnl_temps <- bind_rows(fnl_temps15,fnl_temps16) %>%
mutate(Stream_Name_Date = paste0(Stream_Name,"_",Date))
# calculate daily mean temps
daily_mean_temps <- fnl_temps %>%
filter(!is.na(Temp_C)) %>%
group_by(Stream_Name,Date) %>%
summarise(avg_temp = mean(Temp_C)) %>%
select(Date,Stream_Name,avg_temp) %>%
#rename Stream_Name
rename(river = Stream_Name) %>%
# add column for day of year (doy) %>%
mutate(doy = yday(Date))
# remove temporalry objects
rm(fnl_temps15,fnl_temps16,BC15,RR15,PC15,LKR15,BC16,RR16,PC16,LKR16,dir15,dir16)
results <- inner_join(results,daily_mean_temps,by=c("Date","doy","river"))
# read in start/end weight data
sim_size_inputs <- gs_title("sim_size_inputs")
sim_size_inputs <- sim_size_inputs %>%
gs_read(ws = "lw_data") %>%
select(sim_name,
lm_wt_start,lm_wt_end,lm_len_start,lm_len_end)
#join tables
results <- inner_join(results,sim_size_inputs,by=c("sim_name"))
#
theme_bm <- theme(axis.text.x = element_text(size=18),
axis.text.y = element_text(size=18), # x axis text
axis.title.x = element_text(size=18, face = "bold"), # x axis title
axis.title.y = element_text(size=18, face = "bold"),
strip.text.x = element_text(size = 16, face = "bold"),
strip.text.y = element_text(size = 16, face = "bold"),
legend.text = element_text(size = 16))
# Correct Facet Labels
#age
age_facets <- c(
`0` = "Age 0",
`1` = "Age 1",
`2` = "Age 2"
)
# rivers
river_facets <- c(
`Beaver Creek` = "Beaver Creek",
`Russian River` = "Russian River",
`Ptarmigan Creek` = "Ptarmigan Creek",
`Kenai River` = "Kenai River"
)
#manual order of seasons
results$season <- factor(results$season,
levels = c('Early Summer',
'Mid-Summer',
'Late Summer',
'Fall'), ordered = TRUE)
Glance at statistcial characteristics of data…
# multivariate regression daily growth ~ food + temperature + others
fit <- lm(`W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp + river + season + spp + age, data = results)
z <- summary(fit)
z
##
## Call:
## lm(formula = `W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp +
## river + season + spp + age, data = results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03810 -0.00589 0.00045 0.00472 0.36030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0384131 0.0049699 7.729 2.45e-14 ***
## `Total.Con.(g/d)` 0.1740399 0.0090342 19.265 < 2e-16 ***
## avg_temp -0.0027656 0.0003928 -7.041 3.37e-12 ***
## riverKenai River -0.0044485 0.0017650 -2.520 0.0119 *
## riverPtarmigan Creek -0.0176650 0.0020821 -8.484 < 2e-16 ***
## riverRussian River -0.0010276 0.0017394 -0.591 0.5548
## seasonFall 0.0017498 0.0019800 0.884 0.3770
## seasonLate Summer -0.0101071 0.0015014 -6.732 2.71e-11 ***
## seasonMid-Summer -0.0087932 0.0016560 -5.310 1.33e-07 ***
## sppCoho -0.0032044 0.0015725 -2.038 0.0418 *
## age1 -0.0186578 0.0017196 -10.850 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01671 on 1090 degrees of freedom
## Multiple R-squared: 0.3703, Adjusted R-squared: 0.3646
## F-statistic: 64.11 on 10 and 1090 DF, p-value: < 2.2e-16
glance(z)
# diagnotsic plots
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
plot(fit)
# consider excluding obs 808 & 809 ...?
# diagnostic plots indicate there are some outliers in rows 808 & 809
# trying to ID these two data points and filter out??!!! How do I do this?
#results <- results %>%
# filter(sim_name != "2016_Fall_Russian River_Chinook_Age_0")
# check diagnotsic plots again
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
plot(fit)
Plot simulation results
# need to arrange so that seasons are continuous along doy axis!
p <- ggplot(results, aes(doy,(`W.Gain.(g/d)`/`Daily.Weight.(g)`), color = season, linetype = age)) +
geom_line(size = 1.2) +
facet_grid(river ~ year) +
xlab("Day of Year") +
ylab("Mass Specific Growth Rate (g/g/day)") +
scale_y_continuous(breaks=c(0,0.0025,0.0075)) +
theme_bw() +
theme_bm +
theme(plot.title = element_text(size = 16, face = "bold"))
p %+% subset(results, spp %in% c("Coho")) +
ggtitle("Coho Simulation Results, Mass-Specific Growth") +
ylim(-0.01,0.025)
p %+% subset(results, spp %in% c("Chinook")) +
ggtitle("Chinook Simulation Results, Mass-Specific Growth") +
ylim(-0.01,0.025)
Thought:: It would be ideal to create plots identical to the above ones, but with trends in water temp and diet mass superimposed. But, anti-multiple-yaxes philosophy embedded in ggplot package precludes this being easy to do…
# plot daily growth vs temperature
p2 <- ggplot(results, aes(avg_temp,(`W.Gain.(g/d)`/`Daily.Weight.(g)`), color = river, shape = year)) +
facet_grid(spp~age, labeller = labeller(age = as_labeller(age_facets))) +
geom_jitter(width = 0.1) +
theme_bm +
xlab("Daily Mean Temperature (C)") +
ylab("Mass Specific Growth Rate (g/g/day)") +
#excludes outliers from visual field
ylim(0,0.025) +
theme_bw() +
theme_bm +
theme(plot.title = element_text(size = 16, face = "bold")) +
ggtitle("Simulation Results, Mass Specific Growth")
p2
Comment: from these plots it appears that the temperature threshold is around ~12 C… with parameter updates from Plumb & Moffitt should actually be higher threhold though?!
# boxplot bionergetics results
p3 <- ggplot(results, aes(season,(`W.Gain.(g/d)`/`Daily.Weight.(g)`), fill = as.factor(river)), color = year) +
geom_boxplot(outlier.size = 0) +
#geom_point(pch = 21, position = position_jitterdodge()) +
facet_grid(spp~age, labeller = labeller(age = as_labeller(age_facets))) +
ylim(0,0.025) +
theme_bw() +
theme_bm +
# legend
scale_fill_discrete(name="River") +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
theme(plot.title = element_text(size = 16, face = "bold")) +
xlab("Season") +
ylab("Mass Specific Growth Rate (g/g/day)") +
ggtitle("Simulation Results, Mass Specific Growth")
# print plot
p3
# plot daily growth vs day of year
# in progress 3/8/2018, lines plot funny
p3 <- ggplot(results, aes(doy,`Daily.Weight.(g)`, color = river, linetype = season)) +
facet_grid(spp~age, labeller = labeller(age = as_labeller(age_facets))) +
geom_line() +
theme_bm +
xlab("Day of Year") +
ylab("Daily Weight (g)") +
theme_bw() +
theme_bm +
ggtitle("Simulation Results - Daily Weight")
p3
#This is not plotting correctly.
# try AIC selection
# multivariate regression daily growth ~ food + temperature
library(MASS)
fit <- lm(`W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp + year + river + season + spp + age, data = results)
step <- stepAIC(fit)
## Start: AIC=-9000.9
## `W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp + year + river +
## season + spp + age
##
## Df Sum of Sq RSS AIC
## <none> 0.30330 -9000.9
## - year 1 0.000980 0.30428 -8999.4
## - spp 1 0.001993 0.30529 -8995.7
## - avg_temp 1 0.013638 0.31694 -8954.5
## - season 3 0.017353 0.32065 -8945.6
## - age 1 0.023751 0.32705 -8919.9
## - river 3 0.029569 0.33287 -8904.5
## - `Total.Con.(g/d)` 1 0.100804 0.40410 -8687.0