EPSCoR SCTC Aquatic Ecology

Bioenergetics data selection and subsetting – Analysis in progress

R Session Info

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] compiler_3.4.3  backports_1.1.2 magrittr_1.5    rprojroot_1.3-2
##  [5] tools_3.4.3     htmltools_0.3.6 yaml_2.1.16     Rcpp_0.12.15   
##  [9] stringi_1.1.6   rmarkdown_1.8   knitr_1.20      stringr_1.3.0  
## [13] digest_0.6.15   evaluate_0.10.1



This script imports and analysis results generated by two other scripts, a.) fishbioenergetics_run_template_coho.Rmd and fishbioenergetics_run_template_chnk.Rmd .


# read in coho bioenergetics results
bioenerg_results_coho <- gs_title("fishbioenergetics_results_coho")
bioenerg_results_coho <- bioenerg_results_coho %>%
  gs_read(ws = "coho_sims")

# read in Chinook bioenergetics results
bioenerg_results_chnk <- gs_title("fishbioenergetics_results_chnk")
bioenerg_results_chnk <- bioenerg_results_chnk %>%
  gs_read(ws = "chnk_sims")

# bind all results
results <- bind_rows(bioenerg_results_coho,bioenerg_results_chnk) %>% 
  # split sim name in to factors
  separate(col = sim_name, into = c("year","season","river","spp","agex","age"), sep = "\\_", remove = F) %>%
  select(-agex) %>%
  rename(SimDay = Sim.Day)

# import table to associate Sim.Day for each season with a day of year
sim_dates <- gs_title("sim_dates") %>%
  gs_read(ws = "Sheet1",col_names=TRUE) %>%
  transform(sim_name = as.factor(sim_name),
            Date = mdy(Date)) %>%
  mutate(doy = yday(Date))

# join sim_dates table with results
results <- left_join(results,sim_dates, by = c("sim_name","SimDay")) %>%
  mutate(sim_year_spp_age = paste0(year,"_",spp,"_",age))

#Import water temperatures

# struggling to automate the process of reading in final csv temperature files from local folder.  Accomplished with manual read.csv for now

dir15 <-"/Users/bmeyer/Google Drive/Thesis/R Analysis/Temperature Data/Final Temperature Data/2015/"
dir16 <-"/Users/bmeyer/Google Drive/Thesis/R Analysis/Temperature Data/Final Temperature Data/2016/"

BC15 <- read.csv(paste0(dir15,"2015_","LBC","_H2OTemp_Final",".csv"))
RR15 <- read.csv(paste0(dir15,"2015_","LRR","_H2OTemp_Final",".csv"))
PC15 <- read.csv(paste0(dir15,"2015_","LPC","_H2OTemp_Final",".csv"))
LKR15 <- read.csv(paste0(dir15,"2015_","LKR","_H2OTemp_Final",".csv"))

BC16 <- read.csv(paste0(dir16,"2016_","LBC","_H2OTemp_Final",".csv"))
RR16 <- read.csv(paste0(dir16,"2016_","LRR","_H2OTemp_Final",".csv"))
PC16 <- read.csv(paste0(dir16,"2016_","MPC","_H2OTemp_Final",".csv"))
LKR16 <- read.csv(paste0(dir16,"2016_","LKR","_H2OTemp_Final",".csv"))

# bind all temperature files together
# binding and transformation performed seperately for 2015 and 2016 because for some reason POSIX format of DateTime column between the years is incompatible... (2/8/2018)
fnl_temps15 <- bind_rows(BC15,RR15,PC15,LKR15) %>%
transform(DateTime = mdy_hm(DateTime)) %>%
  mutate(year = year(DateTime)) %>%
  transform(Stream_Name= as.factor(Stream_Name),
            year = as.factor(year),
            Date = mdy(Date)) %>%
  dplyr::select(-c(X,X.1,X.2,X.3))

fnl_temps16 <- bind_rows(BC16,RR16,PC16,LKR16) %>%
  # something weird here eliminates 2016 datetime data
  transform(DateTime = as.POSIXct(DateTime)) %>%
  mutate(year = year(DateTime)) %>%
  transform(Stream_Name= as.factor(Stream_Name),
            year = as.factor(year),
            Date = as.Date(Date)) %>%
  dplyr::select(-X)

# bind together 2015 and 2016
fnl_temps <- bind_rows(fnl_temps15,fnl_temps16) %>%
  mutate(Stream_Name_Date = paste0(Stream_Name,"_",Date))

# calculate daily mean temps
daily_mean_temps <- fnl_temps %>%
  filter(!is.na(Temp_C)) %>%
  group_by(Stream_Name,Date) %>%
  summarise(avg_temp = mean(Temp_C)) %>%
  select(Date,Stream_Name,avg_temp) %>%
  #rename Stream_Name
  rename(river = Stream_Name) %>%
  # add column for day of year (doy) %>%
  mutate(doy = yday(Date))

# remove temporalry objects
rm(fnl_temps15,fnl_temps16,BC15,RR15,PC15,LKR15,BC16,RR16,PC16,LKR16,dir15,dir16)

results <- inner_join(results,daily_mean_temps,by=c("Date","doy","river"))

# read in start/end weight data
sim_size_inputs <- gs_title("sim_size_inputs") 
sim_size_inputs <- sim_size_inputs %>%
  gs_read(ws = "lw_data") %>%
  select(sim_name,
         lm_wt_start,lm_wt_end,lm_len_start,lm_len_end)

#join tables
results <- inner_join(results,sim_size_inputs,by=c("sim_name"))

#
theme_bm <- theme(axis.text.x = element_text(size=18),
                  axis.text.y = element_text(size=18),                       # x axis text
                  axis.title.x = element_text(size=18, face = "bold"),          # x axis title
                  axis.title.y = element_text(size=18, face = "bold"),
                  strip.text.x = element_text(size = 16, face = "bold"),
                  strip.text.y = element_text(size = 16, face = "bold"),
                  legend.text = element_text(size = 16)) 


# Correct Facet Labels
#age
age_facets <- c(
  `0` = "Age 0",
  `1` = "Age 1",
  `2` = "Age 2"
  )
# rivers
river_facets <- c(
  `Beaver Creek` = "Beaver Creek",
  `Russian River` = "Russian River",
  `Ptarmigan Creek` = "Ptarmigan Creek",
  `Kenai River` = "Kenai River"
  )

#manual order of seasons
results$season <- factor(results$season,
    levels = c('Early Summer',
               'Mid-Summer',
               'Late Summer',
               'Fall'), ordered = TRUE)

Glance at statistcial characteristics of data…


# multivariate regression daily growth ~ food + temperature + others
fit <- lm(`W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp + river + season + spp + age, data = results)
z <- summary(fit)
z
## 
## Call:
## lm(formula = `W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp + 
##     river + season + spp + age, data = results)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03810 -0.00589  0.00045  0.00472  0.36030 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.0384131  0.0049699   7.729 2.45e-14 ***
## `Total.Con.(g/d)`     0.1740399  0.0090342  19.265  < 2e-16 ***
## avg_temp             -0.0027656  0.0003928  -7.041 3.37e-12 ***
## riverKenai River     -0.0044485  0.0017650  -2.520   0.0119 *  
## riverPtarmigan Creek -0.0176650  0.0020821  -8.484  < 2e-16 ***
## riverRussian River   -0.0010276  0.0017394  -0.591   0.5548    
## seasonFall            0.0017498  0.0019800   0.884   0.3770    
## seasonLate Summer    -0.0101071  0.0015014  -6.732 2.71e-11 ***
## seasonMid-Summer     -0.0087932  0.0016560  -5.310 1.33e-07 ***
## sppCoho              -0.0032044  0.0015725  -2.038   0.0418 *  
## age1                 -0.0186578  0.0017196 -10.850  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01671 on 1090 degrees of freedom
## Multiple R-squared:  0.3703, Adjusted R-squared:  0.3646 
## F-statistic: 64.11 on 10 and 1090 DF,  p-value: < 2.2e-16
glance(z)

# diagnotsic plots
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit)


# consider excluding obs 808 & 809 ...?
# diagnostic plots indicate there are some outliers in rows 808 & 809 

# trying to ID these two data points and filter out??!!! How do I do this?

#results <- results %>%
#  filter(sim_name != "2016_Fall_Russian River_Chinook_Age_0")

# check diagnotsic plots again
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page 
plot(fit)

Plot simulation results


# need to arrange so that seasons are continuous along doy axis!

p <- ggplot(results, aes(doy,(`W.Gain.(g/d)`/`Daily.Weight.(g)`), color = season, linetype = age)) +
  geom_line(size = 1.2) +
  facet_grid(river ~ year) +
  xlab("Day of Year") +
  ylab("Mass Specific Growth Rate (g/g/day)") +
  scale_y_continuous(breaks=c(0,0.0025,0.0075)) +
  theme_bw() +
  theme_bm +
  theme(plot.title = element_text(size = 16, face = "bold")) 

 p %+% subset(results, spp %in% c("Coho")) +
  ggtitle("Coho Simulation Results, Mass-Specific Growth") +
  ylim(-0.01,0.025)


 p %+% subset(results, spp %in% c("Chinook")) +
  ggtitle("Chinook Simulation Results, Mass-Specific Growth") +
  ylim(-0.01,0.025)

Thought:: It would be ideal to create plots identical to the above ones, but with trends in water temp and diet mass superimposed. But, anti-multiple-yaxes philosophy embedded in ggplot package precludes this being easy to do…


# plot daily growth vs temperature

p2 <- ggplot(results, aes(avg_temp,(`W.Gain.(g/d)`/`Daily.Weight.(g)`), color = river, shape = year)) +  
  facet_grid(spp~age, labeller = labeller(age = as_labeller(age_facets))) +
  geom_jitter(width = 0.1) + 
  theme_bm +
  xlab("Daily Mean Temperature (C)") +
  ylab("Mass Specific Growth Rate (g/g/day)") +
  #excludes outliers from visual field
  ylim(0,0.025) +
  theme_bw() +
  theme_bm +
  theme(plot.title = element_text(size = 16, face = "bold")) +
  ggtitle("Simulation Results, Mass Specific Growth")

p2

Comment: from these plots it appears that the temperature threshold is around ~12 C… with parameter updates from Plumb & Moffitt should actually be higher threhold though?!


# boxplot bionergetics results
p3 <- ggplot(results, aes(season,(`W.Gain.(g/d)`/`Daily.Weight.(g)`), fill = as.factor(river)), color = year) +
  geom_boxplot(outlier.size = 0) +
  #geom_point(pch = 21, position = position_jitterdodge()) +
  facet_grid(spp~age, labeller = labeller(age = as_labeller(age_facets))) +
  ylim(0,0.025) +
  theme_bw() +
  theme_bm +
  # legend
  scale_fill_discrete(name="River") +
  theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
  theme(plot.title = element_text(size = 16, face = "bold")) +
  xlab("Season") +
  ylab("Mass Specific Growth Rate (g/g/day)") +
  ggtitle("Simulation Results, Mass Specific Growth")

# print plot
p3


# plot daily growth vs day of year
# in progress 3/8/2018, lines plot funny

p3 <- ggplot(results, aes(doy,`Daily.Weight.(g)`, color = river, linetype = season)) +  
  facet_grid(spp~age, labeller = labeller(age = as_labeller(age_facets))) +
  geom_line() + 
  theme_bm +
  xlab("Day of Year") +
  ylab("Daily Weight (g)") +
  theme_bw() +
  theme_bm +
  ggtitle("Simulation Results - Daily Weight")

p3

#This is not plotting correctly.

# try AIC selection
# multivariate regression daily growth ~ food + temperature
library(MASS)

fit <- lm(`W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp + year + river + season + spp + age, data = results)
step <- stepAIC(fit)
## Start:  AIC=-9000.9
## `W.Gain.(g/d)` ~ `Total.Con.(g/d)` + avg_temp + year + river + 
##     season + spp + age
## 
##                     Df Sum of Sq     RSS     AIC
## <none>                           0.30330 -9000.9
## - year               1  0.000980 0.30428 -8999.4
## - spp                1  0.001993 0.30529 -8995.7
## - avg_temp           1  0.013638 0.31694 -8954.5
## - season             3  0.017353 0.32065 -8945.6
## - age                1  0.023751 0.32705 -8919.9
## - river              3  0.029569 0.33287 -8904.5
## - `Total.Con.(g/d)`  1  0.100804 0.40410 -8687.0