Load libraries

library(here)
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)
library(forecast)
library(nlme)
library(MuMIn)

bst <- read.csv(here("Week_13", "Data", "bacteria_size_temperature_edit.csv")) %>%
  filter(!is.na(LNA.ab.uml)) #remove NAs
bst$date <- dmy(bst$date) 

Exploratory Plots

Begin by exploring seasonal patterns in LNA abundance, cell size, biomass, and percent biomass, as well as temperature. I.e., make exploratory plots of these variables that focus on seasonality. What have you learned so far?

LNA Abundance and season

ggplot(bst, aes(x = factor(season), y = LNA.ab.uml)) +
  geom_boxplot()

LNA abundance is highest season 3 and lowest season 1.

LNA cell size and season

ggplot(bst, aes(x = factor(season), y = LNA.bv)) +
  geom_boxplot()

LNA cell size is highest season 2 even though its abundance is 2nd to last.

LNA total biomass and season

ggplot(bst, aes(x = factor(season), y = LNA.B)) +
  geom_boxplot()

LNA total biomass and abundance is highest season 3.

Percent of total bacterial biomass and season

ggplot(bst, aes(x = factor(season), y = X.LNA.BB)) +
  geom_boxplot()

Temperature at 5m and season

ggplot(bst, aes(x = factor(season), y = temp.5.m.E2)) +
  geom_boxplot()

During the season 3 where LNA abundance is highest, the temperature at 5m depth is also the highest. Season 1 has the lowest abundance and lowest temperature.

Statistical models

  1. Let’s create statistical models to test for long-term trends, as well as relationships with temperature. Because the sampling is monthly, with only a couple gaps, we can treat the time series as one measured at discrete intervals. To keep track of the sample order you’ll need to construct a new column which numbers each sample in the order they were sampled. I.e., the column should look like 1, 2, 3, 4, etc., where the numbers correspond to the month in the time series. You can use the year and month columns to construct this. Use linear models to test for long term (linear) trends in the five variables: LNA abundance, LNA cell size, LNA biomass, LNA percent biomass, and temperature. For each of these models, assess whether the residuals show evidence of temporal autocorrelation. If autocorrelation is present, add a residual autocorrelation component to your model. Assess whether the model successfully accounted for autocorrelation. What have you learned from these models? Does accounting for autocorrelation change the results?
bst <- bst %>%
  arrange(year, month) %>% #chronological order
  mutate(sample_number = row_number()) #sample order

Linear Models

modlnaab = lm(LNA.ab.uml~ 1, data = bst)
modlnbv = lm(LNA.bv ~ 1, bst)
modlnab = lm(LNA.B ~ 1, bst)
modlnabb = lm(X.LNA.BB ~ 1, bst)
modtemp = lm(temp.5.m.E2 ~ 1, bst)

Residual ACF

modlnaab = lm(LNA.ab.uml~ 1, data = bst)

bst$resid.rainpredictor.raw = resid(modlnaab) 
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')

modlnbv = lm(LNA.bv ~ 1, bst)

bst$resid.rainpredictor.raw = resid(modlnbv) 
ggAcf(bst$resid.rainpredictor.raw)

modlnab = lm(LNA.B ~ 1, bst)

bst$resid.rainpredictor.raw = resid(modlnab) 
ggAcf(bst$resid.rainpredictor.raw) 

modlnabb = lm(X.LNA.BB ~ 1, bst)

bst$resid.rainpredictor.raw = resid(modlnabb) 
ggAcf(bst$resid.rainpredictor.raw)

modtemp = lm(temp.5.m.E2 ~ 1, bst)

bst$resid.rainpredictor.raw = resid(modtemp) 
ggAcf(bst$resid.rainpredictor.raw) 

All models have a higher positive value at lag-1 and then oscillate at regular intervals which indicates seasonality and temporal autocorrelation.

Statistical Models

# residual autocorrelation component

# notes example: mod1 = gls(log(Coot.Maui) ~ Rainfall, data = birds, correlation = corAR 1(form =~ Year), method = "ML") 

modlnaabgls = gls(LNA.ab.uml ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 
modlnbvgls = gls(LNA.bv ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 
modlnabgls = gls(LNA.B ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 
modlnabbgls = gls(X.LNA.BB ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 
modtempgls = gls(temp.5.m.E2 ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 

Residual ACF

modlnaabgls

# ggAcf(resid(mod1, type = 'normalized')) + labs(title = 'normalized resi duals, rainfall predictor')
ggAcf(resid(modlnaabgls, type = 'normalized'))

modlnbvgls

ggAcf(resid(modlnbvgls, type = 'normalized'))

modlnabgls

ggAcf(resid(modlnabgls, type = 'normalized'))

modlnabbgls

ggAcf(resid(modlnabbgls, type = 'normalized'))

modtempgls

ggAcf(resid(modtempgls, type = 'normalized'))

Normalized residuals do not have much higher positive values now at lag-1 when we account for temporal correlation. Most fall below the blue threshold.

AICc

modlnaab

AICc(modlnaab)
## [1] 3132.848
AICc(modlnaabgls)
## [1] 3121.856

modlnbv

AICc(modlnbv)
## [1] -822.4318
AICc(modlnbvgls)
## [1] -836.202

modlnab

AICc(modlnab)
## [1] 595.652
AICc(modlnabgls)
## [1] 586.03

modlnabb

AICc(modlnabb)
## [1] -177.2299
AICc(modlnabbgls)
## [1] -192.6451

modtemp

AICc(modtemp)
## [1] 586.0755
AICc(modtempgls)
## [1] 494.8632

Given the smaller AIC values for the models that account for autocorrelation, we see that it is important to account for it.

Temperature as the predictor

  1. Perform a similar set of analyses using temperature as the predictor, to test whether temperature can explain variation in the four LNA metrics, and whether autocorrelation needs to be accounted for when testing these relationships. Make appropriate plots of the results and discuss the magnitude of the relationships. Considering the whole set of analyses, what are your interpretations of the dynamics and drivers of LNA bacteria in this system?

Temp lm mods

tempmodlnaab = lm(LNA.ab.uml ~ temp.5.m.E2, data = bst)
tempmodlnbv = lm(LNA.bv ~ temp.5.m.E2, bst)
tempmodlnab = lm(LNA.B ~ temp.5.m.E2, bst)
tempmodlnabb = lm(X.LNA.BB ~ temp.5.m.E2, bst)
tempmodtemp = lm(temp.5.m.E2 ~ temp.5.m.E2, bst)
## Warning in model.matrix.default(mt, mf, contrasts): the response appeared on
## the right-hand side and was dropped
## Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
## model.matrix: no columns are assigned

Residual ACF

tempmodlnaab

bst$resid.rainpredictor.raw = resid(tempmodlnaab) 
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')

tempmodlnbv

bst$resid.rainpredictor.raw = resid(tempmodlnbv) 
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')

tempmodlnab

bst$resid.rainpredictor.raw = resid(tempmodlnab) 
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')

tempmodlnabb

bst$resid.rainpredictor.raw = resid(tempmodlnabb) 
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')

tempmodtemp

bst$resid.rainpredictor.raw = resid(tempmodtemp) 
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')

Statistical Models

# residual autocorrelation component

# notes example: mod1 = gls(log(Coot.Maui) ~ Rainfall, data = birds, correlation = corAR 1(form =~ Year), method = "ML") 

tempmodlnaabgls = gls(LNA.ab.uml ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 

tempmodlnbvgls = gls(LNA.bv ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 

tempmodlnabgls = gls(LNA.B ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 

tempmodlnabbgls = gls(X.LNA.BB ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML") 

Residual ACF GLS

tempmodlnaabgls

# ggAcf(resid(mod1, type = 'normalized')) + labs(title = 'normalized resi duals, rainfall predictor')
ggAcf(resid(tempmodlnaabgls, type = 'normalized'))

tempmodlnbvgls

# ggAcf(resid(mod1, type = 'normalized')) + labs(title = 'normalized resi duals, rainfall predictor')
ggAcf(resid(tempmodlnbvgls, type = 'normalized'))

tempmodlnbvgls

ggAcf(resid(tempmodlnbvgls, type = 'normalized'))

tempmodlnabgls

ggAcf(resid(tempmodlnabgls, type = 'normalized'))

tempmodlnabbgls

ggAcf(resid(tempmodlnabbgls, type = 'normalized'))

AICc

tempmodlnaab

AICc(tempmodlnaab)
## [1] 3116.686
AICc(tempmodlnaabgls)
## [1] 3114.866

tempmodlnbv

AICc(tempmodlnbv)
## [1] -825.1361
AICc(tempmodlnbvgls)
## [1] -837.432

tempmodlnab

AICc(tempmodlnab)
## [1] 581.7598
AICc(tempmodlnabgls)
## [1] 580.1918

tempmodlnabb

AICc(tempmodlnabb)
## [1] -199.999
AICc(modlnabbgls)
## [1] -192.6451