library(here)
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)
library(forecast)
library(nlme)
library(MuMIn)
bst <- read.csv(here("Week_13", "Data", "bacteria_size_temperature_edit.csv")) %>%
filter(!is.na(LNA.ab.uml)) #remove NAs
bst$date <- dmy(bst$date)
Begin by exploring seasonal patterns in LNA abundance, cell size, biomass, and percent biomass, as well as temperature. I.e., make exploratory plots of these variables that focus on seasonality. What have you learned so far?
ggplot(bst, aes(x = factor(season), y = LNA.ab.uml)) +
geom_boxplot()
LNA abundance is highest season 3 and lowest season 1.
ggplot(bst, aes(x = factor(season), y = LNA.bv)) +
geom_boxplot()
LNA cell size is highest season 2 even though its abundance is 2nd to last.
ggplot(bst, aes(x = factor(season), y = LNA.B)) +
geom_boxplot()
LNA total biomass and abundance is highest season 3.
ggplot(bst, aes(x = factor(season), y = X.LNA.BB)) +
geom_boxplot()
ggplot(bst, aes(x = factor(season), y = temp.5.m.E2)) +
geom_boxplot()
During the season 3 where LNA abundance is highest, the temperature at 5m depth is also the highest. Season 1 has the lowest abundance and lowest temperature.
ggplot(bst, aes(x = date, y = LNA.ab.uml)) + geom_point() + geom_line()
Abundance fluctuates throughout a single year and appears to peak during the last half of a single year.
ggplot(bst, aes(x = date, y = LNA.bv)) + geom_point() + geom_line()
The cell size decreased from 2002 to 2012.
ggplot(bst, aes(x = date, y = LNA.B)) + geom_point() + geom_line()
ggplot(bst, aes(x = date, y = X.LNA.BB)) + geom_point() + geom_line()
ggplot(bst, aes(x = date, y = temp.5.m.E2)) + geom_point() + geom_line()
The temperature appears to follow a consistent pattern across the years.
bst <- bst %>%
arrange(year, month) %>% #chronological order
mutate(sample_number = row_number()) #sample order
modlnaab = lm(LNA.ab.uml~ 1, data = bst)
modlnbv = lm(LNA.bv ~ 1, bst)
modlnab = lm(LNA.B ~ 1, bst)
modlnabb = lm(X.LNA.BB ~ 1, bst)
modtemp = lm(temp.5.m.E2 ~ 1, bst)
bst$resid.rainpredictor.raw = resid(modlnaab)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')
bst$resid.rainpredictor.raw = resid(modlnbv)
ggAcf(bst$resid.rainpredictor.raw)
bst$resid.rainpredictor.raw = resid(modlnab)
ggAcf(bst$resid.rainpredictor.raw)
bst$resid.rainpredictor.raw = resid(modlnabb)
ggAcf(bst$resid.rainpredictor.raw)
bst$resid.rainpredictor.raw = resid(modtemp)
ggAcf(bst$resid.rainpredictor.raw)
All models have a higher positive value at lag-1 and then oscillate at regular intervals which indicates seasonality and temporal autocorrelation.
# residual autocorrelation component
# notes example: mod1 = gls(log(Coot.Maui) ~ Rainfall, data = birds, correlation = corAR 1(form =~ Year), method = "ML")
modlnaabgls = gls(LNA.ab.uml ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modlnbvgls = gls(LNA.bv ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modlnabgls = gls(LNA.B ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modlnabbgls = gls(X.LNA.BB ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modtempgls = gls(temp.5.m.E2 ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
# ggAcf(resid(mod1, type = 'normalized')) + labs(title = 'normalized resi duals, rainfall predictor')
ggAcf(resid(modlnaabgls, type = 'normalized'))
ggAcf(resid(modlnbvgls, type = 'normalized'))
ggAcf(resid(modlnabgls, type = 'normalized'))
ggAcf(resid(modlnabbgls, type = 'normalized'))
ggAcf(resid(modtempgls, type = 'normalized'))
Normalized residuals do not have much higher positive values now at lag-1 when we account for temporal correlation. Most fall below the blue threshold.
AICc(modlnaab)
## [1] 3132.848
AICc(modlnaabgls)
## [1] 3121.856
AICc(modlnbv)
## [1] -822.4318
AICc(modlnbvgls)
## [1] -836.202
AICc(modlnab)
## [1] 595.652
AICc(modlnabgls)
## [1] 586.03
AICc(modlnabb)
## [1] -177.2299
AICc(modlnabbgls)
## [1] -192.6451
AICc(modtemp)
## [1] 586.0755
AICc(modtempgls)
## [1] 494.8632
Given the smaller AIC values for the models that account for autocorrelation, we see that it is important to account for it.
tempmodlnaab = lm(LNA.ab.uml ~ temp.5.m.E2, data = bst)
tempmodlnbv = lm(LNA.bv ~ temp.5.m.E2, bst)
tempmodlnab = lm(LNA.B ~ temp.5.m.E2, bst)
tempmodlnabb = lm(X.LNA.BB ~ temp.5.m.E2, bst)
tempmodtemp = lm(temp.5.m.E2 ~ temp.5.m.E2, bst)
## Warning in model.matrix.default(mt, mf, contrasts): the response appeared on
## the right-hand side and was dropped
## Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
## model.matrix: no columns are assigned
bst$resid.rainpredictor.raw = resid(tempmodlnaab)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')
bst$resid.rainpredictor.raw = resid(tempmodlnbv)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')
bst$resid.rainpredictor.raw = resid(tempmodlnab)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')
bst$resid.rainpredictor.raw = resid(tempmodlnabb)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')
bst$resid.rainpredictor.raw = resid(tempmodtemp)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals')
# residual autocorrelation component
# notes example: mod1 = gls(log(Coot.Maui) ~ Rainfall, data = birds, correlation = corAR 1(form =~ Year), method = "ML")
tempmodlnaabgls = gls(LNA.ab.uml ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
tempmodlnbvgls = gls(LNA.bv ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
tempmodlnabgls = gls(LNA.B ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
tempmodlnabbgls = gls(X.LNA.BB ~ temp.5.m.E2, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
# ggAcf(resid(mod1, type = 'normalized')) + labs(title = 'normalized resi duals, rainfall predictor')
ggAcf(resid(tempmodlnaabgls, type = 'normalized'))
# ggAcf(resid(mod1, type = 'normalized')) + labs(title = 'normalized resi duals, rainfall predictor')
ggAcf(resid(tempmodlnbvgls, type = 'normalized'))
ggAcf(resid(tempmodlnbvgls, type = 'normalized'))
ggAcf(resid(tempmodlnabgls, type = 'normalized'))
ggAcf(resid(tempmodlnabbgls, type = 'normalized'))
AICc(tempmodlnaab)
## [1] 3116.686
AICc(tempmodlnaabgls)
## [1] 3114.866
AICc(tempmodlnbv)
## [1] -825.1361
AICc(tempmodlnbvgls)
## [1] -837.432
AICc(tempmodlnab)
## [1] 581.7598
AICc(tempmodlnabgls)
## [1] 580.1918
AICc(tempmodlnabb)
## [1] -199.999
AICc(modlnabbgls)
## [1] -192.6451