library(here)
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)
library(forecast)
library(nlme)
library(MuMIn)
bst <- read.csv(here("Week_13", "Data", "bacteria_size_temperature_edit.csv")) %>%
filter(!is.na(LNA.ab.uml)) #remove NAs
bst$date <- dmy(bst$date)
Begin by exploring seasonal patterns in LNA abundance, cell size, biomass, and percent biomass, as well as temperature. I.e., make exploratory plots of these variables that focus on seasonality. What have you learned so far?
ggplot(bst, aes(x = factor(season), y = LNA.ab.uml)) +
geom_boxplot()
LNA abundance is highest season 3 and lowest season 1.
ggplot(bst, aes(x = factor(season), y = LNA.bv)) +
geom_boxplot()
LNA cell size is highest season 2 even though its abundance is 2nd to last.
ggplot(bst, aes(x = factor(season), y = LNA.B)) +
geom_boxplot()
LNA total biomass and abundance is highest season 3.
ggplot(bst, aes(x = factor(season), y = X.LNA.BB)) +
geom_boxplot()
ggplot(bst, aes(x = factor(season), y = temp.5.m.E2)) +
geom_boxplot()
During the season 3 where LNA abundance is highest, the temperature at 5m depth is also the highest. Season 1 has the lowest abundance and lowest temperature.
ggplot(bst, aes(x = date, y = LNA.ab.uml)) + geom_point() + geom_line()
Abundance fluctuates throughout a single year and appears to peak during the last half of a single year.
ggplot(bst, aes(x = date, y = LNA.bv)) + geom_point() + geom_line()
The cell size decreased from 2002 to 2012.
ggplot(bst, aes(x = date, y = LNA.B)) + geom_point() + geom_line()
ggplot(bst, aes(x = date, y = X.LNA.BB)) + geom_point() + geom_line()
ggplot(bst, aes(x = date, y = temp.5.m.E2)) + geom_point() + geom_line()
The temperature appears to follow a consistent pattern across the years.
bst <- bst %>%
arrange(year, month) %>%
mutate(sample_number = row_number())
modlnaab = lm(LNA.ab.uml~ 1, data = bst)
modlnbv = lm(LNA.bv ~ 1, bst)
modlnab = lm(LNA.B ~ 1, bst)
modlnabb = lm(X.LNA.BB ~ 1, bst)
modtemp = lm(temp.5.m.E2 ~ 1, bst)
bst$resid.rainpredictor.raw = resid(modlnaab)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals, no predictor and no autocorrelation for time')
bst$resid.rainpredictor.raw = resid(modlnbv)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals, no predictor and no autocorrelation for time')
bst$resid.rainpredictor.raw = resid(modlnab)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals, no predictor and no autocorrelation for time')
bst$resid.rainpredictor.raw = resid(modlnabb)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals, no predictor and no autocorrelation for time')
bst$resid.rainpredictor.raw = resid(modtemp)
ggAcf(bst$resid.rainpredictor.raw) + labs(title = 'raw residuals, no predictor and no autocorrelation for time')
modlnaabgls = gls(LNA.ab.uml ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modlnbvgls = gls(LNA.bv ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modlnabgls = gls(LNA.B ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modlnabbgls = gls(X.LNA.BB ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
modtempgls = gls(temp.5.m.E2 ~ 1, data = bst, correlation = corAR1(form =~ sample_number), method = "ML")
bst$resid.rainpredictor.raw = resid(modlnaabgls)
ggAcf(bst$resid.rainpredictor.raw)
bst$resid.rainpredictor.raw = resid(modlnbvgls)
ggAcf(bst$resid.rainpredictor.raw)
bst$resid.rainpredictor.raw = resid(modlnabgls)
ggAcf(bst$resid.rainpredictor.raw)
bst$resid.rainpredictor.raw = resid(modlnabbgls)
ggAcf(bst$resid.rainpredictor.raw)
bst$resid.rainpredictor.raw = resid(modtempgls)
ggAcf(bst$resid.rainpredictor.raw)
AICc(modlnaab)
## [1] 3132.848
AICc(modlnaabgls)
## [1] 3121.856
AICc(modlnbv)
## [1] -822.4318
AICc(modlnbvgls)
## [1] -836.202
AICc(modlnab)
## [1] 595.652
AICc(modlnabgls)
## [1] 586.03
AICc(modlnabb)
## [1] -177.2299
AICc(modlnabbgls)
## [1] -192.6451
AICc(modtemp)
## [1] 586.0755
AICc(modtempgls)
## [1] 494.8632