library(caTools)
library(plyr)
library(pracma)
library(rgdal)
library(leafletR)
library(maps)
library(ggplot2)
library(ggmap)
setwd("/home/duncan/Dropbox/Public/Analyses/AtlanticAlgae/Data/")
dir(pattern = "All*")
## [1] "All_phytoplankton_10June2014_P700TEST.csv"
## [2] "All_phytoplankton_10June2014_P701TEST.csv"
## [3] "All_phytoplankton_10June2014_PYETEST.csv"
a <- lapply(dir(pattern = "All*"), read.csv)
dir(pattern = "Temp*")
## [1] "TempData.csv"
Aggregate the data using the trapezoidal integration. This will work for the first file in the list. It is easy to repeat for the others. The aggregate function just provides the first value for each group of observations. These are the labels for the response variable.
d <- a[[2]]
response <- as.vector(by(d, d$BODC_stn, function(x) trapz(x$Bot_depth, x$P701A90Z)))
d1 <- aggregate(d, list(d$BODC_stn), function(x) x[1])
## Select only the most useful variables
d1 <- data.frame(d1[, c(4, 5, 6:15)], response)
## Turn the dates into proper date format.
d1$Date <- as.Date(paste("15", d1$Date, sep = "-"), format = "%d-%b-%y")
## Extract month and year
d1$Month <- format(d1$Date, "%m")
d1$Year <- format(d1$Date, "%Y")
mn <- as.numeric(d1$Month)
d1$season <- ifelse(d1$Latitude > 0, ifelse(mn < 6, "Spring", "Autumn"), ifelse(mn >
6, "Spring", "Autumn"))
## Make a spatial object in case this is useful
pts <- d1
coordinates(pts) <- ~Longitude + Latitude
This can be added to the abundance data through merging on a common field.
setwd("/home/duncan/Dropbox/Public/Analyses/AtlanticAlgae/Data/")
Temp <- read.csv("TempData.csv")
Temp <- Temp[, c(2, 4, 5)]
## I'll ust try the surface temperature for the moment by taking the
## maximum
Temp <- aggregate(Temp, list(Temp$BODC_stn), function(x) max(x))
str(Temp)
## 'data.frame': 1357 obs. of 4 variables:
## $ Group.1 : int 721436 721440 721442 721456 721458 721464 721468 721470 721474 721478 ...
## $ BODC_stn: int 721436 721440 721442 721456 721458 721464 721468 721470 721474 721478 ...
## $ Depth : num 300 300 300 299 300 ...
## $ Temp : num 23.7 23.5 23 21.5 20.8 ...
Not sure why there are more records than for the algae. Not going to do more with temperature for the moment until this is clear. Let's just try a quick map with rgb colours
d2 <- merge(d1, Temp)
plot(pts, pch = 21, cex = d2$Temp/20, bg = rgb(d2$Temp, 1, 30 - d2$Temp, max = 30))
map(add = T)
proj4string(pts) <- CRS("+init=epsg:4326")
box()
Take out the unused province levels.
provs <- levels(d1$AggProv)
d1 <- d1[d1$AggProv %in% provs[c(2, 4, 5)], ]
theme_set(theme_bw())
g0 <- ggplot(d1, aes(y = response, x = Month))
g1 <- g0 + geom_boxplot()
g1 + facet_wrap(~AggProv)
g_mean <- g0 + stat_summary(fun.y = mean, geom = "point")
g_mean <- g_mean + stat_summary(fun.data = mean_cl_normal, geom = "errorbar")
g_mean + facet_wrap(~AggProv)
g0 <- ggplot(d1, aes(y = response, x = season))
g1 <- g0 + geom_boxplot()
g1 + facet_wrap(~AggProv)
g_mean <- g0 + stat_summary(fun.y = mean, geom = "point")
g_mean <- g_mean + stat_summary(fun.data = mean_cl_normal, geom = "errorbar")
g_mean + facet_wrap(~AggProv)
The southern ocean has different seasonal pattern as would be expected. This complicates matters as we can't really just use month in an additive model.
mod <- lm(response ~ season, data = d1)
rs <- residuals(mod)
mod
##
## Call:
## lm(formula = response ~ season, data = d1)
##
## Coefficients:
## (Intercept) seasonSpring
## 15134638 -124665
g0 <- ggplot(d1, aes(y = rs, x = as.numeric(Year)))
g1 <- g0 + geom_point() + geom_smooth(method = "loess", se = TRUE) + theme_bw()
g1 + facet_wrap(~AggProv)
library(mgcv)
mod <- gam(response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) +
season, data = d1)
plot(mod)
anova(mod)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) +
## season
##
## Parametric Terms:
## df F p-value
## AggProv 2 3.19 0.042
## season 1 0.09 0.762
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Year)) 8.03 8.70 31.36 < 2e-16
## s(Latitude) 8.37 8.89 31.34 < 2e-16
## s(Longitude) 3.23 4.10 6.83 1.9e-05
acf(residuals(mod))
mod <- gamm(response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) +
season, data = d1, correlation = corAR1())
## Loading required package: nlme
## Warning: NAs introduced by coercion
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
plot(mod$gam)
anova(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) +
## season
##
## Parametric Terms:
## df F p-value
## AggProv 2 1.55 0.214
## season 1 3.61 0.058
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Year)) 1.00 1.00 43.05 1.1e-10
## s(Latitude) 7.20 7.20 20.14 < 2e-16
## s(Longitude) 2.49 2.49 4.67 0.0061
summary(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) +
## season
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14530827 895729 16.22 <2e-16 ***
## AggProvNAG 643678 1557708 0.41 0.680
## AggProvSAG 2783750 1623794 1.71 0.087 .
## seasonSpring -1321208 695208 -1.90 0.058 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Year)) 1.00 1.00 43.05 1.1e-10 ***
## s(Latitude) 7.20 7.20 20.14 < 2e-16 ***
## s(Longitude) 2.49 2.49 4.67 0.0061 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.499 Scale est. = 3.1152e+13 n = 631
mod
## $lme
## Linear mixed-effects model fit by maximum likelihood
## Data: strip.offset(mf)
## Log-likelihood: -10590
## Fixed: y ~ X - 1
## X(Intercept) XAggProvNAG XAggProvSAG
## 14530827 643678 2783750
## XseasonSpring Xs(as.numeric(Year))Fx1 Xs(Latitude)Fx1
## -1321208 2840484 8713052
## Xs(Longitude)Fx1
## -691303
##
## Random effects:
## Formula: ~Xr - 1 | g
## Structure: pdIdnot
## Xr1 Xr2 Xr3 Xr4 Xr5 Xr6 Xr7 Xr8
## StdDev: 21515 21515 21515 21515 21515 21515 21515 21515
##
## Formula: ~Xr.0 - 1 | g.0 %in% g
## Structure: pdIdnot
## Xr.01 Xr.02 Xr.03 Xr.04 Xr.05 Xr.06
## StdDev: 227623784 227623784 227623784 227623784 227623784 227623784
## Xr.07 Xr.08
## StdDev: 227623784 227623784
##
## Formula: ~Xr.1 - 1 | g.1 %in% g.0 %in% g
## Structure: pdIdnot
## Xr.11 Xr.12 Xr.13 Xr.14 Xr.15 Xr.16 Xr.17
## StdDev: 15680866 15680866 15680866 15680866 15680866 15680866 15680866
## Xr.18 Residual
## StdDev: 15680866 5581375
##
## Correlation Structure: AR(1)
## Formula: ~1 | g/g.0/g.1
## Parameter estimate(s):
## Phi
## 0.5684
## Number of Observations: 631
## Number of Groups:
## g g.0 %in% g g.1 %in% g.0 %in% g
## 1 1 1
##
## $gam
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) +
## season
##
## Estimated degrees of freedom:
## 1.00 7.20 2.49 total = 14.69
##
## attr(,"class")
## [1] "gamm" "list"
mod <- gamm(response ~ s(as.numeric(Year)) + season + s(Latitude) + s(Longitude),
data = d1, correlation = corGaus(form = ~jitter(Latitude) + jitter(Longitude)))
## Warning: NAs introduced by coercion
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
plot(mod$gam)
anova(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Year)) + season + s(Latitude) + s(Longitude)
##
## Parametric Terms:
## df F p-value
## season 1 0.12 0.73
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Year)) 7.62 7.62 32.75 < 2e-16
## s(Latitude) 8.10 8.10 41.10 < 2e-16
## s(Longitude) 3.44 3.44 9.55 1.3e-06
summary(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Year)) + season + s(Latitude) + s(Longitude)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15221698 319695 47.61 <2e-16 ***
## seasonSpring -166641 486425 -0.34 0.73
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Year)) 7.62 7.62 32.75 < 2e-16 ***
## s(Latitude) 8.10 8.10 41.10 < 2e-16 ***
## s(Longitude) 3.44 3.44 9.55 1.3e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.575 Scale est. = 2.6032e+13 n = 631