Phytoplankton

library(caTools)
library(plyr)
library(pracma)
library(rgdal)
library(leafletR)
library(maps)
library(ggplot2)
library(ggmap)
setwd("/home/duncan/Dropbox/Public/Analyses/AtlanticAlgae/Data/")
dir(pattern = "All*")
## [1] "All_phytoplankton_10June2014_P700TEST.csv"
## [2] "All_phytoplankton_10June2014_P701TEST.csv"
## [3] "All_phytoplankton_10June2014_PYETEST.csv"
a <- lapply(dir(pattern = "All*"), read.csv)
dir(pattern = "Temp*")
## [1] "TempData.csv"

Aggregate the data using the trapezoidal integration. This will work for the first file in the list. It is easy to repeat for the others. The aggregate function just provides the first value for each group of observations. These are the labels for the response variable.

d <- a[[2]]
response <- as.vector(by(d, d$BODC_stn, function(x) trapz(x$Bot_depth, x$P701A90Z)))
d1 <- aggregate(d, list(d$BODC_stn), function(x) x[1])

## Select only the most useful variables
d1 <- data.frame(d1[, c(4, 5, 6:15)], response)

## Turn the dates into proper date format.

d1$Date <- as.Date(paste("15", d1$Date, sep = "-"), format = "%d-%b-%y")

## Extract month and year

d1$Month <- format(d1$Date, "%m")
d1$Year <- format(d1$Date, "%Y")

mn <- as.numeric(d1$Month)
d1$season <- ifelse(d1$Latitude > 0, ifelse(mn < 6, "Spring", "Autumn"), ifelse(mn > 
    6, "Spring", "Autumn"))


## Make a spatial object in case this is useful
pts <- d1
coordinates(pts) <- ~Longitude + Latitude

Temperature data

This can be added to the abundance data through merging on a common field.

setwd("/home/duncan/Dropbox/Public/Analyses/AtlanticAlgae/Data/")
Temp <- read.csv("TempData.csv")

Temp <- Temp[, c(2, 4, 5)]
## I'll ust try the surface temperature for the moment by taking the
## maximum
Temp <- aggregate(Temp, list(Temp$BODC_stn), function(x) max(x))
str(Temp)
## 'data.frame':    1357 obs. of  4 variables:
##  $ Group.1 : int  721436 721440 721442 721456 721458 721464 721468 721470 721474 721478 ...
##  $ BODC_stn: int  721436 721440 721442 721456 721458 721464 721468 721470 721474 721478 ...
##  $ Depth   : num  300 300 300 299 300 ...
##  $ Temp    : num  23.7 23.5 23 21.5 20.8 ...

Not sure why there are more records than for the algae. Not going to do more with temperature for the moment until this is clear. Let's just try a quick map with rgb colours

d2 <- merge(d1, Temp)
plot(pts, pch = 21, cex = d2$Temp/20, bg = rgb(d2$Temp, 1, 30 - d2$Temp, max = 30))
map(add = T)
proj4string(pts) <- CRS("+init=epsg:4326")
box()

plot of chunk unnamed-chunk-4

Working with provinces

Take out the unused province levels.

provs <- levels(d1$AggProv)
d1 <- d1[d1$AggProv %in% provs[c(2, 4, 5)], ]

Monthly changes by province

theme_set(theme_bw())
g0 <- ggplot(d1, aes(y = response, x = Month))
g1 <- g0 + geom_boxplot()
g1 + facet_wrap(~AggProv)

plot of chunk unnamed-chunk-6

Confidence intervals

g_mean <- g0 + stat_summary(fun.y = mean, geom = "point")
g_mean <- g_mean + stat_summary(fun.data = mean_cl_normal, geom = "errorbar")
g_mean + facet_wrap(~AggProv)

plot of chunk unnamed-chunk-7

g0 <- ggplot(d1, aes(y = response, x = season))
g1 <- g0 + geom_boxplot()
g1 + facet_wrap(~AggProv)

plot of chunk unnamed-chunk-8

Confidence intervals

g_mean <- g0 + stat_summary(fun.y = mean, geom = "point")
g_mean <- g_mean + stat_summary(fun.data = mean_cl_normal, geom = "errorbar")
g_mean + facet_wrap(~AggProv)

plot of chunk unnamed-chunk-9

The southern ocean has different seasonal pattern as would be expected. This complicates matters as we can't really just use month in an additive model.


mod <- lm(response ~ season, data = d1)
rs <- residuals(mod)
mod
## 
## Call:
## lm(formula = response ~ season, data = d1)
## 
## Coefficients:
##  (Intercept)  seasonSpring  
##     15134638       -124665

g0 <- ggplot(d1, aes(y = rs, x = as.numeric(Year)))


g1 <- g0 + geom_point() + geom_smooth(method = "loess", se = TRUE) + theme_bw()
g1 + facet_wrap(~AggProv)

plot of chunk unnamed-chunk-10

library(mgcv)
mod <- gam(response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) + 
    season, data = d1)
plot(mod)

plot of chunk unnamed-chunk-11 plot of chunk unnamed-chunk-11 plot of chunk unnamed-chunk-11

anova(mod)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) + 
##     season
## 
## Parametric Terms:
##         df    F p-value
## AggProv  2 3.19   0.042
## season   1 0.09   0.762
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value
## s(as.numeric(Year)) 8.03   8.70 31.36 < 2e-16
## s(Latitude)         8.37   8.89 31.34 < 2e-16
## s(Longitude)        3.23   4.10  6.83 1.9e-05
acf(residuals(mod))

plot of chunk unnamed-chunk-11

mod <- gamm(response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) + 
    season, data = d1, correlation = corAR1())
## Loading required package: nlme
## Warning: NAs introduced by coercion
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
plot(mod$gam)

plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12 plot of chunk unnamed-chunk-12

anova(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) + 
##     season
## 
## Parametric Terms:
##         df    F p-value
## AggProv  2 1.55   0.214
## season   1 3.61   0.058
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value
## s(as.numeric(Year)) 1.00   1.00 43.05 1.1e-10
## s(Latitude)         7.20   7.20 20.14 < 2e-16
## s(Longitude)        2.49   2.49  4.67  0.0061
summary(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) + 
##     season
## 
## Parametric coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  14530827     895729   16.22   <2e-16 ***
## AggProvNAG     643678    1557708    0.41    0.680    
## AggProvSAG    2783750    1623794    1.71    0.087 .  
## seasonSpring -1321208     695208   -1.90    0.058 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value    
## s(as.numeric(Year)) 1.00   1.00 43.05 1.1e-10 ***
## s(Latitude)         7.20   7.20 20.14 < 2e-16 ***
## s(Longitude)        2.49   2.49  4.67  0.0061 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.499  Scale est. = 3.1152e+13  n = 631
mod
## $lme
## Linear mixed-effects model fit by maximum likelihood
##   Data: strip.offset(mf) 
##   Log-likelihood: -10590
##   Fixed: y ~ X - 1 
##            X(Intercept)             XAggProvNAG             XAggProvSAG 
##                14530827                  643678                 2783750 
##           XseasonSpring Xs(as.numeric(Year))Fx1         Xs(Latitude)Fx1 
##                -1321208                 2840484                 8713052 
##        Xs(Longitude)Fx1 
##                 -691303 
## 
## Random effects:
##  Formula: ~Xr - 1 | g
##  Structure: pdIdnot
##           Xr1   Xr2   Xr3   Xr4   Xr5   Xr6   Xr7   Xr8
## StdDev: 21515 21515 21515 21515 21515 21515 21515 21515
## 
##  Formula: ~Xr.0 - 1 | g.0 %in% g
##  Structure: pdIdnot
##             Xr.01     Xr.02     Xr.03     Xr.04     Xr.05     Xr.06
## StdDev: 227623784 227623784 227623784 227623784 227623784 227623784
##             Xr.07     Xr.08
## StdDev: 227623784 227623784
## 
##  Formula: ~Xr.1 - 1 | g.1 %in% g.0 %in% g
##  Structure: pdIdnot
##            Xr.11    Xr.12    Xr.13    Xr.14    Xr.15    Xr.16    Xr.17
## StdDev: 15680866 15680866 15680866 15680866 15680866 15680866 15680866
##            Xr.18 Residual
## StdDev: 15680866  5581375
## 
## Correlation Structure: AR(1)
##  Formula: ~1 | g/g.0/g.1 
##  Parameter estimate(s):
##    Phi 
## 0.5684 
## Number of Observations: 631
## Number of Groups: 
##                   g          g.0 %in% g g.1 %in% g.0 %in% g 
##                   1                   1                   1 
## 
## $gam
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Year)) + AggProv + s(Latitude) + s(Longitude) + 
##     season
## 
## Estimated degrees of freedom:
## 1.00 7.20 2.49  total = 14.69 
## 
## attr(,"class")
## [1] "gamm" "list"
mod <- gamm(response ~ s(as.numeric(Year)) + season + s(Latitude) + s(Longitude), 
    data = d1, correlation = corGaus(form = ~jitter(Latitude) + jitter(Longitude)))
## Warning: NAs introduced by coercion
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf

plot(mod$gam)

plot of chunk unnamed-chunk-13 plot of chunk unnamed-chunk-13 plot of chunk unnamed-chunk-13

anova(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Year)) + season + s(Latitude) + s(Longitude)
## 
## Parametric Terms:
##        df    F p-value
## season  1 0.12    0.73
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value
## s(as.numeric(Year)) 7.62   7.62 32.75 < 2e-16
## s(Latitude)         8.10   8.10 41.10 < 2e-16
## s(Longitude)        3.44   3.44  9.55 1.3e-06
summary(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Year)) + season + s(Latitude) + s(Longitude)
## 
## Parametric coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15221698     319695   47.61   <2e-16 ***
## seasonSpring  -166641     486425   -0.34     0.73    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value    
## s(as.numeric(Year)) 7.62   7.62 32.75 < 2e-16 ***
## s(Latitude)         8.10   8.10 41.10 < 2e-16 ***
## s(Longitude)        3.44   3.44  9.55 1.3e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.575  Scale est. = 2.6032e+13  n = 631