Phytoplankton

library(caTools)
library(plyr)
library(pracma)
library(rgdal)
library(leafletR)
library(maps)

setwd("/home/duncan/Dropbox/Public/Analyses/AtlanticAlgae/Data/")
dir(pattern = "All*")
## [1] "All_phytoplankton_10June2014_P700TEST.csv"
## [2] "All_phytoplankton_10June2014_P701TEST.csv"
## [3] "All_phytoplankton_10June2014_PYETEST.csv"
a <- lapply(dir(pattern = "All*"), read.csv)

As I understand it these three files contain the same information, but have been split according to whether there is any missing data for the species in the file name. Not sure why this was necessary as it would be easier to just use one file and drop out NAs later in the analysis. The third line here reads in all the files and forms a list.

Now to aggregate the data using the trapezoidal integration. This will work for the first file in the list. It is easy to repeat for the others. The aggregate function just provides the first value for each group of observations. These are the labels for the response variable.

d <- a[[2]]
response <- as.vector(by(d, d$BODC_stn, function(x) trapz(x$Bot_depth, x$P701A90Z)))
d1 <- aggregate(d, list(d$BODC_stn), function(x) x[1])
d1 <- data.frame(d1[, c(4, 5, 6:15)], response)
d1$Date <- as.Date(paste("15", d1$Date, sep = "-"), format = "%d-%b-%y")
head(d1)
##         Date BODC_stn Orig_stn BODC_bot Latitude Longitude Bot_depth Prov
## 1 2005-10-15   721436  CTD047s   183926   -22.14   -20.199       1.4 SATL
## 2 2005-10-15   721440  CTD048t   183951   -22.61   -19.128       2.8 SATL
## 3 2005-10-15   721442  CTD049s   183976   -23.76   -16.527       1.4 SATL
## 4 2005-10-15   721456  CTD053s   184070   -28.85    -4.688       0.9 SATL
## 5 2005-10-15   721458  CTD054t   184095   -29.32    -3.579       2.3 SATL
## 6 2005-10-15   721464  CTD056t   184144   -31.17     0.923       1.5 SATL
##   AggProv NTRZAATX P701A90Z P700A90Z response
## 1     SAG     0.03   117360   1757.9 24337015
## 2     SAG     0.03   110251   1244.7 22762698
## 3     SAG       NA    85656    874.2 23049343
## 4     SAG       NA    86415    383.5 22078503
## 5     SAG     0.03    91505    556.3 25473738
## 6     SAG       NA   113356   8580.8 13233675
pts <- d1

coordinates(pts) <- ~Longitude + Latitude
plot(pts)
map(add = T)

plot of chunk unnamed-chunk-2

proj4string(pts) <- CRS("+init=epsg:4326")
provs <- levels(d1$AggProv)
d1 <- d1[d1$AggProv %in% provs[c(2, 4, 5)], ]
# d1<-d1[d1$response>0,]
d1$dt <- as.factor(d1$Date)
library(ggplot2)
g0 <- ggplot(d1, aes(y = response, x = Date))
g1 <- g0 + geom_point() + geom_smooth(method = "loess", se = TRUE) + theme_bw()
g1 + facet_wrap(~AggProv)

plot of chunk unnamed-chunk-3

library(mgcv)
mod <- gam(response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude), 
    data = d1)
plot(mod)

plot of chunk unnamed-chunk-4 plot of chunk unnamed-chunk-4 plot of chunk unnamed-chunk-4

anova(mod)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
## 
## Parametric Terms:
##         df    F p-value
## AggProv  2 2.81   0.061
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value
## s(as.numeric(Date)) 7.81   8.57 32.42 < 2e-16
## s(Latitude)         8.35   8.88 31.53 < 2e-16
## s(Longitude)        3.42   4.33  6.54 2.4e-05
acf(residuals(mod))

plot of chunk unnamed-chunk-4

mod <- gamm(response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude), 
    data = d1, correlation = corAR1())
## Loading required package: nlme
plot(mod$gam)

plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5 plot of chunk unnamed-chunk-5

anova(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
## 
## Parametric Terms:
##         df    F p-value
## AggProv  2 1.39    0.25
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value
## s(as.numeric(Date)) 1.00   1.00 50.20 3.6e-12
## s(Latitude)         7.15   7.15 21.59 < 2e-16
## s(Longitude)        2.44   2.44  4.14   0.012
summary(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13923178     833668   16.70   <2e-16 ***
## AggProvNAG    652944    1558371    0.42     0.68    
## AggProvSAG   2629722    1622453    1.62     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value    
## s(as.numeric(Date)) 1.00   1.00 50.20 3.6e-12 ***
## s(Latitude)         7.15   7.15 21.59 < 2e-16 ***
## s(Longitude)        2.44   2.44  4.14   0.012 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.506  Scale est. = 3.0795e+13  n = 631
mod <- gamm(response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude), 
    data = d1, correlation = corGaus(form = ~jitter(Latitude) + jitter(Longitude)))

plot(mod$gam)

plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6

anova(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
## 
## Parametric Terms:
##         df    F p-value
## AggProv  2 2.64   0.072
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value
## s(as.numeric(Date)) 7.45   7.45 34.15 < 2e-16
## s(Latitude)         7.97   7.97 33.73 < 2e-16
## s(Longitude)        3.45   3.45  8.37 7.7e-06
summary(mod$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13590474     859523   15.81   <2e-16 ***
## AggProvNAG   -575786    1712706   -0.34    0.737    
## AggProvSAG   4269312    1885586    2.26    0.024 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df     F p-value    
## s(as.numeric(Date)) 7.45   7.45 34.15 < 2e-16 ***
## s(Latitude)         7.97   7.97 33.73 < 2e-16 ***
## s(Longitude)        3.45   3.45  8.37 7.7e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.579  Scale est. = 2.5951e+13  n = 631