library(caTools)
library(plyr)
library(pracma)
library(rgdal)
library(leafletR)
library(maps)
setwd("/home/duncan/Dropbox/Public/Analyses/AtlanticAlgae/Data/")
dir(pattern = "All*")
## [1] "All_phytoplankton_10June2014_P700TEST.csv"
## [2] "All_phytoplankton_10June2014_P701TEST.csv"
## [3] "All_phytoplankton_10June2014_PYETEST.csv"
a <- lapply(dir(pattern = "All*"), read.csv)
As I understand it these three files contain the same information, but have been split according to whether there is any missing data for the species in the file name. Not sure why this was necessary as it would be easier to just use one file and drop out NAs later in the analysis. The third line here reads in all the files and forms a list.
Now to aggregate the data using the trapezoidal integration. This will work for the first file in the list. It is easy to repeat for the others. The aggregate function just provides the first value for each group of observations. These are the labels for the response variable.
d <- a[[2]]
response <- as.vector(by(d, d$BODC_stn, function(x) trapz(x$Bot_depth, x$P701A90Z)))
d1 <- aggregate(d, list(d$BODC_stn), function(x) x[1])
d1 <- data.frame(d1[, c(4, 5, 6:15)], response)
d1$Date <- as.Date(paste("15", d1$Date, sep = "-"), format = "%d-%b-%y")
head(d1)
## Date BODC_stn Orig_stn BODC_bot Latitude Longitude Bot_depth Prov
## 1 2005-10-15 721436 CTD047s 183926 -22.14 -20.199 1.4 SATL
## 2 2005-10-15 721440 CTD048t 183951 -22.61 -19.128 2.8 SATL
## 3 2005-10-15 721442 CTD049s 183976 -23.76 -16.527 1.4 SATL
## 4 2005-10-15 721456 CTD053s 184070 -28.85 -4.688 0.9 SATL
## 5 2005-10-15 721458 CTD054t 184095 -29.32 -3.579 2.3 SATL
## 6 2005-10-15 721464 CTD056t 184144 -31.17 0.923 1.5 SATL
## AggProv NTRZAATX P701A90Z P700A90Z response
## 1 SAG 0.03 117360 1757.9 24337015
## 2 SAG 0.03 110251 1244.7 22762698
## 3 SAG NA 85656 874.2 23049343
## 4 SAG NA 86415 383.5 22078503
## 5 SAG 0.03 91505 556.3 25473738
## 6 SAG NA 113356 8580.8 13233675
pts <- d1
coordinates(pts) <- ~Longitude + Latitude
plot(pts)
map(add = T)
proj4string(pts) <- CRS("+init=epsg:4326")
provs <- levels(d1$AggProv)
d1 <- d1[d1$AggProv %in% provs[c(2, 4, 5)], ]
# d1<-d1[d1$response>0,]
d1$dt <- as.factor(d1$Date)
library(ggplot2)
g0 <- ggplot(d1, aes(y = response, x = Date))
g1 <- g0 + geom_point() + geom_smooth(method = "loess", se = TRUE) + theme_bw()
g1 + facet_wrap(~AggProv)
library(mgcv)
mod <- gam(response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude),
data = d1)
plot(mod)
anova(mod)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
##
## Parametric Terms:
## df F p-value
## AggProv 2 2.81 0.061
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Date)) 7.81 8.57 32.42 < 2e-16
## s(Latitude) 8.35 8.88 31.53 < 2e-16
## s(Longitude) 3.42 4.33 6.54 2.4e-05
acf(residuals(mod))
mod <- gamm(response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude),
data = d1, correlation = corAR1())
## Loading required package: nlme
plot(mod$gam)
anova(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
##
## Parametric Terms:
## df F p-value
## AggProv 2 1.39 0.25
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Date)) 1.00 1.00 50.20 3.6e-12
## s(Latitude) 7.15 7.15 21.59 < 2e-16
## s(Longitude) 2.44 2.44 4.14 0.012
summary(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13923178 833668 16.70 <2e-16 ***
## AggProvNAG 652944 1558371 0.42 0.68
## AggProvSAG 2629722 1622453 1.62 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Date)) 1.00 1.00 50.20 3.6e-12 ***
## s(Latitude) 7.15 7.15 21.59 < 2e-16 ***
## s(Longitude) 2.44 2.44 4.14 0.012 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.506 Scale est. = 3.0795e+13 n = 631
mod <- gamm(response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude),
data = d1, correlation = corGaus(form = ~jitter(Latitude) + jitter(Longitude)))
plot(mod$gam)
anova(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
##
## Parametric Terms:
## df F p-value
## AggProv 2 2.64 0.072
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Date)) 7.45 7.45 34.15 < 2e-16
## s(Latitude) 7.97 7.97 33.73 < 2e-16
## s(Longitude) 3.45 3.45 8.37 7.7e-06
summary(mod$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## response ~ s(as.numeric(Date)) + AggProv + s(Latitude) + s(Longitude)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13590474 859523 15.81 <2e-16 ***
## AggProvNAG -575786 1712706 -0.34 0.737
## AggProvSAG 4269312 1885586 2.26 0.024 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(Date)) 7.45 7.45 34.15 < 2e-16 ***
## s(Latitude) 7.97 7.97 33.73 < 2e-16 ***
## s(Longitude) 3.45 3.45 8.37 7.7e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.579 Scale est. = 2.5951e+13 n = 631