setwd("H:/javiera/CuSP - 15345/DV Growth rates")
suppressPackageStartupMessages(library(tidyverse))
## Warning: package 'tidyverse' was built under R version 3.3.3
## Warning: package 'purrr' was built under R version 3.3.3
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(gamlss))
suppressPackageStartupMessages(library(scales))
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(forcats))
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-2
library(knitr)
source('theme_javier.R')
source('panel_cor.R')
source('corvif.R')
theme_set(theme_javier())

nelson <- 
  read.csv("nelson.csv") %>%
  mutate(dv = DID.VEX,
         fd_season = fct_recode(factor(D_season), Summer = '1', Autumn = '2' ,Winter = '3',Spring = '4'),
         S_date = as.Date(S_date,format='%d/%m/%Y'),
         id = as.factor(paste(Array,Plate,sep='-')),
         Others = ALGAE + MUSSEL + OYSTER + BRY.ENC + BRY.ERECT + HYDROID + UNKNOWN +
           AMPH + ANEMONE + BARN + EGG + SPONGE + TUBE + COLONIAL + SOLITARY,
         logSpace = log(SPACE +1),
         logSed = log(SED +1))

nelson %>% 
  dplyr::select (S_date, fd_season, dv, SPACE, Others) %>%
  gather(key, value, c(dv,Others,SPACE)) %>% 
  ggplot(.,aes(y = value, x = S_date, lty = fd_season)) +
  stat_summary(fun.y = mean, geom = 'line') +
  stat_summary(fun.data = mean_se,
               geom = "ribbon",
               alpha = 0.07) +
  ggtitle('Nelson') + xlab('') +
  ylab('Cover (%)') +
  theme_javier() +
  theme(legend.position = c(.8,.2))+
  scale_linetype_manual(values = c(1, 2, 3, 4), name = 'Deployment season') +
  scale_x_date(
    breaks = date_breaks("3 month"),
    labels = date_format("%b %y") ,
    limits = as.Date(c('2008-01-01', '2009-9-01'))
  ) +
  coord_cartesian(ylim = c(0, 100)) + 
  facet_wrap(~key,nrow = 3)

###data exploration#####
dotchart(nelson$dv,color=nelson$fd_season,xlab = 'DV % cover')## looks like there are lots of zeros

sum(nelson$dv == 0) / nrow(nelson)###47% of zeros!
## [1] 0.4729167
pairs(
  ~ Time + S_season + dv + S_month + S_date + TempChange + fd_season + DailyRecruit +
    Temp + COLONIAL + SPACE,
  data = nelson,
  panel = panel.smooth,
  upper.panel = panel.cor
)

##Beta regression######
nelson %>% 
  dplyr::select (S_date, fd_season, dv, logSpace, Others,COLONIAL,logSed, TempChange) %>%
  gather(key, value, dv:TempChange) %>% 
  ggplot(.,aes(x= value)) + 
  geom_histogram(position = 'identity',color = 1,fill= 'gray50',bins = 15)+
  facet_wrap(~key,scales= 'free')

##check for collinearity
corvif(nelson[,c('Time','S_season','S_month','DailyRecruit','TempChange','COLONIAL','SPACE','SED')])
## 
## 
## Variance inflation factors
## 
##                  GVIF
## Time         1.316346
## S_season     2.590795
## S_month      7.982347
## DailyRecruit 9.600666
## TempChange   4.438734
## COLONIAL     1.328716
## SPACE        1.292812
## SED          1.278654
corvif(nelson[,c('Time','S_season','S_month','TempChange','COLONIAL','SPACE','SED')])
## 
## 
## Variance inflation factors
## 
##                GVIF
## Time       1.281326
## S_season   1.638784
## S_month    1.706286
## TempChange 1.760999
## COLONIAL   1.311786
## SPACE      1.268116
## SED        1.255221
##Fit the full model
N = nrow(nelson)
nelson$prop.dv = ((nelson$dv / 100) * (N - 1) + 0.5) / N
n0 <- gamlss(prop.dv~
               cs(Time)+
               cs(S_month)+
               TempChange*fd_season +
               logSed+ 
               random(id),
             family = BE, 
             data =nelson)
## GAMLSS-RS iteration 1: Global Deviance = -2442.333 
## GAMLSS-RS iteration 2: Global Deviance = -2525.724 
## GAMLSS-RS iteration 3: Global Deviance = -2589.432 
## GAMLSS-RS iteration 4: Global Deviance = -2633.75 
## GAMLSS-RS iteration 5: Global Deviance = -2661.391 
## GAMLSS-RS iteration 6: Global Deviance = -2676.865 
## GAMLSS-RS iteration 7: Global Deviance = -2684.816 
## GAMLSS-RS iteration 8: Global Deviance = -2688.707 
## GAMLSS-RS iteration 9: Global Deviance = -2690.594 
## GAMLSS-RS iteration 10: Global Deviance = -2691.528 
## GAMLSS-RS iteration 11: Global Deviance = -2692.008 
## GAMLSS-RS iteration 12: Global Deviance = -2692.264 
## GAMLSS-RS iteration 13: Global Deviance = -2692.405 
## GAMLSS-RS iteration 14: Global Deviance = -2692.485 
## GAMLSS-RS iteration 15: Global Deviance = -2692.53 
## GAMLSS-RS iteration 16: Global Deviance = -2692.557 
## GAMLSS-RS iteration 17: Global Deviance = -2692.573 
## GAMLSS-RS iteration 18: Global Deviance = -2692.582 
## GAMLSS-RS iteration 19: Global Deviance = -2692.588 
## GAMLSS-RS iteration 20: Global Deviance = -2692.59
## Warning in RS(): Algorithm RS has not yet converged
plot(n0)

## ******************************************************************
##        Summary of the Quantile Residuals
##                            mean   =  -0.07115016 
##                        variance   =  0.9048583 
##                coef. of skewness  =  -0.102631 
##                coef. of kurtosis  =  3.604134 
## Filliben correlation coefficient  =  0.9945735 
## ******************************************************************
n1 <- stepGAIC(n0, direction = 'backward',trace = 0)
## Start:  AIC= -2608.79 
##  prop.dv ~ cs(Time) + cs(S_month) + TempChange * fd_season + logSed +  
##     random(id)
## Warning in RS(): Algorithm RS has not yet converged
## Warning in RS(): Algorithm RS has not yet converged

## Warning in RS(): Algorithm RS has not yet converged

## Warning in RS(): Algorithm RS has not yet converged

## Warning in RS(): Algorithm RS has not yet converged

## Warning in RS(): Algorithm RS has not yet converged

## Warning in RS(): Algorithm RS has not yet converged

## Warning in RS(): Algorithm RS has not yet converged
summary(n1)
## ******************************************************************
## Family:  c("BE", "Beta") 
## 
## Call:  gamlss(formula = prop.dv ~ cs(Time) + cs(S_month) +  
##     TempChange + fd_season + random(id) + TempChange:fd_season,  
##     family = BE, data = nelson, trace = FALSE) 
## 
## Fitting method: RS() 
## 
## ------------------------------------------------------------------
## Mu link function:  logit
## Mu Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -3.97056    0.15798 -25.133  < 2e-16 ***
## cs(Time)                    0.12712    0.01003  12.668  < 2e-16 ***
## cs(S_month)                 0.05584    0.01113   5.016 7.68e-07 ***
## TempChange                  0.15607    0.02800   5.573 4.36e-08 ***
## fd_seasonAutumn            -1.50080    0.10287 -14.590  < 2e-16 ***
## fd_seasonWinter            -1.17650    0.11300 -10.411  < 2e-16 ***
## fd_seasonSpring            -0.67150    0.12931  -5.193 3.17e-07 ***
## TempChange:fd_seasonAutumn -0.11693    0.04590  -2.548   0.0112 *  
## TempChange:fd_seasonWinter -0.25728    0.04926  -5.223 2.72e-07 ***
## TempChange:fd_seasonSpring -0.33933    0.05046  -6.724 5.51e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## Sigma link function:  logit
## Sigma Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.23918    0.04846  -25.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------------------------------
## NOTE: Additive smoothing terms exist in the formulas: 
##  i) Std. Error for smoothers are for the linear effect only. 
## ii) Std. Error for the linear terms maybe are not accurate. 
## ------------------------------------------------------------------
## No. of observations in the fit:  480 
## Degrees of Freedom for the fit:  40.77401
##       Residual Deg. of Freedom:  439.226 
##                       at cycle:  20 
##  
## Global Deviance:     -2691.563 
##             AIC:     -2610.015 
##             SBC:     -2439.833 
## ******************************************************************
coef_nelson <- exp(cbind(Estimates = coef(n1), confint(n1)))
## Warning in vcov.gamlss(object, robust = robust): Additive terms exists in the  mu formula. 
##   Standard errors for the linear terms maybe are not appropriate
kable(coef_nelson, digits = 2)
Estimates 2.5 % 97.5 %
(Intercept) 0.02 0.01 0.03
cs(Time) 1.14 1.11 1.16
cs(S_month) 1.06 1.03 1.08
TempChange 1.17 1.11 1.23
fd_seasonAutumn 0.22 0.18 0.27
fd_seasonWinter 0.31 0.25 0.38
fd_seasonSpring 0.51 0.40 0.66
random(id)1 NA NA NA
TempChange:fd_seasonAutumn 0.89 0.81 0.97
TempChange:fd_seasonWinter 0.77 0.70 0.85
TempChange:fd_seasonSpring 0.71 0.65 0.79
##Model validation by inspecting normalised quantile residuals
plot(n1)

## ******************************************************************
##        Summary of the Quantile Residuals
##                            mean   =  -0.07122958 
##                        variance   =  0.9047215 
##                coef. of skewness  =  -0.1032754 
##                coef. of kurtosis  =  3.641618 
## Filliben correlation coefficient  =  0.9945497 
## ******************************************************************
plot(n1, ts = T)

## ******************************************************************
##        Summary of the Quantile Residuals
##                            mean   =  -0.07122958 
##                        variance   =  0.9047215 
##                coef. of skewness  =  -0.1032754 
##                coef. of kurtosis  =  3.641618 
## Filliben correlation coefficient  =  0.9945497 
## ******************************************************************
#print the summary table
Rsq(n1, type = "Cox Snell")
## [1] 0.5429489
##Plot the partial effects of the most parsimonious model####
par(
  mfrow = c(2, 3),
  oma = c(0, 0, 0, 0),
  mar = c(4, 4, 2, 0.5),
  mgp = c(2.2, 1, 0)
)
term.plot(
  n1,
  se = T,
  partial = T,
  col.res = 'black',
  ylim = 'common',
  scheme = 'shade',
  col.shaded = 'gray',
  xlabs = c(
    'Deploymenet duration (months)',
    'Sampling month',
    'Temperature change',
    'Deployment season'),
  lwd.term = 1,
  pch.res = 1,
  cex.res = 1,
  ylab = rep('Partial effect', 6)
)
## Warning: contrasts dropped from factor random(id)
## Warning in term.plot(n1, se = T, partial = T, col.res = "black", ylim =
## "common", : interactions have been taken out from the plots
###Multivariate###
cap1 <- 
  capscale(sqrt(nelson[, 15:32])~ fd_season * as.numeric(S_date) + Condition(as.numeric(S_date)) + TempChange, nelson)
##get the sites scores and glue them to the data
sco1 <-  scores(cap1)
nelson$rda1 <- sco1$sites[, 1]
nelson$rda2 <- sco1$sites[, 2]


##get the species scoores
sp.sco2 <- as.data.frame(sco1$species)
mySp1 <- sp.sco2[which(abs(sp.sco2$CAP1) > 0.3),]


##All together for the capscale plot
q <- 
  ggplot(nelson, aes(x = S_date, y = rda1)) + 
  geom_smooth(aes(color = factor(fd_season)),
              size = 1,
              method = loess,
              se = F) + 
  scale_colour_brewer(name = 'Deployment season', palette = 'Paired')+
  scale_x_date(
    breaks = date_breaks("3 month"),
    labels = date_format("%b %y") ,
    limits = as.Date(c('2008-01-01', '2009-9-01'))
  )

nelson_prc <- 
  q + geom_rug(data = mySp1, aes(x = as.Date('2009-8-01'), y = CAP1),
               sides = 'right') +
  geom_text(
    data = mySp1,
    aes(x = as.Date('2009-8-01'), y = CAP1),
    size = 2.5,
    label = row.names(mySp1),
    hjust = 0,
    vjust = 0,
    size = 4
  ) + ylab('Coefficients') +
  theme_javier() +
  xlab('Sampling date')+
  ggtitle('Nelson')
## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`size`)
nelson_prc