Schefferville - generalists/specialists; pairwise co-occurrence; GAM


  These analyzes include GAMS with 34 focal species (including T. alpinum).

#Read in vegetation matrix
veg<-read.csv("Carex_2013_stats_TE.csv", head=TRUE, row.names=1)
veg<-veg[c(1:201,222:701), c(1:3,5:7, 10:12,14:22,24:39)]  #Remove all columns that do not respresent focal species so that every species has at least 20 representatives; also remove DEF and ALP
veg[veg>0]<-1
veg<-veg[, order(names(veg))]
#head(veg)



Randomize the data and calculate random means and standard deviations for z scores.

 #use permiswap from Vegan package for randomization and creation of null model
set.seed(4)## set the seed for randomization
veg.rand<-permatswap(veg, method="quasiswap", fixedmar="both",times = 999, burnin = 20000, thin = 500, mtype = "prab") ##shuffle the occurrence matrix keeping both row and colsums equal

#get 999 transposed matrices from veg.rand$perm[[i]]
#create 999 empty matrices; fill them all with i veg.rand$perm transposed;  get sd for each orf the 528 entries

veg.rand.perm<-veg.rand$perm
veg.rand<-lapply(1:999, function(x) matrix(0, 34,34))

for (i in 1:length(veg.rand)) { #loop over 999 matrices in veg.rand.perm
    for (j in 1:length(veg.rand.perm)){
    veg.rand[[j]]<-t(veg.rand.perm[[j]]) %*% veg.rand.perm[[j]]
   }
}

#calculate mean across 999 different matrices  for each indexed position
fulldim<-34*34
meanmat<-matrix(nrow=34,ncol=34)
meanmat[1:1156]<-

apply(as.matrix(1:fulldim), 1,function(y) mean(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (33*33 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
meanmat

#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
meanmat.out.ind <- which( upper.tri(meanmat,diag=F) , arr.ind = TRUE )
meanmat.out.df<-data.frame( col = dimnames(veg.out)[[2]][meanmat.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][meanmat.out.ind[,1]] ,
            val = meanmat[ meanmat.out.ind ] )
#Calculate plots that mean species data aren't in
meanmat.out.df.neg<-cbind(meanmat.out.df,681-meanmat.out.df$val)

#find how many plots either one oor the other species in, but not both
meanmat.out.df.neg.col<-revalue(meanmat.out.df.neg$col, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))
meanmat.out.df.neg.row<-revalue(meanmat.out.df.neg$row, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))
meanmat.out.df.neg.col.num<-as.numeric(as.character(meanmat.out.df.neg.col))
meanmat.out.df.neg.row.num<-as.numeric(as.character(meanmat.out.df.neg.row))
meanmat.out.df.neg.row.col<-(meanmat.out.df.neg.col.num+meanmat.out.df.neg.row.num)-meanmat.out.df[,3]

#calculate sd across 999 different matrices for each position
fulldim<-34*34
sdmat<-matrix(nrow=34,ncol=34)
sdmat[1:1156]<-

apply(as.matrix(1:fulldim), 1,function(y) sd(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (34*34 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
sdmat

#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
sd.out.ind <- which( upper.tri(sdmat,diag=F) , arr.ind = TRUE )
sd.out.df<-data.frame( col = dimnames(veg.out)[[2]][sd.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][sd.out.ind[,1]] ,
            val = sdmat[ sd.out.ind ] )



Calculate z cooccurrence scores for each species pair

#1. make a data frame with appropriate data
cooc.z.info<-cbind(veg.out.df, meanmat.out.df,sd.out.df)
cooc.z.comb<-(cooc.z.info[,3]-cooc.z.info[,6])/cooc.z.info[,9]

#get the cooccurrence.z list with names
cooc.z<-cbind(cooc.z.info[,c(1:2)], cooc.z.comb)

#Cyperaceae.trees<-read.nexus("Sedges_april30.nex")
#Cyperaceae.samp<-sample(Cyperaceae.trees, 100, replace = FALSE, prob = NULL)
Cyperaceae.tree<-read.nexus("Sedges_april30_comb.nex")

#calculate phy.dist for all 100 random trees
#phy.dist<- lapply(Cyperaceae.samp, cophenetic)
#drop non-carex species from tree and those not focal species: exclude C. deflexa, but include T. alpinum
pruned.cyp<-drop.tip(Cyperaceae.tree,c( "DEF", "JARC", "JTRI", "ACI", "ARC","ATR", "BRA","BUX","CAT", "CVI", "DIA", "FOE","GAR","GLA","LAC","MED","NAR","NIT","WIL"))
pruned.cyp
phy.dist.cyp<-cophenetic(pruned.cyp)

phy.dist.ind <- which( upper.tri(phy.dist.cyp,diag=F) , arr.ind = TRUE )
phy.dist.df<-data.frame( col = dimnames(phy.dist.cyp)[[2]][phy.dist.ind[,2]] ,
            row = dimnames(phy.dist.cyp)[[1]][phy.dist.ind[,1]] ,
            val = phy.dist.cyp[ phy.dist.ind ] )


#load inverse of habitat generalist/specialist data
gen.spec<-read.csv("H.inv.rescale.no.def.allplots.csv", row.names=1, head=TRUE)
gen.spec.sp<-rownames(gen.spec)

diag(final.h.inv)<-0##set self-co-occurrence to zero (technically NA, but this simplifies the procedure)
#write.csv(final.h.inv, "Sedges2_final.h.inv.nodef.matrix.nonames.csv")
final.h.inv.names<-read.csv("Sedges2_final.h.inv.nodef.matrix.nonames.csv", row.names=1, head=TRUE)
#remove cases when col1 name = col2 name
 h.rows = apply(final.h.inv.names[, c(1:2)], 1, function(i) length(unique(i)) > 1)

h.inv.ind <- which( upper.tri(final.h.inv.names,diag=F) , arr.ind = TRUE )
h.inv.df<-data.frame( col = dimnames(final.h.inv.names)[[2]][h.inv.ind[,2]] ,
            row = dimnames(final.h.inv.names)[[1]][h.inv.ind[,1]] ,
            val = final.h.inv.names[ h.inv.ind ] )

#write.csv(h.inv.df, "Sedges2_final.h.inv.nodef.matrix.nonames.noselfs.csv")
#final.h.inv.names.noself<-read.csv("Sedges2_final.h.inv.nodef.matrix.nonames.noselfs.csv", row.names=1, head=TRUE)

 #sink("genspec.phydist.cooccur.results.no.def.dec2.txt", append=TRUE)

final.h.inv<-h.inv.df

data.cooc<-cbind(final.h.inv, phy.dist.df, cooc.z)

data.cooc<-data.cooc[,c(3,6,9)]
colnames(data.cooc)<-c("gen.spec", "phy.dist", "cooc.z")
head(data.cooc)



Import saved data for GAM analyzes

data.cooc<-readRDS("cooc.z.data.rds")
head(data.cooc)
##      gen.spec   phy.dist      cooc.z
## 1 0.047371556  78.891213 -2.39264565
## 2 0.274031336 121.972198  5.85930884
## 3 0.226659781 121.972198 -0.06566243
## 4 0.000248625 121.972198 -2.71153372
## 5 0.047620180 121.972198 -0.70770244
## 6 0.274279961   9.008272 -4.68947236

Visualization of data before GAMS


#The bottom two plots have a smoothing function added
phy.dist.cooc.plot<-ggplot(data=data.cooc, aes(x=phy.dist,y=cooc.z)) + geom_point(color='#1d1d1d', size=1.5 ) + theme_bw() + xlab("Phylogenetic distance") + ylab("Co-occurrence")   +
    ggtitle("Co-occurrence predicted by \nphylogenetic distance") + theme(axis.title.y = element_text(size=10, vjust=1),axis.title.x = element_text(size=10), axis.text.x  = element_text(size=8), axis.text.y= element_text(size=8,angle=90,hjust=0.5))+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line=element_line(size=1)) +
theme(plot.title = element_text(vjust = 2, size=10, face="bold")) 

gen.spec.cooc.plot<-ggplot(data=data.cooc, aes(x=gen.spec,y=cooc.z)) + geom_point(color='#1d1d1d', size=1.5 ) + theme_bw() + xlab("Difference in specialism and generalism") + ylab("Co-occurrence")   + ggtitle("Co-occurrence predicted by difference \nbetween specialism and generalism") + theme(axis.title.y = element_text(size=10, vjust=1),axis.title.x = element_text(size=10), axis.text.x  = element_text(size=8), axis.text.y= element_text(size=8,angle=90,hjust=0.5))+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line=element_line(size=1)) +
theme(plot.title = element_text(vjust = 2, size=10, face="bold")) 

phy.dist.cooc.smooth<-ggplot(data=data.cooc, aes(x=phy.dist,y=cooc.z)) + geom_point(color='#1d1d1d',size=1.5) + geom_smooth(se=F, method='gam', formula=y~s(x), color='#1d1d1d')  + theme_bw() + xlab("Phylogenetic distance") + ylab("Co-occurrence")   + ggtitle("Co-occurrence predicted by \nphylogenetic distance (smoothed)") + theme(axis.title.y = element_text(size=10, vjust=1),axis.title.x = element_text(size=10), axis.text.x  = element_text(size=8), axis.text.y= element_text(size=8,angle=90,hjust=0.5))+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line=element_line(size=1)) +
theme(plot.title = element_text(vjust = 2, size=10, face="bold"))  

gen.spec.cooc.smooth<-ggplot(data=data.cooc, aes(x=gen.spec,y=cooc.z)) + geom_point(color='#1d1d1d',size=1.5) + geom_smooth(se=F, method='gam', formula=y~s(x), color='#1d1d1d')  + theme_bw() + xlab("Difference in specialism and generalism") + ylab("Co-occurrence")   + ggtitle("Co-occurrence predicted by difference in \ngeneralism and specialism (smoothed)") + theme(axis.title.y = element_text(size=10, vjust=1),axis.title.x = element_text(size=10), axis.text.x  = element_text(size=8), axis.text.y= element_text(size=8,angle=90,hjust=0.5))+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), axis.line = element_line(size=1)) +
theme(plot.title = element_text(vjust = 2, size=10, face="bold")) 

multiplot(phy.dist.cooc.plot, gen.spec.cooc.plot, phy.dist.cooc.smooth, gen.spec.cooc.smooth, cols=2, rows=2)

## [1] 2

Basic OLS analyzes to show relations among data

#Do lm with GAM function; results same as for normal lm model because GAM just an extension of a normal OLS with link function
cooc.phydist_lm <- gam(cooc.z ~ phy.dist, data = data.cooc)
summary(cooc.phydist_lm)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ phy.dist
## 
## Parametric coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.647491   0.248055  -2.610  0.00929 **
## phy.dist     0.006849   0.003033   2.258  0.02432 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.00727   Deviance explained = 0.904%
## GCV = 7.9338  Scale est. = 7.9056    n = 561
cooc.gen.spec_lm <- gam(cooc.z ~ gen.spec, data = data.cooc)
summary(cooc.gen.spec_lm)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ gen.spec
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.3329     0.1918   1.736   0.0832 . 
## gen.spec     -1.6000     0.4949  -3.233   0.0013 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.0166   Deviance explained = 1.84%
## GCV = 7.8592  Scale est. = 7.8312    n = 561
cooc.gen.spec.phy.dist_lm <- gam(cooc.z ~ gen.spec*phy.dist, data = data.cooc)
summary(cooc.gen.spec.phy.dist_lm)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ gen.spec * phy.dist
## 
## Parametric coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.076500   0.395956   2.719  0.00676 ** 
## gen.spec          -5.427895   0.989432  -5.486 6.25e-08 ***
## phy.dist          -0.011430   0.004814  -2.374  0.01792 *  
## gen.spec:phy.dist  0.059591   0.013043   4.569 6.05e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.055   Deviance explained = 6.01%
## GCV = 7.5791  Scale est. = 7.5251    n = 561

Fit GAMS with Cubic Splines


 #fit GAMS with cubic splines
cooc.phydist_gam.cs<- gam(cooc.z ~ s(phy.dist, bs="cr"), data = data.cooc)
summary(cooc.phydist_gam.cs)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ s(phy.dist, bs = "cr")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1187  -1.311     0.19
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F p-value  
## s(phy.dist)   1      1 5.099  0.0243 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.00727   Deviance explained = 0.904%
## GCV = 7.9338  Scale est. = 7.9056    n = 561

GAMs with smoothing factors


#GAM with only phy.dist as a predictor
cooc.phy.dist.gam<-gam(cooc.z~s(phy.dist), data=data.cooc)
summary(cooc.phy.dist.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ s(phy.dist)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1187  -1.311     0.19
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F p-value  
## s(phy.dist)   1      1 5.099  0.0243 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.00727   Deviance explained = 0.904%
## GCV = 7.9338  Scale est. = 7.9056    n = 561
#GAM with only gen.spec as a predictor
cooc.gen.spec.gam <- gam(cooc.z ~ s(gen.spec), data = data.cooc)
summary(cooc.gen.spec.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ s(gen.spec)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1181  -1.317    0.188
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F p-value   
## s(gen.spec)   1      1 10.45 0.00129 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0166   Deviance explained = 1.84%
## GCV = 7.8592  Scale est. = 7.8312    n = 561
#GAM with non-linear predictors
 cooc.gen.spec.phy.dist.add.gam <- gam(cooc.z ~ s(phy.dist) + s(gen.spec), data = data.cooc)
 summary(cooc.gen.spec.phy.dist.add.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ s(phy.dist) + s(gen.spec)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1178  -1.321    0.187
## 
## Approximate significance of smooth terms:
##               edf Ref.df     F p-value   
## s(phy.dist) 1.252  1.449 2.144 0.12743   
## s(gen.spec) 1.000  1.000 9.167 0.00258 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.022   Deviance explained = 2.59%
## GCV = 7.8339  Scale est. = 7.7885    n = 561
#GAM with interaction between phy.dist and gen.spec
cooc.gam<-gam(cooc.z~s(phy.dist,gen.spec), dat=data.cooc)
summary(cooc.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ s(phy.dist, gen.spec)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1153  -1.349    0.178
## 
## Approximate significance of smooth terms:
##                        edf Ref.df     F p-value   
## s(phy.dist,gen.spec) 17.61  21.14 2.147 0.00222 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0627   Deviance explained = 9.22%
## GCV =   7.72  Scale est. = 7.4638    n = 561

Compare models with two response variables using ANOVA


anova(cooc.gen.spec.phy.dist.add.gam, cooc.gam, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: cooc.z ~ s(phy.dist) + s(gen.spec)
## Model 2: cooc.z ~ s(phy.dist, gen.spec)
##   Resid. Df Resid. Dev     Df Deviance  Pr(>Chi)    
## 1    557.75     4344.0                              
## 2    542.39     4048.3 15.362   295.75 0.0006298 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Data diagnostics for two response variables GAM models


#Non-linear predictor GAM
gam.check(cooc.gen.spec.phy.dist.add.gam, k.rep = 1000)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 12 iterations.
## The RMS GCV score gradiant at convergence was 9.242498e-07 .
## The Hessian was positive definite.
## The estimated model rank was 19 (maximum possible: 19)
## Model rank =  19 / 19 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##               k'  edf k-index p-value
## s(phy.dist) 9.00 1.25    1.08    0.98
## s(gen.spec) 9.00 1.00    1.01    0.51
#Interaction predictor GAM
gam.check(cooc.gam, k.rep = 1000)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradiant at convergence was 7.505932e-07 .
## The Hessian was positive definite.
## The estimated model rank was 30 (maximum possible: 30)
## Model rank =  30 / 30 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                         k'   edf k-index p-value
## s(phy.dist,gen.spec) 29.00 17.61    1.03    0.74

GAM visualizations


par(mfrow=c(1,2))
par(mar=c(5.1,5.1,5.6,4.1))
my.vis.gam(cooc.gen.spec.phy.dist.add.gam, type = "response", plot.type = "contour", color="gray", ylab="Difference in generalism and specialism",
main="Predicted co-occurrence based \non non-interacting predictor variables",
xlab="Phylogenetic distance",cex=3, cex.lab=1.25, cex.axis=1.25, cex.main=1.5, lwd=2)
my.vis.gam(cooc.gam, type = "response", plot.type = "contour", color="gray", ylab="Difference in generalism and specialism",
main="Predicted co-occurrence based on \ninteracting predictor variables",
xlab="Phylogenetic distance", cex.lab=1.25, cex.axis=1.25, cex.main=1.5, lwd=2, cex=2)

#textClick("a)", cex=1.5)
#textClick("b)", cex=1.5)

GAMs with tensor product smoothing (te)

#Do same GAM model with tensor product smoothing because predictors not of same unit
#First look at interacting predictors
cooc.gam.te<-gam(cooc.z~te(phy.dist,gen.spec), dat=data.cooc)
summary(cooc.gam.te)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ te(phy.dist, gen.spec)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1149  -1.355    0.176
## 
## Approximate significance of smooth terms:
##                         edf Ref.df     F  p-value    
## te(phy.dist,gen.spec) 5.072  5.495 8.523 3.36e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0704   Deviance explained = 7.88%
## GCV = 7.4837  Scale est. = 7.4027    n = 561
plot(cooc.gam.te, pages=1, residuals=TRUE, pch=16, cex=0.5, col="#000000", shade=T, shade.col='gray90')

gam.check(cooc.gam.te)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 13 iterations.
## The RMS GCV score gradiant at convergence was 6.37398e-07 .
## The Hessian was positive definite.
## The estimated model rank was 25 (maximum possible: 25)
## Model rank =  25 / 25 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                          k'   edf k-index p-value
## te(phy.dist,gen.spec) 24.00  5.07    1.06    0.92
#GAM with tensor product smoothing because predictors not of the same unit
#Non-interacting predictors
cooc.gen.spec.phy.dist.add.gam.te <- gam(cooc.z ~ te(phy.dist) + te(gen.spec), data = data.cooc)
summary(cooc.gen.spec.phy.dist.add.gam.te)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ te(phy.dist) + te(gen.spec)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1178  -1.321    0.187
## 
## Approximate significance of smooth terms:
##                edf Ref.df     F p-value   
## te(phy.dist) 1.382  1.625 3.163 0.05356 . 
## te(gen.spec) 1.000  1.000 9.245 0.00247 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0224   Deviance explained = 2.66%
## GCV = 7.8319  Scale est. = 7.7847    n = 561
plot(cooc.gen.spec.phy.dist.add.gam.te, pages=1, residuals=TRUE, pch=16, cex=0.5, col="#000000", shade=T, shade.col='gray90')

gam.check(cooc.gen.spec.phy.dist.add.gam.te)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 14 iterations.
## The RMS GCV score gradiant at convergence was 5.941743e-07 .
## The Hessian was positive definite.
## The estimated model rank was 9 (maximum possible: 9)
## Model rank =  9 / 9 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                k'  edf k-index p-value
## te(phy.dist) 4.00 1.38    1.08    1.00
## te(gen.spec) 4.00 1.00    1.01    0.56

Examine if there is an interaction between explanatory variables

#Create a perspective plot to see if there is an interaction between explanatory variables
pdist<-seq(min(data.cooc$phy.dist), max(data.cooc$phy.dist), len=25)
gspec<-seq(min(data.cooc$gen.spec), max(data.cooc$gen.spec), len=25)
#dividing the range of phy.dist and gen.spec into 25 values each
data<-expand.grid(phy.dist=pdist, gen.spec=gspec)
fit.gs.pdist<-matrix(predict(cooc.gen.spec.phy.dist.add.gam.te, data), 25, 25)

persp(pdist, gspec, fit.gs.pdist, theta=45, phi=40, ticktype="detailed", xlab="Phylogenetic distance", ylab="Difference in generalism and specialism",
zlab="Co-occurrence", main="Perspective plot showing interaction \nbetween phylogenetic distance and \ndifference in generalism and specialism",expand=2/3, shade=0.5,
cex.axis=0.8, cex.lab=1, cex.main=1.25)

#by these results it doesn't appear as there is an interaction, because the slope of gen.spec is the same for every value of phy.dist

ANOVA to compare two response variable models

anova(cooc.gen.spec.phy.dist.add.gam.te, cooc.gam.te, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: cooc.z ~ te(phy.dist) + te(gen.spec)
## Model 2: cooc.z ~ te(phy.dist, gen.spec)
##   Resid. Df Resid. Dev     Df Deviance  Pr(>Chi)    
## 1    557.62     4340.9                              
## 2    554.93     4108.0 2.6902   232.88 4.372e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(1,2))
par(mar=c(5.1,5.1,5.6,4.1))
my.vis.gam(cooc.gen.spec.phy.dist.add.gam.te, type = "response", plot.type = "contour", color="gray", ylab="Difference in generalism and specialism",
main="Predicted co-occurrence based \non non-interacting predictor variables",
xlab="Phylogenetic distance",cex=3, cex.lab=1.25, cex.axis=1.25, cex.main=1.5, lwd=2)
my.vis.gam(cooc.gam.te, type = "response", plot.type = "contour", color="gray", ylab="Difference in generalism and specialism",
main="Predicted co-occurrence based on \ninteracting predictor variables",
xlab="Phylogenetic distance", cex.lab=1.25, cex.axis=1.25, cex.main=1.5, lwd=2, cex=2)

#textClick("a)", cex=1.5)
#textClick("b)", cex=1.5)