Schefferville - generalists/specialists; pairwise co-occurrence; modelling; gen-spec based on jaccard rankings

Includes OLS analyses with 34 focal species (including T. alpinum)

Author: Tammy L. Elliott

Date: February 20, 2015

R version 3.1.0

Prepare data

#Read in vegetation matrix
veg<-read.csv("Carex_2013_stats_TE.csv", head=TRUE, row.names=1)
veg<-veg[c(1:201,222:701), c(1:3,5:7, 10:12,14:22,24:39)]  #Remove all columns that do not respresent focal species so that every species has at least 20 representatives; also remove DEF and ALP
veg[veg>0]<-1
veg<-veg[, order(names(veg))]

##Step 1: Calculating the actual co-occurrence prevelences for the data set; this creates a species*species co-occurrence matrix
##import occurrence data (species as columns and site as rows, with 1=presence and 0=absence)
veg.m<-as.matrix(veg)##convert to matrix
veg.out<-t(veg.m) %*% veg.m##multiply the occurrence matrix by the transposed matrix
diag(veg.out)<-0##set self-co-occurrence to zero (technically NA, but this simplifies the procedure)
#SUM<-colSums (veg.out, na.rm = FALSE, dims = 1)##calculate the total number of occurrences per species #this was in Liam's code, instead divide by number of plots included

#get upper.triangular matrix with names; then get a dataframe showing presence and absence of a particular combination;this is one way, but doesn't account for 0-0s
veg.out.ind <- which( upper.tri(veg.out,diag=F) , arr.ind = TRUE )
veg.out.df<-data.frame( col = dimnames(veg.out)[[2]][veg.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][veg.out.ind[,1]] ,
            val = veg.out[ veg.out.ind ] )
veg.out.df.neg<-cbind(veg.out.df,661-veg.out.df$val)

colSums(veg)

## ALP ANG AQU BIG BRU CAN CAP CES CHO DIS ECH EVA EXI GYN HEL LEN LEP LIM 
##  76  57 218  51  28  74  27 189  46  31  56  25  29  77  26  29  79 167 
## LIV MAG OLI PAU RAR ROS RUS SAX SCI STY TEN TRI UTR VAG VES VIR 
##  77 201  40  75  60  66  62  33  39  30  61  60  66 143  39  38

veg.out.df.neg<-cbind(veg.out.df,681-veg.out.df$val)

#make upper triangular co-occurrence matrix; include 1:1 presence; 1:0 and 0:1 presence; and 0:0 presence
veg.out.df.neg.col<-revalue(veg.out.df.neg$col, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: ALP

veg.out.df.neg.row<-revalue(veg.out.df.neg$row, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: VIR

veg.out.df.neg.col.num<-as.numeric(as.character(veg.out.df.neg.col))
veg.out.df.neg.row.num<-as.numeric(as.character(veg.out.df.neg.row))
#calculate presence-absence/absence-presence; without absence-absence and presence-presence for each species pair
veg.out.df.neg.row.col<-(veg.out.df.neg.col.num-veg.out.df[,3])+(veg.out.df.neg.row.num-veg.out.df[,3])

#calculate presence-absence/absence-presence; without absence-absence and presence-presence for each species pair
veg.out.df.neg.row.col<-(veg.out.df.neg.col.num-veg.out.df[,3])+(veg.out.df.neg.row.num-veg.out.df[,3])


 #use permiswap from Vegan package for randomization and creation of null model
set.seed(4)## set the seed for randomization
veg.rand<-permatswap(veg, method="quasiswap", fixedmar="both",times = 999, burnin = 20000, thin = 500, mtype = "prab") ##shuffle the occurrence matrix keeping both row and colsums equal

#get 999 transposed matrices from veg.rand$perm[[i]]
#create 999 empty matrices; fill them all with i veg.rand$perm transposed;  get sd for each orf the 528 entries

veg.rand.perm<-veg.rand$perm
veg.rand<-lapply(1:999, function(x) matrix(0, 34,34))

for (i in 1:length(veg.rand)) { #loop over 999 matrices in veg.rand.perm
    for (j in 1:length(veg.rand.perm)){
    veg.rand[[j]]<-t(veg.rand.perm[[j]]) %*% veg.rand.perm[[j]]
   }
}

#calculate mean across 999 different matrices  for each indexed position
fulldim<-34*34
meanmat<-matrix(nrow=34,ncol=34)
meanmat[1:1156]<-

apply(as.matrix(1:fulldim), 1,function(y) mean(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (33*33 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
head(meanmat)

##           [,1]      [,2]       [,3]      [,4]      [,5]      [,6]     [,7]
## [1,] 76.000000  6.471471  24.158158  5.749750  3.110110  8.319319 3.079079
## [2,]  6.471471 57.000000  18.222222  4.282282  2.360360  6.166166 2.348348
## [3,] 24.158158 18.222222 218.000000 16.260260  8.958959 23.624625 8.658659
## [4,]  5.749750  4.282282  16.260260 51.000000  2.074074  5.519520 1.986987
## [5,]  3.110110  2.360360   8.958959  2.074074 28.000000  2.983984 1.187187
## [6,]  8.319319  6.166166  23.624625  5.519520  2.983984 74.000000 3.049049
##           [,8]      [,9]    [,10]     [,11]    [,12]    [,13]     [,14]
## [1,] 21.035035  5.328328 3.511512  6.246246 2.792793 3.196196  8.697698
## [2,] 15.686687  3.931932 2.702703  4.744745 2.105105 2.483483  6.537538
## [3,] 60.200200 14.657658 9.838839 17.801802 8.019019 9.158158 24.721722
## [4,] 14.289289  3.437437 2.377377  4.176176 1.902903 2.136136  5.798799
## [5,]  7.686687  1.913914 1.287287  2.310310 0.969970 1.185185  3.129129
## [6,] 20.523524  4.897898 3.419419  6.141141 2.800801 3.110110  8.382382
##         [,15]    [,16]     [,17]     [,18]     [,19]     [,20]     [,21]
## [1,] 2.872873 3.351351  8.924925 18.640641  8.392392 22.304304  4.354354
## [2,] 2.199199 2.489489  6.579580 13.875876  6.438438 16.806807  3.309309
## [3,] 8.315315 9.307307 25.105105 53.127127 24.732733 63.631632 12.803804
## [4,] 1.938939 2.164164  5.992993 12.388388  5.829830 15.144144  2.919920
## [5,] 1.081081 1.218218  3.211211  6.831832  3.125125  8.291291  1.608609
## [6,] 2.744745 3.260260  8.653654 17.952953  8.295295 21.718719  4.377377
##          [,22]     [,23]     [,24]     [,25]     [,26]     [,27]    [,28]
## [1,]  8.445445  6.707708  7.430430  6.897898  3.622623  4.343343 3.429429
## [2,]  6.148148  5.059059  5.554555  5.149149  2.802803  3.201201 2.558559
## [3,] 23.933934 19.029029 21.163163 19.597598 10.540541 12.522523 9.637638
## [4,]  5.673674  4.476476  4.940941  4.604605  2.521522  2.912913 2.288288
## [5,]  3.085085  2.537538  2.674675  2.581582  1.408408  1.708709 1.234234
## [6,]  8.195195  6.613614  7.168168  6.795796  3.599600  4.222222 3.211211
##          [,29]     [,30]     [,31]     [,32]     [,33]     [,34]
## [1,]  6.818819  6.927928  7.409409 16.076076  4.285285  4.329329
## [2,]  4.995996  5.132132  5.580581 12.123123  3.180180  3.213213
## [3,] 19.467467 19.114114 20.976977 45.343343 12.271271 12.265265
## [4,]  4.657658  4.529530  5.013013 10.615616  2.877878  2.890891
## [5,]  2.573574  2.449449  2.682683  5.932933  1.599600  1.622623
## [6,]  6.531532  6.592593  7.220220 15.411411  4.248248  4.097097

#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
meanmat.out.ind <- which( upper.tri(meanmat,diag=F) , arr.ind = TRUE )
meanmat.out.df<-data.frame( col = dimnames(veg.out)[[2]][meanmat.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][meanmat.out.ind[,1]] ,
            val = meanmat[ meanmat.out.ind ] )
#Calculate plots that mean species data aren't in
meanmat.out.df.neg<-cbind(meanmat.out.df,681-meanmat.out.df$val)

#find how many plots either one oor the other species in, but not both
meanmat.out.df.neg.col<-revalue(meanmat.out.df.neg$col, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: ALP

meanmat.out.df.neg.row<-revalue(meanmat.out.df.neg$row, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: VIR

meanmat.out.df.neg.col.num<-as.numeric(as.character(meanmat.out.df.neg.col))
meanmat.out.df.neg.row.num<-as.numeric(as.character(meanmat.out.df.neg.row))
meanmat.out.df.neg.row.col<-(meanmat.out.df.neg.col.num+meanmat.out.df.neg.row.num)-meanmat.out.df[,3]

#calculate sd across 999 different matrices for each position
fulldim<-34*34
sdmat<-matrix(nrow=34,ncol=34)
sdmat[1:1156]<-

apply(as.matrix(1:fulldim), 1,function(y) sd(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (34*34 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
head(sdmat)

##          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]
## [1,] 0.000000 2.286787 3.898385 2.120479 1.632550 2.592794 1.662010
## [2,] 2.286787 0.000000 3.384313 1.811895 1.420671 2.261115 1.432088
## [3,] 3.898385 3.384313 0.000000 3.254153 2.435813 3.801201 2.359605
## [4,] 2.120479 1.811895 3.254153 0.000000 1.363906 2.220638 1.320727
## [5,] 1.632550 1.420671 2.435813 1.363906 0.000000 1.495398 1.047947
## [6,] 2.592794 2.261115 3.801201 2.220638 1.495398 0.000000 1.616733
##          [,8]     [,9]    [,10]    [,11]     [,12]    [,13]    [,14]
## [1,] 3.842304 2.039762 1.789979 2.199796 1.5273420 1.599582 2.613113
## [2,] 3.302840 1.814927 1.565760 2.080039 1.3256994 1.475748 2.305857
## [3,] 5.434983 2.987435 2.631521 3.288523 2.3562135 2.357143 3.797299
## [4,] 3.063685 1.727950 1.441139 1.827477 1.2468725 1.404076 2.213223
## [5,] 2.301636 1.302301 1.076507 1.391285 0.9596567 1.026083 1.619750
## [6,] 3.681073 1.974686 1.719645 2.205636 1.5856822 1.618371 2.641189
##          [,15]    [,16]    [,17]    [,18]    [,19]    [,20]    [,21]
## [1,] 1.5117625 1.725001 2.711101 3.551509 2.545022 3.639778 2.029975
## [2,] 1.4287905 1.518635 2.260895 3.061594 2.239194 3.332555 1.702710
## [3,] 2.2520700 2.496622 3.686074 5.222394 3.858941 5.484213 2.925177
## [4,] 1.2831868 1.359697 2.255466 2.945957 2.161114 3.182373 1.571156
## [5,] 0.9871075 1.060508 1.664324 2.212586 1.558916 2.438879 1.208672
## [6,] 1.4974628 1.688797 2.580597 3.603159 2.520435 3.686617 1.907355
##         [,22]    [,23]    [,24]    [,25]    [,26]    [,27]    [,28]
## [1,] 2.607753 2.324103 2.390050 2.367868 1.780187 1.934611 1.763234
## [2,] 2.353115 2.066411 2.141483 2.050655 1.632631 1.717107 1.450010
## [3,] 3.828440 3.369996 3.567613 3.427788 2.643878 2.738291 2.486602
## [4,] 2.172800 1.927337 2.049858 2.103728 1.497924 1.547324 1.448160
## [5,] 1.620191 1.494258 1.435057 1.554972 1.138403 1.255143 1.076805
## [6,] 2.580747 2.262098 2.409015 2.397047 1.770341 1.873416 1.662517
##         [,29]    [,30]    [,31]    [,32]    [,33]    [,34]
## [1,] 2.345038 2.413807 2.464081 3.240249 1.913902 1.868055
## [2,] 2.077165 2.101283 2.150934 2.959285 1.670673 1.673076
## [3,] 3.440402 3.404728 3.612557 4.897674 2.882725 2.868652
## [4,] 2.007730 1.928813 2.169850 2.755646 1.516476 1.595053
## [5,] 1.462402 1.432935 1.563746 2.120259 1.216882 1.265675
## [6,] 2.357089 2.400147 2.471390 3.158776 1.884552 1.797789

#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
sd.out.ind <- which( upper.tri(sdmat,diag=F) , arr.ind = TRUE )
sd.out.df<-data.frame( col = dimnames(veg.out)[[2]][sd.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][sd.out.ind[,1]] ,
            val = sdmat[ sd.out.ind ] )

#Calculate z cooccurrence scores for each species pair

#1. make a data frame with appropriate data
cooc.z.info<-cbind(veg.out.df, meanmat.out.df,sd.out.df)
cooc.z.comb<-(cooc.z.info[,3]-cooc.z.info[,6])/cooc.z.info[,9]

#get the cooccurrence.z list with names
cooc.z<-cbind(cooc.z.info[,c(1:2)], cooc.z.comb)

Cyperaceae.trees<-read.nexus("Sedges_april30_comb.nex")


#calculate phy.dist for all 100 random trees
phy.dist<- cophenetic(Cyperaceae.trees)
#drop non-carex species from tree and those not focal species: exclude C. deflexa, but include T. alpinum
pruned.cyp<-drop.tip(Cyperaceae.trees,c( "DEF", "JARC", "JTRI", "ACI", "ARC","ATR", "BRA","BUX","CAT", "CVI", "DIA", "FOE","GAR","GLA","LAC","MED","NAR","NIT","WIL"))
pruned.cyp

## 
## Phylogenetic tree with 34 tips and 33 internal nodes.
## 
## Tip labels:
##  ALP, ANG, AQU, BIG, BRU, CAN, ...
## 
## Rooted; includes branch lengths.

phy.dist.cyp<-cophenetic(pruned.cyp)

phy.dist.ind <- which( upper.tri(phy.dist.cyp,diag=F) , arr.ind = TRUE )
phy.dist.df<-data.frame( col = dimnames(phy.dist.cyp)[[2]][phy.dist.ind[,2]] ,
            row = dimnames(phy.dist.cyp)[[1]][phy.dist.ind[,1]] ,
           val = phy.dist.cyp[ phy.dist.ind ] )


#load inverse of habitat generalist/specialist data
gen.spec<-read.csv("jaccard.rescale.allcooccurence.nodef.csv", row.names=1, head=TRUE)
gen.spec.sp<-rownames(gen.spec)
rownames(gen.spec)[rownames(gen.spec)=="ALP.jaccard"]<-"ALP"
rownames(gen.spec)[rownames(gen.spec)=="ANG.jaccard"]<-"ANG"
rownames(gen.spec)[rownames(gen.spec)=="AQU.jaccard"]<-"AQU"
rownames(gen.spec)[rownames(gen.spec)=="BIG.jaccard"]<-"BIG"
rownames(gen.spec)[rownames(gen.spec)=="BRU.jaccard"]<-"BRU"
rownames(gen.spec)[rownames(gen.spec)=="CAN.jaccard"]<-"CAN"
rownames(gen.spec)[rownames(gen.spec)=="CAP.jaccard"]<-"CAP"
rownames(gen.spec)[rownames(gen.spec)=="CES.jaccard"]<-"CES"
rownames(gen.spec)[rownames(gen.spec)=="RUS.jaccard"]<-"RUS"
rownames(gen.spec)[rownames(gen.spec)=="CHO.jaccard"]<-"CHO"
rownames(gen.spec)[rownames(gen.spec)=="DIS.jaccard"]<-"DIS"
rownames(gen.spec)[rownames(gen.spec)=="ECH.jaccard"]<-"ECH"
rownames(gen.spec)[rownames(gen.spec)=="EVA.jaccard"]<-"EVA"
rownames(gen.spec)[rownames(gen.spec)=="EXI.jaccard"]<-"EXI"
rownames(gen.spec)[rownames(gen.spec)=="GYN.jaccard"]<-"GYN"
rownames(gen.spec)[rownames(gen.spec)=="HEL.jaccard"]<-"HEL"
rownames(gen.spec)[rownames(gen.spec)=="LEN.jaccard"]<-"LEN"
rownames(gen.spec)[rownames(gen.spec)=="LEP.jaccard"]<-"LEP"
rownames(gen.spec)[rownames(gen.spec)=="LIM.jaccard"]<-"LIM"
rownames(gen.spec)[rownames(gen.spec)=="LIV.jaccard"]<-"LIV"
rownames(gen.spec)[rownames(gen.spec)=="MAG.jaccard"]<-"MAG"
rownames(gen.spec)[rownames(gen.spec)=="OLI.jaccard"]<-"OLI"
rownames(gen.spec)[rownames(gen.spec)=="PAU.jaccard"]<-"PAU"
rownames(gen.spec)[rownames(gen.spec)=="RAR.jaccard"]<-"RAR"
rownames(gen.spec)[rownames(gen.spec)=="ROS.jaccard"]<-"ROS"
rownames(gen.spec)[rownames(gen.spec)=="SAX.jaccard"]<-"SAX"
rownames(gen.spec)[rownames(gen.spec)=="SCI.jaccard"]<-"SCI"
rownames(gen.spec)[rownames(gen.spec)=="STY.jaccard"]<-"STY"
rownames(gen.spec)[rownames(gen.spec)=="TEN.jaccard"]<-"TEN"
rownames(gen.spec)[rownames(gen.spec)=="TRI.jaccard"]<-"TRI"
rownames(gen.spec)[rownames(gen.spec)=="UTR.jaccard"]<-"UTR"
rownames(gen.spec)[rownames(gen.spec)=="VAG.jaccard"]<-"VAG"
rownames(gen.spec)[rownames(gen.spec)=="VES.jaccard"]<-"VES"
rownames(gen.spec)[rownames(gen.spec)=="VIR.jaccard"]<-"VIR"

gen.spec.sp<-rownames(gen.spec)



gen.spec.with.spec<-cbind(gen.spec.sp, gen.spec)
colnames(gen.spec.with.spec)<-c("Species", "gen.spec")
gen.sp<-gen.spec.with.spec[order(gen.spec.with.spec$Species),]
sp.h.inv<-gen.sp[,2,drop=TRUE]    #jaccard without species names
h.inv<-gen.sp[,2,drop=FALSE] #with species names

final.jaccard<-matrix(nrow=34,ncol=34)
#this is for jaccard's, even though states h.inv.
for (i in 1:nrow(final.jaccard)){
    final.jaccard[i,]<-abs(sp.h.inv[i]-sp.h.inv)
}

#diag(final.jaccard)<-0##set self-co-occurrence to zero (technically NA, but this simplifies the procedure)
#write.csv(final.jaccard, "Sedges2_final.jaccard.nodef.matrix.nonames.csv")
final.jaccard.names<-read.csv("Sedges2_final.jaccard.nodef.matrix.nonames.csv", row.names=1, head=TRUE)
#remove cases when col1 name = col2 name
#h.rows = apply(final.jaccard.names[, c(1:2)], 1, function(i) length(unique(i)) > 1)

jaccard.ind <- which( upper.tri(final.jaccard.names,diag=F) , arr.ind = TRUE )
jaccard.df<-data.frame( col = dimnames(final.jaccard.names)[[2]][jaccard.ind[,2]] ,
            row = dimnames(final.jaccard.names)[[1]][jaccard.ind[,1]] ,
            val = final.jaccard.names[ jaccard.ind ] )

#write.csv(jaccard.df, "Sedges2_final.jaccard.nodef.matrix.nonames.noselfs.csv")
final.jaccard.names.noself<-read.csv("Sedges2_final.jaccard.nodef.matrix.nonames.noselfs.csv", row.names=1, head=TRUE)

final.jaccard<-final.jaccard.names.noself

Data frame for analyzes

data.cooc<-cbind(final.jaccard, phy.dist.df, cooc.z)
data.cooc<-data.cooc[,c(3,6,9)]
colnames(data.cooc)<-c("gen.spec", "phy.dist", "cooc.z")
head(data.cooc)

##    gen.spec   phy.dist      cooc.z
## 1 0.3932909  78.891213 -2.39264565
## 2 0.1458232 121.972198  5.85930884
## 3 0.2474678 121.972198 -0.06566243
## 4 0.2162027 121.972198 -2.71153372
## 5 0.6094937 121.972198 -0.70770244
## 6 0.3620259   9.008272 -4.68947236

OLS analyzes

OLS of co-occurrence and phylogenetic distance

#lm of cooc.z and phy.dist
cooc.z.phy.dist.lm<-lm(data.cooc$cooc.z~sqrt(data.cooc$phy.dist))
summary(cooc.z.phy.dist.lm)

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ sqrt(data.cooc$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.7487 -1.7057 -0.8179  0.6467 12.4636 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               -1.0583     0.3881  -2.727   0.0066 **
## sqrt(data.cooc$phy.dist)   0.1119     0.0458   2.443   0.0149 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.81 on 559 degrees of freedom
## Multiple R-squared:  0.01056,    Adjusted R-squared:  0.00879 
## F-statistic: 5.966 on 1 and 559 DF,  p-value: 0.01489

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.lm)

par(opar)
# What do the residuals suggest?
par(mfrow=c(2,2))
hist(resid(cooc.z.phy.dist.lm)) #this is a little right skewed
#Are the data normally distributed
hist(sqrt(data.cooc$phy.dist),col="grey50", main="Untransformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc$cooc.z, col="grey50", main="Untransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(sqrt(data.cooc$phy.dist))   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

#variance is good; just need to fix normality
plot(sqrt(data.cooc$phy.dist), data.cooc$cooc.z)
abline(cooc.z.phy.dist.lm)

OLS of co-occurrence and niche width

#do lm of cooc.z by niche width (gen/spec) for entire data set
cooc.z.gen.spec.lm<-lm(data.cooc$cooc.z~data.cooc$gen.spec)
summary(cooc.z.gen.spec.lm)

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3436 -1.7282 -0.7946  0.9305 12.1811 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.5830     0.1926   3.027  0.00258 ** 
## data.cooc$gen.spec  -2.7070     0.5611  -4.824 1.81e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.767 on 559 degrees of freedom
## Multiple R-squared:  0.03997,    Adjusted R-squared:  0.03826 
## F-statistic: 23.28 on 1 and 559 DF,  p-value: 1.813e-06

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.lm)

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc$cooc.z, col="grey50", main="Untransformed data", xlab="Standardized Co-occurrence")

# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec)   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$gen.spec
## W = 0.9266, p-value = 6.224e-16

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

#variance is alright; just need to fix normality
plot(data.cooc$gen.spec, data.cooc$cooc.z)
abline(cooc.z.gen.spec.lm)

OLS of niche width and phylogenetic distance

#do lm of genspec by phy.dist for entire data set
phydist.gen.spec.lm<-lm(data.cooc$gen.spec~sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.lm)

## 
## Call:
## lm(formula = data.cooc$gen.spec ~ sqrt(data.cooc$phy.dist))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27232 -0.16916 -0.03801  0.12382  0.72903 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.277961   0.028816   9.646   <2e-16 ***
## sqrt(data.cooc$phy.dist) -0.000633   0.003400  -0.186    0.852    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2086 on 559 degrees of freedom
## Multiple R-squared:  6.198e-05,  Adjusted R-squared:  -0.001727 
## F-statistic: 0.03465 on 1 and 559 DF,  p-value: 0.8524

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.lm)

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(phydist.gen.spec.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(sqrt(data.cooc$phy.dist), col="grey50", main="Transformed data", xlab="Phylogenetic Distance")

# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec)   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$gen.spec
## W = 0.9266, p-value = 6.224e-16

shapiro.test(sqrt(data.cooc$phy.dist))    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16

#variance is alright

OLS of niche width, phylogenetic distance, and co-occurrence

#do lm of genspec by phy.dist and gen spec for entire data set
phydist.gen.spec.cooc.z.lm<-lm(data.cooc$cooc.z ~ sqrt(data.cooc$gen.spec)+data.cooc$phy.dist)
summary(phydist.gen.spec.cooc.z.lm) #looks like

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ sqrt(data.cooc$gen.spec) + data.cooc$phy.dist)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0846 -1.6438 -0.7493  0.8658 12.5353 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.829475   0.361203   2.296    0.022 *  
## sqrt(data.cooc$gen.spec) -3.053729   0.554789  -5.504 5.66e-08 ***
## data.cooc$phy.dist        0.006646   0.002957   2.248    0.025 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.741 on 558 degrees of freedom
## Multiple R-squared:  0.06007,    Adjusted R-squared:  0.05671 
## F-statistic: 17.83 on 2 and 558 DF,  p-value: 3.113e-08

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.cooc.z.lm)   #Q-Q plot is like a big snake;  variance is highest for middle values

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(phydist.gen.spec.cooc.z.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc$phy.dist, col="grey50", main="Transformed data", xlab="Phylogenetic Distance")

# Let's verify using the Shapiro test:
shapiro.test(asin(sqrt(data.cooc$gen.spec)))   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  asin(sqrt(data.cooc$gen.spec))
## W = 0.9759, p-value = 5.509e-08

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

shapiro.test(data.cooc$phy.dist)  #not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$phy.dist
## W = 0.8671, p-value < 2.2e-16

#normality and variance are not good

OLS Interaction of phylogenetic distance, niche width, and co-occurrence

#check to see if there is an interaction - yes there is between phy.dist and gen.spec.

phydist.gen.spec.cooc.z.lm.int<-lm(data.cooc$cooc.z ~ data.cooc$gen.spec*sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.cooc.z.lm.int)

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec * sqrt(data.cooc$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0488 -1.6599 -0.7253  0.8056 12.4128 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                                 -0.91748    0.63212  -1.451
## data.cooc$gen.spec                          -0.51581    1.80931  -0.285
## sqrt(data.cooc$phy.dist)                     0.18505    0.07423   2.493
## data.cooc$gen.spec:sqrt(data.cooc$phy.dist) -0.26877    0.21214  -1.267
##                                             Pr(>|t|)  
## (Intercept)                                    0.147  
## data.cooc$gen.spec                             0.776  
## sqrt(data.cooc$phy.dist)                       0.013 *
## data.cooc$gen.spec:sqrt(data.cooc$phy.dist)    0.206  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.754 on 557 degrees of freedom
## Multiple R-squared:  0.05294,    Adjusted R-squared:  0.04784 
## F-statistic: 10.38 on 3 and 557 DF,  p-value: 1.179e-06

Darwin Hutchinson Zone

#####################do for same dataset, but for phy.dist <30
data.cooc.dh<-subset(data.cooc[data.cooc$phy.dist<30,])
head(data.cooc.dh)

##       gen.spec  phy.dist     cooc.z
## 6  0.362025922  9.008272 -4.6894724
## 15 0.401400769 14.509728  4.6917372
## 33 0.041772872 24.481046 -1.4696399
## 34 0.359627897 24.481046 -2.4803428
## 50 0.392969434 24.481046 -0.9417983
## 51 0.008431335 24.481046  0.3893929

OLS of cooccurence and phylogenetic distance for species pairs <30 million years of separation

#lm of cooc.z and phy.dist
cooc.z.phy.dist.dh.lm<-lm(data.cooc.dh$cooc.z~sqrt(data.cooc.dh$phy.dist))
summary(cooc.z.phy.dist.dh.lm)

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1446 -1.3318 -0.4673  0.5589 12.1081 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                  -2.2158     1.2186  -1.818   0.0715 .
## sqrt(data.cooc.dh$phy.dist)   0.3722     0.2893   1.287   0.2007  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.61 on 120 degrees of freedom
## Multiple R-squared:  0.01361,    Adjusted R-squared:  0.005389 
## F-statistic: 1.656 on 1 and 120 DF,  p-value: 0.2007

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.dh.lm)

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(cooc.z.phy.dist.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(sqrt(data.cooc.dh$phy.dist),col="grey50", main="Trransformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc.dh$cooc.z, col="grey50", main="Urtransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(sqrt(data.cooc.dh$phy.dist))   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10

shapiro.test(data.cooc.dh$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc.dh$cooc.z
## W = 0.8228, p-value = 8.239e-11

#variance is good; just need to fix normality
plot(sqrt(data.cooc.dh$phy.dist), data.cooc.dh$cooc.z)

OLS of niche width and cooccurrence for species pairs <30 million years of separation

#do lm of cooc.z by phy.dist for entire data set
cooc.z.gen.spec.dh.lm<-lm(data.cooc.dh$cooc.z~data.cooc.dh$gen.spec)
summary(cooc.z.gen.spec.dh.lm)

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3668 -1.3034 -0.6618  0.5584 11.8953 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            -0.1227     0.3967  -0.309   0.7577  
## data.cooc.dh$gen.spec  -1.8919     1.0899  -1.736   0.0852 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.595 on 120 degrees of freedom
## Multiple R-squared:  0.02449,    Adjusted R-squared:  0.01636 
## F-statistic: 3.013 on 1 and 120 DF,  p-value: 0.08516

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.dh.lm)

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc.dh$cooc.z, col="grey50", main="Urtransformed data", xlab="Standardized Co-occurrence")

# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec)   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$gen.spec
## W = 0.9266, p-value = 6.224e-16

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

#variance and normality are not good
plot(data.cooc.dh$gen.spec, data.cooc.dh$cooc.z)
abline(cooc.z.gen.spec.dh.lm)

OLS of niche width and phylogenetic distance for species pairs <30 million years of separation

phydist.gen.spec.dh.lm<-lm(data.cooc.dh$gen.spec~sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.dh.lm)

## 
## Call:
## lm(formula = data.cooc.dh$gen.spec ~ sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28474 -0.16667 -0.02732  0.10217  0.66031 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  0.313748   0.101485   3.092  0.00248 **
## sqrt(data.cooc.dh$phy.dist) -0.004952   0.024092  -0.206  0.83749   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2173 on 120 degrees of freedom
## Multiple R-squared:  0.000352,   Adjusted R-squared:  -0.007978 
## F-statistic: 0.04225 on 1 and 120 DF,  p-value: 0.8375

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.dh.lm)

par(opar)
# What do the residuals suggest?

par(mfrow=c(2,2))
hist(resid(phydist.gen.spec.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")  # a little right skewed
hist(sqrt(data.cooc.dh$phy.dist), col="grey50", main="Transformed data", xlab="Phylogenetic Distance")           #majorly left skewed

# Let's verify using the Shapiro test:
shapiro.test(data.cooc.dh$gen.spec) #good

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc.dh$gen.spec
## W = 0.9193, p-value = 1.845e-06

shapiro.test(sqrt(data.cooc.dh$phy.dist))    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10

OLS of niche width, co-occurrence and phylogenetic distance for species pairs <30 million years of separation

#do lm of genspec by phy.dist and gen spec for entire data set
phydist.gen.spec.cooc.z.dh.lm<-lm(data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec+sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.cooc.z.dh.lm) #looks like

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec + sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4737 -1.4519 -0.4975  0.6221 11.6494 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                   -1.630      1.256  -1.298   0.1968  
## data.cooc.dh$gen.spec         -1.866      1.087  -1.716   0.0887 .
## sqrt(data.cooc.dh$phy.dist)    0.363      0.287   1.265   0.2085  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.589 on 119 degrees of freedom
## Multiple R-squared:  0.03743,    Adjusted R-squared:  0.02125 
## F-statistic: 2.314 on 2 and 119 DF,  p-value: 0.1033

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.cooc.z.dh.lm)   #Q-Q plot is like a big snake;  variance is highest for middle values  #variance goes downwards

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(phydist.gen.spec.cooc.z.dh.lm)) #good
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(sqrt(data.cooc.dh$phy.dist), col="grey50", main="Transformed data", xlab="Phylogenetic Distance")    #majorily left skewed

# Let's verify using the Shapiro test:
shapiro.test(asin(sqrt(data.cooc.dh$gen.spec)))  #normal

## 
##  Shapiro-Wilk normality test
## 
## data:  asin(sqrt(data.cooc.dh$gen.spec))
## W = 0.9639, p-value = 0.002354

shapiro.test(data.cooc.dh$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc.dh$cooc.z
## W = 0.8228, p-value = 8.239e-11

shapiro.test(sqrt(data.cooc.dh$phy.dist))  #not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10

#normality and variance are not good

OLS of interaction of niche width, phylogenetic distance, and co-occurrence for species pairs <30 million years of separation

#check to see if there is an interaction - yes there is between phy.dist and gen.spec.
phydist.gen.spec.cooc.z.dh.lm.int<-lm(data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec*sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.cooc.z.dh.lm.int)

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec * sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5894 -1.4963 -0.3537  0.7141 11.2719 
## 
## Coefficients:
##                                                   Estimate Std. Error
## (Intercept)                                        -4.4131     2.0383
## data.cooc.dh$gen.spec                               6.1409     4.7655
## sqrt(data.cooc.dh$phy.dist)                         1.0291     0.4797
## data.cooc.dh$gen.spec:sqrt(data.cooc.dh$phy.dist)  -1.9178     1.1118
##                                                   t value Pr(>|t|)  
## (Intercept)                                        -2.165   0.0324 *
## data.cooc.dh$gen.spec                               1.289   0.2001  
## sqrt(data.cooc.dh$phy.dist)                         2.145   0.0340 *
## data.cooc.dh$gen.spec:sqrt(data.cooc.dh$phy.dist)  -1.725   0.0872 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.568 on 118 degrees of freedom
## Multiple R-squared:  0.06111,    Adjusted R-squared:  0.03724 
## F-statistic:  2.56 on 3 and 118 DF,  p-value: 0.0583

GAM models with tensor product smoothing and square root transformation of phylogenetic distance

GAM with interaction term of two predictors

Set up GAM formula with interaction between two predictor variables
I am using tensor product smoothing because predictors not of same unit
Do same GAM model with tensor product smoothing because predictors not of same unit; the second analysis includes a square root transformation of phy.dist. GAM.check
Indicates that k value too log for .log model so used k=6 instead (selected by a trial-and-error process, seeing when p-value of gam.check greater than p<0.05
For te smooths, the upper limit of degrees of freedom is given by product of the K values provided by each marginal smooth less than one, for the construction.
Exact choice is not generally ciritical, but should be large enough so you are reasonable sure of having enough degrees of frreedom to represent the ‘truth’ reasonable well, but small enough for comuptational efficiency

cooc.gam.sqrt.te<-gam(cooc.z~te(phy.dist.sqrt,gen.spec), dat=data.cooc)
summary(cooc.gam.sqrt.te)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z ~ te(phy.dist.sqrt, gen.spec)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1557     0.1152  -1.351    0.177
## 
## Approximate significance of smooth terms:
##                              edf Ref.df     F  p-value    
## te(phy.dist.sqrt,gen.spec) 4.495  5.196 7.892 2.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0652   Deviance explained = 7.27%
## GCV = 7.5182  Scale est. = 7.4445    n = 561

gam.check(cooc.gam.sqrt.te)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 15 iterations.
## The RMS GCV score gradiant at convergence was 7.917218e-07 .
## The Hessian was positive definite.
## The estimated model rank was 25 (maximum possible: 25)
## Model rank =  25 / 25 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'  edf k-index p-value
## te(phy.dist.sqrt,gen.spec) 24.0  4.5     1.0    0.55

#####################do for same dataset, but remove phy.dist of 30 my
data.cooc.30<-subset(data.cooc[data.cooc$phy.dist<30,])
phy.dist.30.sqrt<-sqrt(data.cooc.30$phy.dist)
gen.spec.30<-data.cooc.30$gen.spec
cooc.z.30<-data.cooc.30$cooc.z

cooc.gam.30<-gam(cooc.z.30~te(phy.dist.30.sqrt,gen.spec.30), dat=data.cooc.30)
summary(cooc.gam.30)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## cooc.z.30 ~ te(phy.dist.30.sqrt, gen.spec.30)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -0.6775     0.2303  -2.942  0.00393 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                   edf Ref.df     F p-value  
## te(phy.dist.30.sqrt,gen.spec.30) 3.79  4.279 2.293  0.0594 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0552   Deviance explained = 8.48%
## GCV = 6.7343  Scale est. = 6.4699    n = 122

gam.check(cooc.gam.30, k.rep = 1000)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 15 iterations.
## The RMS GCV score gradiant at convergence was 4.859982e-07 .
## The Hessian was positive definite.
## The estimated model rank was 25 (maximum possible: 25)
## Model rank =  25 / 25 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                     k'   edf k-index p-value
## te(phy.dist.30.sqrt,gen.spec.30) 24.00  3.79    1.03    0.63

#other diagnostic checks that should be done
par(mfrow=c(1,2))
plot(fitted(cooc.gam.30),residuals(cooc.gam.30))
plot(data.cooc.30$cooc.z,residuals(cooc.gam.30))

Plot GAMS for Jaccard specialisation index

#dev.new(width=7.4, height=4)
layout(matrix(1:2,ncol=2), width = c(1,1),height = c(1,1))
par(mar=c(4,2.5,2.75,0.75), mgp=c(1.5,0.5,0), las=0)
my.vis.gam(cooc.gam.sqrt.te, type = "response",color="gray",plot.type = "contour",
 ylab="Difference in niche width",main="Predicted co-occurrence based on \ninteracting predictor variables",
xlab="Phylogenetic distance (square root)", cex.lab=1, cex.axis=0.75, cex.main=1, lwd=2, cex=2)
my.vis.gam(cooc.gam.30, type = "response",color="gray",plot.type = "contour",
 ylab="Difference in niche width",main="Predicted co-occurrence based on \ninteracting predictor variables",
xlab="Phylogenetic distance (square root)", cex.lab=1, cex.axis=0.75, cex.main=1, lwd=2, cex=2)

#textClick("(a)", cex=1)
#textClick("(b)", cex=1)