Schefferville - generalists/specialists; pairwise co-occurrence; modelling

Includes OLS analyses with 34 focal species (including T. alpinum)

Author: Tammy L. Elliott

Date: February 20, 2015

R version 3.1.0

Prepare data for analyzes

#Read in vegetation matrix
#Read in vegetation matrix
veg<-read.csv("Carex_2013_stats_TE.csv", head=TRUE, row.names=1)
veg<-veg[c(1:201,222:701), c(1:3,5:7, 10:12,14:22,24:39)]  #Remove all columns that do not respresent focal species so that every species has at least 20 representatives; also remove DEF and ALP
veg[veg>0]<-1
veg<-veg[, order(names(veg))]

##Step 1: Calculating the actual co-occurrence prevelences for the data set; this creates a species*species co-occurrence matrix
##import occurrence data (species as columns and site as rows, with 1=presence and 0=absence)
veg.m<-as.matrix(veg)##convert to matrix
veg.out<-t(veg.m) %*% veg.m##multiply the occurrence matrix by the transposed matrix
diag(veg.out)<-0##set self-co-occurrence to zero (technically NA, but this simplifies the procedure)
#SUM<-colSums (veg.out, na.rm = FALSE, dims = 1)##calculate the total number of occurrences per species #this was in Liam's code, instead divide by number of plots included
#get upper.triangular matrix with names; then get a dataframe showing presence and absence of a particular combination;this is one way, but doesn't account for 0-0s
veg.out.ind <- which( upper.tri(veg.out,diag=F) , arr.ind = TRUE )
veg.out.df<-data.frame( col = dimnames(veg.out)[[2]][veg.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][veg.out.ind[,1]] ,
            val = veg.out[ veg.out.ind ] )
veg.out.df.neg<-cbind(veg.out.df,661-veg.out.df$val)
colSums(veg)

## ALP ANG AQU BIG BRU CAN CAP CES CHO DIS ECH EVA EXI GYN HEL LEN LEP LIM 
##  76  57 218  51  28  74  27 189  46  31  56  25  29  77  26  29  79 167 
## LIV MAG OLI PAU RAR ROS RUS SAX SCI STY TEN TRI UTR VAG VES VIR 
##  77 201  40  75  60  66  62  33  39  30  61  60  66 143  39  38

veg.out.df.neg<-cbind(veg.out.df,681-veg.out.df$val)

#make upper triangular co-occurrence matrix; include 1:1 presence; 1:0 and 0:1 presence; and 0:0 presence
veg.out.df.neg.col<-revalue(veg.out.df.neg$col, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: ALP

veg.out.df.neg.row<-revalue(veg.out.df.neg$row, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: VIR

veg.out.df.neg.col.num<-as.numeric(as.character(veg.out.df.neg.col))
veg.out.df.neg.row.num<-as.numeric(as.character(veg.out.df.neg.row))
#calculate presence-absence/absence-presence; without absence-absence and presence-presence for each species pair
veg.out.df.neg.row.col<-(veg.out.df.neg.col.num-veg.out.df[,3])+(veg.out.df.neg.row.num-veg.out.df[,3])

#calculate presence-absence/absence-presence; without absence-absence and presence-presence for each species pair
veg.out.df.neg.row.col<-(veg.out.df.neg.col.num-veg.out.df[,3])+(veg.out.df.neg.row.num-veg.out.df[,3])


 #use permiswap from Vegan package for randomization and creation of null model
set.seed(4)## set the seed for randomization
veg.rand<-permatswap(veg, method="quasiswap", fixedmar="both",times = 999, burnin = 20000, thin = 500, mtype = "prab") ##shuffle the occurrence matrix keeping both row and colsums equal

#get 999 transposed matrices from veg.rand$perm[[i]]
#create 999 empty matrices; fill them all with i veg.rand$perm transposed;  get sd for each orf the 528 entries

veg.rand.perm<-veg.rand$perm
veg.rand<-lapply(1:999, function(x) matrix(0, 34,34))

for (i in 1:length(veg.rand)) { #loop over 999 matrices in veg.rand.perm
    for (j in 1:length(veg.rand.perm)){
    veg.rand[[j]]<-t(veg.rand.perm[[j]]) %*% veg.rand.perm[[j]]
   }
}

#calculate mean across 999 different matrices  for each indexed position
fulldim<-34*34
meanmat<-matrix(nrow=34,ncol=34)
meanmat[1:1156]<-

apply(as.matrix(1:fulldim), 1,function(y) mean(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (33*33 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
head(meanmat)

##           [,1]      [,2]       [,3]      [,4]      [,5]      [,6]     [,7]
## [1,] 76.000000  6.471471  24.158158  5.749750  3.110110  8.319319 3.079079
## [2,]  6.471471 57.000000  18.222222  4.282282  2.360360  6.166166 2.348348
## [3,] 24.158158 18.222222 218.000000 16.260260  8.958959 23.624625 8.658659
## [4,]  5.749750  4.282282  16.260260 51.000000  2.074074  5.519520 1.986987
## [5,]  3.110110  2.360360   8.958959  2.074074 28.000000  2.983984 1.187187
## [6,]  8.319319  6.166166  23.624625  5.519520  2.983984 74.000000 3.049049
##           [,8]      [,9]    [,10]     [,11]    [,12]    [,13]     [,14]
## [1,] 21.035035  5.328328 3.511512  6.246246 2.792793 3.196196  8.697698
## [2,] 15.686687  3.931932 2.702703  4.744745 2.105105 2.483483  6.537538
## [3,] 60.200200 14.657658 9.838839 17.801802 8.019019 9.158158 24.721722
## [4,] 14.289289  3.437437 2.377377  4.176176 1.902903 2.136136  5.798799
## [5,]  7.686687  1.913914 1.287287  2.310310 0.969970 1.185185  3.129129
## [6,] 20.523524  4.897898 3.419419  6.141141 2.800801 3.110110  8.382382
##         [,15]    [,16]     [,17]     [,18]     [,19]     [,20]     [,21]
## [1,] 2.872873 3.351351  8.924925 18.640641  8.392392 22.304304  4.354354
## [2,] 2.199199 2.489489  6.579580 13.875876  6.438438 16.806807  3.309309
## [3,] 8.315315 9.307307 25.105105 53.127127 24.732733 63.631632 12.803804
## [4,] 1.938939 2.164164  5.992993 12.388388  5.829830 15.144144  2.919920
## [5,] 1.081081 1.218218  3.211211  6.831832  3.125125  8.291291  1.608609
## [6,] 2.744745 3.260260  8.653654 17.952953  8.295295 21.718719  4.377377
##          [,22]     [,23]     [,24]     [,25]     [,26]     [,27]    [,28]
## [1,]  8.445445  6.707708  7.430430  6.897898  3.622623  4.343343 3.429429
## [2,]  6.148148  5.059059  5.554555  5.149149  2.802803  3.201201 2.558559
## [3,] 23.933934 19.029029 21.163163 19.597598 10.540541 12.522523 9.637638
## [4,]  5.673674  4.476476  4.940941  4.604605  2.521522  2.912913 2.288288
## [5,]  3.085085  2.537538  2.674675  2.581582  1.408408  1.708709 1.234234
## [6,]  8.195195  6.613614  7.168168  6.795796  3.599600  4.222222 3.211211
##          [,29]     [,30]     [,31]     [,32]     [,33]     [,34]
## [1,]  6.818819  6.927928  7.409409 16.076076  4.285285  4.329329
## [2,]  4.995996  5.132132  5.580581 12.123123  3.180180  3.213213
## [3,] 19.467467 19.114114 20.976977 45.343343 12.271271 12.265265
## [4,]  4.657658  4.529530  5.013013 10.615616  2.877878  2.890891
## [5,]  2.573574  2.449449  2.682683  5.932933  1.599600  1.622623
## [6,]  6.531532  6.592593  7.220220 15.411411  4.248248  4.097097

#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
meanmat.out.ind <- which( upper.tri(meanmat,diag=F) , arr.ind = TRUE )
meanmat.out.df<-data.frame( col = dimnames(veg.out)[[2]][meanmat.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][meanmat.out.ind[,1]] ,
            val = meanmat[ meanmat.out.ind ] )
#Calculate plots that mean species data aren't in
meanmat.out.df.neg<-cbind(meanmat.out.df,681-meanmat.out.df$val)

#find how many plots either one oor the other species in, but not both
meanmat.out.df.neg.col<-revalue(meanmat.out.df.neg$col, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: ALP

meanmat.out.df.neg.row<-revalue(meanmat.out.df.neg$row, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))

## The following `from` values were not present in `x`: VIR

meanmat.out.df.neg.col.num<-as.numeric(as.character(meanmat.out.df.neg.col))
meanmat.out.df.neg.row.num<-as.numeric(as.character(meanmat.out.df.neg.row))
meanmat.out.df.neg.row.col<-(meanmat.out.df.neg.col.num+meanmat.out.df.neg.row.num)-meanmat.out.df[,3]

#calculate sd across 999 different matrices for each position
fulldim<-34*34
sdmat<-matrix(nrow=34,ncol=34)
sdmat[1:1156]<-

apply(as.matrix(1:fulldim), 1,function(y) sd(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (34*34 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
head(sdmat)

##          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]
## [1,] 0.000000 2.286787 3.898385 2.120479 1.632550 2.592794 1.662010
## [2,] 2.286787 0.000000 3.384313 1.811895 1.420671 2.261115 1.432088
## [3,] 3.898385 3.384313 0.000000 3.254153 2.435813 3.801201 2.359605
## [4,] 2.120479 1.811895 3.254153 0.000000 1.363906 2.220638 1.320727
## [5,] 1.632550 1.420671 2.435813 1.363906 0.000000 1.495398 1.047947
## [6,] 2.592794 2.261115 3.801201 2.220638 1.495398 0.000000 1.616733
##          [,8]     [,9]    [,10]    [,11]     [,12]    [,13]    [,14]
## [1,] 3.842304 2.039762 1.789979 2.199796 1.5273420 1.599582 2.613113
## [2,] 3.302840 1.814927 1.565760 2.080039 1.3256994 1.475748 2.305857
## [3,] 5.434983 2.987435 2.631521 3.288523 2.3562135 2.357143 3.797299
## [4,] 3.063685 1.727950 1.441139 1.827477 1.2468725 1.404076 2.213223
## [5,] 2.301636 1.302301 1.076507 1.391285 0.9596567 1.026083 1.619750
## [6,] 3.681073 1.974686 1.719645 2.205636 1.5856822 1.618371 2.641189
##          [,15]    [,16]    [,17]    [,18]    [,19]    [,20]    [,21]
## [1,] 1.5117625 1.725001 2.711101 3.551509 2.545022 3.639778 2.029975
## [2,] 1.4287905 1.518635 2.260895 3.061594 2.239194 3.332555 1.702710
## [3,] 2.2520700 2.496622 3.686074 5.222394 3.858941 5.484213 2.925177
## [4,] 1.2831868 1.359697 2.255466 2.945957 2.161114 3.182373 1.571156
## [5,] 0.9871075 1.060508 1.664324 2.212586 1.558916 2.438879 1.208672
## [6,] 1.4974628 1.688797 2.580597 3.603159 2.520435 3.686617 1.907355
##         [,22]    [,23]    [,24]    [,25]    [,26]    [,27]    [,28]
## [1,] 2.607753 2.324103 2.390050 2.367868 1.780187 1.934611 1.763234
## [2,] 2.353115 2.066411 2.141483 2.050655 1.632631 1.717107 1.450010
## [3,] 3.828440 3.369996 3.567613 3.427788 2.643878 2.738291 2.486602
## [4,] 2.172800 1.927337 2.049858 2.103728 1.497924 1.547324 1.448160
## [5,] 1.620191 1.494258 1.435057 1.554972 1.138403 1.255143 1.076805
## [6,] 2.580747 2.262098 2.409015 2.397047 1.770341 1.873416 1.662517
##         [,29]    [,30]    [,31]    [,32]    [,33]    [,34]
## [1,] 2.345038 2.413807 2.464081 3.240249 1.913902 1.868055
## [2,] 2.077165 2.101283 2.150934 2.959285 1.670673 1.673076
## [3,] 3.440402 3.404728 3.612557 4.897674 2.882725 2.868652
## [4,] 2.007730 1.928813 2.169850 2.755646 1.516476 1.595053
## [5,] 1.462402 1.432935 1.563746 2.120259 1.216882 1.265675
## [6,] 2.357089 2.400147 2.471390 3.158776 1.884552 1.797789

#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
sd.out.ind <- which( upper.tri(sdmat,diag=F) , arr.ind = TRUE )
sd.out.df<-data.frame( col = dimnames(veg.out)[[2]][sd.out.ind[,2]] ,
            row = dimnames(veg.out)[[1]][sd.out.ind[,1]] ,
            val = sdmat[ sd.out.ind ] )

#Calculate z cooccurrence scores for each species pair

#1. make a data frame with appropriate data
cooc.z.info<-cbind(veg.out.df, meanmat.out.df,sd.out.df)
cooc.z.comb<-(cooc.z.info[,3]-cooc.z.info[,6])/cooc.z.info[,9]

#get the cooccurrence.z list with names
cooc.z<-cbind(cooc.z.info[,c(1:2)], cooc.z.comb)

#Cyperaceae.trees<-read.nexus("Sedges_april30.nex")
#Cyperaceae.samp<-sample(Cyperaceae.trees, 100, replace = FALSE, prob = NULL)
Cyperaceae.tree<-read.nexus("Sedges_april30_comb.nex")

#calculate phy.dist for all 100 random trees
#phy.dist<- lapply(Cyperaceae.samp, cophenetic)
#drop non-carex species from tree and those not focal species: exclude C. deflexa, but include T. alpinum
pruned.cyp<-drop.tip(Cyperaceae.tree,c( "DEF", "JARC", "JTRI", "ACI", "ARC","ATR", "BRA","BUX","CAT", "CVI", "DIA", "FOE","GAR","GLA","LAC","MED","NAR","NIT","WIL"))
pruned.cyp

## 
## Phylogenetic tree with 34 tips and 33 internal nodes.
## 
## Tip labels:
##  ALP, ANG, AQU, BIG, BRU, CAN, ...
## 
## Rooted; includes branch lengths.

phy.dist.cyp<-cophenetic(pruned.cyp)

phy.dist.ind <- which( upper.tri(phy.dist.cyp,diag=F) , arr.ind = TRUE )
phy.dist.df<-data.frame( col = dimnames(phy.dist.cyp)[[2]][phy.dist.ind[,2]] ,
            row = dimnames(phy.dist.cyp)[[1]][phy.dist.ind[,1]] ,
            val = phy.dist.cyp[ phy.dist.ind ] )


#load inverse of habitat generalist/specialist data
gen.spec<-read.csv("H.inv.rescale.no.def.allplots.csv", row.names=1, head=TRUE)
gen.spec.sp<-rownames(gen.spec)


#diag(final.h.inv)<-0##set self-co-occurrence to zero (technically NA, but this simplifies the procedure)
#write.csv(final.h.inv, "Sedges2_final.h.inv.nodef.matrix.nonames.csv")
final.h.inv.names<-read.csv("Sedges2_final.h.inv.nodef.matrix.nonames.csv", row.names=1, head=TRUE)
#remove cases when col1 name = col2 name
 h.rows = apply(final.h.inv.names[, c(1:2)], 1, function(i) length(unique(i)) > 1)

h.inv.ind <- which( upper.tri(final.h.inv.names,diag=F) , arr.ind = TRUE )
h.inv.df<-data.frame( col = dimnames(final.h.inv.names)[[2]][h.inv.ind[,2]] ,
            row = dimnames(final.h.inv.names)[[1]][h.inv.ind[,1]] ,
            val = final.h.inv.names[ h.inv.ind ] )

#write.csv(h.inv.df, "Sedges2_final.h.inv.nodef.matrix.nonames.noselfs.csv")
final.h.inv.names.noself<-read.csv("Sedges2_final.h.inv.nodef.matrix.nonames.noselfs.csv", row.names=1, head=TRUE)


final.h.inv<-final.h.inv.names.noself

data.cooc<-cbind(final.h.inv, phy.dist.df, cooc.z)
data.cooc<-data.cooc[,c(3,6,9)]
colnames(data.cooc)<-c("gen.spec", "phy.dist", "cooc.z")

OLS of standardized co-occurrence and phylogenetic distance

#lm of cooc.z and phy.dist
cooc.z.phy.dist.lm<-lm(data.cooc$cooc.z~sqrt(data.cooc$phy.dist))
summary(cooc.z.phy.dist.lm)

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ sqrt(data.cooc$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.7487 -1.7057 -0.8179  0.6467 12.4636 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               -1.0583     0.3881  -2.727   0.0066 **
## sqrt(data.cooc$phy.dist)   0.1119     0.0458   2.443   0.0149 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.81 on 559 degrees of freedom
## Multiple R-squared:  0.01056,    Adjusted R-squared:  0.00879 
## F-statistic: 5.966 on 1 and 559 DF,  p-value: 0.01489

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.lm)

par(opar)
# What do the residuals suggest
par(mfrow=c(2,2))
hist(resid(cooc.z.phy.dist.lm), col="grey50") #this is a little right skewed
#Are the data normally distributed
hist(sqrt(data.cooc$phy.dist),col="grey50", main="Transformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(sqrt(data.cooc$phy.dist))   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

#variance is good; just need to fix normality
plot(sqrt(data.cooc$phy.dist), data.cooc$cooc.z)
abline(cooc.z.phy.dist.lm)

***

OLS of niche width by co-occurrence

#do lm of gen.spec by phy.dist for entire data set

cooc.z.gen.spec.lm<-lm(data.cooc$cooc.z~data.cooc$gen.spec)
summary(cooc.z.gen.spec.lm)

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3892 -1.6934 -0.8110  0.8703 12.0012 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)          0.3329     0.1918   1.736   0.0832 . 
## data.cooc$gen.spec  -1.6000     0.4949  -3.233   0.0013 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.798 on 559 degrees of freedom
## Multiple R-squared:  0.01836,    Adjusted R-squared:  0.0166 
## F-statistic: 10.45 on 1 and 559 DF,  p-value: 0.001296

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.lm)

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")

# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec)   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

#variance is alright; just need to fix normality
plot(data.cooc$gen.spec, data.cooc$cooc.z)
abline(cooc.z.gen.spec.lm)

***

OLS of niche width and phylogenetic distance

#do lm of genspec by phy.dist for entire data set
phydist.gen.spec.lm<-lm(data.cooc$gen.spec~sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.lm)

## 
## Call:
## lm(formula = data.cooc$gen.spec ~ sqrt(data.cooc$phy.dist))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36172 -0.18243 -0.04931  0.15247  0.69933 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.378491   0.032880  11.511   <2e-16 ***
## sqrt(data.cooc$phy.dist) -0.009062   0.003880  -2.336   0.0199 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.238 on 559 degrees of freedom
## Multiple R-squared:  0.009664,   Adjusted R-squared:  0.007893 
## F-statistic: 5.455 on 1 and 559 DF,  p-value: 0.01986

#diagnostic plots for cooc.z.phy.dist.lm
par(mfrow=c(2,2))
plot(phydist.gen.spec.lm)

# What do the residuals suggest?
hist(resid(phydist.gen.spec.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(sqrt(data.cooc$phy.dist), col="grey50", main="Uransformed data", xlab="Phylogenetic Distance")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec)   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16

shapiro.test(sqrt(data.cooc$phy.dist))    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16

#variance is alright

***

OLS of phylogenetic distance, niche width and co-occurrence

 #do lm of genspec by phy.dist and gen spec for entire data set

phydist.gen.spec.cooc.z.lm<-lm(data.cooc$cooc.z ~ data.cooc$gen.spec+sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.cooc.z.lm) #looks like

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec + sqrt(data.cooc$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.2355 -1.6869 -0.7745  0.8120 12.2087 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              -0.49237    0.42856  -1.149  0.25109   
## data.cooc$gen.spec       -1.49514    0.49566  -3.016  0.00267 **
## sqrt(data.cooc$phy.dist)  0.09832    0.04569   2.152  0.03184 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.789 on 558 degrees of freedom
## Multiple R-squared:  0.02644,    Adjusted R-squared:  0.02295 
## F-statistic: 7.576 on 2 and 558 DF,  p-value: 0.0005672

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.cooc.z.lm)   #Q-Q plot is like a big snake;  variance is highest for middle values

par(opar)
# What do the residuals suggest?
hist(resid(phydist.gen.spec.cooc.z.lm)) #this is a little right skewed

#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")

hist(sqrt(data.cooc$phy.dist), col="grey50", main="Transformed data", xlab="Phylogenetic Distance")

# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec)   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

shapiro.test(sqrt(data.cooc$phy.dist))  #not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16

#normality and variance are not good

#check to see if there is an interaction - yes there is between phy.dist and gen.spec.

OLS of phylogenetic distance, niche width and co-occurrence with interaction

phydist.gen.spec.cooc.z.lm.int<-lm(data.cooc$cooc.z ~ data.cooc$gen.spec*sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.cooc.z.lm.int)

## 
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec * sqrt(data.cooc$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.8298 -1.7764 -0.7972  0.9147 11.9889 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                                  1.29160    0.62080   2.081
## data.cooc$gen.spec                          -7.35617    1.57059  -4.684
## sqrt(data.cooc$phy.dist)                    -0.12690    0.07296  -1.739
## data.cooc$gen.spec:sqrt(data.cooc$phy.dist)  0.75860    0.19316   3.927
##                                             Pr(>|t|)    
## (Intercept)                                   0.0379 *  
## data.cooc$gen.spec                          3.54e-06 ***
## sqrt(data.cooc$phy.dist)                      0.0825 .  
## data.cooc$gen.spec:sqrt(data.cooc$phy.dist) 9.67e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.754 on 557 degrees of freedom
## Multiple R-squared:  0.05267,    Adjusted R-squared:  0.04757 
## F-statistic: 10.32 on 3 and 557 DF,  p-value: 1.275e-06

Darwin-Hutchinson zone

#####################do for same dataset, but for phy.dist <30
data.cooc.dh<-subset(data.cooc[data.cooc$phy.dist<30,])
head(data.cooc.dh)

##      gen.spec  phy.dist     cooc.z
## 6  0.27427996  9.008272 -4.6894724
## 15 0.06309177 14.509728  4.6917372
## 33 0.53791685 24.481046 -1.4696399
## 34 0.60100862 24.481046 -2.4803428
## 50 0.16159843 24.481046 -0.9417983
## 51 0.22469020 24.481046  0.3893929

OLS of cooccurrece and phylogenetic distance for phylogenetic distance <30 my

#lm of cooc.z and phy.dist
cooc.z.phy.dist.dh.lm<-lm(data.cooc.dh$cooc.z~sqrt(data.cooc.dh$phy.dist))
summary(cooc.z.phy.dist.dh.lm)

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1446 -1.3318 -0.4673  0.5589 12.1081 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                  -2.2158     1.2186  -1.818   0.0715 .
## sqrt(data.cooc.dh$phy.dist)   0.3722     0.2893   1.287   0.2007  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.61 on 120 degrees of freedom
## Multiple R-squared:  0.01361,    Adjusted R-squared:  0.005389 
## F-statistic: 1.656 on 1 and 120 DF,  p-value: 0.2007

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.dh.lm)

par(opar)
# What do the residuals suggest?
par(mfrow=c(2,2))
hist(resid(cooc.z.phy.dist.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(sqrt(data.cooc.dh$phy.dist),col="grey50", main="Transformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc.dh$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(sqrt(data.cooc.dh$phy.dist))   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10

shapiro.test(data.cooc.dh$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc.dh$cooc.z
## W = 0.8228, p-value = 8.239e-11

#variance is good; just need to fix normality
plot(sqrt(data.cooc.dh$phy.dist), data.cooc.dh$cooc.z)

***

OLS of co-occurrence and niche width for phylogenetic distance <30 my

#do lm of cooc.z by phy.dist for entire data set
cooc.z.gen.spec.dh.lm<-lm(data.cooc.dh$cooc.z~data.cooc.dh$gen.spec)
summary(cooc.z.gen.spec.dh.lm)

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1571 -1.5938 -0.4869  0.7882 11.6722 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             0.1875     0.3715   0.505  0.61461   
## data.cooc.dh$gen.spec  -2.6245     0.8858  -2.963  0.00368 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.537 on 120 degrees of freedom
## Multiple R-squared:  0.06817,    Adjusted R-squared:  0.0604 
## F-statistic: 8.778 on 1 and 120 DF,  p-value: 0.003677

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.dh.lm)

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc.dh$cooc.z, col="grey50", main="Untransformed data", xlab="Standardized Co-occurrence")

# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec)   #definiately not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16

shapiro.test(data.cooc$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16

#variance and normality are not good
plot(data.cooc.dh$gen.spec, data.cooc.dh$cooc.z)

***

OLS of Phydistance and niche width for phylogenetic distance less than 30 my

#do lm of genspec by phy.dist for entire data set
phydist.gen.spec.dh.lm<-lm(data.cooc.dh$gen.spec~sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.dh.lm)

## 
## Call:
## lm(formula = data.cooc.dh$gen.spec ~ sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33589 -0.23074 -0.03184  0.13845  0.66675 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                  0.28343    0.12198   2.324   0.0218 *
## sqrt(data.cooc.dh$phy.dist)  0.01117    0.02896   0.386   0.7003  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2612 on 120 degrees of freedom
## Multiple R-squared:  0.001239,   Adjusted R-squared:  -0.007084 
## F-statistic: 0.1489 on 1 and 120 DF,  p-value: 0.7003

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.dh.lm)

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(phydist.gen.spec.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")  # a little right skewed
hist(sqrt(data.cooc.dh$phy.dist), col="grey50", main="Untransformed data", xlab="Phylogenetic Distance")           #majorly left skewed

# Let's verify using the Shapiro test:
shapiro.test(data.cooc.dh$gen.spec) #good

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc.dh$gen.spec
## W = 0.9192, p-value = 1.827e-06

shapiro.test(sqrt(data.cooc.dh$phy.dist))    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10

#variance is not good

***

OLS of Phylogenetic distance, niche width, and co-occurrence for phylogenetic distance less than 30 my

 #do lm of genspec by phy.dist and gen spec for entire data set
phydist.gen.spec.cooc.z.dh.lm<-lm(data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec+sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.cooc.z.dh.lm) #looks like

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec + sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8256 -1.5670 -0.4213  0.7196 11.3810 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  -1.4593     1.2055  -1.211  0.22848   
## data.cooc.dh$gen.spec        -2.6691     0.8825  -3.025  0.00305 **
## sqrt(data.cooc.dh$phy.dist)   0.4021     0.2801   1.435  0.15382   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.526 on 119 degrees of freedom
## Multiple R-squared:  0.08402,    Adjusted R-squared:  0.06863 
## F-statistic: 5.458 on 2 and 119 DF,  p-value: 0.005397

#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.cooc.z.dh.lm)   #Q-Q plot is like a big snake;  variance is highest for middle values  #variance goes downwards

par(opar)

par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(phydist.gen.spec.cooc.z.dh.lm)) #good
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(sqrt(data.cooc.dh$phy.dist), col="grey50", main="Uransformed data", xlab="Phylogenetic Distance")    #majorily left skewed

# Let's verify using the Shapiro test:
shapiro.test(data.cooc.dh$gen.spec)  #normal

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc.dh$gen.spec
## W = 0.9192, p-value = 1.827e-06

shapiro.test(data.cooc.dh$cooc.z)    #not normal either; however residuals no longer  a negative binomial or binomial relation

## 
##  Shapiro-Wilk normality test
## 
## data:  data.cooc.dh$cooc.z
## W = 0.8228, p-value = 8.239e-11

shapiro.test(sqrt(data.cooc.dh$phy.dist))  #not normal

## 
##  Shapiro-Wilk normality test
## 
## data:  sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10

#normality and variance are not good

***

OLS of interaction among phylogenetic distance, niche width, and co-occurrence for phylogenetic distance <30 my

phydist.gen.spec.cooc.z.dh.lm.int<-lm(data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec*sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.cooc.z.dh.lm.int)

## 
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec * sqrt(data.cooc.dh$phy.dist))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8669 -1.5452 -0.3620  0.6783 11.2057 
## 
## Coefficients:
##                                                   Estimate Std. Error
## (Intercept)                                        -2.8530     1.9211
## data.cooc.dh$gen.spec                               1.3289     4.3792
## sqrt(data.cooc.dh$phy.dist)                         0.7349     0.4540
## data.cooc.dh$gen.spec:sqrt(data.cooc.dh$phy.dist)  -0.9489     1.0180
##                                                   t value Pr(>|t|)
## (Intercept)                                        -1.485    0.140
## data.cooc.dh$gen.spec                               0.303    0.762
## sqrt(data.cooc.dh$phy.dist)                         1.619    0.108
## data.cooc.dh$gen.spec:sqrt(data.cooc.dh$phy.dist)  -0.932    0.353
## 
## Residual standard error: 2.527 on 118 degrees of freedom
## Multiple R-squared:  0.09072,    Adjusted R-squared:  0.0676 
## F-statistic: 3.924 on 3 and 118 DF,  p-value: 0.01036