#Read in vegetation matrix
#Read in vegetation matrix
veg<-read.csv("Carex_2013_stats_TE.csv", head=TRUE, row.names=1)
veg<-veg[c(1:201,222:701), c(1:3,5:7, 10:12,14:22,24:39)] #Remove all columns that do not respresent focal species so that every species has at least 20 representatives; also remove DEF and ALP
veg[veg>0]<-1
veg<-veg[, order(names(veg))]
##Step 1: Calculating the actual co-occurrence prevelences for the data set; this creates a species*species co-occurrence matrix
##import occurrence data (species as columns and site as rows, with 1=presence and 0=absence)
veg.m<-as.matrix(veg)##convert to matrix
veg.out<-t(veg.m) %*% veg.m##multiply the occurrence matrix by the transposed matrix
diag(veg.out)<-0##set self-co-occurrence to zero (technically NA, but this simplifies the procedure)
#SUM<-colSums (veg.out, na.rm = FALSE, dims = 1)##calculate the total number of occurrences per species #this was in Liam's code, instead divide by number of plots included
#get upper.triangular matrix with names; then get a dataframe showing presence and absence of a particular combination;this is one way, but doesn't account for 0-0s
veg.out.ind <- which( upper.tri(veg.out,diag=F) , arr.ind = TRUE )
veg.out.df<-data.frame( col = dimnames(veg.out)[[2]][veg.out.ind[,2]] ,
row = dimnames(veg.out)[[1]][veg.out.ind[,1]] ,
val = veg.out[ veg.out.ind ] )
veg.out.df.neg<-cbind(veg.out.df,661-veg.out.df$val)
colSums(veg)
## ALP ANG AQU BIG BRU CAN CAP CES CHO DIS ECH EVA EXI GYN HEL LEN LEP LIM
## 76 57 218 51 28 74 27 189 46 31 56 25 29 77 26 29 79 167
## LIV MAG OLI PAU RAR ROS RUS SAX SCI STY TEN TRI UTR VAG VES VIR
## 77 201 40 75 60 66 62 33 39 30 61 60 66 143 39 38
veg.out.df.neg<-cbind(veg.out.df,681-veg.out.df$val)
#make upper triangular co-occurrence matrix; include 1:1 presence; 1:0 and 0:1 presence; and 0:0 presence
veg.out.df.neg.col<-revalue(veg.out.df.neg$col, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))
## The following `from` values were not present in `x`: ALP
veg.out.df.neg.row<-revalue(veg.out.df.neg$row, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))
## The following `from` values were not present in `x`: VIR
veg.out.df.neg.col.num<-as.numeric(as.character(veg.out.df.neg.col))
veg.out.df.neg.row.num<-as.numeric(as.character(veg.out.df.neg.row))
#calculate presence-absence/absence-presence; without absence-absence and presence-presence for each species pair
veg.out.df.neg.row.col<-(veg.out.df.neg.col.num-veg.out.df[,3])+(veg.out.df.neg.row.num-veg.out.df[,3])
#calculate presence-absence/absence-presence; without absence-absence and presence-presence for each species pair
veg.out.df.neg.row.col<-(veg.out.df.neg.col.num-veg.out.df[,3])+(veg.out.df.neg.row.num-veg.out.df[,3])
#use permiswap from Vegan package for randomization and creation of null model
set.seed(4)## set the seed for randomization
veg.rand<-permatswap(veg, method="quasiswap", fixedmar="both",times = 999, burnin = 20000, thin = 500, mtype = "prab") ##shuffle the occurrence matrix keeping both row and colsums equal
#get 999 transposed matrices from veg.rand$perm[[i]]
#create 999 empty matrices; fill them all with i veg.rand$perm transposed; get sd for each orf the 528 entries
veg.rand.perm<-veg.rand$perm
veg.rand<-lapply(1:999, function(x) matrix(0, 34,34))
for (i in 1:length(veg.rand)) { #loop over 999 matrices in veg.rand.perm
for (j in 1:length(veg.rand.perm)){
veg.rand[[j]]<-t(veg.rand.perm[[j]]) %*% veg.rand.perm[[j]]
}
}
#calculate mean across 999 different matrices for each indexed position
fulldim<-34*34
meanmat<-matrix(nrow=34,ncol=34)
meanmat[1:1156]<-
apply(as.matrix(1:fulldim), 1,function(y) mean(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (33*33 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
head(meanmat)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 76.000000 6.471471 24.158158 5.749750 3.110110 8.319319 3.079079
## [2,] 6.471471 57.000000 18.222222 4.282282 2.360360 6.166166 2.348348
## [3,] 24.158158 18.222222 218.000000 16.260260 8.958959 23.624625 8.658659
## [4,] 5.749750 4.282282 16.260260 51.000000 2.074074 5.519520 1.986987
## [5,] 3.110110 2.360360 8.958959 2.074074 28.000000 2.983984 1.187187
## [6,] 8.319319 6.166166 23.624625 5.519520 2.983984 74.000000 3.049049
## [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 21.035035 5.328328 3.511512 6.246246 2.792793 3.196196 8.697698
## [2,] 15.686687 3.931932 2.702703 4.744745 2.105105 2.483483 6.537538
## [3,] 60.200200 14.657658 9.838839 17.801802 8.019019 9.158158 24.721722
## [4,] 14.289289 3.437437 2.377377 4.176176 1.902903 2.136136 5.798799
## [5,] 7.686687 1.913914 1.287287 2.310310 0.969970 1.185185 3.129129
## [6,] 20.523524 4.897898 3.419419 6.141141 2.800801 3.110110 8.382382
## [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## [1,] 2.872873 3.351351 8.924925 18.640641 8.392392 22.304304 4.354354
## [2,] 2.199199 2.489489 6.579580 13.875876 6.438438 16.806807 3.309309
## [3,] 8.315315 9.307307 25.105105 53.127127 24.732733 63.631632 12.803804
## [4,] 1.938939 2.164164 5.992993 12.388388 5.829830 15.144144 2.919920
## [5,] 1.081081 1.218218 3.211211 6.831832 3.125125 8.291291 1.608609
## [6,] 2.744745 3.260260 8.653654 17.952953 8.295295 21.718719 4.377377
## [,22] [,23] [,24] [,25] [,26] [,27] [,28]
## [1,] 8.445445 6.707708 7.430430 6.897898 3.622623 4.343343 3.429429
## [2,] 6.148148 5.059059 5.554555 5.149149 2.802803 3.201201 2.558559
## [3,] 23.933934 19.029029 21.163163 19.597598 10.540541 12.522523 9.637638
## [4,] 5.673674 4.476476 4.940941 4.604605 2.521522 2.912913 2.288288
## [5,] 3.085085 2.537538 2.674675 2.581582 1.408408 1.708709 1.234234
## [6,] 8.195195 6.613614 7.168168 6.795796 3.599600 4.222222 3.211211
## [,29] [,30] [,31] [,32] [,33] [,34]
## [1,] 6.818819 6.927928 7.409409 16.076076 4.285285 4.329329
## [2,] 4.995996 5.132132 5.580581 12.123123 3.180180 3.213213
## [3,] 19.467467 19.114114 20.976977 45.343343 12.271271 12.265265
## [4,] 4.657658 4.529530 5.013013 10.615616 2.877878 2.890891
## [5,] 2.573574 2.449449 2.682683 5.932933 1.599600 1.622623
## [6,] 6.531532 6.592593 7.220220 15.411411 4.248248 4.097097
#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
meanmat.out.ind <- which( upper.tri(meanmat,diag=F) , arr.ind = TRUE )
meanmat.out.df<-data.frame( col = dimnames(veg.out)[[2]][meanmat.out.ind[,2]] ,
row = dimnames(veg.out)[[1]][meanmat.out.ind[,1]] ,
val = meanmat[ meanmat.out.ind ] )
#Calculate plots that mean species data aren't in
meanmat.out.df.neg<-cbind(meanmat.out.df,681-meanmat.out.df$val)
#find how many plots either one oor the other species in, but not both
meanmat.out.df.neg.col<-revalue(meanmat.out.df.neg$col, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))
## The following `from` values were not present in `x`: ALP
meanmat.out.df.neg.row<-revalue(meanmat.out.df.neg$row, c("ALP"=76, "AQU"=218,"BIG"=51, "BRU"=28, "CAP"=27, "CAN"=74, "CHO"=46, "DIS"=31, "ECH"=56, "EXI"=29, "GYN"=77, "HEL"=26, "LEN"=29, "LEP"=79, "LIM"=155, "LIV"=68,
"OLI"=40, "PAU"=74, "MAG"=201, "RAR"=60, "ROS"=66, "SAX"=33, "SCI"=39, "STY"=30, "TEN"=61, "TRI"=60, "UTR"=66, "VAG"=143, "VES"=39, "ANG"=57, "EVA"=25, "RUS"=62, "VIR"=38, "CES"=189))
## The following `from` values were not present in `x`: VIR
meanmat.out.df.neg.col.num<-as.numeric(as.character(meanmat.out.df.neg.col))
meanmat.out.df.neg.row.num<-as.numeric(as.character(meanmat.out.df.neg.row))
meanmat.out.df.neg.row.col<-(meanmat.out.df.neg.col.num+meanmat.out.df.neg.row.num)-meanmat.out.df[,3]
#calculate sd across 999 different matrices for each position
fulldim<-34*34
sdmat<-matrix(nrow=34,ncol=34)
sdmat[1:1156]<-
apply(as.matrix(1:fulldim), 1,function(y) sd(unlist(lapply(veg.rand,function(x) x[y]))))
#the 1:9 is gonna be 1089 (34*34 = 1089)
#y is the index (so 1:1089, a.k.a, [1,1],[1,2] etc
#x is the list index (1 to 999)
#x[y] is list x, element y
head(sdmat)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 0.000000 2.286787 3.898385 2.120479 1.632550 2.592794 1.662010
## [2,] 2.286787 0.000000 3.384313 1.811895 1.420671 2.261115 1.432088
## [3,] 3.898385 3.384313 0.000000 3.254153 2.435813 3.801201 2.359605
## [4,] 2.120479 1.811895 3.254153 0.000000 1.363906 2.220638 1.320727
## [5,] 1.632550 1.420671 2.435813 1.363906 0.000000 1.495398 1.047947
## [6,] 2.592794 2.261115 3.801201 2.220638 1.495398 0.000000 1.616733
## [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 3.842304 2.039762 1.789979 2.199796 1.5273420 1.599582 2.613113
## [2,] 3.302840 1.814927 1.565760 2.080039 1.3256994 1.475748 2.305857
## [3,] 5.434983 2.987435 2.631521 3.288523 2.3562135 2.357143 3.797299
## [4,] 3.063685 1.727950 1.441139 1.827477 1.2468725 1.404076 2.213223
## [5,] 2.301636 1.302301 1.076507 1.391285 0.9596567 1.026083 1.619750
## [6,] 3.681073 1.974686 1.719645 2.205636 1.5856822 1.618371 2.641189
## [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## [1,] 1.5117625 1.725001 2.711101 3.551509 2.545022 3.639778 2.029975
## [2,] 1.4287905 1.518635 2.260895 3.061594 2.239194 3.332555 1.702710
## [3,] 2.2520700 2.496622 3.686074 5.222394 3.858941 5.484213 2.925177
## [4,] 1.2831868 1.359697 2.255466 2.945957 2.161114 3.182373 1.571156
## [5,] 0.9871075 1.060508 1.664324 2.212586 1.558916 2.438879 1.208672
## [6,] 1.4974628 1.688797 2.580597 3.603159 2.520435 3.686617 1.907355
## [,22] [,23] [,24] [,25] [,26] [,27] [,28]
## [1,] 2.607753 2.324103 2.390050 2.367868 1.780187 1.934611 1.763234
## [2,] 2.353115 2.066411 2.141483 2.050655 1.632631 1.717107 1.450010
## [3,] 3.828440 3.369996 3.567613 3.427788 2.643878 2.738291 2.486602
## [4,] 2.172800 1.927337 2.049858 2.103728 1.497924 1.547324 1.448160
## [5,] 1.620191 1.494258 1.435057 1.554972 1.138403 1.255143 1.076805
## [6,] 2.580747 2.262098 2.409015 2.397047 1.770341 1.873416 1.662517
## [,29] [,30] [,31] [,32] [,33] [,34]
## [1,] 2.345038 2.413807 2.464081 3.240249 1.913902 1.868055
## [2,] 2.077165 2.101283 2.150934 2.959285 1.670673 1.673076
## [3,] 3.440402 3.404728 3.612557 4.897674 2.882725 2.868652
## [4,] 2.007730 1.928813 2.169850 2.755646 1.516476 1.595053
## [5,] 1.462402 1.432935 1.563746 2.120259 1.216882 1.265675
## [6,] 2.357089 2.400147 2.471390 3.158776 1.884552 1.797789
#get upper.triangular matrix with names for mean; then get a dataframe showing presence and absence of a particular combination;this is one way, but does not account for 0-0s
sd.out.ind <- which( upper.tri(sdmat,diag=F) , arr.ind = TRUE )
sd.out.df<-data.frame( col = dimnames(veg.out)[[2]][sd.out.ind[,2]] ,
row = dimnames(veg.out)[[1]][sd.out.ind[,1]] ,
val = sdmat[ sd.out.ind ] )
#Calculate z cooccurrence scores for each species pair
#1. make a data frame with appropriate data
cooc.z.info<-cbind(veg.out.df, meanmat.out.df,sd.out.df)
cooc.z.comb<-(cooc.z.info[,3]-cooc.z.info[,6])/cooc.z.info[,9]
#get the cooccurrence.z list with names
cooc.z<-cbind(cooc.z.info[,c(1:2)], cooc.z.comb)
#Cyperaceae.trees<-read.nexus("Sedges_april30.nex")
#Cyperaceae.samp<-sample(Cyperaceae.trees, 100, replace = FALSE, prob = NULL)
Cyperaceae.tree<-read.nexus("Sedges_april30_comb.nex")
#calculate phy.dist for all 100 random trees
#phy.dist<- lapply(Cyperaceae.samp, cophenetic)
#drop non-carex species from tree and those not focal species: exclude C. deflexa, but include T. alpinum
pruned.cyp<-drop.tip(Cyperaceae.tree,c( "DEF", "JARC", "JTRI", "ACI", "ARC","ATR", "BRA","BUX","CAT", "CVI", "DIA", "FOE","GAR","GLA","LAC","MED","NAR","NIT","WIL"))
pruned.cyp
##
## Phylogenetic tree with 34 tips and 33 internal nodes.
##
## Tip labels:
## ALP, ANG, AQU, BIG, BRU, CAN, ...
##
## Rooted; includes branch lengths.
phy.dist.cyp<-cophenetic(pruned.cyp)
phy.dist.ind <- which( upper.tri(phy.dist.cyp,diag=F) , arr.ind = TRUE )
phy.dist.df<-data.frame( col = dimnames(phy.dist.cyp)[[2]][phy.dist.ind[,2]] ,
row = dimnames(phy.dist.cyp)[[1]][phy.dist.ind[,1]] ,
val = phy.dist.cyp[ phy.dist.ind ] )
#load inverse of habitat generalist/specialist data
gen.spec<-read.csv("H.inv.rescale.no.def.allplots.csv", row.names=1, head=TRUE)
gen.spec.sp<-rownames(gen.spec)
#diag(final.h.inv)<-0##set self-co-occurrence to zero (technically NA, but this simplifies the procedure)
#write.csv(final.h.inv, "Sedges2_final.h.inv.nodef.matrix.nonames.csv")
final.h.inv.names<-read.csv("Sedges2_final.h.inv.nodef.matrix.nonames.csv", row.names=1, head=TRUE)
#remove cases when col1 name = col2 name
h.rows = apply(final.h.inv.names[, c(1:2)], 1, function(i) length(unique(i)) > 1)
h.inv.ind <- which( upper.tri(final.h.inv.names,diag=F) , arr.ind = TRUE )
h.inv.df<-data.frame( col = dimnames(final.h.inv.names)[[2]][h.inv.ind[,2]] ,
row = dimnames(final.h.inv.names)[[1]][h.inv.ind[,1]] ,
val = final.h.inv.names[ h.inv.ind ] )
#write.csv(h.inv.df, "Sedges2_final.h.inv.nodef.matrix.nonames.noselfs.csv")
final.h.inv.names.noself<-read.csv("Sedges2_final.h.inv.nodef.matrix.nonames.noselfs.csv", row.names=1, head=TRUE)
final.h.inv<-final.h.inv.names.noself
data.cooc<-cbind(final.h.inv, phy.dist.df, cooc.z)
data.cooc<-data.cooc[,c(3,6,9)]
colnames(data.cooc)<-c("gen.spec", "phy.dist", "cooc.z")
#lm of cooc.z and phy.dist
cooc.z.phy.dist.lm<-lm(data.cooc$cooc.z~sqrt(data.cooc$phy.dist))
summary(cooc.z.phy.dist.lm)
##
## Call:
## lm(formula = data.cooc$cooc.z ~ sqrt(data.cooc$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7487 -1.7057 -0.8179 0.6467 12.4636
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0583 0.3881 -2.727 0.0066 **
## sqrt(data.cooc$phy.dist) 0.1119 0.0458 2.443 0.0149 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.81 on 559 degrees of freedom
## Multiple R-squared: 0.01056, Adjusted R-squared: 0.00879
## F-statistic: 5.966 on 1 and 559 DF, p-value: 0.01489
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.lm)
par(opar)
# What do the residuals suggest
par(mfrow=c(2,2))
hist(resid(cooc.z.phy.dist.lm), col="grey50") #this is a little right skewed
#Are the data normally distributed
hist(sqrt(data.cooc$phy.dist),col="grey50", main="Transformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(sqrt(data.cooc$phy.dist)) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16
shapiro.test(data.cooc$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16
#variance is good; just need to fix normality
plot(sqrt(data.cooc$phy.dist), data.cooc$cooc.z)
abline(cooc.z.phy.dist.lm)
***
#do lm of gen.spec by phy.dist for entire data set
cooc.z.gen.spec.lm<-lm(data.cooc$cooc.z~data.cooc$gen.spec)
summary(cooc.z.gen.spec.lm)
##
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3892 -1.6934 -0.8110 0.8703 12.0012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3329 0.1918 1.736 0.0832 .
## data.cooc$gen.spec -1.6000 0.4949 -3.233 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.798 on 559 degrees of freedom
## Multiple R-squared: 0.01836, Adjusted R-squared: 0.0166
## F-statistic: 10.45 on 1 and 559 DF, p-value: 0.001296
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.lm)
par(opar)
par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16
shapiro.test(data.cooc$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16
#variance is alright; just need to fix normality
plot(data.cooc$gen.spec, data.cooc$cooc.z)
abline(cooc.z.gen.spec.lm)
***
#do lm of genspec by phy.dist for entire data set
phydist.gen.spec.lm<-lm(data.cooc$gen.spec~sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.lm)
##
## Call:
## lm(formula = data.cooc$gen.spec ~ sqrt(data.cooc$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36172 -0.18243 -0.04931 0.15247 0.69933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.378491 0.032880 11.511 <2e-16 ***
## sqrt(data.cooc$phy.dist) -0.009062 0.003880 -2.336 0.0199 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.238 on 559 degrees of freedom
## Multiple R-squared: 0.009664, Adjusted R-squared: 0.007893
## F-statistic: 5.455 on 1 and 559 DF, p-value: 0.01986
#diagnostic plots for cooc.z.phy.dist.lm
par(mfrow=c(2,2))
plot(phydist.gen.spec.lm)
# What do the residuals suggest?
hist(resid(phydist.gen.spec.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(sqrt(data.cooc$phy.dist), col="grey50", main="Uransformed data", xlab="Phylogenetic Distance")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16
shapiro.test(sqrt(data.cooc$phy.dist)) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16
#variance is alright
***
#do lm of genspec by phy.dist and gen spec for entire data set
phydist.gen.spec.cooc.z.lm<-lm(data.cooc$cooc.z ~ data.cooc$gen.spec+sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.cooc.z.lm) #looks like
##
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec + sqrt(data.cooc$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2355 -1.6869 -0.7745 0.8120 12.2087
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.49237 0.42856 -1.149 0.25109
## data.cooc$gen.spec -1.49514 0.49566 -3.016 0.00267 **
## sqrt(data.cooc$phy.dist) 0.09832 0.04569 2.152 0.03184 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.789 on 558 degrees of freedom
## Multiple R-squared: 0.02644, Adjusted R-squared: 0.02295
## F-statistic: 7.576 on 2 and 558 DF, p-value: 0.0005672
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.cooc.z.lm) #Q-Q plot is like a big snake; variance is highest for middle values
par(opar)
# What do the residuals suggest?
hist(resid(phydist.gen.spec.cooc.z.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(sqrt(data.cooc$phy.dist), col="grey50", main="Transformed data", xlab="Phylogenetic Distance")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16
shapiro.test(data.cooc$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16
shapiro.test(sqrt(data.cooc$phy.dist)) #not normal
##
## Shapiro-Wilk normality test
##
## data: sqrt(data.cooc$phy.dist)
## W = 0.8772, p-value < 2.2e-16
#normality and variance are not good
#check to see if there is an interaction - yes there is between phy.dist and gen.spec.
phydist.gen.spec.cooc.z.lm.int<-lm(data.cooc$cooc.z ~ data.cooc$gen.spec*sqrt(data.cooc$phy.dist))
summary(phydist.gen.spec.cooc.z.lm.int)
##
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec * sqrt(data.cooc$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8298 -1.7764 -0.7972 0.9147 11.9889
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.29160 0.62080 2.081
## data.cooc$gen.spec -7.35617 1.57059 -4.684
## sqrt(data.cooc$phy.dist) -0.12690 0.07296 -1.739
## data.cooc$gen.spec:sqrt(data.cooc$phy.dist) 0.75860 0.19316 3.927
## Pr(>|t|)
## (Intercept) 0.0379 *
## data.cooc$gen.spec 3.54e-06 ***
## sqrt(data.cooc$phy.dist) 0.0825 .
## data.cooc$gen.spec:sqrt(data.cooc$phy.dist) 9.67e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.754 on 557 degrees of freedom
## Multiple R-squared: 0.05267, Adjusted R-squared: 0.04757
## F-statistic: 10.32 on 3 and 557 DF, p-value: 1.275e-06
#####################do for same dataset, but for phy.dist <30
data.cooc.dh<-subset(data.cooc[data.cooc$phy.dist<30,])
head(data.cooc.dh)
## gen.spec phy.dist cooc.z
## 6 0.27427996 9.008272 -4.6894724
## 15 0.06309177 14.509728 4.6917372
## 33 0.53791685 24.481046 -1.4696399
## 34 0.60100862 24.481046 -2.4803428
## 50 0.16159843 24.481046 -0.9417983
## 51 0.22469020 24.481046 0.3893929
#lm of cooc.z and phy.dist
cooc.z.phy.dist.dh.lm<-lm(data.cooc.dh$cooc.z~sqrt(data.cooc.dh$phy.dist))
summary(cooc.z.phy.dist.dh.lm)
##
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ sqrt(data.cooc.dh$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1446 -1.3318 -0.4673 0.5589 12.1081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2158 1.2186 -1.818 0.0715 .
## sqrt(data.cooc.dh$phy.dist) 0.3722 0.2893 1.287 0.2007
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.61 on 120 degrees of freedom
## Multiple R-squared: 0.01361, Adjusted R-squared: 0.005389
## F-statistic: 1.656 on 1 and 120 DF, p-value: 0.2007
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.dh.lm)
par(opar)
# What do the residuals suggest?
par(mfrow=c(2,2))
hist(resid(cooc.z.phy.dist.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(sqrt(data.cooc.dh$phy.dist),col="grey50", main="Transformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc.dh$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(sqrt(data.cooc.dh$phy.dist)) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10
shapiro.test(data.cooc.dh$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc.dh$cooc.z
## W = 0.8228, p-value = 8.239e-11
#variance is good; just need to fix normality
plot(sqrt(data.cooc.dh$phy.dist), data.cooc.dh$cooc.z)
***
#do lm of cooc.z by phy.dist for entire data set
cooc.z.gen.spec.dh.lm<-lm(data.cooc.dh$cooc.z~data.cooc.dh$gen.spec)
summary(cooc.z.gen.spec.dh.lm)
##
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1571 -1.5938 -0.4869 0.7882 11.6722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1875 0.3715 0.505 0.61461
## data.cooc.dh$gen.spec -2.6245 0.8858 -2.963 0.00368 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.537 on 120 degrees of freedom
## Multiple R-squared: 0.06817, Adjusted R-squared: 0.0604
## F-statistic: 8.778 on 1 and 120 DF, p-value: 0.003677
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.dh.lm)
par(opar)
par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc.dh$cooc.z, col="grey50", main="Untransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16
shapiro.test(data.cooc$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16
#variance and normality are not good
plot(data.cooc.dh$gen.spec, data.cooc.dh$cooc.z)
***
#do lm of genspec by phy.dist for entire data set
phydist.gen.spec.dh.lm<-lm(data.cooc.dh$gen.spec~sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.dh.lm)
##
## Call:
## lm(formula = data.cooc.dh$gen.spec ~ sqrt(data.cooc.dh$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33589 -0.23074 -0.03184 0.13845 0.66675
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.28343 0.12198 2.324 0.0218 *
## sqrt(data.cooc.dh$phy.dist) 0.01117 0.02896 0.386 0.7003
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2612 on 120 degrees of freedom
## Multiple R-squared: 0.001239, Adjusted R-squared: -0.007084
## F-statistic: 0.1489 on 1 and 120 DF, p-value: 0.7003
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.dh.lm)
par(opar)
par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(phydist.gen.spec.dh.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism") # a little right skewed
hist(sqrt(data.cooc.dh$phy.dist), col="grey50", main="Untransformed data", xlab="Phylogenetic Distance") #majorly left skewed
# Let's verify using the Shapiro test:
shapiro.test(data.cooc.dh$gen.spec) #good
##
## Shapiro-Wilk normality test
##
## data: data.cooc.dh$gen.spec
## W = 0.9192, p-value = 1.827e-06
shapiro.test(sqrt(data.cooc.dh$phy.dist)) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10
#variance is not good
***
#do lm of genspec by phy.dist and gen spec for entire data set
phydist.gen.spec.cooc.z.dh.lm<-lm(data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec+sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.cooc.z.dh.lm) #looks like
##
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec + sqrt(data.cooc.dh$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8256 -1.5670 -0.4213 0.7196 11.3810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.4593 1.2055 -1.211 0.22848
## data.cooc.dh$gen.spec -2.6691 0.8825 -3.025 0.00305 **
## sqrt(data.cooc.dh$phy.dist) 0.4021 0.2801 1.435 0.15382
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.526 on 119 degrees of freedom
## Multiple R-squared: 0.08402, Adjusted R-squared: 0.06863
## F-statistic: 5.458 on 2 and 119 DF, p-value: 0.005397
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(phydist.gen.spec.cooc.z.dh.lm) #Q-Q plot is like a big snake; variance is highest for middle values #variance goes downwards
par(opar)
par(mfrow=c(2,2))
# What do the residuals suggest?
hist(resid(phydist.gen.spec.cooc.z.dh.lm)) #good
#Are the data normally distributed
hist(data.cooc.dh$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(sqrt(data.cooc.dh$phy.dist), col="grey50", main="Uransformed data", xlab="Phylogenetic Distance") #majorily left skewed
# Let's verify using the Shapiro test:
shapiro.test(data.cooc.dh$gen.spec) #normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc.dh$gen.spec
## W = 0.9192, p-value = 1.827e-06
shapiro.test(data.cooc.dh$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc.dh$cooc.z
## W = 0.8228, p-value = 8.239e-11
shapiro.test(sqrt(data.cooc.dh$phy.dist)) #not normal
##
## Shapiro-Wilk normality test
##
## data: sqrt(data.cooc.dh$phy.dist)
## W = 0.8502, p-value = 9.04e-10
#normality and variance are not good
***
phydist.gen.spec.cooc.z.dh.lm.int<-lm(data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec*sqrt(data.cooc.dh$phy.dist))
summary(phydist.gen.spec.cooc.z.dh.lm.int)
##
## Call:
## lm(formula = data.cooc.dh$cooc.z ~ data.cooc.dh$gen.spec * sqrt(data.cooc.dh$phy.dist))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8669 -1.5452 -0.3620 0.6783 11.2057
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -2.8530 1.9211
## data.cooc.dh$gen.spec 1.3289 4.3792
## sqrt(data.cooc.dh$phy.dist) 0.7349 0.4540
## data.cooc.dh$gen.spec:sqrt(data.cooc.dh$phy.dist) -0.9489 1.0180
## t value Pr(>|t|)
## (Intercept) -1.485 0.140
## data.cooc.dh$gen.spec 0.303 0.762
## sqrt(data.cooc.dh$phy.dist) 1.619 0.108
## data.cooc.dh$gen.spec:sqrt(data.cooc.dh$phy.dist) -0.932 0.353
##
## Residual standard error: 2.527 on 118 degrees of freedom
## Multiple R-squared: 0.09072, Adjusted R-squared: 0.0676
## F-statistic: 3.924 on 3 and 118 DF, p-value: 0.01036