#saveRDS(data.cooc, file="cooc.z.data.rds")
data.cooc<-readRDS("cooc.z.data.rds")
head(data.cooc)
## gen.spec phy.dist cooc.z
## 1 0.047371556 78.891213 -2.39264565
## 2 0.274031336 121.972198 5.85930884
## 3 0.226659781 121.972198 -0.06566243
## 4 0.000248625 121.972198 -2.71153372
## 5 0.047620180 121.972198 -0.70770244
## 6 0.274279961 9.008272 -4.68947236
phy.dist.sqrt<-sqrt(data.cooc$phy.dist)
head(phy.dist.sqrt)
## [1] 8.882073 11.044102 11.044102 11.044102 11.044102 3.001378
#Compare with OLS models
#do lm of cooc.z by gen.spec for entire data set
cooc.z.gen.spec.lm<-lm(data.cooc$cooc.z~data.cooc$gen.spec)
summary(cooc.z.gen.spec.lm)
##
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3892 -1.6934 -0.8110 0.8703 12.0012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3329 0.1918 1.736 0.0832 .
## data.cooc$gen.spec -1.6000 0.4949 -3.233 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.798 on 559 degrees of freedom
## Multiple R-squared: 0.01836, Adjusted R-squared: 0.0166
## F-statistic: 10.45 on 1 and 559 DF, p-value: 0.001296
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.lm)
par(opar)
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16
shapiro.test(data.cooc$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16
#lm of cooc.z and phy.dist
cooc.z.phy.dist.lm<-lm(data.cooc$cooc.z~phy.dist.sqrt)
summary(cooc.z.phy.dist.lm)
##
## Call:
## lm(formula = data.cooc$cooc.z ~ phy.dist.sqrt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7487 -1.7057 -0.8179 0.6467 12.4636
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0583 0.3881 -2.727 0.0066 **
## phy.dist.sqrt 0.1119 0.0458 2.443 0.0149 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.81 on 559 degrees of freedom
## Multiple R-squared: 0.01056, Adjusted R-squared: 0.00879
## F-statistic: 5.966 on 1 and 559 DF, p-value: 0.01489
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.lm)
par(opar)
# What do the residuals suggest?
hist(resid(cooc.z.phy.dist.lm),col="grey50" ) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$phy.dist,col="grey50", main="Untransformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(phy.dist.sqrt) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: phy.dist.sqrt
## W = 0.8772, p-value < 2.2e-16
shapiro.test(data.cooc$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16
#variance is good; just need to fix normality
#OLS models with interaction term
#do lm of cooc.z by gen.spec and phy.dist in interaction term
cooc.z.gen.spec.phy.dist.lm<-lm(data.cooc$cooc.z~data.cooc$gen.spec*phy.dist.sqrt)
summary(cooc.z.gen.spec.phy.dist.lm)
##
## Call:
## lm(formula = data.cooc$cooc.z ~ data.cooc$gen.spec * phy.dist.sqrt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8298 -1.7764 -0.7972 0.9147 11.9889
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.29160 0.62080 2.081 0.0379 *
## data.cooc$gen.spec -7.35617 1.57059 -4.684 3.54e-06 ***
## phy.dist.sqrt -0.12690 0.07296 -1.739 0.0825 .
## data.cooc$gen.spec:phy.dist.sqrt 0.75860 0.19316 3.927 9.67e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.754 on 557 degrees of freedom
## Multiple R-squared: 0.05267, Adjusted R-squared: 0.04757
## F-statistic: 10.32 on 3 and 557 DF, p-value: 1.275e-06
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.phy.dist.lm)
par(opar)
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.phy.dist.lm)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
hist(phy.dist.sqrt,col="grey50", main="Uransformed data", xlab="Phylogenetic distance (sqrt)")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc$gen.spec
## W = 0.921, p-value < 2.2e-16
shapiro.test(data.cooc$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc$cooc.z
## W = 0.8524, p-value < 2.2e-16
shapiro.test(phy.dist.sqrt)
##
## Shapiro-Wilk normality test
##
## data: phy.dist.sqrt
## W = 0.8772, p-value < 2.2e-16
#####################do for same dataset, but remove phy.dist of 30 my
data.cooc.30<-subset(data.cooc[data.cooc$phy.dist<30,])
phy.dist.30.sqrt<-sqrt(data.cooc.30$phy.dist)
gen.spec.30<-data.cooc.30$gen.spec
cooc.z.30<-data.cooc.30$cooc.z
head(data.cooc.30)
## gen.spec phy.dist cooc.z
## 6 0.27427996 9.008272 -4.6894724
## 15 0.06309177 14.509728 4.6917372
## 33 0.53791685 24.481046 -1.4696399
## 34 0.60100862 24.481046 -2.4803428
## 50 0.16159843 24.481046 -0.9417983
## 51 0.22469020 24.481046 0.3893929
#Compare with OLS models
#do lm of cooc.z by gen.spec for entire data set
cooc.z.gen.spec.lm.30<-lm(data.cooc.30$cooc.z~data.cooc.30$gen.spec)
summary(cooc.z.gen.spec.lm.30)
##
## Call:
## lm(formula = data.cooc.30$cooc.z ~ data.cooc.30$gen.spec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1571 -1.5938 -0.4869 0.7882 11.6722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1875 0.3715 0.505 0.61461
## data.cooc.30$gen.spec -2.6245 0.8858 -2.963 0.00368 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.537 on 120 degrees of freedom
## Multiple R-squared: 0.06817, Adjusted R-squared: 0.0604
## F-statistic: 8.778 on 1 and 120 DF, p-value: 0.003677
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.lm.30)
par(opar)
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.lm.30)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.30$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc.30$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc.30$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc.30$gen.spec
## W = 0.9192, p-value = 1.827e-06
shapiro.test(data.cooc.30$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc.30$cooc.z
## W = 0.8228, p-value = 8.239e-11
#lm of cooc.z and phy.dist
cooc.z.phy.dist.lm.30<-lm(data.cooc.30$cooc.z~phy.dist.30.sqrt)
summary(cooc.z.phy.dist.lm.30)
##
## Call:
## lm(formula = data.cooc.30$cooc.z ~ phy.dist.30.sqrt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1446 -1.3318 -0.4673 0.5589 12.1081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2158 1.2186 -1.818 0.0715 .
## phy.dist.30.sqrt 0.3722 0.2893 1.287 0.2007
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.61 on 120 degrees of freedom
## Multiple R-squared: 0.01361, Adjusted R-squared: 0.005389
## F-statistic: 1.656 on 1 and 120 DF, p-value: 0.2007
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.phy.dist.lm.30)
par(opar)
# What do the residuals suggest?
hist(resid(cooc.z.phy.dist.lm.30),col="grey50" ) #this is a little right skewed
#Are the data normally distributed
hist(phy.dist.30.sqrt,col="grey50", main="Untransformed data", xlab="Pairwise phylogenetic distance")
hist(data.cooc.30$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
# Let's verify using the Shapiro test:
shapiro.test(phy.dist.30.sqrt) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: phy.dist.30.sqrt
## W = 0.8502, p-value = 9.04e-10
shapiro.test(data.cooc.30$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc.30$cooc.z
## W = 0.8228, p-value = 8.239e-11
#variance is good; just need to fix normality
#do lm of cooc.z by gen.spec*phy.dist.sqrt for entire data set
cooc.z.gen.spec.phy.dist.lm.30<-lm(data.cooc.30$cooc.z~data.cooc.30$gen.spec*phy.dist.30.sqrt)
summary(cooc.z.gen.spec.phy.dist.lm.30)
##
## Call:
## lm(formula = data.cooc.30$cooc.z ~ data.cooc.30$gen.spec * phy.dist.30.sqrt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8669 -1.5452 -0.3620 0.6783 11.2057
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -2.8530 1.9211 -1.485
## data.cooc.30$gen.spec 1.3289 4.3792 0.303
## phy.dist.30.sqrt 0.7349 0.4540 1.619
## data.cooc.30$gen.spec:phy.dist.30.sqrt -0.9489 1.0180 -0.932
## Pr(>|t|)
## (Intercept) 0.140
## data.cooc.30$gen.spec 0.762
## phy.dist.30.sqrt 0.108
## data.cooc.30$gen.spec:phy.dist.30.sqrt 0.353
##
## Residual standard error: 2.527 on 118 degrees of freedom
## Multiple R-squared: 0.09072, Adjusted R-squared: 0.0676
## F-statistic: 3.924 on 3 and 118 DF, p-value: 0.01036
#diagnostic plots for cooc.z.phy.dist.lm
opar <- par(mfrow=c(2,2))
plot(cooc.z.gen.spec.phy.dist.lm.30)
par(opar)
# What do the residuals suggest?
hist(resid(cooc.z.gen.spec.phy.dist.lm.30)) #this is a little right skewed
#Are the data normally distributed
hist(data.cooc.30$gen.spec,col="grey50", main="Untransformed data", xlab="Generalism-Specialism")
hist(data.cooc.30$cooc.z, col="grey50", main="Uransformed data", xlab="Standardized Co-occurrence")
hist(phy.dist.30.sqrt,col="grey50", main="Transformed data", xlab="Phylogenetic distance (square root)")
# Let's verify using the Shapiro test:
shapiro.test(data.cooc.30$gen.spec) #definiately not normal
##
## Shapiro-Wilk normality test
##
## data: data.cooc.30$gen.spec
## W = 0.9192, p-value = 1.827e-06
shapiro.test(data.cooc.30$cooc.z) #not normal either; however residuals no longer a negative binomial or binomial relation
##
## Shapiro-Wilk normality test
##
## data: data.cooc.30$cooc.z
## W = 0.8228, p-value = 8.239e-11
shapiro.test(phy.dist.30.sqrt)
##
## Shapiro-Wilk normality test
##
## data: phy.dist.30.sqrt
## W = 0.8502, p-value = 9.04e-10
Top plots include entire dataset, while those on the bottom include only those plots separated by less than 30 million years.
x-axis for plots to the left is “Difference between specialists and generalists”, while x-axis for plots to the right is “Phylogenetic distance (square root transformed)”
#dev.new(width=2.95, height=2.95)
par(mfrow=c(2,2))
par(mar=c(3,2.5,0.75,0.75), mgp=c(1,0.25,0), las=0)
plot(data.cooc$gen.spec,data.cooc$cooc.z, xlab="", ylab="Co-occurrence", lwd=2, pch=16, col="black", cex.axis=0.75, cex.lab=0.85,bty="n", cex=0.5,tck=-0.05)
abline(cooc.z.gen.spec.lm, lwd=2)
box(bty="l", lwd=2)
plot(sqrt(data.cooc$phy.dist),data.cooc$cooc.z, xlab="", ylab="Co-occurrence", lwd=2, pch=16, col="black", cex.axis=0.75, cex.lab=0.85,bty="n", cex=0.5,tck=-0.05 )
abline(cooc.z.phy.dist.lm, lwd=2)
box(bty="l", lwd=2)
plot((data.cooc.30$gen.spec),data.cooc.30$cooc.z, xlab="", ylab="Co-occurrence", lwd=2, pch=16, col="black", cex.axis=0.75, cex.lab=0.85,bty="n", cex=0.5,tck=-0.05,)
abline(cooc.z.gen.spec.lm.30, lwd=2)
box(bty="l", lwd=2)
plot(sqrt(data.cooc.30$phy.dist),data.cooc.30$cooc.z, xlab="", ylab="Co-occurrence", lwd=2, pch=16, col="black", cex.axis=0.75, cex.lab=0.85,bty="n", cex=0.5,tck=-0.05 )
box(bty="l", lwd=2)
#textClick("(a)",cex=c(size=1, font=2) )
#textClick("(b)", cex=c(size=1, font=2))
#textClick("(c)", cex=c(size=1, font=2))
#textClick("(d)", cex=c(size=1, font=2))
#textClick("Difference in \nniche width",cex=c(size=1, font=2))
#textClick("Phylogenetic \ndistance", cex=c(size=1, font=2))
#textClick("Difference in \nniche width", cex=c(size=1, font=2))
#textClick("Phylogenetic \ndistance", cex=c(size=1, font=2))
#set up GAM formula with interaction between two predictor variables####################################################################
#I am using tensor product smoothing because predictors not of same unit
#Do same GAM model with tensor product smoothing because predictors not of same unit; the second analysis includes a square root transformation of phy.dist. GAM.check
#indicates that k value too log for .log model so used k=6 instead (selected by a trial-and-error process, seeing when p-value of gam.check greater than p<0.05
#For te smooths, the upper limit of degrees of freedom is given by product of the K values provided by each marginal smooth less than one, for the construction.
#Exact choice is not generally ciritical, but should be large enough so you are reasonable sure of having enough degrees of frreedom to represent the 'truth' reasonable well, but small
#enough for comuptational efficiency
cooc.gam.sqrt.te<-gam(cooc.z~te(phy.dist.sqrt,gen.spec), dat=data.cooc)
summary(cooc.gam.sqrt.te)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## cooc.z ~ te(phy.dist.sqrt, gen.spec)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1557 0.1148 -1.356 0.176
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(phy.dist.sqrt,gen.spec) 5.353 6.084 7.505 8.19e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0717 Deviance explained = 8.06%
## GCV = 7.4771 Scale est. = 7.3924 n = 561
gam.check(cooc.gam.sqrt.te)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 15 iterations.
## The RMS GCV score gradiant at convergence was 4.960206e-07 .
## The Hessian was positive definite.
## The estimated model rank was 25 (maximum possible: 25)
## Model rank = 25 / 25
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(phy.dist.sqrt,gen.spec) 24.000 5.353 0.979 0.25
#other diagnostic checks that should be done
par(mfrow=c(1,2))
plot(fitted(cooc.gam.sqrt.te),residuals(cooc.gam.sqrt.te))
plot(data.cooc$cooc.z,residuals(cooc.gam.sqrt.te))
#Calculate GAM for only those species pairs with less than 30 million years of separation
cooc.gam.30<-gam(cooc.z.30~te(phy.dist.30.sqrt,gen.spec.30), dat=data.cooc.30)
summary(cooc.gam.30)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## cooc.z.30 ~ te(phy.dist.30.sqrt, gen.spec.30)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6775 0.2288 -2.962 0.0037 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(phy.dist.30.sqrt,gen.spec.30) 3 3 3.924 0.0103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0676 Deviance explained = 9.07%
## GCV = 6.6017 Scale est. = 6.3853 n = 122
gam.check(cooc.gam.30, k.rep = 1000)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 16 iterations.
## The RMS GCV score gradiant at convergence was 3.426105e-07 .
## The Hessian was positive definite.
## The estimated model rank was 25 (maximum possible: 25)
## Model rank = 25 / 25
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(phy.dist.30.sqrt,gen.spec.30) 24.000 3.000 0.948 0.23
#other diagnostic checks that should be done
par(mfrow=c(1,2))
plot(fitted(cooc.gam.30),residuals(cooc.gam.30))
plot(data.cooc.30$cooc.z,residuals(cooc.gam.30))
x-axis should be “Phylogenetic distance (square root transformed)”. Plot for all species pairs is to the left, whereas plot for only those species pairs separated by 30 million years is on the right.
#Plot two GAMs side-by-side
#dev.new(width=5.95, height=4)
layout(matrix(1:2,ncol=2), width = c(1,1),height = c(1,1))
par(mar=c(4,2.5,2.75,0.75), mgp=c(1.5,0.5,0), las=0)
#par(mgp=c(axis.title.position, axis.label.position, axis.line.position))
my.vis.gam(cooc.gam.sqrt.te, type="response", plot.type = "contour", color="gray", ylab="Difference in niche width", main="",
xlab="", cex.lab=1.1, cex.axis=0.9, cex.main=0.1, lwd=2, cex=2)
box(bty="o", lwd=2)
my.vis.gam(cooc.gam.30,type="response", plot.type = "contour", color="gray", ylab="Difference in niche width", main="",
xlab="", cex.lab=1.1, cex.axis=0.9, cex.main=0.1, lwd=2, cex=2)
box(bty="o", lwd=2)
#textClick("(a)", cex=c(size=1, font=2))
#textClick("(b)", cex=1)
#textClick("Phylogenetic distance", cex=1)
#textClick("Phylogenetic distance", cex=1)