cabral <- read_csv("cabral_island_data.csv",
col_types = cols(...1 = col_skip()))
## New names:
## • `` -> `...1`
arch <-read_csv("C:/Users/sarah/Desktop/Masters program/Classes_spring_25/homework/cabral_arch_data.csv",
col_types = cols(...1 = col_skip()))
## New names:
## • `` -> `...1`
arch <- arch %>% filter(Archipelago != "NA")
#Make a model with species richness as the response, and with a random effect for Archipelago.
cabral.rand = lmer(log(Species+1) ~ (1|Archipelago), data= cabral)
summary(cabral.rand)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Species + 1) ~ (1 | Archipelago)
## Data: cabral
##
## REML criterion at convergence: 501.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.1507 -0.5357 0.0796 0.5790 1.9455
##
## Random effects:
## Groups Name Variance Std.Dev.
## Archipelago (Intercept) 0.6408 0.8005
## Residual 0.8243 0.9079
## Number of obs: 174, groups: Archipelago, 23
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.7685 0.1865 18.9104 25.57 3.94e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#What proportion of the variation in species richness occurs at the archipelago scale, and what proportion occurs within archipelagos?
###The archipelago scale variation is 0.64/(0.64+0.82)= ~44%. The variation within archipelagos is ~56%.
#Which archipelagos are particularly diverse, and which are depauperate?
ranef(cabral.rand)
## $Archipelago
## (Intercept)
## Aldabra 0.28121897
## Azores 0.36773210
## Balearic Islands 1.18183176
## Canaries 1.45904002
## Cape Verde -0.22621387
## Cook islands -0.22591288
## Dutch Caribbean 0.91678059
## Galapagos 0.09980580
## Hawaii -0.09227461
## Iles Crozet -1.16604580
## Inner Seychelles 0.06058546
## Juan Fernandez 0.06298814
## Kuril Islands 0.12453644
## Madeira 0.74721555
## Marianas -0.11819580
## Marquesas 0.10645794
## North. Cal. Channel Islands 0.67822271
## Phoenix Islands -1.35368520
## Pitcairn Islands -0.94759395
## Prince Edward Islands -0.90859247
## Revillagigedo Islands -0.71128117
## Society Islands -0.02387580
## Tristan da Cunha -0.31274390
##
## with conditional variances for "Archipelago"
plot_model(cabral.rand, type ="re") + ylim(-2.2, 2.2)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
### The Balearic and azores islands are particularyl diverse, and the Pheonix and Iles Crozet are depauperate.
#Merge datasets
cabral2 <- cabral_merged <- left_join(cabral, arch, by = "Archipelago")
#Make some exploratory plots of the effect of each variable on richness, plotted on the appropriate scale.
plot(Species ~ Area, cabral2)
plot(Species ~ Elev, cabral2)
plot(Species ~ Temp, cabral2)
plot(Species ~ number.islands , cabral2)
plot(Species ~ distance, cabral2)
plot(Species ~ age, cabral2)
###Area
par(mfrow = c(1,3))
plot(Species ~ Area, cabral2)
plot(Species~ sqrt(Area), data= cabral2)
plot(Species~ log(Area), data= cabral2) ##Best
###Elev
par(mfrow = c(1,3))
plot(Species ~ Elev, cabral2)
plot(Species~ sqrt(Elev), data= cabral2) ##Best
plot(Species~ log(Elev), data= cabral2)
##distance, seems normally distributed but the scale is way larger than the other variables
cabral2$distance2 <- cabral2$distance/100
#all variables
all.model <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ Temp + number.islands + distance2 + age + (1|Archipelago), data= cabral2)
summary(all.model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + number.islands +
## distance2 + age + (1 | Archipelago)
## Data: cabral2
##
## REML criterion at convergence: 353.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6564 -0.5285 0.0589 0.5260 2.3593
##
## Random effects:
## Groups Name Variance Std.Dev.
## Archipelago (Intercept) 0.5029 0.7091
## Residual 0.2589 0.5088
## Number of obs: 174, groups: Archipelago, 23
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.896027 0.525115 24.088340 3.611 0.00139 **
## log(Area) 0.374711 0.034667 156.315351 10.809 < 2e-16 ***
## sqrt(Elev) 0.004292 0.005589 157.148523 0.768 0.44376
## Temp 0.071684 0.021981 24.145787 3.261 0.00329 **
## number.islands 0.009385 0.030570 15.835542 0.307 0.76284
## distance2 -0.019620 0.009791 16.641159 -2.004 0.06165 .
## age 0.005203 0.007136 16.684364 0.729 0.47607
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) lg(Ar) sqr(E) Temp nmbr.s dstnc2
## log(Area) 0.013
## sqrt(Elev) -0.160 -0.770
## Temp -0.742 -0.168 0.187
## numbr.slnds -0.395 0.002 -0.032 0.171
## distance2 -0.110 0.094 -0.038 -0.354 -0.180
## age -0.087 -0.008 0.056 -0.202 -0.444 0.399
#plot fixed
fixef(all.model)
## (Intercept) log(Area) sqrt(Elev) Temp number.islands
## 1.896027368 0.374710813 0.004291531 0.071683703 0.009384910
## distance2 age
## -0.019619716 0.005202597
plot_model(all.model, type = "eff", terms = "Area")
plot_model(all.model, type = "eff", terms = "Elev")
plot_model(all.model, type = "eff", terms = "Temp")
plot_model(all.model, type = "eff", terms = "number.islands")
plot_model(all.model, type = "eff", terms = "distance2")
plot_model(all.model, type = "eff", terms = "age")
#plot random
plot_model(all.model, type="re")
#model diagnostics
plot_model(all.model, type="diag")[[1]]
## `geom_smooth()` using formula = 'y ~ x'
###We have some low value outliers, otherwise, the data fits the line fairly well.
plot_model(all.model, type="diag")[[2]]
## $Archipelago
## `geom_smooth()` using formula = 'y ~ x'
###random effects seem normally distributed
#How much variation do the predictors explain? At the two different scales?
VarCorr(all.model)
## Groups Name Std.Dev.
## Archipelago (Intercept) 0.70912
## Residual 0.50879
#R^2 = 1 - variance of full model/ variance of model with not predictors
1-(VarCorr(all.model)$Archipelago[1]/VarCorr(cabral.rand)$Archipelago[1])
## [1] 0.2152449
### 22% of the variation between Archipelagos is explained by the predictors
1- ((sigma(all.model)^2)/(sigma(cabral.rand)^2))
## [1] 0.6859342
### 69% of the variation within islands in an Archipelago is explained by the predictors
#Also, how much of the total variation have they explained, according to R2GLMM(m)?
r.squaredGLMM(all.model)
## R2m R2c
## [1,] 0.5449462 0.8453507
###The fixed effects explained 55% of the total variation. With the random effects, 85% of the variation is explained.
#####Use appropriate hypothesis tests (as described in lecture) to test the role of the different predictors. How do you interpret the results of these tests, and the effects plots, in light of hypotheses for what controls species richness in islands and archipelagos? What are the denominator degrees of freedom for each predictor? This is essentially telling you how much replication there is for that predictor, minus the number of parameters for that predictor. Do the denominator df make sense? Why or why not?
#Hypothesis tests from the lecture:
library(lmerTest)
anova(all.model, ddf = "Satterthwaite", type = 2)
## Type II Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## log(Area) 30.2446 30.2446 1 156.315 116.8333 < 2.2e-16 ***
## sqrt(Elev) 0.1526 0.1526 1 157.149 0.5895 0.443761
## Temp 2.7530 2.7530 1 24.146 10.6348 0.003294 **
## number.islands 0.0244 0.0244 1 15.836 0.0942 0.762845
## distance2 1.0395 1.0395 1 16.641 4.0154 0.061645 .
## age 0.1376 0.1376 1 16.684 0.5315 0.476067
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###p< 0.01 for Area and Temperature
#parametric bootstrapping:
library(pbkrtest)
noarea <- lmer(log(Species+1) ~ sqrt(Elev)+ Temp + number.islands + distance2 + age + (1|Archipelago), data= cabral2)
PBmodcomp(all.model, noarea, nsim = 10000)
## Bootstrap test; time: 137.39 sec; samples: 10000; extremes: 0;
## large : log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + number.islands +
## distance2 + age + (1 | Archipelago)
## log(Species + 1) ~ sqrt(Elev) + Temp + number.islands + distance2 +
## age + (1 | Archipelago)
## stat df p.value
## LRT 89.571 1 < 2.2e-16 ***
## PBtest 89.571 9.999e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
noelev <-lmer(log(Species+1) ~ log(Area) + Temp + number.islands + distance2 + age + (1|Archipelago), data= cabral2)
PBmodcomp(all.model, noelev, nsim = 10000)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00301308 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00204709 (tol = 0.002, component 1)
## Bootstrap test; time: 137.21 sec; samples: 10000; extremes: 4628;
## large : log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + number.islands +
## distance2 + age + (1 | Archipelago)
## log(Species + 1) ~ log(Area) + Temp + number.islands + distance2 +
## age + (1 | Archipelago)
## stat df p.value
## LRT 0.5659 1 0.4519
## PBtest 0.5659 0.4629
notemp <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ number.islands + distance2 + age + (1|Archipelago), data= cabral2)
PBmodcomp(all.model, notemp, nsim = 10000)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00237747 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00313266 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00305003 (tol = 0.002, component 1)
## Bootstrap test; time: 143.08 sec; samples: 10000; extremes: 32;
## large : log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + number.islands +
## distance2 + age + (1 | Archipelago)
## log(Species + 1) ~ log(Area) + sqrt(Elev) + number.islands +
## distance2 + age + (1 | Archipelago)
## stat df p.value
## LRT 10.982 1 0.0009201 ***
## PBtest 10.982 0.0032997 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
noislands <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ Temp + distance2 + age + (1|Archipelago), data= cabral2)
PBmodcomp(all.model, noislands, nsim = 10000)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00331035 (tol = 0.002, component 1)
## Bootstrap test; time: 2271.21 sec; samples: 10000; extremes: 7967;
## large : log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + number.islands +
## distance2 + age + (1 | Archipelago)
## log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + distance2 +
## age + (1 | Archipelago)
## stat df p.value
## LRT 0.0851 1 0.7705
## PBtest 0.0851 0.7967
noage <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ Temp + number.islands + distance2 + (1|Archipelago), data= cabral2)
PBmodcomp(all.model, noage, nsim = 10000)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00283181 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00300656 (tol = 0.002, component 1)
## Bootstrap test; time: 139.75 sec; samples: 10000; extremes: 4461;
## large : log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + number.islands +
## distance2 + age + (1 | Archipelago)
## log(Species + 1) ~ log(Area) + sqrt(Elev) + Temp + number.islands +
## distance2 + (1 | Archipelago)
## stat df p.value
## LRT 0.7763 1 0.3783
## PBtest 0.7763 0.4462
#How do you interpret the results of these tests, and the effects plots, in light of hypotheses for what controls species richness in islands and archipelagos?
###The larger the Area, the Higher the temperature, and the lower the distance from a continent, the greater species richness of an island.
# What are the denominator degrees of freedom for each predictor? This is essentially telling you how much replication there is for that predictor, minus the number of parameters for that predictor. Do the denominator df make sense? Why or why not?
# DenDF
#log(Area) 156.315
#sqrt(Elev) 157.149
#Temp 24.146
#number.islands 15.836
#distance2 16.641
#age 16.684
###There are 174 rows in the dataset, 5 fixed effects and 1 random effect, so maximum DenDF is 168. The Area DenDF is 156, and it is measured at the individual island level, so a DenDF of 156 makes sense. The Temp DenDF is 24, but it measured at the Archipeligo scale, and there are 24 different Archipelagos in the data so that makes sense also. All the DenDFs make sense because of the number of replications for each.
all.model <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ Temp + number.islands + distance2 + age + (1|Archipelago), data= cabral2,REML = FALSE)
noarea <- lmer(log(Species+1) ~ sqrt(Elev)+ Temp + number.islands +distance2 + age + (1|Archipelago), data= cabral2,REML = FALSE)
noelev <-lmer(log(Species+1) ~ log(Area) + Temp + number.islands + distance2 + age + (1|Archipelago), data= cabral2,REML = FALSE)
notemp <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ number.islands + distance2 + age + (1|Archipelago), data= cabral2,REML = FALSE)
noislands <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ Temp + distance2 + age + (1|Archipelago), data= cabral2,REML = FALSE)
nodistance <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ Temp + number.islands + age + (1|Archipelago), data= cabral2,REML = FALSE)
noage <- lmer(log(Species+1) ~ log(Area) + sqrt(Elev)+ Temp + number.islands + distance2 + (1|Archipelago), data= cabral2,REML = FALSE)
AIC <- AICc(all.model, noarea, noelev, notemp, noislands, nodistance, noage, cabral.rand)
AIC$delta <- AIC$AICc - 326.5073
###The model that includes all the predictors except number of islands fit the best. However, the models that didn't include elevation, island age, and the full model were close to the best model in fit. The predictors island area, temperature, and distance from a continent were most important for predicting species richness.