Use the ozone.txt data to model the ozone concentration as a linear function of wind speed, air temperature and the intensity of solar radiation. Assume that the requirements to perform a linear regression are met.
install.packages(“caTools”) # For Linear regression install.packages(‘car’) # To check multicollinearity install.packages(“quantmod”) install.packages(“MASS”) install.packages(“corrplot”) # plot correlation plot
library(caTools) library(car) library(quantmod) library(MASS) library(corrplot)
ozone <- read.table("ozone.txt", sep = "\t", header = T)
head(ozone)
## rad temp wind ozone
## 1 190 67 7.4 41
## 2 118 72 8.0 36
## 3 149 74 12.6 12
## 4 313 62 11.5 18
## 5 299 65 8.6 23
## 6 99 59 13.8 19
pairs(ozone)
model_ozone <- lm(ozone ~ rad + temp + wind, data = ozone)
summary(model_ozone)
##
## Call:
## lm(formula = ozone ~ rad + temp + wind, data = ozone)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.485 -14.210 -3.556 10.124 95.600
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -64.23208 23.04204 -2.788 0.00628 **
## rad 0.05980 0.02318 2.580 0.01124 *
## temp 1.65121 0.25341 6.516 2.43e-09 ***
## wind -3.33760 0.65384 -5.105 1.45e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.17 on 107 degrees of freedom
## Multiple R-squared: 0.6062, Adjusted R-squared: 0.5952
## F-statistic: 54.91 on 3 and 107 DF, p-value: < 2.2e-16
The R^2 is 0.6062, showing a moderate relationship between variables.
library(car)
## Warning: package 'car' was built under R version 4.3.2
## Loading required package: carData
vif(model_ozone)
## rad temp wind
## 1.095241 1.431201 1.328979
There is minimal collinearity. VIF of 1 indicates complete absense of collinearity, with increasing values of increasing concern.
library(ggplot2) library(performance)
library(performance)
## Warning: package 'performance' was built under R version 4.3.2
check_model(model_ozone)
Use the diminish.txt data (xv is explanatory, yv is response variable) to:
diminish <- read.table("diminish.txt", sep = "\t", header = T)
head(diminish)
## xv yv
## 1 5 26
## 2 10 29
## 3 15 31
## 4 20 30
## 5 25 35
## 6 30 37
diminish_LM <- lm(yv ~ xv, data = diminish)
summary(diminish_LM)
##
## Call:
## lm(formula = yv ~ xv, data = diminish)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3584 -2.1206 -0.3218 1.8763 4.1782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.61438 1.17315 23.54 7.66e-14 ***
## xv 0.22683 0.02168 10.46 1.45e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.386 on 16 degrees of freedom
## Multiple R-squared: 0.8725, Adjusted R-squared: 0.8646
## F-statistic: 109.5 on 1 and 16 DF, p-value: 1.455e-08
diminish_P <- diminish.poly <- lm(yv ~ xv + I(xv^2), data=diminish)
summary(diminish_P)
##
## Call:
## lm(formula = yv ~ xv + I(xv^2), data = diminish)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0490 -1.2890 0.6018 1.1829 2.9610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.6323529 1.6916139 14.561 2.95e-10 ***
## xv 0.4057534 0.0819860 4.949 0.000175 ***
## I(xv^2) -0.0018834 0.0008386 -2.246 0.040203 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.131 on 15 degrees of freedom
## Multiple R-squared: 0.9046, Adjusted R-squared: 0.8919
## F-statistic: 71.12 on 2 and 15 DF, p-value: 2.222e-08
AIC(diminish_LM, diminish_P)
## df AIC
## diminish_LM 3 86.26193
## diminish_P 4 83.04403
Poly is better, the lower the AIC value the better.
plot(diminish$xv, diminish$yv, main = "Diminish", xlab = "xv", ylab = "yv") + abline(diminish_LM, col = "blue", lwd = 3) + lines(diminish$xv, predict(diminish_P), col = "darkred", lwd = 3)
## integer(0)
The data in stork.txt display the stress-induced corticosterone levels circulating in the blood of European white storks and their survival over the subsequent five years of study.
storka <- read.table('stork.txt', header = 1)
plot(storka)
A binomial regression or logistic regression would be sufficient, as the response variable is binary.
storka_glm <- glm(Survival ~ Corticosterone, data = storka, family = binomial(logit))
summary(storka_glm)
##
## Call:
## glm(formula = Survival ~ Corticosterone, family = binomial(logit),
## data = storka)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.70304 1.74725 1.547 0.1219
## Corticosterone -0.07980 0.04368 -1.827 0.0677 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 45.234 on 33 degrees of freedom
## Residual deviance: 41.396 on 32 degrees of freedom
## AIC: 45.396
##
## Number of Fisher Scoring iterations: 4
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.3.2
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:car':
##
## Recode
install.packages("DescTools")
## Warning: package 'DescTools' is in use and will not be installed
1-(storka_glm$deviance / storka_glm$null.deviance)
## [1] 0.084852
PseudoR2(storka_glm)
## McFadden
## 0.084852
anova(storka_glm, test = 'Chisq')
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Survival
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 33 45.234
## Corticosterone 1 3.8382 32 41.396 0.0501 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
P-Value is 0.0501
library(ggplot2)
library(ggplot2)
ggplot(aes(x = Corticosterone, y = Survival), data = storka_glm) +
geom_point() +
geom_smooth(method = glm, method.args = list(family = "binomial"), se = T)
## `geom_smooth()` using formula = 'y ~ x'
library(“performance”)
check_model(storka_glm)
The clusters.txt dataset contains the response variable Cancers (the number of reported cancer cases per year per clinic) and the explanatory variable Distance (the distance from a nuclear plant to the clinic in kilometers).
clusters <- read.table("clusters.txt", sep = "\t", header = T)
plot(clusters)
Poisson regression; “Count data. It is useful to describe rare events in large population”
clusters_glm <- glm(Cancers ~ Distance, poisson, clusters)
summary(clusters_glm)
##
## Call:
## glm(formula = Cancers ~ Distance, family = poisson, data = clusters)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.186865 0.188728 0.990 0.3221
## Distance -0.006138 0.003667 -1.674 0.0941 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 149.48 on 93 degrees of freedom
## Residual deviance: 146.64 on 92 degrees of freedom
## AIC: 262.41
##
## Number of Fisher Scoring iterations: 5
There is no significant negative trend.
1-(clusters_glm$deviance/clusters_glm$null.deviance)
## [1] 0.01900423
Pseudo-R2 is 0.019
anova(clusters_glm, test = "Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: Cancers
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 93 149.48
## Distance 1 2.8408 92 146.64 0.0919 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
P-value is 0.0919
ggplot(aes(x = Distance, y = Cancers), data = clusters_glm) + geom_point() + geom_smooth(method = "glm", method.args = list(family = "poisson"), se = T) + ggtitle("Cancer Incidence and Distance from Nuclear Plant") + xlab("Distance from Nuclear Plant (km)") + ylab("Cancer Incidence Annually (Count)")
## `geom_smooth()` using formula = 'y ~ x'
check_overdispersion(clusters_glm)
## # Overdispersion test
##
## dispersion ratio = 1.555
## Pearson's Chi-Squared = 143.071
## p-value = 0.001
## Overdispersion detected.
Overdispersion detected.
library(MASS)
## Warning: package 'MASS' was built under R version 4.3.2
clusters_glm_nb <- glm.nb(Cancers ~ Distance, data = clusters)
summary(clusters_glm_nb)
##
## Call:
## glm.nb(formula = Cancers ~ Distance, data = clusters, init.theta = 1.359466981,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.182490 0.252434 0.723 0.470
## Distance -0.006041 0.004727 -1.278 0.201
##
## (Dispersion parameter for Negative Binomial(1.3595) family taken to be 1)
##
## Null deviance: 96.647 on 93 degrees of freedom
## Residual deviance: 94.973 on 92 degrees of freedom
## AIC: 253.19
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.359
## Std. Err.: 0.612
##
## 2 x log-likelihood: -247.191
check_model(clusters_glm_nb)
Use the jaws.txt data to:
jaws <- read.table('jaws.txt', sep = "\t", header = T)
head(jaws)
## age bone
## 1 0.000000 0.00000
## 2 5.112000 20.22000
## 3 1.320000 11.11130
## 4 35.240000 140.65000
## 5 1.632931 26.15218
## 6 2.297635 10.00100
plot(bone ~ age, data = jaws, main = "Age vs Bone Response", xlab = "Age", ylab = "Bone")
jaws_nl <- nls(bone ~ (a * (1 - exp(-1 * c * age))), start = list(a = 100, c = 0.1), jaws)
summary(jaws_nl)
##
## Formula: bone ~ (a * (1 - exp(-1 * c * age)))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 115.58059 2.84365 40.645 < 2e-16 ***
## c 0.11882 0.01233 9.635 3.69e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.1 on 52 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 4.035e-06
jaws_mm <- nls(bone ~ (a * age / (1 + b * age)), start = list(a = 100, b = 0.1), jaws)
summary(jaws_mm)
##
## Formula: bone ~ (a * age/(1 + b * age))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 18.72524 2.52585 7.413 1.09e-09 ***
## b 0.13596 0.02339 5.814 3.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.77 on 52 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 7.763e-06
rss_1 <- 13.1^2 * 52
rss_2 <- 13.77^2 * 52
null <- lm(bone ~ 1, data = jaws)
summary(null)
##
## Call:
## lm(formula = bone ~ 1, data = jaws)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.979 -8.844 9.872 21.775 48.021
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 93.979 4.541 20.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.37 on 53 degrees of freedom
tss <- 33.37^2 * 53
100 * (tss-rss_1)/tss
## [1] 84.8798
100 * (tss-rss_2)/tss
## [1] 83.2936
This took me a little more time than I anticipated, and I had to revisit the class materials and look something up to be confident about it but… I believe this should be right! residual sum of squares for the first model is rss_1 (asymptotic exponential relationship), and rss_2 is for the michaelis-menten modele
AIC(nls(bone ~ (a * age) / (1 + b * age), start = list(a = 100, b = 0.1), jaws))
## [1] 440.4066
plot(jaws$age, jaws$bone)
lines(seq(0, 50, 0.1), predict(jaws_nl, list(age = seq(0, 50, 0.1)),
type = "response"), col = "blue", lwd = 3)
lines(seq(0, 50, 0.1), predict(jaws_mm, list(age = seq(0, 50, 0.1)),
type = "response"), col = "darkred", lwd = 3)
legend("topleft", legend = c("Asymptotic", "MM Model"), col = c("blue", "darkred"), lwd = 3)
In a recent paper we read: “Linear mixed effects modelling fit by restricted maximum likelihood was used to explain the variations in growth. The linear mixed effects model was generated using the lmer function in the R package lme4, with turbidity, temperature, tide, and wave action set as fixed factors and site and date set as random effects”. Write down the R code to recreate their model.
library(lme4)
LinMix <- lmer(growth ~ turbidity + temperature + tide + wave_action + (1|site) + (1|date), data = growth_data) anova(LinMix)
Needed to revisit module 7 readings but I believe this is correct.
Researchers at the University of Arizona want to assess the germination rate of saguaros using a factorial design, with 3 levels of soil type (remnant, cultivated and restored) and 2 levels of sterilization (yes or no). The same experimental design was deployed in 4 different greenhouses. Each of the unique treatments was replicated in 5 pots. 6 seeds were planted in each pot. a) How many fixed treatments (unique combinations) exist?
Combinations of 3 soil types, 2 levels of sterilization
combo <- 3 * 2
combo
## [1] 6
(Combinations of soil types and sterilization levels) * number of pots each
pots <- (3 * 2) * 5
pots
## [1] 30
((Combinations of soil types and sterilization levels) * number of pots each) * number of greenhouses replicating
plants <- (combo * pots) * 4
plants
## [1] 720
library(lme4)
glmer(germination ~ soil * sterilization + (1|Greenhouse) + (1|Greenhouse:Pot), family = binomial, data = dataset)
I had to compare my own with that of others, having made a couple small changes to my original answer. The overall model was the same, though, as logistic glmm seemed to be the best fit granted the parameters, 2 nested random effects, and germination being a proportion.
Check these hypothetical data from 4 subjects relating number of previous performances to negative affect a) What does the thick dashed black line represent?
The thicker dashed black line is a linear regression trendline for the data.
The solid black line is the average of the random variables. The average of the four lighter dashed lines.
The four thin dashed lines are the random-intercept levels for a mixed effects model.
Load dragons.RData
load("dragons.RData")
head(dragons)
## testScore bodyLength mountainRange X site
## 1 16.147309 165.5485 Bavarian NA a
## 2 33.886183 167.5593 Bavarian NA a
## 3 6.038333 165.8830 Bavarian NA a
## 4 18.838821 167.6855 Bavarian NA a
## 5 33.862328 169.9597 Bavarian NA a
## 6 47.043246 168.6887 Bavarian NA a
dragon_lm <- lm(testScore ~ bodyLength, data = dragons)
summary(dragon_lm)
##
## Call:
## lm(formula = testScore ~ bodyLength, data = dragons)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.962 -16.411 -0.783 15.193 55.200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -61.31783 12.06694 -5.081 5.38e-07 ***
## bodyLength 0.55487 0.05975 9.287 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.2 on 478 degrees of freedom
## Multiple R-squared: 0.1529, Adjusted R-squared: 0.1511
## F-statistic: 86.25 on 1 and 478 DF, p-value: < 2.2e-16
library(ggplot2)
ggplot(aes(y = testScore, x = bodyLength), data = dragons) + geom_point() + geom_smooth(method = "lm", se = T)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = dragons, mapping = aes(y = testScore, x = mountainRange, fill = mountainRange)) + geom_boxplot() + labs(title = "Dragon Test Score and Mountain Range")
ggplot(aes(y = testScore, x = bodyLength, color = mountainRange), data = dragons) + geom_point() + labs(title = "Dragon Test Score and Body Length by Mountain Range", color = "Mountain Range")
ggplot(aes(y = testScore, x = bodyLength, color = mountainRange), data = dragons) + geom_point() + labs(title = "Dragon Test Score and Body Length by Mountain Range", color = "Mountain Range") + facet_wrap(. ~ mountainRange)
range <- lm(testScore ~ bodyLength + mountainRange, data = dragons)
summary(range)
##
## Call:
## lm(formula = testScore ~ bodyLength + mountainRange, data = dragons)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.263 -9.926 0.361 9.994 44.488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.83051 14.47218 1.439 0.15072
## bodyLength 0.01267 0.07974 0.159 0.87379
## mountainRangeCentral 36.58277 3.59929 10.164 < 2e-16 ***
## mountainRangeEmmental 16.20923 3.69665 4.385 1.43e-05 ***
## mountainRangeJulian 45.11469 4.19012 10.767 < 2e-16 ***
## mountainRangeLigurian 17.74779 3.67363 4.831 1.84e-06 ***
## mountainRangeMaritime 49.88133 3.13924 15.890 < 2e-16 ***
## mountainRangeSarntal 41.97841 3.19717 13.130 < 2e-16 ***
## mountainRangeSouthern 8.51961 2.73128 3.119 0.00192 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.96 on 471 degrees of freedom
## Multiple R-squared: 0.5843, Adjusted R-squared: 0.5773
## F-statistic: 82.76 on 8 and 471 DF, p-value: < 2.2e-16
library(lme4)
## Loading required package: Matrix
range2 <- lmer(testScore ~ bodyLength + (1|mountainRange), data = dragons)
summary(range2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: testScore ~ bodyLength + (1 | mountainRange)
## Data: dragons
##
## REML criterion at convergence: 3991.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4815 -0.6513 0.0066 0.6685 2.9583
##
## Random effects:
## Groups Name Variance Std.Dev.
## mountainRange (Intercept) 339.7 18.43
## Residual 223.8 14.96
## Number of obs: 480, groups: mountainRange, 8
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 43.70938 17.13489 2.551
## bodyLength 0.03316 0.07865 0.422
##
## Correlation of Fixed Effects:
## (Intr)
## bodyLength -0.924
100*(339.7/(339.7+223.8))
## [1] 60.28394
100 * (random mr intercept variance) / (random mr intercept variance + random residual intercept variance) 60.284%
r2(range2)
## # R2 for Mixed Models
##
## Conditional R2: 0.603
## Marginal R2: 0.001
library(performance)
check_model(range2)
library(ggplot2)
fitted_values <- function(dragons) {
predicted <- predict(range2, newdata = data.frame(bodyLength = dragons$bodyLength, mountainRange = dragons$mountainRange))
return(predicted)
}
dragons$predicted <- fitted_values(dragons)
ggplot(aes(y = testScore, x = bodyLength, color = mountainRange), data = dragons) +
geom_point() +
geom_line(aes(y = predicted), color = "black") +
theme_bw() +
labs(title = "Dragon Test Score and Body Length by Mountain Range",
color = "Mountain Range") +
theme(panel.grid = element_blank(),
legend.position = "bottom") +
facet_wrap(~mountainRange)
Data in Estuaries.csv correspond to counts of invertebrates at 3-4 sites in each of 7 (randomly chosen) estuaries.
library(ggplot2) library(lme4) library(performance) library(DHARMa) install.packages(“DHARMa”)
estu <- read.csv("Estuaries.csv", header = T)
head(estu)
## X Modification Estuary Site Hydroid Total Schizoporella.errata
## 1 1 Modified JAK 1 0 44 15
## 2 2 Modified JAK 1 0 42 8
## 3 3 Modified JAK 2 0 32 9
## 4 4 Modified JAK 2 0 44 14
## 5 5 Modified JAK 3 1 42 6
## 6 6 Modified JAK 3 1 48 12
estulmer <- lmer(Total ~ Modification + (1|Estuary), data = estu)
summary(estulmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Total ~ Modification + (1 | Estuary)
## Data: estu
##
## REML criterion at convergence: 394.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3859 -0.7142 0.2766 0.5240 2.0456
##
## Random effects:
## Groups Name Variance Std.Dev.
## Estuary (Intercept) 55.12 7.424
## Residual 86.07 9.277
## Number of obs: 54, groups: Estuary, 7
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 40.973 4.727 8.668
## ModificationPristine -14.473 6.230 -2.323
##
## Correlation of Fixed Effects:
## (Intr)
## MdfctnPrstn -0.759
r2(estulmer)
## # R2 for Mixed Models
##
## Conditional R2: 0.553
## Marginal R2: 0.267
Conditional: 0.552, Marginal: 0.267
ggplot(estu, aes(x = Modification, y = Total)) + geom_boxplot() + geom_jitter(aes(color = Estuary)) + facet_wrap(~Estuary)
estulmer_siter <- lmer(Total ~ Modification + (1|Estuary/Site), data = estu)
summary(estulmer_siter)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Total ~ Modification + (1 | Estuary/Site)
## Data: estu
##
## REML criterion at convergence: 386.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8686 -0.6687 0.1504 0.6505 1.9816
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site:Estuary (Intercept) 49.85 7.061
## Estuary (Intercept) 47.59 6.899
## Residual 43.65 6.607
## Number of obs: 54, groups: Site:Estuary, 27; Estuary, 7
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 41.053 4.739 8.662
## ModificationPristine -14.553 6.232 -2.335
##
## Correlation of Fixed Effects:
## (Intr)
## MdfctnPrstn -0.760
r2(estulmer_siter)
## # R2 for Mixed Models
##
## Conditional R2: 0.774
## Marginal R2: 0.270
Conditional: 0.774, Marginal: 0.270
check_model(estulmer_siter)
ggplot(estu, aes(x = Modification, y = Total)) + geom_boxplot()+
geom_jitter(aes(color = as.factor(Site))) +
facet_wrap(~Estuary)
estu$HydroidP <- estu$Hydroid > 0
hydroidglmsite <- glmer(HydroidP ~ Modification + (1|Estuary/Site), family = binomial, data = estu)
summary(hydroidglmsite)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: HydroidP ~ Modification + (1 | Estuary/Site)
## Data: estu
##
## AIC BIC logLik deviance df.resid
## 57.1 65.0 -24.5 49.1 50
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.06063 -0.25028 -0.05443 0.25475 0.98719
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site:Estuary (Intercept) 14.45 3.802
## Estuary (Intercept) 1.13 1.063
## Number of obs: 54, groups: Site:Estuary, 27; Estuary, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.710 2.419 -2.360 0.0183 *
## ModificationPristine 6.534 3.140 2.081 0.0375 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## MdfctnPrstn -0.888
check_model(hydroidglmsite)
Write a function to calculate the 3-point moving average for an input vector. The formula is: y_i = (y_(i-1)+y_i+y_(i+1))/3
tpmavg <- function(x) {
y <- numeric(length(x))
for (i in 2:(length(x) - 1)) {
y[i] <- (x[i - 1] + x[i] + x[i + 1]) / 3
}
return(y)
}
Read the temp.txt file. The data correspond to monthly average temperatures.
temp <- read.table("temp.txt", header = T)
tempts <- ts(temp$temps, frequency = 12)
plot(tempts, main = "Temperature by Month", ylab = "Temperature", xlab = "Month")
library(TTR)
## Warning: package 'TTR' was built under R version 4.3.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
tempma <- SMA(tempts, n = 5)
plot(tempma, col = "darkred", lwd = 3)
tempdec <- decompose(tempts)
plot(tempdec)
acf(tempts)
acf(tempdec$random[!is.na(tempdec$random)])
pacf(tempts)
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following object is masked from 'package:DescTools':
##
## BoxCox
tempfit <- auto.arima(tempts)
summary(tempfit)
## Series: tempts
## ARIMA(0,0,1)(2,1,0)[12]
##
## Coefficients:
## ma1 sar1 sar2
## 0.2456 -0.5212 -0.3233
## s.e. 0.0774 0.0823 0.0827
##
## sigma^2 = 3.772: log likelihood = -300.76
## AIC=609.52 AICc=609.81 BIC=621.4
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2384692 1.846384 1.498262 0.2682136 11.90101 0.8327783
## ACF1
## Training set 0.005413047
tempfore <- forecast(tempfit)
checkresiduals(tempfit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1)(2,1,0)[12]
## Q* = 38.688, df = 21, p-value = 0.01069
##
## Model df: 3. Total lags used: 24
plot(tempfore)
Read the bac_dust.txt dataset. The data corresponds to proteobacterial abundance and bacterial species in dust samples collected around the globe.
library(maps) library(ggplot2)
bacd <- read.table("bac_dust.txt", header = T, sep = "\t")
mapcon <- map_data("world")
ggplot(mapcon) + geom_polygon(aes(y = lat, x = long, group = group), fill = "gray", color = "black", size = 0.1) +
geom_point(data = bacd, aes(y = Latitude, x = Longitude, color = Continent), size = 2) +
theme_bw(base_size = 15) +
theme(legend.title = element_blank(), axis.title.x = element_blank(),
axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.y = element_blank(), axis.text.y = element_blank(),
axis.ticks.y = element_blank())
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(mapcon) +
geom_polygon(aes(y = lat, x = long, group = group), fill = "gray", color = "black", size = 0.1) +
geom_point(data = bacd, aes(y = Latitude, x = Longitude, color = Continent, size = Richness), alpha = 0.6) +
theme_bw(base_size = 15) +
theme(legend.title = element_blank(), axis.title.x = element_blank(),
axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.y = element_blank(), axis.text.y = element_blank(),
axis.ticks.y = element_blank())
library(ape)
## Warning: package 'ape' was built under R version 4.3.2
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
bacdist <- as.matrix(dist(cbind(bacd$Latitude, bacd$Longitude)))
bacdistinv <- 1/bacdist
diag(bacdistinv) <- 0
Moran.I(bacd$Richness, bacdistinv)
## $observed
## [1] 0.1159867
##
## $expected
## [1] -0.003225806
##
## $sd
## [1] 0.03860909
##
## $p.value
## [1] 0.002017263
Moran.I(bacd$Proteobacteria, bacdistinv)
## $observed
## [1] 0.216501
##
## $expected
## [1] -0.003225806
##
## $sd
## [1] 0.03858986
##
## $p.value
## [1] 1.241692e-08
anova(lm(Richness ~ Proteobacteria, data = bacd))
## Analysis of Variance Table
##
## Response: Richness
## Df Sum Sq Mean Sq F value Pr(>F)
## Proteobacteria 1 38901 38901 0.5349 0.4651
## Residuals 309 22471204 72722
There appears to be no significance
library(spaMM)
## Warning: package 'spaMM' was built under R version 4.3.2
## Registered S3 methods overwritten by 'registry':
## method from
## print.registry_field proxy
## print.registry_entry proxy
## spaMM (Rousset & Ferdy, 2014, version 4.4.0) is loaded.
## Type 'help(spaMM)' for a short introduction,
## 'news(package='spaMM')' for news,
## and 'citation('spaMM')' for proper citation.
## Further infos, slides, etc. at https://gitlab.mbb.univ-montp2.fr/francois/spamm-ref.
fitme(Richness ~ Proteobacteria + Matern(1|Latitude + Longitude), data = bacd)
## If the 'RSpectra' package were installed, an extreme eigenvalue computation could be faster.
## formula: Richness ~ Proteobacteria + Matern(1 | Latitude + Longitude)
## ML: Estimation of corrPars, lambda and phi by ML.
## Estimation of fixed effects by ML.
## Estimation of lambda and phi by 'outer' ML, maximizing logL.
## family: gaussian( link = identity )
## ------------ Fixed effects (beta) ------------
## Estimate Cond. SE t-value
## (Intercept) 411.58 48.47 8.4910
## Proteobacteria 37.66 112.90 0.3336
## --------------- Random effects ---------------
## Family: gaussian( link = identity )
## --- Correlation parameters:
## 1.nu 1.rho
## 0.2041736 0.3145350
## --- Variance parameters ('lambda'):
## lambda = var(u) for u ~ Gaussian;
## Latitude . : 19190
## # of obs: 311; # of groups: Latitude ., 311
## -------------- Residual variance ------------
## phi estimate was 54713
## ------------- Likelihood values -------------
## logLik
## logL (p_v(h)): -2170.262
dist <- dist(bacd[,c("Longitude", "Latitude")])
matern <- MaternCorr(dist, nu = 0.2041723, rho = 0.3145336)
plot(as.numeric(dist), as.numeric(matern), xlab = "Distance", ylab = "Correlation")
The estimated correlation is greater across froups than outside of grouping.
Download the map of Spain.
library(maps)
## Warning: package 'maps' was built under R version 4.3.2
##
## Attaching package: 'maps'
## The following object is masked _by_ '.GlobalEnv':
##
## ozone
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:maps':
##
## unemp
spain <- map_data("world", region = "spain")
ggplot(spain) +
geom_polygon(aes(y = lat, x = long, group = group), fill = "gray", color = "black", size = 0.1) +
scale_size_continuous(range = c(1, 12))
city <- get('world.cities')
head(city)
## name country.etc pop lat long capital
## 1 'Abasan al-Jadidah Palestine 5629 31.31 34.34 0
## 2 'Abasan al-Kabirah Palestine 18999 31.32 34.35 0
## 3 'Abdul Hakim Pakistan 47788 30.55 72.11 0
## 4 'Abdullah-as-Salam Kuwait 21817 29.36 47.98 0
## 5 'Abud Palestine 2456 32.03 35.07 0
## 6 'Abwein Palestine 3434 32.03 35.20 0
spaincit <- city[city$country.etc == 'Spain',]
head(spaincit)
## name country.etc pop lat long capital
## 128 A Coruna Spain 243088 43.33 -8.42 0
## 129 A Estrada Spain 21997 42.70 -8.50 0
## 130 A Laracha Spain 10856 43.25 -8.59 0
## 131 A Pobra do Caraminal Spain 9955 42.61 -8.94 0
## 183 Abanto Zierbena Spain 9505 43.32 -3.08 0
## 186 Abaran Spain 12890 38.20 -1.40 0
ggplot(spain) +
geom_polygon(aes(y = lat, x = long, group = group), fill = "gray", color = "black", size = 0.1) +
geom_point(data = spaincit, aes(y = lat, x = long, size = pop, color = "darkred"), alpha = 0.5) +
scale_size_continuous(range = c(1, 12)) +
theme_void()
I took some design tips from an online tutorial, but I think this map should suffice.
Download GBIF georeferenced occurrence data of the Pyrenean desman (Galemys pyrenaicus) and make a map of its geographical distribution (Hint: it is only present in Spain, Andorra, Portugal and France).
library(rgbif)
## Warning: package 'rgbif' was built under R version 4.3.2
galpyr <- occ_search(scientificName = "Galemys pyrenaicus", hasCoordinate = T)
galp <- as.data.frame(galpyr$data[,c("decimalLatitude", "decimalLongitude")])
world <- map_data("world")
europe <- map_data("world", region = c("Spain", "Portugal", "France", "Andorra"))
ggplot(europe, aes(y = lat, x = long)) +
geom_polygon(aes(group = group, fill = region)) +
geom_point(data = galp, aes(y = decimalLatitude, x = decimalLongitude), color = "black", alpha = 0.2, size= 2) +
theme_bw()
I took some design tips from a tutorial I watched… The map looks decent though!
Load the dune_bio.txt dataset. Species should be in columns and sites in rows.
dunebio <- read.table("dune_bio.txt", header = T, sep = "\t", row.names = 1)
head(dunebio)
## Belper Empnig Junbuf Junart Airpra Elepal Rumace Viclat Brarut Ranfla Cirarv
## 2 3 0 0 0 0 0 0 0 0 0 0
## 13 0 0 3 0 0 0 0 0 0 2 0
## 4 2 0 0 0 0 0 0 0 2 0 2
## 16 0 0 0 3 0 8 0 0 4 2 0
## 6 0 0 0 0 0 0 6 0 6 0 0
## 1 0 0 0 0 0 0 0 0 0 0 0
## Hyprad Leoaut Potpal Poapra Calcus Tripra Trirep Antodo Salrep Achmil Poatri
## 2 0 5 0 4 0 0 5 0 0 3 7
## 13 0 2 0 2 0 0 2 0 0 0 9
## 4 0 2 0 4 0 0 1 0 0 0 5
## 16 0 0 0 0 3 0 0 0 0 0 2
## 6 0 3 0 3 0 5 5 3 0 2 4
## 1 0 0 0 4 0 0 0 0 0 1 2
## Chealb Elyrep Sagpro Plalan Agrsto Lolper Alogen Brohor
## 2 0 4 0 0 0 5 2 4
## 13 1 0 2 0 5 0 5 0
## 4 0 4 5 0 8 5 2 3
## 16 0 0 0 0 7 0 4 0
## 6 0 0 0 5 0 6 0 0
## 1 0 4 0 0 0 7 0 0
sum(dunebio)
## [1] 685
colSums(dunebio)
## Belper Empnig Junbuf Junart Airpra Elepal Rumace Viclat Brarut Ranfla Cirarv
## 13 2 13 18 5 25 18 4 49 14 2
## Hyprad Leoaut Potpal Poapra Calcus Tripra Trirep Antodo Salrep Achmil Poatri
## 9 54 4 48 10 9 47 21 11 16 63
## Chealb Elyrep Sagpro Plalan Agrsto Lolper Alogen Brohor
## 1 26 20 26 48 58 36 15
colMeans(dunebio)
## Belper Empnig Junbuf Junart Airpra Elepal Rumace Viclat Brarut Ranfla Cirarv
## 0.65 0.10 0.65 0.90 0.25 1.25 0.90 0.20 2.45 0.70 0.10
## Hyprad Leoaut Potpal Poapra Calcus Tripra Trirep Antodo Salrep Achmil Poatri
## 0.45 2.70 0.20 2.40 0.50 0.45 2.35 1.05 0.55 0.80 3.15
## Chealb Elyrep Sagpro Plalan Agrsto Lolper Alogen Brohor
## 0.05 1.30 1.00 1.30 2.40 2.90 1.80 0.75
rowSums(dunebio)
## 2 13 4 16 6 1 8 5 17 15 10 11 9 18 3 20 14 19 12 7
## 42 33 45 33 48 18 40 43 15 23 43 32 42 27 40 31 24 31 35 40
rowMeans(dunebio)
## 2 13 4 16 6 1 8 5
## 1.4000000 1.1000000 1.5000000 1.1000000 1.6000000 0.6000000 1.3333333 1.4333333
## 17 15 10 11 9 18 3 20
## 0.5000000 0.7666667 1.4333333 1.0666667 1.4000000 0.9000000 1.3333333 1.0333333
## 14 19 12 7
## 0.8000000 1.0333333 1.1666667 1.3333333
mediansite <- function(x){
cat("The median number of species is", apply(x, 2, median), "and for sites is", apply(x, 1, median))
}
mediansite(dunebio)
## The median number of species is 0 0 0 0 0 0 0 0 2 0 0 0 2 0 3 0 0 2 0 0 0 4 0 0 0 0 1.5 2 0 0 and for sites is 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
library(vegan)
## Warning: package 'vegan' was built under R version 4.3.2
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.3.2
##
## Attaching package: 'permute'
## The following object is masked from 'package:spaMM':
##
## how
## Loading required package: lattice
## This is vegan 2.6-4
dunebior <- decostand(dunebio, method = "total")
rowSums(dunebior)
## 2 13 4 16 6 1 8 5 17 15 10 11 9 18 3 20 14 19 12 7
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
summary(dunebior)
## Belper Empnig Junbuf Junart
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.01665 Mean :0.003226 Mean :0.01752 Mean :0.02728
## 3rd Qu.:0.04496 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.02273
## Max. :0.07407 Max. :0.064516 Max. :0.11429 Max. :0.13043
## Airpra Elepal Rumace Viclat
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01151 Mean :0.04278 Mean :0.02105 Mean :0.00614
## 3rd Qu.:0.00000 3rd Qu.:0.02500 3rd Qu.:0.01190 3rd Qu.:0.00000
## Max. :0.13333 Max. :0.24242 Max. :0.12500 Max. :0.06250
## Brarut Ranfla Cirarv Hyprad
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.03333 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.05000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.07213 Mean :0.02353 Mean :0.002222 Mean :0.01786
## 3rd Qu.:0.12216 3rd Qu.:0.05265 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.22222 Max. :0.12903 Max. :0.044444 Max. :0.16129
## Leoaut Potpal Poapra Calcus
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.05536 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.06977 Median :0.000000 Median :0.07778 Median :0.00000
## Mean :0.08170 Mean :0.008514 Mean :0.06960 Mean :0.01772
## 3rd Qu.:0.09498 3rd Qu.:0.000000 3rd Qu.:0.10000 3rd Qu.:0.00000
## Max. :0.19355 Max. :0.086957 Max. :0.22222 Max. :0.16667
## Tripra Trirep Antodo Salrep
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.03816 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.05530 Median :0.00000 Median :0.00000
## Mean :0.01003 Mean :0.06625 Mean :0.03471 Mean :0.01846
## 3rd Qu.:0.00000 3rd Qu.:0.08772 3rd Qu.:0.05312 3rd Qu.:0.00000
## Max. :0.10417 Max. :0.25000 Max. :0.26667 Max. :0.16129
## Achmil Poatri Chealb Elyrep
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.09651 Median :0.000000 Median :0.00000
## Mean :0.02458 Mean :0.08232 Mean :0.001515 Mean :0.03711
## 3rd Qu.:0.04738 3rd Qu.:0.12054 3rd Qu.:0.000000 3rd Qu.:0.08992
## Max. :0.13333 Max. :0.27273 Max. :0.030303 Max. :0.22222
## Sagpro Plalan Agrsto Lolper
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.03571 Median :0.06085
## Mean :0.02714 Mean :0.03767 Mean :0.07145 Mean :0.08353
## 3rd Qu.:0.05265 3rd Qu.:0.09635 3rd Qu.:0.15396 3rd Qu.:0.12863
## Max. :0.11429 Max. :0.13333 Max. :0.21212 Max. :0.38889
## Alogen Brohor
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000
## Mean :0.04824 Mean :0.01757
## 3rd Qu.:0.08387 3rd Qu.:0.01163
## Max. :0.22857 Max. :0.09524
dunebio_range <- decostand(dunebio, method = "range")
dunebio_stand <- decostand(dunebio, method = "standardize")
summary(dunebior)
## Belper Empnig Junbuf Junart
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.01665 Mean :0.003226 Mean :0.01752 Mean :0.02728
## 3rd Qu.:0.04496 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.02273
## Max. :0.07407 Max. :0.064516 Max. :0.11429 Max. :0.13043
## Airpra Elepal Rumace Viclat
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01151 Mean :0.04278 Mean :0.02105 Mean :0.00614
## 3rd Qu.:0.00000 3rd Qu.:0.02500 3rd Qu.:0.01190 3rd Qu.:0.00000
## Max. :0.13333 Max. :0.24242 Max. :0.12500 Max. :0.06250
## Brarut Ranfla Cirarv Hyprad
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.03333 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.05000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.07213 Mean :0.02353 Mean :0.002222 Mean :0.01786
## 3rd Qu.:0.12216 3rd Qu.:0.05265 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.22222 Max. :0.12903 Max. :0.044444 Max. :0.16129
## Leoaut Potpal Poapra Calcus
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.05536 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.06977 Median :0.000000 Median :0.07778 Median :0.00000
## Mean :0.08170 Mean :0.008514 Mean :0.06960 Mean :0.01772
## 3rd Qu.:0.09498 3rd Qu.:0.000000 3rd Qu.:0.10000 3rd Qu.:0.00000
## Max. :0.19355 Max. :0.086957 Max. :0.22222 Max. :0.16667
## Tripra Trirep Antodo Salrep
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.03816 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.05530 Median :0.00000 Median :0.00000
## Mean :0.01003 Mean :0.06625 Mean :0.03471 Mean :0.01846
## 3rd Qu.:0.00000 3rd Qu.:0.08772 3rd Qu.:0.05312 3rd Qu.:0.00000
## Max. :0.10417 Max. :0.25000 Max. :0.26667 Max. :0.16129
## Achmil Poatri Chealb Elyrep
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.09651 Median :0.000000 Median :0.00000
## Mean :0.02458 Mean :0.08232 Mean :0.001515 Mean :0.03711
## 3rd Qu.:0.04738 3rd Qu.:0.12054 3rd Qu.:0.000000 3rd Qu.:0.08992
## Max. :0.13333 Max. :0.27273 Max. :0.030303 Max. :0.22222
## Sagpro Plalan Agrsto Lolper
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.03571 Median :0.06085
## Mean :0.02714 Mean :0.03767 Mean :0.07145 Mean :0.08353
## 3rd Qu.:0.05265 3rd Qu.:0.09635 3rd Qu.:0.15396 3rd Qu.:0.12863
## Max. :0.11429 Max. :0.13333 Max. :0.21212 Max. :0.38889
## Alogen Brohor
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000
## Mean :0.04824 Mean :0.01757
## 3rd Qu.:0.08387 3rd Qu.:0.01163
## Max. :0.22857 Max. :0.09524
rowSums(dunebior)
## 2 13 4 16 6 1 8 5 17 15 10 11 9 18 3 20 14 19 12 7
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Write a function to calculate the observed richness of a vector representing the abundances of different species in a community.
richness <- function(x){
return(specnumber(decostand(x, method = "total")))
}
Write a function to calculate the Shannon-Wiener diversity index of a vector representing the abundances of different species in a community.
sdiv <- function(x){
return(diversity(decostant(x, method = "total"), index = "shannon"))
}
Write a function that calculates both the observed richness and the Shannon-Wiener diversity index for each community in a matrix, and writes an output table with the results. You can use the vegan built-in functions. Test your function with the dune_bio.txt dataset.
richdiv <- function (x){
rich <- specnumber(decostand(x, method = "total"))
sdiv <- diversity(decostand(x, method = "total"), index = "shannon")
resul <- data.frame(rich = rich, sdiv = sdiv)
return(resul)
}
resul <- richdiv(dunebio)
resul
## rich sdiv
## 2 10 2.252516
## 13 10 2.099638
## 4 13 2.426779
## 16 8 1.959795
## 6 11 2.345946
## 1 5 1.440482
## 8 12 2.434898
## 5 14 2.544421
## 17 7 1.876274
## 15 8 1.979309
## 10 12 2.398613
## 11 9 2.106065
## 9 13 2.493568
## 18 9 2.079387
## 3 10 2.193749
## 20 8 2.048270
## 14 7 1.863680
## 19 9 2.134024
## 12 9 2.114495
## 7 13 2.471733
write.table(resul, file = "richnessdivresults.txt", sep = "\t", quote = FALSE, row.names = TRUE)
Make a rank-abundance curve using the second site (13) of the dune_bio.txt dataset. Fit a lognormal model to the data.
duneb <- read.table("dune_bio.txt", sep = "\t", header = T, row.names = 1)
plot(radfit(duneb[2,]))
radfit(duneb[2,])
##
## RAD models, family poisson
## No. of species 10, total abundance 33
##
## par1 par2 par3 Deviance AIC BIC
## Null 3.91659 32.99179 32.99179
## Preemption 0.20971 2.02192 33.09712 33.39970
## Lognormal 0.99822 0.71061 1.13244 34.20764 34.81281
## Zipf 0.27866 -0.79375 0.79058 33.86578 34.47095
## Mandelbrot 0.40348 -0.95679 0.51608 0.75881 35.83401 36.74176
plot(rad.lognormal(duneb[2,]), lty=2, lwd=3)
rad.lognormal(duneb[2,])
##
## RAD model: Log-Normal
## Family: poisson
## No. of species: 10
## Total abundance: 33
##
## log.mu log.sigma Deviance AIC BIC
## 0.9982177 0.7106063 1.1324449 34.2076395 34.8128097
I followed a similar format of this post to a similar one I’ve seen when checking my answers…
Create a Euclidean distance matrix after standardization using the first (A1), second (Moisture) and fifth (Manure) variables of dune_env.txt. Then create a Bray-Curtis distance matrix using dune_bio.txt. Perform a Mantel test on both distance matrices and plot the relationship.
dunebio <- read.table("dune_bio.txt", sep = "\t", header = T, row.names = 1)
duneenv <- read.table("dune_env.txt", sep = "\t", header = T, row.names = 1)
duneenv_euc <- vegdist(decostand(duneenv[, c(1, 2, 5)], method = "standardize"), method = "euclidean")
dunebio_bray <- vegdist(dunebio, method = "bray")
mantel(duneenv_euc, dunebio_bray)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = duneenv_euc, ydis = dunebio_bray)
##
## Mantel statistic r: 0.5496
## Significance: 0.001
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.136 0.179 0.218 0.259
## Permutation: free
## Number of permutations: 999
plot(duneenv_euc, dunebio_bray)
Make dendrograms of the results of hierarchical clustering using the single, average and complete methods. Use the dune_bio.txt dataset after creating a Bray-Curtis distance matrix.
dunebio <- read.table("dune_bio.txt", sep = "\t", header = T, row.names = 1)
dunebio_veg <- vegdist(dunebio, method = "bray")
Single Method:
singledunebio_veg <- hclust(dunebio_veg, method = "single")
plot(singledunebio_veg, main = "Single")
Average Method:
averagedunebio_veg <- hclust(dunebio_veg, method = "average")
plot(averagedunebio_veg, main = "Average")
Complete Method:
completedunebio_veg <- hclust(dunebio_veg, method = "complete")
plot(completedunebio_veg, main = "Complete")
Perform k-means partitions from 2 to 6 on the dune_bio.txt dataset and select the best partition using the Calinski criterion. Visualize the results.
dunebio <- read.table("dune_bio.txt", sep = "\t", header = T, row.names = 1)
dunebio_kmeans <- cascadeKM(dunebio, inf.gr = 2, sup.gr = 6)
dunebio_kmeans$results
## 2 groups 3 groups 4 groups 5 groups 6 groups
## SSE 1218.500000 950.516667 777.333333 676.500000 593.750000
## calinski 5.611243 5.793253 5.633047 5.110033 4.737482
Calinski max value is best at 3 groups, at 5.79
plot(dunebio_kmeans, sortg = T)
Load the varechem dataset within the R package vegan. This data frame collects soil characteristics. Given the nature of this dataset perform a PCA or a CA ordination analysis.
library(vegan)
data(varechem)
head(varechem)
## N P K Ca Mg S Al Fe Mn Zn Mo Baresoil Humdepth
## 18 19.8 42.1 139.9 519.4 90.0 32.3 39.0 40.9 58.1 4.5 0.3 43.9 2.2
## 15 13.4 39.1 167.3 356.7 70.7 35.2 88.1 39.0 52.4 5.4 0.3 23.6 2.2
## 24 20.2 67.7 207.1 973.3 209.1 58.1 138.0 35.4 32.1 16.8 0.8 21.2 2.0
## 27 20.6 60.8 233.7 834.0 127.2 40.7 15.4 4.4 132.0 10.7 0.2 18.7 2.9
## 23 23.8 54.5 180.6 777.0 125.8 39.5 24.2 3.0 50.1 6.6 0.3 46.0 3.0
## 19 22.8 40.9 171.4 691.8 151.4 40.8 104.8 17.6 43.6 9.1 0.4 40.5 3.8
## pH
## 18 2.7
## 15 2.8
## 24 3.0
## 27 2.8
## 23 2.7
## 19 2.7
varechemp <- rda(varechem, scale = T)
summary(varechemp)
##
## Call:
## rda(X = varechem, scale = T)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 14 1
## Unconstrained 14 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 5.1916 3.1928 1.6855 1.06899 0.81599 0.70581 0.43641
## Proportion Explained 0.3708 0.2281 0.1204 0.07636 0.05829 0.05042 0.03117
## Cumulative Proportion 0.3708 0.5989 0.7193 0.79564 0.85392 0.90434 0.93551
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Eigenvalue 0.36884 0.1707 0.14953 0.08526 0.06986 0.035057 0.023615
## Proportion Explained 0.02635 0.0122 0.01068 0.00609 0.00499 0.002504 0.001687
## Cumulative Proportion 0.96185 0.9740 0.98473 0.99082 0.99581 0.998313 1.000000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 4.236078
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## N 0.2441 0.2516 -0.23818 0.92846 -0.359554 0.30319
## P -0.9181 -0.4194 0.13356 0.09649 0.224909 0.07938
## K -0.9369 -0.3272 -0.06261 0.25051 0.180899 -0.28683
## Ca -0.9280 -0.1711 0.47982 -0.02138 -0.241479 -0.09536
## Mg -0.9087 -0.1978 0.12446 -0.04430 -0.497428 -0.10781
## S -0.8576 -0.6002 -0.31620 -0.01652 0.071629 -0.08779
## Al 0.2980 -0.9720 -0.39009 0.09092 0.005386 -0.19467
## Fe 0.5236 -0.7748 -0.23063 0.37187 -0.023377 -0.32292
## Mn -0.7418 0.3908 0.13114 0.44346 0.501064 0.06776
## Zn -0.8926 -0.3277 0.06142 -0.06121 -0.153677 0.51653
## Mo -0.1072 -0.4625 -0.85720 -0.27258 -0.030063 0.39721
## Baresoil -0.4218 0.6387 -0.40089 -0.07389 -0.416610 -0.31060
## Humdepth -0.6070 0.6825 -0.41674 0.02975 -0.047738 -0.15923
## pH 0.4286 -0.6517 0.66384 0.07599 -0.265374 0.05070
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## 18 0.05126 0.84085 -0.190142 -0.40431 0.101518 -1.01997
## 15 0.20452 0.37397 0.002978 -1.06030 1.175734 -0.92257
## 24 -1.53647 -1.25830 -0.012170 -0.86690 -1.854372 1.74375
## 27 -1.25643 0.51345 0.743928 0.63835 1.097881 0.15631
## 23 -0.71203 0.86247 -0.203389 -0.05971 -0.786196 -0.80080
## 19 -0.76179 0.73785 -0.761645 -0.31728 -1.303297 -0.62264
## 22 -0.30340 0.86304 0.188484 0.54758 -0.078064 0.24567
## 16 0.62797 0.76436 -0.124301 -0.26047 0.009044 -0.02807
## 28 -1.13923 0.44909 0.229104 1.61582 0.842871 0.60852
## 13 -0.62483 -0.85407 -1.259755 1.38195 -0.157593 -1.04794
## 14 -0.04206 0.60916 -0.719743 -0.73109 -0.230147 0.25505
## 20 -0.93747 0.20753 -0.689713 0.62198 -0.282188 0.33232
## 25 -0.34451 0.90128 0.710714 0.64127 1.214897 0.48173
## 7 1.50502 -0.22114 -0.495604 0.87068 -0.769266 -0.25733
## 5 1.40889 0.60035 0.911463 0.71133 -0.488111 2.45195
## 6 1.30776 0.03604 -0.243884 -0.87792 0.543259 0.46876
## 3 1.64967 -0.82755 -0.343406 1.40028 -0.546374 -0.19580
## 4 -0.26711 -1.65198 -1.744251 -0.60763 0.947492 0.46203
## 2 0.59653 -1.21610 -0.200030 0.72815 0.799208 -0.76013
## 9 0.03271 -0.69445 -0.350028 -1.35247 0.717140 0.82965
## 12 0.47944 0.26377 0.184677 -1.27744 0.573929 0.07253
## 10 -0.32993 -0.95483 1.670469 -0.51343 0.819138 -0.48682
## 11 -0.09921 -1.52318 2.493913 -0.09044 -1.092296 -1.10654
## 21 0.49069 1.17838 0.202333 -0.73801 -1.254205 -0.85966
Variation/Proportion explained by first two axes: 0.3708 + 0.2281 = 0.5989 (about 60%)
library(stats)
library(vegan)
screeplot(varechemp)
plot(varechemp, display = "sites", xlab = "PC1 (37%", ylab = "PC2 (23%)")
biplot(varechemp, scaling = 1, xlab="PC1 (37%)", ylab="PC2 (23%)")
The dune_bio.txt dataset corresponds to species abundances. Given the nature of this dataset perform a PCA or a CA ordination analysis.
dunebio <- read.table("dune_bio.txt", sep = "\t", header = T, row.names = 1)
dunebioca <- cca(dunebio)
dunebioca
## Call: cca(X = dunebio)
##
## Inertia Rank
## Total 2.115
## Unconstrained 2.115 19
## Inertia is scaled Chi-square
##
## Eigenvalues for unconstrained axes:
## CA1 CA2 CA3 CA4 CA5 CA6 CA7 CA8
## 0.5360 0.4001 0.2598 0.1760 0.1448 0.1079 0.0925 0.0809
## (Showing 8 of 19 unconstrained eigenvalues)
summary(dunebioca)
##
## Call:
## cca(X = dunebio)
##
## Partitioning of scaled Chi-square:
## Inertia Proportion
## Total 2.115 1
## Unconstrained 2.115 1
##
## Eigenvalues, and their contribution to the scaled Chi-square
##
## Importance of components:
## CA1 CA2 CA3 CA4 CA5 CA6 CA7
## Eigenvalue 0.5360 0.4001 0.2598 0.17598 0.14476 0.10791 0.09247
## Proportion Explained 0.2534 0.1892 0.1228 0.08319 0.06844 0.05102 0.04372
## Cumulative Proportion 0.2534 0.4426 0.5654 0.64858 0.71702 0.76804 0.81175
## CA8 CA9 CA10 CA11 CA12 CA13 CA14
## Eigenvalue 0.08091 0.07332 0.05630 0.04826 0.04125 0.03523 0.020529
## Proportion Explained 0.03825 0.03466 0.02661 0.02282 0.01950 0.01665 0.009705
## Cumulative Proportion 0.85000 0.88467 0.91128 0.93410 0.95360 0.97025 0.979955
## CA15 CA16 CA17 CA18 CA19
## Eigenvalue 0.014911 0.009074 0.007938 0.007002 0.003477
## Proportion Explained 0.007049 0.004290 0.003753 0.003310 0.001644
## Cumulative Proportion 0.987004 0.991293 0.995046 0.998356 1.000000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
##
##
## Species scores
##
## CA1 CA2 CA3 CA4 CA5 CA6
## Belper 0.50018 -0.35503 0.15239 -0.704153 -0.058546 -0.07308
## Empnig 0.69027 3.26420 -1.95716 -0.176936 -0.073518 0.16083
## Junbuf -0.08157 -0.68074 -1.00545 1.078390 0.268360 -0.24168
## Junart -1.27580 0.09963 0.09320 0.005536 0.289410 0.78247
## Airpra 1.00434 3.06749 -1.33773 0.194305 -1.081813 0.53699
## Elepal -1.76383 0.34562 0.57336 -0.002976 -0.332396 0.14688
## Rumace 0.65289 -0.25525 0.59728 1.160164 0.255849 0.32730
## Viclat 0.61893 0.37140 0.46057 -1.000375 1.162652 -1.44971
## Brarut -0.18222 0.26477 0.16606 0.064009 0.576334 -0.07741
## Ranfla -1.55886 0.30700 0.29765 0.046974 -0.008747 0.14744
## Cirarv 0.05647 -0.76398 -0.91793 -1.175919 -0.384024 0.13985
## Hyprad 0.85408 2.52821 -1.13951 -0.175115 -0.311874 -0.11177
## Leoaut 0.19566 0.38884 -0.03975 -0.130392 0.141232 -0.23717
## Potpal -1.91690 0.52150 1.18215 -0.021738 -1.359988 -1.31207
## Poapra 0.38919 -0.32999 0.02015 -0.358371 0.079296 0.05165
## Calcus -1.95199 0.56743 0.85948 -0.098969 -0.556737 -0.23282
## Tripra 0.88116 -0.09792 1.18172 1.282429 0.325706 0.33388
## Trirep 0.07666 -0.02032 0.20594 0.026462 -0.186748 -0.53957
## Antodo 0.96676 1.08361 0.17188 0.459788 -0.607533 0.30425
## Salrep -0.61035 1.54868 -0.04970 -0.607136 1.429729 0.55183
## Achmil 0.90859 0.08461 0.58636 -0.008919 -0.660183 0.18877
## Poatri 0.18185 -0.53997 -0.23388 0.178834 -0.155342 0.07584
## Chealb -0.42445 -0.84402 -1.59029 1.248755 -0.207480 -0.87566
## Elyrep 0.37074 -0.74148 -0.26238 -0.566308 -0.270122 0.72624
## Sagpro -0.00364 0.01719 -1.11570 0.066981 0.186654 -0.32463
## Plalan 0.84058 0.24886 0.78066 0.371149 0.271377 -0.11989
## Agrsto -0.93378 -0.20651 -0.28165 0.024293 -0.139326 0.02256
## Lolper 0.50272 -0.35955 0.21821 -0.474727 0.101494 0.01594
## Alogen -0.40088 -0.61839 -0.85013 0.346740 0.016509 -0.10169
## Brohor 0.65762 -0.40634 0.30685 -0.496751 -0.561358 -0.07004
##
##
## Site scores (weighted averages of species scores)
##
## CA1 CA2 CA3 CA4 CA5 CA6
## 2 0.63268 -0.6958357 0.09708 -1.18695 -0.97686 -0.06575
## 13 -0.42445 -0.8440195 -1.59029 1.24876 -0.20748 -0.87566
## 4 0.05647 -0.7639784 -0.91793 -1.17592 -0.38402 0.13985
## 16 -2.00229 0.1090627 0.33414 0.33760 -0.50097 0.76159
## 6 0.85633 -0.0005408 1.39735 1.59909 0.65494 0.19386
## 1 0.81167 -1.0826714 0.14479 -2.10665 -0.39287 1.83462
## 8 -0.76268 -0.2968459 -0.35648 -0.10772 0.17507 0.36444
## 5 0.95293 -0.1846015 0.95609 0.86853 -0.34552 0.98333
## 17 1.47545 2.7724102 -0.40859 0.75117 -2.59425 1.10122
## 15 -1.91384 0.5079036 0.96567 0.04227 -0.50681 -0.19370
## 10 0.87885 -0.0353136 0.82987 -0.68053 -0.75438 -0.81070
## 11 0.64223 0.4440332 0.17371 -1.09684 1.37462 -2.00626
## 9 -0.09693 -0.7864314 -0.86492 0.40090 0.28704 1.02783
## 18 0.31241 0.6328355 0.66501 -1.12728 2.65575 -0.97565
## 3 0.10148 -0.9128732 -0.68815 -0.68137 -0.08709 0.28678
## 20 -1.94438 1.0688809 0.66595 -0.55317 1.59606 1.70292
## 14 -1.91996 0.5351062 1.39863 -0.08575 -2.21317 -2.43044
## 19 0.69027 3.2642026 -1.95716 -0.17694 -0.07352 0.16083
## 12 -0.28557 -0.6656161 -1.64423 1.71496 0.65381 -1.17376
## 7 0.87149 -0.2547040 0.86830 0.90468 0.17385 0.03446
Variation/proportion explained by the first two axes: 0.2534 + 0.1892 = 0.4426 (44%)
screeplot(dunebioca)
plot(dunebioca, display = "sites", xlab="CA1 (25%)", ylab="CA2 (19%)")
Perform a nonmetric multidimensional scaling (NMDS) on the dune_bio.txt dataset after calculating Bray-Curtis distances among sites.
library(vegan)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::map() masks maps::map()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ dplyr::recode() masks car::recode()
## ✖ dplyr::select() masks MASS::select()
## ✖ purrr::some() masks car::some()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ✖ ape::where() masks dplyr::where()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
dunebio <- read.table("dune_bio.txt", sep = "\t", header = T, row.names = 1)
dunebio_nmds <- metaMDS(dunebio, distance = "bray", k = 2)
## Run 0 stress 0.1192678
## Run 1 stress 0.1192678
## ... Procrustes: rmse 5.738836e-05 max resid 0.0001745784
## ... Similar to previous best
## Run 2 stress 0.1183186
## ... New best solution
## ... Procrustes: rmse 0.02027036 max resid 0.06496194
## Run 3 stress 0.1808911
## Run 4 stress 0.1183186
## ... Procrustes: rmse 2.629844e-05 max resid 8.013126e-05
## ... Similar to previous best
## Run 5 stress 0.1192679
## Run 6 stress 0.1808912
## Run 7 stress 0.1192678
## Run 8 stress 0.1192678
## Run 9 stress 0.1980521
## Run 10 stress 0.1183186
## ... Procrustes: rmse 1.537974e-06 max resid 3.395164e-06
## ... Similar to previous best
## Run 11 stress 0.1183186
## ... Procrustes: rmse 5.750454e-06 max resid 1.834448e-05
## ... Similar to previous best
## Run 12 stress 0.1192678
## Run 13 stress 0.1192678
## Run 14 stress 0.1939202
## Run 15 stress 0.19015
## Run 16 stress 0.2075713
## Run 17 stress 0.1183186
## ... Procrustes: rmse 8.37902e-06 max resid 2.586816e-05
## ... Similar to previous best
## Run 18 stress 0.1192678
## Run 19 stress 0.1886532
## Run 20 stress 0.1192679
## *** Best solution repeated 4 times
dunebio_nmds$stress
## [1] 0.1183186
Stress: 0.1183186
stressplot(dunebio_nmds)
plot(dunebio_nmds, type = "t", display = "sites")
duneenv <- read.table("dune_env.txt", sep = "\t", header = T, row.names = 1)
row.names(duneenv) == rownames(dunebio)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE
duneenv$Axis01 <- dunebio_nmds$points[, 1]
duneenv$Axis02 <- dunebio_nmds$points[, 2]
duneenv$Richness <- specnumber(dunebio)
ggplot(duneenv, aes(y = Axis02, x = Axis01)) +
geom_point(aes(fill = Management, size = Richness), alpha = 0.6, pch = 21)
Perform a constrained correspondence analysis (CCA) on the dune_bio.txt dataset using the first (A1), second (Moisture) and fifth (Manure) variables of dune_env.txt as explanatory.
dunebio <- read.table("dune_bio.txt", header = T, sep = "\t", row.names = 1)
duneenv <- read.table("dune_env.txt", header = T, sep = "\t", row.names = 1)
dunecca <- cca(dunebio ~ A1 + Moisture + Manure, data = duneenv)
summary(dunecca)
##
## Call:
## cca(formula = dunebio ~ A1 + Moisture + Manure, data = duneenv)
##
## Partitioning of scaled Chi-square:
## Inertia Proportion
## Total 2.1153 1.0000
## Constrained 0.7692 0.3637
## Unconstrained 1.3460 0.6363
##
## Eigenvalues, and their contribution to the scaled Chi-square
##
## Importance of components:
## CCA1 CCA2 CCA3 CA1 CA2 CA3 CA4
## Eigenvalue 0.4264 0.2347 0.10812 0.3124 0.2101 0.17518 0.13748
## Proportion Explained 0.2016 0.1110 0.05112 0.1477 0.0993 0.08282 0.06499
## Cumulative Proportion 0.2016 0.3125 0.36366 0.5114 0.6107 0.69348 0.75847
## CA5 CA6 CA7 CA8 CA9 CA10 CA11
## Eigenvalue 0.09442 0.09295 0.07380 0.06360 0.05541 0.04663 0.021117
## Proportion Explained 0.04464 0.04394 0.03489 0.03007 0.02620 0.02204 0.009983
## Cumulative Proportion 0.80311 0.84705 0.88194 0.91201 0.93820 0.96025 0.970230
## CA12 CA13 CA14 CA15 CA16
## Eigenvalue 0.019730 0.016183 0.012531 0.009960 0.004566
## Proportion Explained 0.009327 0.007651 0.005924 0.004709 0.002159
## Cumulative Proportion 0.979558 0.987208 0.993132 0.997841 1.000000
##
## Accumulated constrained eigenvalues
## Importance of components:
## CCA1 CCA2 CCA3
## Eigenvalue 0.4264 0.2347 0.1081
## Proportion Explained 0.5543 0.3051 0.1406
## Cumulative Proportion 0.5543 0.8594 1.0000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
##
##
## Species scores
##
## CCA1 CCA2 CCA3 CA1 CA2 CA3
## Belper 0.72508 0.09015 -0.28678 0.14997 0.08941 -0.64602
## Empnig -0.92386 -1.49359 1.29239 -2.87128 -1.31651 -0.74936
## Junbuf -0.50547 0.17496 0.35707 -0.33578 1.58298 0.40514
## Junart -1.08445 -0.22093 0.33048 0.84568 -0.15861 0.23372
## Airpra -0.32923 -1.52131 0.75777 -2.80560 -1.49278 -0.23227
## Elepal -1.39255 0.01967 -0.39847 0.90296 -0.89564 0.18234
## Rumace 0.56966 0.07756 -0.44317 -0.23147 0.33959 1.25112
## Viclat 0.95631 -1.05874 -0.13224 0.42277 0.16218 -0.84521
## Brarut -0.05861 -0.29093 -0.14295 0.17325 -0.10170 0.08514
## Ranfla -1.30695 -0.17793 -0.06749 0.84091 -0.45935 0.18497
## Cirarv 0.40316 1.49565 0.09657 -0.17279 -0.57950 -1.51909
## Hyprad -0.14257 -1.37899 0.68907 -2.13762 -1.11990 -0.54595
## Leoaut 0.10805 -0.39424 -0.03343 -0.21126 -0.05153 -0.18568
## Potpal -1.82080 -0.59463 -2.50587 0.53778 -0.46936 -0.51885
## Poapra 0.47298 0.19818 0.15806 0.08959 0.09919 -0.21588
## Calcus -1.32589 -0.43845 -0.22643 1.29121 -0.99188 0.18948
## Tripra 0.94282 0.14003 -0.52488 -0.16097 -0.18159 1.70499
## Trirep 0.03691 -0.24820 -0.22610 0.03138 0.23464 -0.04178
## Antodo 0.42848 -0.66783 -0.01590 -1.13407 -0.58011 0.56583
## Salrep -0.38937 -1.51269 0.78060 0.47447 -0.69625 -0.26139
## Achmil 0.84528 -0.28199 -0.08034 -0.23663 -0.15351 0.33457
## Poatri 0.09591 0.48672 0.08195 -0.10052 0.39496 0.08346
## Chealb -1.33134 1.08879 0.17910 -0.92504 1.53277 0.26877
## Elyrep 0.45995 0.49110 0.07018 0.09270 0.32202 -0.53794
## Sagpro -0.36673 0.22891 0.41200 -0.63169 0.40176 -0.48274
## Plalan 0.89463 -0.37036 -0.37142 -0.16434 -0.15598 0.65298
## Agrsto -0.80663 0.42851 -0.03513 0.31737 -0.09257 -0.16535
## Lolper 0.68266 0.25866 0.13366 0.17559 -0.06653 -0.20307
## Alogen -0.52884 0.74865 0.28721 -0.11872 0.57814 -0.07754
## Brohor 0.77674 0.11771 -0.03195 0.07993 0.06235 -0.29678
##
##
## Site scores (weighted averages of species scores)
##
## CCA1 CCA2 CCA3 CA1 CA2 CA3
## 2 0.8544 0.57195 0.04460 0.44645 0.473383 -1.10419
## 13 -0.7656 1.43234 1.04660 -0.92504 1.532773 0.26877
## 4 0.1565 1.36916 0.67602 -0.17279 -0.579503 -1.51909
## 16 -2.0459 0.46834 -0.70504 0.99503 -1.669766 0.75493
## 6 1.0571 -0.29557 -1.50936 -0.06506 -0.216688 2.09457
## 1 1.2439 1.24492 0.99278 0.48730 -0.708935 -1.13256
## 8 -0.8113 0.66762 0.54772 0.28201 -0.298269 0.31177
## 5 1.1247 -0.04619 -1.17587 -0.61920 -0.008872 0.95018
## 17 0.7722 -2.94478 1.24411 -2.70708 -1.757175 0.54337
## 15 -2.0065 -0.48090 -2.87633 0.26585 -0.401900 -0.57133
## 10 1.0958 -0.42412 -0.49762 0.26374 0.332785 -0.08742
## 11 0.7816 -0.90608 0.28150 0.26599 0.008900 -1.12676
## 9 -0.1817 0.86595 0.91242 0.34851 2.059004 0.10377
## 18 0.6053 -1.51952 -0.07324 0.89534 0.298138 -1.03991
## 3 0.2093 1.35237 0.66038 -0.06196 -0.171454 -0.84659
## 20 -1.8996 -1.40266 0.55715 2.22940 -0.920734 0.49851
## 14 -1.9462 -0.67177 -3.54931 0.80971 -0.536813 -0.46637
## 19 -0.2690 -3.39538 3.20303 -2.87128 -1.316513 -0.74936
## 12 -0.6252 1.06204 0.88731 -0.77476 2.069361 0.26842
## 7 1.0498 -0.01449 -0.64118 0.05748 -0.266550 1.48584
##
##
## Site constraints (linear combinations of constraining variables)
##
## CCA1 CCA2 CCA3 CA1 CA2 CA3
## 2 1.0722 -0.15065 -0.02248 0.44645 0.473383 -1.10419
## 13 -1.3313 1.08879 0.17910 -0.92504 1.532773 0.26877
## 4 0.4032 1.49565 0.09657 -0.17279 -0.579503 -1.51909
## 16 -1.2912 1.04854 0.34917 0.99503 -1.669766 0.75493
## 6 0.9651 -0.04331 -0.47601 -0.06506 -0.216688 2.09457
## 1 1.0995 1.27129 0.50141 0.48730 -0.708935 -1.13256
## 8 -1.0904 0.84728 1.19953 0.28201 -0.298269 0.31177
## 5 0.6973 0.22504 -1.60982 -0.61920 -0.008872 0.95018
## 17 0.5627 -1.56290 -0.04417 -2.70708 -1.757175 0.54337
## 15 -1.9681 -0.44704 -3.12947 0.26585 -0.401900 -0.57133
## 10 0.6232 -0.89889 0.41620 0.26374 0.332785 -0.08742
## 11 1.1054 -0.90857 -0.08601 0.26599 0.008900 -1.12676
## 9 -0.4481 -0.77218 0.96709 0.34851 2.059004 0.10377
## 18 0.9913 -1.51891 -0.77314 0.89534 0.298138 -1.03991
## 3 0.3898 1.50907 0.03988 -0.06196 -0.171454 -0.84659
## 20 -0.8971 -1.52042 1.40577 2.22940 -0.920734 0.49851
## 14 -1.6735 -0.74222 -1.88228 0.80971 -0.536813 -0.46637
## 19 -0.9239 -1.49359 1.29239 -2.87128 -1.316513 -0.74936
## 12 -0.7625 0.26751 -0.15988 -0.77476 2.069361 0.26842
## 7 1.1327 0.51336 0.43788 0.05748 -0.266550 1.48584
##
##
## Biplot scores for constraining variables
##
## CCA1 CCA2 CCA3 CA1 CA2 CA3
## A1 -0.6048 0.04018 -0.7954 0 0 0
## Moisture -0.9746 -0.06076 0.2158 0 0 0
## Manure 0.2059 0.96205 0.1791 0 0 0
0.2016 + 0.1110
## [1] 0.3126
Proportion explained by the two first constrained axes: 0.2016 + 0.1110 = 0.3126 (31%)
RsquareAdj(dunecca)$adj.r.squared
## [1] 0.2458976
Adjusted R^2: 0.2431963
anova(dunecca)
## Permutation test for cca under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = dunebio ~ A1 + Moisture + Manure, data = duneenv)
## Df ChiSquare F Pr(>F)
## Model 3 0.76925 3.048 0.001 ***
## Residual 16 1.34602
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Number of permutations: 999
anova(dunecca, by = "margin")
## Permutation test for cca under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = dunebio ~ A1 + Moisture + Manure, data = duneenv)
## Df ChiSquare F Pr(>F)
## A1 1 0.13047 1.5508 0.113
## Moisture 1 0.30886 3.6714 0.001 ***
## Manure 1 0.23418 2.7837 0.008 **
## Residual 16 1.34602
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Manure and Moisture are significant
plot(dunecca, scaling = 2, xlab = "CCA1 (20%)", ylab = "CCA2 (11%)")
Perform a permutational multivariate analysis of variance (PERMANOVA) with 9,999 permutations using the dune_bio.txt dataset after calculating Bray-Curtis distances and the first (A1), second (Moisture) and fifth (Manure) variables of dune_env.txt as explanatory.
dunebio <- read.table("dune_bio.txt", sep = "\t", header = T)
duneenv <- read.table("dune_env.txt", sep = "\t", header = T)
adonis2(vegdist(dunebio, method = "bray") ~ A1 + Moisture + Manure, data = duneenv, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = vegdist(dunebio, method = "bray") ~ A1 + Moisture + Manure, data = duneenv, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## A1 1 0.5107 0.15417 5.8578 1e-04 ***
## Moisture 1 0.7196 0.21722 8.2537 1e-04 ***
## Manure 1 0.6874 0.20752 7.8849 1e-04 ***
## Residual 16 1.3949 0.42109
## Total 19 3.3126 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A1, Moisture, and Manure appear to be significant
A1_R2: 0.15417, Moisture_R2: 0.21722, Manure_R2: 0.20752
adonis2(vegdist(dunebio, method = "bray") ~ Use/A1, data = duneenv, permutations = 9999, strate - duneenv$Use)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = vegdist(dunebio, method = "bray") ~ Use/A1, data = duneenv, permutations = 9999, method = strate - duneenv$Use)
## Df SumOfSqs R2 F Pr(>F)
## Use 2 0.4904 0.14805 1.9394 0.0773 .
## Use:A1 3 1.0520 0.31757 2.7733 0.0012 **
## Residual 14 1.7702 0.53438
## Total 19 3.3126 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
duneenv$Axis01 <- metaMDS(vegdist(dunebio, method = "bray"))$points[, 1]
## Run 0 stress 0.09032258
## Run 1 stress 0.1468973
## Run 2 stress 0.1727643
## Run 3 stress 0.09032258
## ... Procrustes: rmse 1.227991e-05 max resid 4.744843e-05
## ... Similar to previous best
## Run 4 stress 0.1562326
## Run 5 stress 0.09032258
## ... Procrustes: rmse 1.655116e-05 max resid 6.381774e-05
## ... Similar to previous best
## Run 6 stress 0.3767481
## Run 7 stress 0.09032258
## ... New best solution
## ... Procrustes: rmse 3.793767e-06 max resid 1.448536e-05
## ... Similar to previous best
## Run 8 stress 0.09032258
## ... Procrustes: rmse 6.83464e-06 max resid 2.584741e-05
## ... Similar to previous best
## Run 9 stress 0.09032258
## ... Procrustes: rmse 6.813383e-06 max resid 2.608483e-05
## ... Similar to previous best
## Run 10 stress 0.09032258
## ... Procrustes: rmse 5.998274e-06 max resid 2.277969e-05
## ... Similar to previous best
## Run 11 stress 0.09032258
## ... Procrustes: rmse 9.493568e-06 max resid 3.656018e-05
## ... Similar to previous best
## Run 12 stress 0.09032258
## ... New best solution
## ... Procrustes: rmse 1.980322e-06 max resid 5.594786e-06
## ... Similar to previous best
## Run 13 stress 0.09032258
## ... Procrustes: rmse 4.303221e-06 max resid 1.620247e-05
## ... Similar to previous best
## Run 14 stress 0.09032258
## ... Procrustes: rmse 5.150702e-06 max resid 1.85509e-05
## ... Similar to previous best
## Run 15 stress 0.09032258
## ... Procrustes: rmse 9.746486e-06 max resid 3.690835e-05
## ... Similar to previous best
## Run 16 stress 0.09032258
## ... Procrustes: rmse 1.899404e-05 max resid 7.316031e-05
## ... Similar to previous best
## Run 17 stress 0.09032258
## ... Procrustes: rmse 2.564366e-06 max resid 8.719522e-06
## ... Similar to previous best
## Run 18 stress 0.09032258
## ... Procrustes: rmse 3.504096e-06 max resid 7.521196e-06
## ... Similar to previous best
## Run 19 stress 0.2017008
## Run 20 stress 0.09032258
## ... Procrustes: rmse 1.023503e-05 max resid 3.912565e-05
## ... Similar to previous best
## *** Best solution repeated 8 times
duneenv$Axis02 <- metaMDS(vegdist(dunebio, method = "bray"))$points[, 2]
## Run 0 stress 0.09032258
## Run 1 stress 0.1468972
## Run 2 stress 0.09032258
## ... Procrustes: rmse 2.111676e-05 max resid 8.148843e-05
## ... Similar to previous best
## Run 3 stress 0.09032258
## ... New best solution
## ... Procrustes: rmse 1.153276e-05 max resid 4.449227e-05
## ... Similar to previous best
## Run 4 stress 0.1870917
## Run 5 stress 0.3656027
## Run 6 stress 0.09032258
## ... Procrustes: rmse 1.088365e-05 max resid 4.117973e-05
## ... Similar to previous best
## Run 7 stress 0.09032258
## ... Procrustes: rmse 1.144219e-05 max resid 4.373465e-05
## ... Similar to previous best
## Run 8 stress 0.1468973
## Run 9 stress 0.09032258
## ... New best solution
## ... Procrustes: rmse 5.370808e-06 max resid 2.065136e-05
## ... Similar to previous best
## Run 10 stress 0.09032258
## ... Procrustes: rmse 3.161333e-06 max resid 1.221996e-05
## ... Similar to previous best
## Run 11 stress 0.09032258
## ... Procrustes: rmse 1.128149e-05 max resid 3.435539e-05
## ... Similar to previous best
## Run 12 stress 0.09032258
## ... Procrustes: rmse 3.709831e-06 max resid 1.43359e-05
## ... Similar to previous best
## Run 13 stress 0.09032258
## ... Procrustes: rmse 1.511781e-06 max resid 5.791099e-06
## ... Similar to previous best
## Run 14 stress 0.09032258
## ... Procrustes: rmse 2.399781e-06 max resid 3.702076e-06
## ... Similar to previous best
## Run 15 stress 0.173623
## Run 16 stress 0.1468972
## Run 17 stress 0.09032258
## ... Procrustes: rmse 3.187954e-06 max resid 1.223107e-05
## ... Similar to previous best
## Run 18 stress 0.09032258
## ... Procrustes: rmse 1.52566e-06 max resid 5.633032e-06
## ... Similar to previous best
## Run 19 stress 0.1468972
## Run 20 stress 0.09032258
## ... Procrustes: rmse 2.812532e-06 max resid 1.083621e-05
## ... Similar to previous best
## *** Best solution repeated 9 times
ggplot(duneenv, aes(x = Axis01, y = Axis02)) +
geom_point(aes(fill = Use, size = A1), alpha = 0.6, pch = 21)