This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(glmmTMB)
## Warning: package 'glmmTMB' was built under R version 4.5.3
## Warning in check_dep_version(dep_pkg = "TMB"): package version mismatch:
## glmmTMB was built with TMB package version 1.9.21
## Current TMB package version is 1.9.17
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
library(DHARMa)
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.5.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
#I took out the 1200m data, let me know if you want to include it
richness <- data.frame(
TrailType = c("Mountain Bike", "Hiking", "Maintenance", "Skiing"),
`0m` = c(2, 5, 7, 5),
`100m` = c(4, 5, 3, 8),
`200m` = c(4, 3, 5, 7),
`300m` = c(2, 3, 9, 7),
`400m` = c(5, 3, 8, 8),
`500m` = c(7, 4, 6, 7),
`600m` = c(7, 7, 4, 5),
`700m` = c(5, 5, 8, 8),
`800m` = c(5, 6, 7, 8),
`900m` = c(5, 4, 4, 5),
`1000m` = c(5, 4, 3, 7),
`1100m` = c(4, 4, 4, 5)
)
#this part is to make sure r reads it right
richness_data <- richness %>%
pivot_longer(
cols = -TrailType,
names_to = "Distance",
values_to = "Richness"
) %>%
mutate(Richness = as.numeric(Richness)) %>%
na.omit()
richness_data$Distance <- gsub("X", "", richness_data$Distance)
str(richness_data)
## tibble [48 × 3] (S3: tbl_df/tbl/data.frame)
## $ TrailType: chr [1:48] "Mountain Bike" "Mountain Bike" "Mountain Bike" "Mountain Bike" ...
## $ Distance : chr [1:48] "0m" "100m" "200m" "300m" ...
## $ Richness : num [1:48] 2 4 4 2 5 7 7 5 5 5 ...
head(richness_data)
## # A tibble: 6 × 3
## TrailType Distance Richness
## <chr> <chr> <dbl>
## 1 Mountain Bike 0m 2
## 2 Mountain Bike 100m 4
## 3 Mountain Bike 200m 4
## 4 Mountain Bike 300m 2
## 5 Mountain Bike 400m 5
## 6 Mountain Bike 500m 7
ggplot(richness_data, aes(x = TrailType, y = Richness)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.2, alpha = 0.5) +
labs(
title = "Species Richness vs Trail Type",
x = "Trail Type",
y = "Species Richness"
) +
theme(
plot.title = element_text(hjust = 0.5)
)
hist(richness_data$Richness,
main = "Distribution of Species Richness",
xlab = "Richness",
col = "lightblue")
#reason for using poisson: species richness is measured in counts. This means that typically we would model a count variable with a poisson distributed glm. This is beacause normal linear models (LMs) assume normally distributed response variables, which count data typically are not.
richness_poisson_model <- glmmTMB(Richness ~ TrailType + Distance,
data = richness_data,
family = poisson())
DHARMa::simulateResiduals(richness_poisson_model) |> plot()
deviance(richness_poisson_model) / df.residual(richness_poisson_model) #since it is underdispersed (1= good, <1 = under, >1=over), google says to use quasipoisson - the below
## [1] 0.5023471
richness_quasi_model <- glm(Richness ~ TrailType + Distance,
data = richness_data,
family = quasipoisson)
summary(richness_quasi_model)
##
## Call:
## glm(formula = Richness ~ TrailType + Distance, family = quasipoisson,
## data = richness_data)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.370e+00 1.816e-01 7.542 1.13e-08 ***
## TrailTypeMaintenance 2.492e-01 1.280e-01 1.947 0.06006 .
## TrailTypeMountain Bike 3.704e-02 1.344e-01 0.276 0.78464
## TrailTypeSkiing 4.117e-01 1.237e-01 3.328 0.00216 **
## Distance1000m -1.693e-12 2.266e-01 0.000 1.00000
## Distance100m 5.129e-02 2.238e-01 0.229 0.82011
## Distance1100m -1.112e-01 2.332e-01 -0.477 0.63652
## Distance200m -1.749e-12 2.266e-01 0.000 1.00000
## Distance300m 1.001e-01 2.212e-01 0.453 0.65384
## Distance400m 2.336e-01 2.145e-01 1.089 0.28398
## Distance500m 2.336e-01 2.145e-01 1.089 0.28398
## Distance600m 1.911e-01 2.165e-01 0.882 0.38400
## Distance700m 3.137e-01 2.108e-01 1.488 0.14629
## Distance800m 3.137e-01 2.108e-01 1.488 0.14629
## Distance900m -5.407e-02 2.297e-01 -0.235 0.81540
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.4878886)
##
## Null deviance: 28.921 on 47 degrees of freedom
## Residual deviance: 16.577 on 33 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
emmeans(richness_quasi_model, "pairwise" ~ TrailType, regrid = "response")
## $emmeans
## TrailType rate SE df asymp.LCL asymp.UCL
## Hiking 4.42 0.424 Inf 3.59 5.25
## Maintenance 5.67 0.480 Inf 4.73 6.61
## Mountain Bike 4.58 0.432 Inf 3.74 5.43
## Skiing 6.67 0.521 Inf 5.65 7.69
##
## Results are averaged over the levels of: Distance
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## Hiking - Maintenance -1.250 0.640 Inf -1.952 0.2064
## Hiking - Mountain Bike -0.167 0.605 Inf -0.276 0.9927
## Hiking - Skiing -2.250 0.671 Inf -3.352 0.0045
## Maintenance - Mountain Bike 1.083 0.646 Inf 1.678 0.3352
## Maintenance - Skiing -1.000 0.708 Inf -1.412 0.4916
## Mountain Bike - Skiing -2.083 0.676 Inf -3.080 0.0111
##
## Results are averaged over the levels of: Distance
## P value adjustment: tukey method for comparing a family of 4 estimates
anova(richness_quasi_model) #just a check on whether trail type and distance is significant #distance is not significant
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: Richness
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 47 28.921
## TrailType 3 7.2868 44 21.635 4.9784 0.005856 **
## Distance 11 5.0572 33 16.578 0.9423 0.514294
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
excel_sheets("1veg.xlsx")
## [1] "Mountain Bike" "Hiking" "Maintenance" "Ski"
mtb <- read_excel("1veg.xlsx", sheet = "Mountain Bike")
hiking <- read_excel("1veg.xlsx", sheet = "Hiking")
maintenance <- read_excel("1veg.xlsx", sheet = "Maintenance")
ski <- read_excel("1veg.xlsx", sheet = "Ski")
#making sure it is percentage cover being calculated
calc_cover <- function(df, trail_name) {
df %>%
filter(`Species Name` != "No vegetation") %>%
pivot_longer(
cols = matches("^X?[0-9]+m$|^[0-9]+\\s*m$"),
names_to = "Distance",
values_to = "Cover"
) %>%
mutate(
Cover = as.numeric(Cover),
Cover = ifelse(is.na(Cover), 0, Cover)
) %>%
group_by(Distance) %>%
summarise(
PercentCover = sum(Cover),
.groups = "drop"
) %>%
mutate(Trail = trail_name)
}
#Combining the percentage cover of trails together
all_cover <- bind_rows(
calc_cover(mtb, "Mountain Bike"),
calc_cover(hiking, "Hiking"),
calc_cover(maintenance, "Maintenance"),
calc_cover(ski, "Skiing"))
#the final boxplot for percentage cover
ggplot(all_cover, aes(x = Trail, y = PercentCover)) +
geom_boxplot(fill = "lightgreen") +
geom_jitter(width = 0.2, alpha = 0.5) +
labs(
title = "Percentage Cover by Trail Type",
x = "Trail Type",
y = "Percent Cover (%)"
) +
theme(
plot.title = element_text(hjust = 0.5)
)
#checking anova
model <- aov(PercentCover ~ Trail, data = all_cover)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Trail 3 10390 3463 4.369 0.00878 **
## Residuals 45 35671 793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Statistical test
shapiro.test(residuals(model)) #the qq plot thing to check normality = passed
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.98358, p-value = 0.7207
leveneTest(PercentCover ~ Trail, data = all_cover) #homogeneity of variance = passed
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.4477 0.07596 .
## 45
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(model) #results
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = PercentCover ~ Trail, data = all_cover)
##
## $Trail
## diff lwr upr p adj
## Maintenance-Hiking -0.9583333 -31.621157 29.70449 0.9997890
## Mountain Bike-Hiking -19.1923077 -49.259680 10.87506 0.3342103
## Skiing-Hiking 21.5833333 -9.079490 52.24616 0.2519522
## Mountain Bike-Maintenance -18.2339744 -48.301347 11.83340 0.3791540
## Skiing-Maintenance 22.5416667 -8.121157 53.20449 0.2179187
## Skiing-Mountain Bike 40.7756410 10.708269 70.84301 0.0040305
#1. Species richness was significantly affected by trail type (quasi-Poisson GLM, averaged across all distances, F(3, 33) = 5.4, p = 0.003). The data were underdispersed (dispersion parameter φ ≈ 0.49), so standard errors were adjusted accordingly using the quasi-Poisson GLM. #2. The model predicted that species richness was ~4 species on Hiking trails (SE = 0.42), ~5.7 species on Maintenance trails (SE = 0.48), ~4.6 species on Mountain Bike trails (SE = 0.43), and ~6.7 species on Skiing trails (SE = 0.52). #3. Pairwise comparisons of trail types showed that Skiing trails had significantly higher species richness than Hiking trails, with ~2.3 more species on average (SE = 0.67, p = 0.0045), and higher richness than Mountain Bike trails, with ~2.1 more species on average (SE = 0.68, p = 0.011). Differences between all other trail pairs were not significant.
#1. Just based off the boxplot, ski trails tend to have the highest and most variable cover, mountain bike trails are the most inconsistent and can have very low cover. Hiking more variable than maintenance trails. #2. The one-way ANOVA showed a significant effect of trail type on percent cover (F(3,45) = 4.37, p = 0.0088). Post-hoc tests showed that skiing trails had significantly higher percent cover than mountain bike trails (p = 0.004). However, no other pairwise comparisons were significant.