R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

Required packages

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(glmmTMB)
## Warning: package 'glmmTMB' was built under R version 4.5.3
## Warning in check_dep_version(dep_pkg = "TMB"): package version mismatch: 
## glmmTMB was built with TMB package version 1.9.21
## Current TMB package version is 1.9.17
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
library(DHARMa)
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.5.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode

#updated!

richness <- data.frame(
  TrailType = c("Mountain Bike", "Hiking", "Maintenance", "Skiing"),
  `0m` = c(1, 2, 3, 2),
  `100m` = c(3, 4, 1, 5),
  `200m` = c(3, 1, 3, 4),
  `300m` = c(1, 1, 5, 4),
  `400m` = c(3, 1, 5, 7),
  `500m` = c(6, 2, 3, 5),
  `600m` = c(5, 5, 3, 3),
  `700m` = c(3, 3, 5, 6),
  `800m` = c(4, 3, 4, 5),
  `900m` = c(4, 2, 3, 5),
  `1000m` = c(5, 3, 2, 6),
  `1100m` = c(3, 3, 3, 4)
)

#this part is to make sure r reads it right

richness_data <- richness %>%
  pivot_longer(
    cols = -TrailType,
    names_to = "Distance",
    values_to = "Richness"
  ) %>%
  mutate(Richness = as.numeric(Richness)) %>%
  na.omit()
richness_data$Distance <- gsub("X", "", richness_data$Distance)
str(richness_data)
## tibble [48 × 3] (S3: tbl_df/tbl/data.frame)
##  $ TrailType: chr [1:48] "Mountain Bike" "Mountain Bike" "Mountain Bike" "Mountain Bike" ...
##  $ Distance : chr [1:48] "0m" "100m" "200m" "300m" ...
##  $ Richness : num [1:48] 1 3 3 1 3 6 5 3 4 4 ...
head(richness_data)
## # A tibble: 6 × 3
##   TrailType     Distance Richness
##   <chr>         <chr>       <dbl>
## 1 Mountain Bike 0m              1
## 2 Mountain Bike 100m            3
## 3 Mountain Bike 200m            3
## 4 Mountain Bike 300m            1
## 5 Mountain Bike 400m            3
## 6 Mountain Bike 500m            6

Boxplot for species richness

ggplot(richness_data, aes(x = TrailType, y = Richness)) +
  geom_boxplot(fill = "lightblue") +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Species Richness vs Trail Type",
    x = "Trail Type",
    y = "Species Richness"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5)
  )

Histogram to check distribution

hist(richness_data$Richness,
     main = "Distribution of Species Richness",
     xlab = "Richness",
     col = "lightblue")

#reason for using poisson: species richness is measured in counts. This means that typically we would model a count variable with a poisson distributed glm. This is beacause normal linear models (LMs) assume normally distributed response variables, which count data typically are not.

richness_poisson_model <- glmmTMB(Richness ~ TrailType + Distance,
                          data = richness_data,
                          family = poisson())
DHARMa::simulateResiduals(richness_poisson_model) |> plot()

deviance(richness_poisson_model) / df.residual(richness_poisson_model)  #since it is underdispersed (1= good, <1 = under, >1=over), google says to use quasipoisson - the below
## [1] 0.5616557
richness_quasi_model <- glm(Richness ~ TrailType + Distance,
                      data = richness_data,
                      family = quasipoisson)
summary(richness_quasi_model)
## 
## Call:
## glm(formula = Richness ~ TrailType + Distance, family = quasipoisson, 
##     data = richness_data)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.3626     0.2865   1.266 0.214533    
## TrailTypeMaintenance     0.2877     0.1773   1.622 0.114224    
## TrailTypeMountain Bike   0.3124     0.1764   1.771 0.085800 .  
## TrailTypeSkiing          0.6242     0.1661   3.758 0.000666 ***
## Distance1000m            0.6931     0.3179   2.180 0.036460 *  
## Distance100m             0.4855     0.3299   1.472 0.150575    
## Distance1100m            0.4855     0.3299   1.472 0.150576    
## Distance200m             0.3185     0.3411   0.934 0.357330    
## Distance300m             0.3185     0.3411   0.934 0.357323    
## Distance400m             0.6931     0.3179   2.180 0.036460 *  
## Distance500m             0.6931     0.3179   2.180 0.036460 *  
## Distance600m             0.6931     0.3179   2.180 0.036460 *  
## Distance700m             0.7538     0.3148   2.395 0.022469 *  
## Distance800m             0.6931     0.3179   2.180 0.036460 *  
## Distance900m             0.5596     0.3254   1.720 0.094818 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 0.5389797)
## 
##     Null deviance: 33.223  on 47  degrees of freedom
## Residual deviance: 18.535  on 33  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4
emmeans(richness_quasi_model, "pairwise" ~ TrailType, regrid = "response")
## $emmeans
##  TrailType     rate    SE  df asymp.LCL asymp.UCL
##  Hiking        2.50 0.335 Inf      1.84      3.16
##  Maintenance   3.33 0.387 Inf      2.57      4.09
##  Mountain Bike 3.42 0.392 Inf      2.65      4.18
##  Skiing        4.67 0.458 Inf      3.77      5.56
## 
## Results are averaged over the levels of: Distance 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                    estimate    SE  df z.ratio p.value
##  Hiking - Maintenance         -0.8333 0.512 Inf  -1.628  0.3627
##  Hiking - Mountain Bike       -0.9167 0.516 Inf  -1.778  0.2838
##  Hiking - Skiing              -2.1667 0.567 Inf  -3.819  0.0008
##  Maintenance - Mountain Bike  -0.0833 0.551 Inf  -0.151  0.9988
##  Maintenance - Skiing         -1.3333 0.599 Inf  -2.224  0.1166
##  Mountain Bike - Skiing       -1.2500 0.603 Inf  -2.075  0.1614
## 
## Results are averaged over the levels of: Distance 
## P value adjustment: tukey method for comparing a family of 4 estimates
anova(richness_quasi_model) #just a check on whether trail type and distance is significant #distance is not significant
## Analysis of Deviance Table
## 
## Model: quasipoisson, link: log
## 
## Response: Richness
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev      F   Pr(>F)   
## NULL                         47     33.223                   
## TrailType  3   8.1469        44     25.076 5.0385 0.005523 **
## Distance  11   6.5418        33     18.535 1.1034 0.389153   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

This part is for the percentage cover boxplot

excel_sheets("1veg.xlsx")
## [1] "Mountain Bike" "Hiking"        "Maintenance"   "Ski"
mtb <- read_excel("1veg.xlsx", sheet = "Mountain Bike")
hiking <- read_excel("1veg.xlsx", sheet = "Hiking")
maintenance <- read_excel("1veg.xlsx", sheet = "Maintenance")
ski <- read_excel("1veg.xlsx", sheet = "Ski")
#making sure it is percentage cover being calculated
calc_cover <- function(df, trail_name) {
  df %>%
    filter(`Species Name` != "No vegetation") %>%
    pivot_longer(
      cols = matches("^X?[0-9]+m$|^[0-9]+\\s*m$"),
      names_to = "Distance",
      values_to = "Cover"
    ) %>%
    mutate(
      Cover = as.numeric(Cover),
      Cover = ifelse(is.na(Cover), 0, Cover)
    ) %>%
    group_by(Distance) %>%
    summarise(
      PercentCover = sum(Cover),
      .groups = "drop"
    ) %>%
    mutate(Trail = trail_name) 
} 
  
#Combining the percentage cover of trails together
all_cover <- bind_rows(
  calc_cover(mtb, "Mountain Bike"),
  calc_cover(hiking, "Hiking"),
  calc_cover(maintenance, "Maintenance"),
  calc_cover(ski, "Skiing"))
#the final boxplot for percentage cover
ggplot(all_cover, aes(x = Trail, y = PercentCover)) +
  geom_boxplot(fill = "lightgreen") +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Percentage Cover by Trail Type",
    x = "Trail Type",
    y = "Percent Cover (%)"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5)
  )

#checking anova
model <- aov(PercentCover ~ Trail, data = all_cover)
summary(model)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Trail        3   3313    1104   1.198  0.322
## Residuals   44  40569     922
# Statistical test
shapiro.test(residuals(model)) #the qq plot thing to check normality = passed
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.99004, p-value = 0.9536
leveneTest(PercentCover ~ Trail, data = all_cover) #homogeneity of variance = passed
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.5543  0.648
##       44
TukeyHSD(model) #results
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = PercentCover ~ Trail, data = all_cover)
## 
## $Trail
##                                diff       lwr      upr     p adj
## Maintenance-Hiking         6.541667 -26.55681 39.64015 0.9519111
## Mountain Bike-Hiking      -1.875000 -34.97348 31.22348 0.9987476
## Skiing-Hiking             19.333333 -13.76515 52.43181 0.4117823
## Mountain Bike-Maintenance -8.416667 -41.51515 24.68181 0.9045267
## Skiing-Maintenance        12.791667 -20.30681 45.89015 0.7318013
## Skiing-Mountain Bike      21.208333 -11.89015 54.30681 0.3303960

So what does the data mean?

For species richness

#1. Species richness was significantly affected by trail type (quasi-Poisson GLM, averaged across all distances, F(3, 33) = 5.04, p = 0.0055). The data were underdispersed (dispersion parameter φ ≈ 0.539), so standard errors were adjusted accordingly using the quasi-Poisson GLM. Note: Distance did not have a significant effect (F(11,33) = 1.10, p = 0.389). #2. The model predicted that species richness was ~2.50 species on Hiking trails (SE = 0.34), ~3.33 species on Maintenance trails (SE = 0.39), ~3.42 species on Mountain Bike trails (SE = 0.39), and ~4.67 species on Skiing trails (SE = 0.46). #3. Pairwise comparisons of trail types showed that Skiing trails had significantly higher species richness than Hiking trails, with ~2.17 more species on average (SE = 0.57, p = 0.0008). Differences between all other trail pairs were not significant (p > 0.1).

For the percentage cover

#1. Just based off the boxplot, ski trails tend to have the highest and most variable cover, mountain bike trails are the most inconsistent and can have very low cover. Hiking more variable than maintenance trails. #2. The one-way ANOVA showed no significant effect of trail type on percent cover (F(3,45) = 1.50, p = 0.226).