library(piecewiseSEM)
## 
##   This is piecewiseSEM version 2.3.0.1.
## 
## 
##   Questions or bugs can be addressed to <jslefche@gmail.com>.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(lme4)
## Loading required package: Matrix
library(lmerTest) 
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
library(multcompView) 
library(knitr)

Question 1 - Elevation and Species Distribution

1. Load in the data and look at summary

alpine<- read.csv("raw/AlpineMammals_SEM_data.csv") 
summary(alpine)
##    Transect          Elevation_ft     PatchSize       PercentGrass 
##  Length:192         Min.   : 6040   Min.   :  2.00   Min.   : 0.1  
##  Class :character   1st Qu.: 8970   1st Qu.:  6.00   1st Qu.: 2.0  
##  Mode  :character   Median : 9822   Median : 12.00   Median : 8.0  
##                     Mean   : 9925   Mean   : 25.74   Mean   :17.2  
##                     3rd Qu.:11050   3rd Qu.: 30.00   3rd Qu.:27.0  
##                     Max.   :13168   Max.   :219.00   Max.   :78.0  
##   PercentForbs       Pikas           Woodrats         Marmots      
##  Min.   : 0.10   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 4.00   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :10.50   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :15.36   Mean   :0.5052   Mean   :0.3698   Mean   :0.3333  
##  3rd Qu.:19.50   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :79.00   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    Road_50m          Water_50m         RocksDeeper2m      Tave_wt       
##  Length:192         Length:192         Min.   :0.000   Min.   :-11.950  
##  Class :character   Class :character   1st Qu.:0.000   1st Qu.: -7.225  
##  Mode  :character   Mode  :character   Median :0.000   Median : -6.463  
##                                        Mean   :0.474   Mean   : -6.164  
##                                        3rd Qu.:1.000   3rd Qu.: -5.287  
##                                        Max.   :1.000   Max.   :  0.450  
##     Tave_sp           Tave_sm          Tave_at           PPT_wt      
##  Min.   :-4.5250   Min.   : 8.825   Min.   :-1.350   Min.   : 13.25  
##  1st Qu.: 0.2687   1st Qu.:11.775   1st Qu.: 2.913   1st Qu.:144.88  
##  Median : 1.8250   Median :13.425   Median : 4.300   Median :194.25  
##  Mean   : 1.8617   Mean   :13.486   Mean   : 4.273   Mean   :209.10  
##  3rd Qu.: 3.1063   3rd Qu.:14.800   3rd Qu.: 5.412   3rd Qu.:269.94  
##  Max.   : 8.3500   Max.   :21.175   Max.   :10.850   Max.   :610.25  
##      PPT_sp          PPT_sm          PPT_at           PAS_wt      
##  Min.   : 50.5   Min.   : 95.0   Min.   : 42.75   Min.   :  7.75  
##  1st Qu.:164.2   1st Qu.:120.8   1st Qu.:111.12   1st Qu.:113.94  
##  Median :207.8   Median :146.4   Median :148.12   Median :162.88  
##  Mean   :211.9   Mean   :154.3   Mean   :152.99   Mean   :177.67  
##  3rd Qu.:255.1   3rd Qu.:172.6   3rd Qu.:181.81   3rd Qu.:228.12  
##  Max.   :446.2   Max.   :321.2   Max.   :318.25   Max.   :567.50  
##      PAS_sp           PAS_sm           PAS_at      
##  Min.   :  6.25   Min.   : 0.000   Min.   :  3.75  
##  1st Qu.: 74.69   1st Qu.: 0.750   1st Qu.: 23.12  
##  Median :124.62   Median : 1.625   Median : 39.12  
##  Mean   :133.22   Mean   : 2.659   Mean   : 48.02  
##  3rd Qu.:183.12   3rd Qu.: 3.750   3rd Qu.: 64.12  
##  Max.   :409.25   Max.   :16.000   Max.   :197.00

2. What type of regression model would you run to look at effects on species absence/presence?

To look at species absence or presence, I would use a logistic regression model, specifically a binary logistic regression because the data for the response variables of pika, woodrats, and marmots is binary (1 = species present, 0 = species absent).

3. Create 3 univariate models - one for each species using Elevation_ft as the predictor variable

pika<- glm(Pikas~Elevation_ft, data = alpine, family = binomial)
summary(pika) # pika GLM
## 
## Call:
## glm(formula = Pikas ~ Elevation_ft, family = binomial, data = alpine)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.051e+01  1.632e+00  -6.439 1.20e-10 ***
## Elevation_ft  1.060e-03  1.635e-04   6.484 8.96e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 266.15  on 191  degrees of freedom
## Residual deviance: 201.42  on 190  degrees of freedom
## AIC: 205.42
## 
## Number of Fisher Scoring iterations: 4
woodrat<- glm(Woodrats~Elevation_ft, data = alpine, family = binomial)
summary(woodrat) # woodrat GLM
## 
## Call:
## glm(formula = Woodrats ~ Elevation_ft, family = binomial, data = alpine)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  12.3765120  1.9359883   6.393 1.63e-10 ***
## Elevation_ft -0.0013318  0.0002019  -6.596 4.22e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 253.00  on 191  degrees of freedom
## Residual deviance: 173.13  on 190  degrees of freedom
## AIC: 177.13
## 
## Number of Fisher Scoring iterations: 5
marmot<- glm(Marmots~Elevation_ft, data = alpine, family = binomial)
summary(marmot)
## 
## Call:
## glm(formula = Marmots ~ Elevation_ft, family = binomial, data = alpine)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.185e+01  1.810e+00  -6.548 5.84e-11 ***
## Elevation_ft  1.092e-03  1.729e-04   6.315 2.69e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 244.42  on 191  degrees of freedom
## Residual deviance: 183.81  on 190  degrees of freedom
## AIC: 187.81
## 
## Number of Fisher Scoring iterations: 5

4. Which species would you find hiking from low to high elevation?

At lower elevation, woodrats would be seen then start to disappear as elevation is gained. Pikas and marmots will appear at higher elevations with marmots being present in particularly high elevations

5. Create SEM!

SEM1<- psem(glm(Pikas~Elevation_ft, family = binomial, data = alpine),
            glm(Woodrats~Elevation_ft, family = binomial, data = alpine),
            glm(Marmots~Elevation_ft, family = binomial, data = alpine)) 
summary(SEM1, conserve=TRUE)
##   |                                                                              |                                                                      |   0%  |                                                                              |============                                                          |  17%  |                                                                              |=======================                                               |  33%  |                                                                              |===================================                                   |  50%  |                                                                              |===============================================                       |  67%  |                                                                              |==========================================================            |  83%  |                                                                              |======================================================================| 100%
## 
## Structural Equation Model of SEM1 
## 
## Call:
##   Pikas ~ Elevation_ft
##   Woodrats ~ Elevation_ft
##   Marmots ~ Elevation_ft
## 
##     AIC
##  570.367
## 
## ---
## Tests of directed separation:
## 
##             Independ.Claim Test.Type  DF Crit.Value P.Value    
##     Pikas ~ Woodrats + ...      coef 189    -4.8759  0.0000 ***
##      Pikas ~ Marmots + ...      coef 189     2.3652  0.0180   *
##   Woodrats ~ Marmots + ...      coef 189    -0.8986  0.3689    
## 
## --
## Global goodness-of-fit:
## 
## Chi-Squared = 0 with P-value = 1 and on 6 degrees of freedom
## Fisher's C = 37.499 with P-value = 0 and on 6 degrees of freedom
## 
## ---
## Coefficients:
## 
##   Response    Predictor Estimate Std.Error  DF Crit.Value P.Value Std.Estimate
##      Pikas Elevation_ft   0.0011     2e-04 190     6.4836       0       0.6343
##   Woodrats Elevation_ft  -0.0013     2e-04 190    -6.5961       0      -0.7177
##    Marmots Elevation_ft   0.0011     2e-04 190     6.3155       0       0.6454
##      
##   ***
##   ***
##   ***
## 
##   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05
## 
## ---
## Individual R-squared:
## 
##   Response     method R.squared
##      Pikas nagelkerke      0.38
##   Woodrats nagelkerke      0.46
##    Marmots nagelkerke      0.38

6. Plot model and interpret

plot(SEM1)

Elevation effects all three species. Elevation negatively effects woodrats (-0.718), and positively effects both pikas (0.634) and marmots. (0.645). It has the highest positive effect on marmots, meaning that marmots will be found in higher elevations (and pikas), and woodrats will not be found at higher elevations.

Question 2 - Abiotic and Biotic Drivers of Distributions

1. SEM including each of the species - climate predictors only - 2-4 per species

SEM2<- psem(glm(Pikas~ PAS_wt + Tave_wt, family = binomial, data = alpine),
            glm(Woodrats~ PAS_wt + Tave_wt, family = binomial, data = alpine),
            glm(Marmots~ PAS_wt + Tave_wt, family = binomial, data = alpine)) 
plot(SEM2)

2. SEMs - biotic predictors

SEM3<- psem(glm(Pikas~ PercentGrass + PercentForbs +PatchSize, family = binomial, data = alpine),
            glm(Woodrats~ PercentGrass + PercentForbs +PatchSize, family = binomial, data = alpine),
            glm(Marmots~ PercentGrass + PercentForbs +PatchSize, family = binomial, data = alpine)) 

plot(SEM3)

3. SEM - biotic + abiotic factors

SEM4 <- psem(glm(Pikas ~ Elevation_ft*PPT_wt+ RocksDeeper2m + Woodrats, family = binomial, data = alpine), glm(Marmots ~  Elevation_ft* PPT_wt +RocksDeeper2m +Pikas, family = binomial, data = alpine),
glm(Woodrats ~ Elevation_ft* PPT_wt + RocksDeeper2m, family = binomial, data = alpine)) 

plot(SEM4)
fishertest1<- fisherC(SEM4, conserve = TRUE) 
fishertest1

4. Moving forward use only this model - adjust

Adjusted relationships until p-value in fisher c test was appropriate

5. Explain variables

Elevation and PPT in winter both effect pikas as they are found at higher elevations and precipitation (snow) effects their distribution. Deeper rocks also provide habitat underneath snow cover for pikas as well. There also may be competition between pikas and woodrats despite not sharing habitat in higher elevations. Marmots have a similar relationship, however increases snow fall is not necessarily beneficial for them in the same way it is for pikas. Pikas may be competition for marmots or may also be an indication of favorable habitat for marmots. Conversely, woodrats prefer lower elevations with potentially higher precip allowing for more support of vegetation for food sources.

6. R-squared values for each 3 sub-models

pikas_model <- glm(Pikas ~ Elevation_ft*PPT_wt+ RocksDeeper2m + Woodrats, family = binomial, data = alpine)

marmots_model <- glm(Marmots ~  Elevation_ft* PPT_wt +RocksDeeper2m +Pikas, family = binomial, data = alpine)

woodrats_model <- glm(Woodrats ~ Elevation_ft* PPT_wt + RocksDeeper2m, family = binomial, data = alpine)
mcfadden_r2 <- function(model) {
  1 - (logLik(model) / logLik(update(model, . ~ 1)))  # llf / llnull
}
pikas_r2 <- mcfadden_r2(pikas_model)
marmots_r2 <- mcfadden_r2(marmots_model)
woodrats_r2 <- mcfadden_r2(woodrats_model)
pikas_r2
## 'log Lik.' 0.5141576 (df=6)
marmots_r2
## 'log Lik.' 0.3130416 (df=6)
woodrats_r2
## 'log Lik.' 0.4224071 (df=5)

I believe the differnce between the r-squared values is reflective of the relationships seen in the model due to difference in species habitat vs. shared species habitat.

7. What is your SEMs Fisher C value?

The fisher c value for the SEM used was 0.369 which indicated a good fit of the model and the relationships between the predictors and other variables.

8. Miss any important pathways?

dSep(SEM4)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%

9. Insignificant pathways?

The pathway found by dSEP is insignificant (p-value = 0.831). This is not suprising as marmots and woodrats do not overlap in habitat and therefore would not affect each other in any way.

10. How many predictors across all models? - minimum number of data points to not overfit?

The total number of predictors for the final model was 11 including duplicates. The minimum number of data points with 11 predictors would be 110 (want at least 10 data points per predictor).

11. Check correlations

cor(alpine$Elevation_ft, alpine$PatchSize)
## [1] 0.2454946
cor(alpine$Elevation_ft, alpine$RocksDeeper2m)
## [1] -0.03259664
cor(alpine$Elevation_ft, alpine$Woodrats)
## [1] -0.5866755
cor(alpine$PatchSize, alpine$RocksDeeper2m)
## [1] 0.2700521
cor(alpine$PatchSize, alpine$Woodrats)
## [1] -0.1669178
cor(alpine$RocksDeeper2m, alpine$Woodrats)
## [1] 0.1587902
cor(alpine$Elevation_ft, alpine$PatchSize)
## [1] 0.2454946
cor(alpine$Elevation_ft, alpine$RocksDeeper2m)
## [1] -0.03259664
cor(alpine$Elevation_ft, alpine$Pikas)
## [1] 0.5378511
cor(alpine$PatchSize, alpine$RocksDeeper2m)
## [1] 0.2700521
cor(alpine$PatchSize, alpine$Pikas)
## [1] 0.2781928
cor(alpine$RocksDeeper2m, alpine$Pikas)
## [1] 0.1465828
cor(alpine$Elevation_ft, alpine$PatchSize)
## [1] 0.2454946
cor(alpine$Elevation_ft, alpine$RocksDeeper2m)
## [1] -0.03259664
cor(alpine$Elevation_ft, alpine$Woodrats)
## [1] -0.5866755
cor(alpine$PatchSize, alpine$RocksDeeper2m)
## [1] 0.2700521
cor(alpine$PatchSize, alpine$Woodrats)
## [1] -0.1669178
cor(alpine$RocksDeeper2m, alpine$Woodrats)
## [1] 0.1587902

None of the correlations exceed 0.7!

12. Add in Transect

SEM5 <- psem(
  lmer(Pikas ~ Elevation_ft + PatchSize + RocksDeeper2m + (1|Transect), data = alpine),
  lmer(Marmots ~ Elevation_ft + PatchSize + RocksDeeper2m + (1|Transect), data = alpine),
  lmer(Woodrats ~ Elevation_ft + PatchSize + RocksDeeper2m + (1|Transect), data = alpine)
)
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
summary(SEM5)
##   |                                                                              |                                                                      |   0%  |                                                                              |=======================                                               |  33%  |                                                                              |===============================================                       |  67%  |                                                                              |======================================================================| 100%
## 
## Structural Equation Model of SEM5 
## 
## Call:
##   Pikas ~ Elevation_ft + PatchSize + RocksDeeper2m
##   Marmots ~ Elevation_ft + PatchSize + RocksDeeper2m
##   Woodrats ~ Elevation_ft + PatchSize + RocksDeeper2m
## 
##     AIC
##  678.696
## 
## ---
## Tests of directed separation:
## 
##             Independ.Claim Test.Type       DF Crit.Value P.Value    
##      Marmots ~ Pikas + ...      coef 141.2619     1.8669  0.0640    
##     Woodrats ~ Pikas + ...      coef 129.2276    -6.9075  0.0000 ***
##   Woodrats ~ Marmots + ...      coef 184.1114    -0.6687  0.5045    
## 
## --
## Global goodness-of-fit:
## 
## Chi-Squared = 29.567 with P-value = 0 and on 3 degrees of freedom
## Fisher's C = 51.521 with P-value = 0 and on 6 degrees of freedom
## 
## ---
## Coefficients:
## 
##   Response     Predictor Estimate Std.Error       DF Crit.Value P.Value
##      Pikas  Elevation_ft   0.0002    0.0000 186.3854    10.1663  0.0000
##      Pikas     PatchSize   0.0009    0.0008 178.0892     1.0838  0.2799
##      Pikas RocksDeeper2m   0.1199    0.0580 182.4374     2.0657  0.0403
##    Marmots  Elevation_ft   0.0002    0.0000 160.7556     7.5963  0.0000
##    Marmots     PatchSize   0.0027    0.0009 187.9936     3.1467  0.0019
##    Marmots RocksDeeper2m   0.0498    0.0602 180.7934     0.8277  0.4089
##   Woodrats  Elevation_ft  -0.0002    0.0000 183.5661    -9.6075  0.0000
##   Woodrats     PatchSize  -0.0005    0.0008 185.2569    -0.6662  0.5061
##   Woodrats RocksDeeper2m   0.1322    0.0586 187.9979     2.2538  0.0254
##   Std.Estimate    
##         0.6098 ***
##         0.0624    
##         0.1198   *
##         0.4876 ***
##         0.2040  **
##         0.0528    
##        -0.5949 ***
##        -0.0406    
##         0.1367   *
## 
##   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05
## 
## ---
## Individual R-squared:
## 
##   Response method Marginal Conditional
##      Pikas   none     0.36        0.58
##    Marmots   none     0.32        0.36
##   Woodrats   none     0.37        0.47

The Fishers C value changed significantly, there was a huge increase (Fisher’s C now at 51.521). This shows the model including transect is a poor fit. Biologically, the random transects account for spatial variation between species habitat assuming the transects may have different conditions.