Chapter 1 Analysis
Reading in data
## Load packages
library(dplyr) # data wrangling
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(lme4) # glmer
## Warning: package 'lme4' was built under R version 3.6.2
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.6.2
library(MuMIn) # r.squaredGLMM
## Warning: package 'MuMIn' was built under R version 3.6.2
library(emmeans) #pairwise
## Warning: package 'emmeans' was built under R version 3.6.2
# Read the data
# all lidar only has primary unit
nahmint<- read.csv("/Users/NonisA/Documents/Feb 18th - finalizing stats/Chapter.2/data/all.lidar.edited.csv")
# new file with corrected headache creek to match lidar
# All fish for LiDAR
all<-nahmint%>% # remove outlier values
filter(stream != "gray.creek")
test<-subset(all,is.na(mean.slope)) # checking na's
all<-all[!is.na(all$mean.slope), ] # removing 12 na values
all$cover.dom<-as.character(all$cover.dom)
all$cover.dom[is.na(all$cover.dom)] <- "none"
all$cover.dom<-as.factor(all$cover.dom)
all$level<-as.factor(all$level)
all$stream<-as.factor(all$stream)
all$location<-as.factor(all$location)
all$sample<-as.factor(all$sample)
#### ordering of the categorical factors; how do i choose to order them? Order does not matter.
all$sub.dom <- factor(all$sub.dom, levels = c( "R", "G", "B", "C"))
all$cover.dom <- factor(all$cover.dom, levels = c("none", "l.wood", "s.wood", "OV", "UCB", "B"))
all$stream <- factor(all$stream, levels = c("rainbow.creek", "head.water.creek", "elk.creek", "view.creek", "headache.creek"))
all$habitat <- factor(all$habitat, levels = c("STEP", "C", "RI", "G", "POOL"))
all<-all%>%
dplyr::mutate(als.habitat = recode_factor(als.habitat, "CB" = "C", "CR" = "C", "RP" = "RI"))
str(all)
## 'data.frame': 372 obs. of 37 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ stream : Factor w/ 5 levels "rainbow.creek",..: 1 1 1 1 1 1 1 1 2 2 ...
## $ location : Factor w/ 31 levels "1","2","3","4",..: 12 13 15 19 2 3 5 8 12 13 ...
## $ level : Factor w/ 1 level "P": 1 1 1 1 1 1 1 1 1 1 ...
## $ plot : Factor w/ 118 levels "elk.creek-1",..: 90 91 93 97 98 99 101 104 39 40 ...
## $ sample : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## $ als.length : int 3 8 9 5 4 3 5 4 5 10 ...
## $ total.per.habitat: int 2 1 1 1 1 2 3 1 3 3 ...
## $ density.m2 : num 0.0275 0.0245 0.085 0.0608 0.0433 ...
## $ fish.p.a : int 1 1 1 1 1 1 1 1 1 1 ...
## $ species : Factor w/ 1 level "ALL": 1 1 1 1 1 1 1 1 1 1 ...
## $ habitat : Factor w/ 5 levels "STEP","C","RI",..: 4 3 5 5 5 4 5 5 5 4 ...
## $ gradient : num 0.5 1 0 0 0 1 0 0 0 1 ...
## $ bfw : num 8 8 4.2 4.7 6.6 2.9 4.4 6.9 7.3 5.6 ...
## $ sub.dom : Factor w/ 4 levels "R","G","B","C": 2 4 3 1 3 4 4 3 4 2 ...
## $ cover.dom : Factor w/ 6 levels "none","l.wood",..: 4 4 4 6 4 4 4 2 2 4 ...
## $ pool.area : num NA NA 11.8 12.6 23.1 ...
## $ wetted.width.m : num 3.3 2.1 3.7 3.6 6.5 1.9 3.2 3.7 4.6 3.1 ...
## $ mean.depth.m : num 0.263 0.143 0.367 0.36 0.34 ...
## $ total.wood : int 0 0 0 0 2 0 0 1 4 1 ...
## $ small : int 0 0 0 0 0 0 0 0 0 0 ...
## $ medium : int 0 0 0 0 0 0 0 0 2 0 ...
## $ large : int 0 0 0 0 2 0 0 1 2 1 ...
## $ wood.m2 : num 0 0 0 0 0.0866 ...
## $ wood.p.a : int 0 0 0 0 1 0 0 1 1 1 ...
## $ mean.slope : num 3.42 1.74 6.67 13.54 7.68 ...
## $ mean.roughness : num 0.2479 0.0702 0.4826 0.6556 0.6149 ...
## $ als.bfw : num 6.25 8.75 6.9 2.83 4.4 ...
## $ als.max.bfw : int 7 9 10 4 5 5 7 5 8 8 ...
## $ als.min.bfw : int 6 8 5 1 4 3 5 5 6 5 ...
## $ als.gradient : num -3.77 -1.11 -3.74 -20.24 -17.65 ...
## $ als.habitat : Factor w/ 4 levels "C","RI","G","POOL": 2 2 4 4 1 3 4 4 4 3 ...
## $ IntClass : int 2 2 3 4 2 3 4 3 3 3 ...
## $ MeanCHM : num 10.55 10.43 9.31 3.75 5.44 ...
## $ ikurt : num 2.37 2.51 2.96 2.55 2.9 ...
## $ zkurt : num 4 4.4 2.58 2.53 3.15 ...
## $ zq15 : num 4.71 6.88 2.6 1.46 1.15 ...
head(all)
## X stream location level plot sample als.length
## 1 1 rainbow.creek 12 P rainbow.creek-12 1 3
## 2 2 rainbow.creek 13 P rainbow.creek-13 1 8
## 3 3 rainbow.creek 15 P rainbow.creek-15 1 9
## 4 4 rainbow.creek 19 P rainbow.creek-19 1 5
## 5 5 rainbow.creek 2 P rainbow.creek-2 1 4
## 6 6 rainbow.creek 3 P rainbow.creek-3 1 3
## total.per.habitat density.m2 fish.p.a species habitat gradient bfw sub.dom
## 1 2 0.02747253 1 ALL G 0.5 8.0 G
## 2 1 0.02450980 1 ALL RI 1.0 8.0 C
## 3 1 0.08503401 1 ALL POOL 0.0 4.2 B
## 4 1 0.06079027 1 ALL POOL 0.0 4.7 R
## 5 1 0.04329004 1 ALL POOL 0.0 6.6 B
## 6 2 0.09447331 1 ALL G 1.0 2.9 C
## cover.dom pool.area wetted.width.m mean.depth.m total.wood small medium large
## 1 OV NA 3.3 0.2633 0 0 0 0
## 2 OV NA 2.1 0.1433 0 0 0 0
## 3 OV 11.76 3.7 0.3667 0 0 0 0
## 4 B 12.60 3.6 0.3600 0 0 0 0
## 5 OV 23.10 6.5 0.3400 2 0 0 2
## 6 OV NA 1.9 0.3400 0 0 0 0
## wood.m2 wood.p.a mean.slope mean.roughness als.bfw als.max.bfw
## 1 0.00000000 0 3.418388 0.24793388 6.250000 7
## 2 0.00000000 0 1.740133 0.07024793 8.750000 9
## 3 0.00000000 0 6.672169 0.48264463 6.900000 10
## 4 0.00000000 0 13.542637 0.65564739 2.833333 4
## 5 0.08658009 1 7.683977 0.61487603 4.400000 5
## 6 0.00000000 0 7.726148 0.74173553 3.750000 5
## als.min.bfw als.gradient als.habitat IntClass MeanCHM ikurt zkurt
## 1 6 -3.766378 RI 2 10.545442 2.366944 3.996058
## 2 8 -1.112556 RI 2 10.432096 2.510605 4.398169
## 3 5 -3.744507 POOL 3 9.314879 2.963565 2.576385
## 4 1 -20.240173 POOL 4 3.748570 2.548174 2.525043
## 5 4 -17.649841 C 2 5.438025 2.900789 3.150615
## 6 3 -8.833313 G 3 4.270184 2.863221 3.369249
## zq15
## 1 4.706800
## 2 6.880472
## 3 2.600435
## 4 1.458125
## 5 1.147700
## 6 1.114375
# Need to calculate pool area
all<-all%>%
mutate(als.pool.area = (als.length*als.bfw))%>%
mutate(als.gradient.poss = als.gradient*-1)
###########
# LiDAR fish GLMM w/ field gradient
###########
### Response 0.001 has better residuals than 0.01; better results with 0.01
all.c <- all%>%
mutate(density.m2=if_else(total.per.habitat==0, 0.01, density.m2))
#explore the data a bit: general shape per stream and sample
ggplot(data = all.c) +
geom_density(aes(x = density.m2), col = "darkblue", fill = "lightblue") +
theme_bw()+
facet_wrap(~stream)
all.lm<- glmer(density.m2 ~ gradient+ # significant
+ (1|stream/plot), data=all.c,family=Gamma(link=log))
summary(all.lm) # singular fit issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ gradient + +(1 | stream/plot)
## Data: all.c
##
## AIC BIC logLik deviance df.resid
## -2243.4 -2223.8 1126.7 -2253.4 367
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4715 -0.4959 -0.1944 0.2957 3.8634
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.312383 0.55891
## stream (Intercept) 0.005774 0.07599
## Residual 0.369358 0.60775
## Number of obs: 372, groups: plot:stream, 93; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -3.68225 0.12095 -30.445 < 2e-16 ***
## gradient -0.13688 0.03566 -3.839 0.000124 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## gradient -0.538
#pseudo R^2
r.squaredGLMM(all.lm) #MuMIN package
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help page.
## R2m R2c
## delta 0.1486670 0.5426332
## lognormal 0.1595353 0.5823025
## trigamma 0.1358179 0.4957342
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 50% of variation in density
# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02626246
## [1] 0.02626246
### something funky with the estimate values with the poss/neg gradient
# Intercept
#exp(-3.68225)
# X variables
#(exp(-0.13688)-1)*100 # --> incorrect way of calculating this? value seems off
# or XX% decrease in density per one unit increase in gradient
#Check the Model Assumptions
# Linearity
### Residuals
plot(fitted(all.lm),residuals(all.lm))
residuals <- residuals(all.lm, type = "response", retype="normalized")
plot(residuals)
par(mfrow=c(2,2), cex=1.0, mai=c(1.0,1.0,0.6,0.6))
plot(all.lm)
par(mfrow=c(1,1), cex=1.0, mai=c(1.0,1.0,1.0,1.0))
## Colinearity - no relationship
#ggplot(all.c,aes(x=bfw, y=gradient))+
#geom_point() + geom_smooth(method = "lm")+ theme_bw()+
# facet_wrap(~stream)
#test<-lm(bfw ~ gradient, data =all.c) # Not significantly related
#summary(test)
## Heteroskedasticity
plot(fitted(all.lm),residuals(all.lm))
# Normality
hist(residuals(all.lm))
# QQ Plot
dres <- resid(all.lm)
qqnorm(dres, main="Deviance residuals", las=1); qqline(dres)
## Partial F-test to test significance of terms; do you need to do this for glm?
#gradient
all.null<- glmer(density.m2 ~ 1 +
+ (1|stream/plot), data=all.c,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(all.null) #singularity issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ 1 + +(1 | stream/plot)
## Data: all.c
##
## AIC BIC logLik deviance df.resid
## -2231.3 -2215.6 1119.7 -2239.3 368
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4360 -0.5076 -0.2046 0.2798 3.7907
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.3943 0.6279
## stream (Intercept) 0.0000 0.0000
## Residual 0.3869 0.6220
## Number of obs: 372, groups: plot:stream, 93; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -3.95032 0.09324 -42.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(all.lm, all.null)
## Data: all.c
## Models:
## all.null: density.m2 ~ 1 + +(1 | stream/plot)
## all.lm: density.m2 ~ gradient + +(1 | stream/plot)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## all.null 4 -2231.3 -2215.7 1119.7 -2239.3
## all.lm 5 -2243.4 -2223.8 1126.7 -2253.4 14.038 1 0.0001792 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Wood investigation
## Wood presence/absence having issues with finding significance
## no significance with removing or keeping zeros
#all.c$wood.p.a <- factor(all.c$wood.p.a, levels = c("0", "1"))
#all.c<-all.c%>%
#mutate(wood.p.a = recode_factor(wood.p.a, "0" ="Absent"))%>%
#mutate(wood.p.a = recode_factor(wood.p.a, "1" ="Present"))
#################
## Wood per m2 vs. total.per.unit (CPUE) --> no significance
##################3
no.zeros<-all.c%>%
filter(total.per.habitat !="0")
ggplot(data = no.zeros) +
geom_density(aes(x = total.per.habitat), col = "darkblue", fill = "lightblue") +
theme_bw()+
facet_wrap(~stream)
wood<-lmer(total.per.habitat ~ wood.m2 + (1|stream/plot), data=no.zeros)
summary(wood)
## Linear mixed model fit by REML ['lmerMod']
## Formula: total.per.habitat ~ wood.m2 + (1 | stream/plot)
## Data: no.zeros
##
## REML criterion at convergence: 728.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4621 -0.4556 -0.1724 0.2369 6.8908
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 1.3570 1.1649
## stream (Intercept) 0.7478 0.8648
## Residual 2.9264 1.7107
## Number of obs: 173, groups: plot:stream, 66; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.1360 0.4512 4.734
## wood.m2 5.5412 4.6782 1.184
##
## Correlation of Fixed Effects:
## (Intr)
## wood.m2 -0.251
anova(wood)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## wood.m2 1 4.1057 4.1057 1.403
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
stargazer(wood, type = "text",
digits = 3,
star.cutoffs = c(0.05, 0.01, 0.001),
digit.separator = "")
##
## =================================================
## Dependent variable:
## -----------------------------
## total.per.habitat
## -------------------------------------------------
## wood.m2 5.541
## (4.678)
##
## Constant 2.136***
## (0.451)
##
## -------------------------------------------------
## Observations 173
## Log Likelihood -364.078
## Akaike Inf. Crit. 738.156
## Bayesian Inf. Crit. 753.922
## =================================================
## Note: *p<0.05; **p<0.01; ***p<0.001
wood<- glmer(total.per.habitat ~ wood.m2
+ (1|stream/plot), data=no.zeros,family=Gamma(link=log))
summary(wood)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: total.per.habitat ~ wood.m2 + (1 | stream/plot)
## Data: no.zeros
##
## AIC BIC logLik deviance df.resid
## 512.3 528.1 -251.2 502.3 168
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1492 -0.6009 -0.2645 0.5979 3.2099
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.16507 0.4063
## stream (Intercept) 0.03314 0.1821
## Residual 0.27664 0.5260
## Number of obs: 173, groups: plot:stream, 66; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) 0.4874 0.1607 3.033 0.00242 **
## wood.m2 2.2732 1.7771 1.279 0.20084
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## wood.m2 -0.279
anova(wood)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## wood.m2 1 0.6161 0.6161 2.2271
###########
# NOTE: wood should be an influential factor; how to make this work?
###########
#######################
# Field Habitat Selection
########################
all.c.hab<-all.c%>%
filter(habitat != "STEP")
habitat<- glmer(density.m2 ~ habitat +
+ (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(habitat)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ habitat + +(1 | stream/plot)
## Data: all.c.hab
##
## AIC BIC logLik deviance df.resid
## -2200.0 -2172.7 1107.0 -2214.0 357
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5030 -0.5146 -0.1832 0.1321 4.0234
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.2172 0.4661
## stream (Intercept) 0.0000 0.0000
## Residual 0.3611 0.6009
## Number of obs: 364, groups: plot:stream, 91; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -4.4457 0.1718 -25.872 < 2e-16 ***
## habitatRI 0.2220 0.2085 1.065 0.2870
## habitatG 0.5443 0.2217 2.455 0.0141 *
## habitatPOOL 1.5484 0.2393 6.471 9.76e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) hbttRI habttG
## habitatRI -0.824
## habitatG -0.775 0.639
## habitatPOOL -0.718 0.592 0.557
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
hab.null<- glmer(density.m2 ~ 1
+ (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(hab.null)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ 1 + (1 | stream/plot)
## Data: all.c.hab
##
## AIC BIC logLik deviance df.resid
## -2166.3 -2150.7 1087.2 -2174.3 360
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4255 -0.5108 -0.2087 0.3172 3.7568
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 3.977e-01 6.306e-01
## stream (Intercept) 7.516e-10 2.742e-05
## Residual 3.927e-01 6.267e-01
## Number of obs: 364, groups: plot:stream, 91; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -3.9351 0.0944 -41.69 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(habitat, hab.null) # habitat significant --> error
## Data: all.c.hab
## Models:
## hab.null: density.m2 ~ 1 + (1 | stream/plot)
## habitat: density.m2 ~ habitat + +(1 | stream/plot)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## hab.null 4 -2166.3 -2150.7 1087.2 -2174.3
## habitat 7 -2200.0 -2172.7 1107.0 -2214.0 39.717 3 1.224e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pseudo R^2
r.squaredGLMM(habitat)
## R2m R2c
## delta 0.3256128 0.5789230
## lognormal 0.3469767 0.6169070
## trigamma 0.3001190 0.5335965
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 53.3% of variation in density
# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02626246
## [1] 0.02626246
##########################
## Field Pool area
#########################
pool.area.data<-all.c%>%
filter(habitat == "POOL")
foo<-pool.area.data%>%
distinct(plot, .keep_all = TRUE) # 17 distrinct pools
# GLMM poisson distribution
### Stream no plot nested
poisson<-glmer(total.per.habitat ~ pool.area + (1|stream),
family = "poisson", data = pool.area.data)
summary(poisson)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: total.per.habitat ~ pool.area + (1 | stream)
## Data: pool.area.data
##
## AIC BIC logLik deviance df.resid
## 271.2 277.9 -132.6 265.2 65
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9454 -1.1006 -0.2207 0.6029 3.8349
##
## Random effects:
## Groups Name Variance Std.Dev.
## stream (Intercept) 0.1725 0.4153
## Number of obs: 68, groups: stream, 5
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.34586 0.24974 1.385 0.166088
## pool.area 0.01314 0.00350 3.755 0.000173 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## pool.area -0.569
# Plot included, model cannot converge
poisson.1<-glmer(total.per.habitat ~ pool.area + (1|stream/plot),
family = "poisson", data = pool.area.data)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00307044 (tol = 0.002, component 1)
summary(poisson.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: total.per.habitat ~ pool.area + (1 | stream/plot)
## Data: pool.area.data
##
## AIC BIC logLik deviance df.resid
## 259.7 268.6 -125.8 251.7 64
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.96554 -0.97470 -0.07414 0.68598 3.01704
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 3.669e-01 0.6056924
## stream (Intercept) 1.934e-07 0.0004398
## Number of obs: 68, groups: plot:stream, 17; stream, 5
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.028247 0.277633 -0.102 0.9190
## pool.area 0.018178 0.006667 2.726 0.0064 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## pool.area -0.763
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00307044 (tol = 0.002, component 1)
###########
# LiDAR fish GLMM w/ als.gradient
###########
### Response 0.001 has better residuals than 0.01; better results with 0.01
all.c <- all%>%
mutate(density.m2=if_else(total.per.habitat==0, 0.01, density.m2))
#explore the data a bit: general shape per stream and sample
ggplot(data = all.c) +
geom_density(aes(x = density.m2), col = "darkblue", fill = "lightblue") +
theme_bw()+
facet_wrap(~stream)
all.lm<- glmer(density.m2 ~ als.gradient.poss+ # significant
+ (1|stream/plot), data=all.c,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(all.lm) # singular fit issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ als.gradient.poss + +(1 | stream/plot)
## Data: all.c
##
## AIC BIC logLik deviance df.resid
## -2233.5 -2213.9 1121.7 -2243.5 367
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4365 -0.4696 -0.1971 0.2330 3.8425
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.3724 0.6103
## stream (Intercept) 0.0000 0.0000
## Residual 0.3845 0.6201
## Number of obs: 372, groups: plot:stream, 93; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -4.18050 0.14372 -29.089 <2e-16 ***
## als.gradient.poss 0.04243 0.02050 2.069 0.0385 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## als.grdnt.p -0.775
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
#pseudo R^2
r.squaredGLMM(all.lm) #MuMIN package
## R2m R2c
## delta 0.04471180 0.5147338
## lognormal 0.04831954 0.5562670
## trigamma 0.04046591 0.4658540
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 46.6% of variation in density
# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02628701
## [1] 0.02628701
### something funky with the estimate values with the poss/neg gradient
# Intercept
exp(-4.18050)
## [1] 0.01529086
# X variables
(exp(0.04243)-1)*100
## [1] 4.334302
# or 4.33% decrease in density per one unit increase in gradient
#Check the Model Assumptions
# Linearity
### Residuals
plot(fitted(all.lm),residuals(all.lm))
residuals <- residuals(all.lm, type = "response", retype="normalized")
plot(residuals)
par(mfrow=c(2,2), cex=1.0, mai=c(1.0,1.0,0.6,0.6))
plot(all.lm)
par(mfrow=c(1,1), cex=1.0, mai=c(1.0,1.0,1.0,1.0))
## Colinearity - no relationship
#ggplot(all.c,aes(x=bfw, y=gradient))+
#geom_point() + geom_smooth(method = "lm")+ theme_bw()+
# facet_wrap(~stream)
#test<-lm(bfw ~ gradient, data =all.c) # Not significantly related
#summary(test)
## Heteroskedasticity
plot(fitted(all.lm),residuals(all.lm))
# Normality
hist(residuals(all.lm))
# QQ Plot
dres <- resid(all.lm)
qqnorm(dres, main="Deviance residuals", las=1); qqline(dres)
## Partial F-test to test significance of terms; do you need to do this for glm?
#gradient
all.null<- glmer(density.m2 ~ 1 +
+ (1|stream/plot), data=all.c,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(all.null) #singularity issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ 1 + +(1 | stream/plot)
## Data: all.c
##
## AIC BIC logLik deviance df.resid
## -2231.3 -2215.6 1119.7 -2239.3 368
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4360 -0.5076 -0.2046 0.2798 3.7907
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.3943 0.6279
## stream (Intercept) 0.0000 0.0000
## Residual 0.3869 0.6220
## Number of obs: 372, groups: plot:stream, 93; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -3.95032 0.09324 -42.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(all.lm, all.null)
## Data: all.c
## Models:
## all.null: density.m2 ~ 1 + +(1 | stream/plot)
## all.lm: density.m2 ~ als.gradient.poss + +(1 | stream/plot)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## all.null 4 -2231.3 -2215.7 1119.7 -2239.3
## all.lm 5 -2233.4 -2213.9 1121.7 -2243.4 4.1287 1 0.04216 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###########
# NOTE: Wood investigation --> nothing from LiDAR yet
###########
#######################
# ALS Habitat Selection
########################
all.c.hab<-all.c%>%
filter(als.habitat != "STEP")
habitat<- glmer(density.m2 ~ als.habitat +
+ (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(habitat)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ als.habitat + +(1 | stream/plot)
## Data: all.c.hab
##
## AIC BIC logLik deviance df.resid
## -2254.4 -2227.0 1134.2 -2268.4 365
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5017 -0.5620 -0.1627 0.1534 3.8626
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.2551 0.5051
## stream (Intercept) 0.0000 0.0000
## Residual 0.3598 0.5998
## Number of obs: 372, groups: plot:stream, 93; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -4.4105 0.1752 -25.181 < 2e-16 ***
## als.habitatRI 0.1850 0.2167 0.854 0.393
## als.habitatG 0.5585 0.2360 2.366 0.018 *
## als.habitatPOOL 1.2901 0.2442 5.283 1.27e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) als.RI als.hG
## als.habttRI -0.808
## als.habittG -0.742 0.600
## als.hbtPOOL -0.717 0.580 0.532
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
hab.null<- glmer(density.m2 ~ 1
+ (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(hab.null)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Gamma ( log )
## Formula: density.m2 ~ 1 + (1 | stream/plot)
## Data: all.c.hab
##
## AIC BIC logLik deviance df.resid
## -2231.3 -2215.6 1119.7 -2239.3 368
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4360 -0.5076 -0.2046 0.2798 3.7907
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.3943 0.6279
## stream (Intercept) 0.0000 0.0000
## Residual 0.3869 0.6220
## Number of obs: 372, groups: plot:stream, 93; stream, 5
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) -3.95032 0.09324 -42.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(habitat, hab.null) # habitat significant --> error
## Data: all.c.hab
## Models:
## hab.null: density.m2 ~ 1 + (1 | stream/plot)
## habitat: density.m2 ~ als.habitat + +(1 | stream/plot)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## hab.null 4 -2231.3 -2215.7 1119.7 -2239.3
## habitat 7 -2254.4 -2227.0 1134.2 -2268.4 29.099 3 2.135e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pseudo R^2
r.squaredGLMM(habitat)
## R2m R2c
## delta 0.2566016 0.5650399
## lognormal 0.2739770 0.6033007
## trigamma 0.2359759 0.5196218
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 52% of variation in density
# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02628695
## [1] 0.02628701
##########################
## ALS Pool area
#########################
pool.area.data<-all.c%>%
filter(als.habitat == "POOL")
foo<-pool.area.data%>%
distinct(plot, .keep_all = TRUE) # 19 classified pools
# GLMM poisson distribution
### Stream without plot nested
poisson<-glmer(total.per.habitat ~ als.pool.area + (1|stream/plot),
family = "poisson", data = pool.area.data)
## boundary (singular) fit: see ?isSingular
summary(poisson)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: total.per.habitat ~ als.pool.area + (1 | stream/plot)
## Data: pool.area.data
##
## AIC BIC logLik deviance df.resid
## 263.6 273.0 -127.8 255.6 72
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0473 -0.7340 -0.1198 0.6493 2.8452
##
## Random effects:
## Groups Name Variance Std.Dev.
## plot:stream (Intercept) 0.3823 0.6183
## stream (Intercept) 0.0000 0.0000
## Number of obs: 76, groups: plot:stream, 19; stream, 5
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.778729 0.002934 -265.42 <2e-16 ***
## als.pool.area 0.030561 0.002380 12.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## als.pool.ar -0.008
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Plot not included, fixes singular fit?
poisson.1<-glmer(total.per.habitat ~ als.pool.area + (1|stream),
family = "poisson", data = pool.area.data)
summary(poisson.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: total.per.habitat ~ als.pool.area + (1 | stream)
## Data: pool.area.data
##
## AIC BIC logLik deviance df.resid
## 283.7 290.7 -138.9 277.7 73
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8987 -0.9553 -0.1415 0.4514 3.3118
##
## Random effects:
## Groups Name Variance Std.Dev.
## stream (Intercept) 0.03949 0.1987
## Number of obs: 76, groups: stream, 5
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.437169 0.222993 -1.960 0.0499 *
## als.pool.area 0.026843 0.003793 7.078 1.47e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## als.pool.ar -0.835