Chapter 1 Analysis

Reading in data

## Load packages
library(dplyr) # data wrangling
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(lme4) # glmer
## Warning: package 'lme4' was built under R version 3.6.2
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.6.2
library(MuMIn) # r.squaredGLMM
## Warning: package 'MuMIn' was built under R version 3.6.2
library(emmeans) #pairwise
## Warning: package 'emmeans' was built under R version 3.6.2
# Read the data
# all lidar only has primary unit
nahmint<- read.csv("/Users/NonisA/Documents/Feb 18th - finalizing stats/Chapter.2/data/all.lidar.edited.csv")

# new file with corrected headache creek to match lidar
# All fish for LiDAR
all<-nahmint%>% # remove outlier values 
  filter(stream != "gray.creek")

test<-subset(all,is.na(mean.slope)) # checking na's
all<-all[!is.na(all$mean.slope), ] # removing 12 na values
 
all$cover.dom<-as.character(all$cover.dom) 
all$cover.dom[is.na(all$cover.dom)] <- "none"
all$cover.dom<-as.factor(all$cover.dom)
all$level<-as.factor(all$level)
all$stream<-as.factor(all$stream)
all$location<-as.factor(all$location)
all$sample<-as.factor(all$sample)

#### ordering of the categorical factors; how do i choose to order them? Order does not matter. 
all$sub.dom <- factor(all$sub.dom, levels = c( "R", "G", "B", "C"))
all$cover.dom <- factor(all$cover.dom, levels = c("none", "l.wood", "s.wood", "OV", "UCB", "B"))
all$stream <- factor(all$stream, levels = c("rainbow.creek", "head.water.creek", "elk.creek", "view.creek", "headache.creek"))
all$habitat <- factor(all$habitat, levels = c("STEP", "C", "RI", "G", "POOL"))

all<-all%>%
  dplyr::mutate(als.habitat = recode_factor(als.habitat, "CB" = "C", "CR" = "C", "RP" = "RI"))

str(all)
## 'data.frame':    372 obs. of  37 variables:
##  $ X                : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ stream           : Factor w/ 5 levels "rainbow.creek",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ location         : Factor w/ 31 levels "1","2","3","4",..: 12 13 15 19 2 3 5 8 12 13 ...
##  $ level            : Factor w/ 1 level "P": 1 1 1 1 1 1 1 1 1 1 ...
##  $ plot             : Factor w/ 118 levels "elk.creek-1",..: 90 91 93 97 98 99 101 104 39 40 ...
##  $ sample           : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ als.length       : int  3 8 9 5 4 3 5 4 5 10 ...
##  $ total.per.habitat: int  2 1 1 1 1 2 3 1 3 3 ...
##  $ density.m2       : num  0.0275 0.0245 0.085 0.0608 0.0433 ...
##  $ fish.p.a         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ species          : Factor w/ 1 level "ALL": 1 1 1 1 1 1 1 1 1 1 ...
##  $ habitat          : Factor w/ 5 levels "STEP","C","RI",..: 4 3 5 5 5 4 5 5 5 4 ...
##  $ gradient         : num  0.5 1 0 0 0 1 0 0 0 1 ...
##  $ bfw              : num  8 8 4.2 4.7 6.6 2.9 4.4 6.9 7.3 5.6 ...
##  $ sub.dom          : Factor w/ 4 levels "R","G","B","C": 2 4 3 1 3 4 4 3 4 2 ...
##  $ cover.dom        : Factor w/ 6 levels "none","l.wood",..: 4 4 4 6 4 4 4 2 2 4 ...
##  $ pool.area        : num  NA NA 11.8 12.6 23.1 ...
##  $ wetted.width.m   : num  3.3 2.1 3.7 3.6 6.5 1.9 3.2 3.7 4.6 3.1 ...
##  $ mean.depth.m     : num  0.263 0.143 0.367 0.36 0.34 ...
##  $ total.wood       : int  0 0 0 0 2 0 0 1 4 1 ...
##  $ small            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ medium           : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ large            : int  0 0 0 0 2 0 0 1 2 1 ...
##  $ wood.m2          : num  0 0 0 0 0.0866 ...
##  $ wood.p.a         : int  0 0 0 0 1 0 0 1 1 1 ...
##  $ mean.slope       : num  3.42 1.74 6.67 13.54 7.68 ...
##  $ mean.roughness   : num  0.2479 0.0702 0.4826 0.6556 0.6149 ...
##  $ als.bfw          : num  6.25 8.75 6.9 2.83 4.4 ...
##  $ als.max.bfw      : int  7 9 10 4 5 5 7 5 8 8 ...
##  $ als.min.bfw      : int  6 8 5 1 4 3 5 5 6 5 ...
##  $ als.gradient     : num  -3.77 -1.11 -3.74 -20.24 -17.65 ...
##  $ als.habitat      : Factor w/ 4 levels "C","RI","G","POOL": 2 2 4 4 1 3 4 4 4 3 ...
##  $ IntClass         : int  2 2 3 4 2 3 4 3 3 3 ...
##  $ MeanCHM          : num  10.55 10.43 9.31 3.75 5.44 ...
##  $ ikurt            : num  2.37 2.51 2.96 2.55 2.9 ...
##  $ zkurt            : num  4 4.4 2.58 2.53 3.15 ...
##  $ zq15             : num  4.71 6.88 2.6 1.46 1.15 ...
head(all)
##   X        stream location level             plot sample als.length
## 1 1 rainbow.creek       12     P rainbow.creek-12      1          3
## 2 2 rainbow.creek       13     P rainbow.creek-13      1          8
## 3 3 rainbow.creek       15     P rainbow.creek-15      1          9
## 4 4 rainbow.creek       19     P rainbow.creek-19      1          5
## 5 5 rainbow.creek        2     P  rainbow.creek-2      1          4
## 6 6 rainbow.creek        3     P  rainbow.creek-3      1          3
##   total.per.habitat density.m2 fish.p.a species habitat gradient bfw sub.dom
## 1                 2 0.02747253        1     ALL       G      0.5 8.0       G
## 2                 1 0.02450980        1     ALL      RI      1.0 8.0       C
## 3                 1 0.08503401        1     ALL    POOL      0.0 4.2       B
## 4                 1 0.06079027        1     ALL    POOL      0.0 4.7       R
## 5                 1 0.04329004        1     ALL    POOL      0.0 6.6       B
## 6                 2 0.09447331        1     ALL       G      1.0 2.9       C
##   cover.dom pool.area wetted.width.m mean.depth.m total.wood small medium large
## 1        OV        NA            3.3       0.2633          0     0      0     0
## 2        OV        NA            2.1       0.1433          0     0      0     0
## 3        OV     11.76            3.7       0.3667          0     0      0     0
## 4         B     12.60            3.6       0.3600          0     0      0     0
## 5        OV     23.10            6.5       0.3400          2     0      0     2
## 6        OV        NA            1.9       0.3400          0     0      0     0
##      wood.m2 wood.p.a mean.slope mean.roughness  als.bfw als.max.bfw
## 1 0.00000000        0   3.418388     0.24793388 6.250000           7
## 2 0.00000000        0   1.740133     0.07024793 8.750000           9
## 3 0.00000000        0   6.672169     0.48264463 6.900000          10
## 4 0.00000000        0  13.542637     0.65564739 2.833333           4
## 5 0.08658009        1   7.683977     0.61487603 4.400000           5
## 6 0.00000000        0   7.726148     0.74173553 3.750000           5
##   als.min.bfw als.gradient als.habitat IntClass   MeanCHM    ikurt    zkurt
## 1           6    -3.766378          RI        2 10.545442 2.366944 3.996058
## 2           8    -1.112556          RI        2 10.432096 2.510605 4.398169
## 3           5    -3.744507        POOL        3  9.314879 2.963565 2.576385
## 4           1   -20.240173        POOL        4  3.748570 2.548174 2.525043
## 5           4   -17.649841           C        2  5.438025 2.900789 3.150615
## 6           3    -8.833313           G        3  4.270184 2.863221 3.369249
##       zq15
## 1 4.706800
## 2 6.880472
## 3 2.600435
## 4 1.458125
## 5 1.147700
## 6 1.114375
# Need to calculate pool area
all<-all%>%
  mutate(als.pool.area = (als.length*als.bfw))%>%
  mutate(als.gradient.poss = als.gradient*-1)

TEST

###########
# LiDAR fish GLMM  w/ field gradient
###########
### Response 0.001 has better residuals than 0.01; better results with 0.01
all.c <- all%>% 
  mutate(density.m2=if_else(total.per.habitat==0, 0.01, density.m2))

#explore the data a bit: general shape per stream and sample
ggplot(data = all.c) + 
  geom_density(aes(x = density.m2), col = "darkblue", fill = "lightblue") + 
  theme_bw()+
  facet_wrap(~stream)

all.lm<- glmer(density.m2 ~ gradient+ # significant
                 + (1|stream/plot), data=all.c,family=Gamma(link=log)) 
summary(all.lm) # singular fit issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ gradient + +(1 | stream/plot)
##    Data: all.c
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2243.4  -2223.8   1126.7  -2253.4      367 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4715 -0.4959 -0.1944  0.2957  3.8634 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.312383 0.55891 
##  stream      (Intercept) 0.005774 0.07599 
##  Residual                0.369358 0.60775 
## Number of obs: 372, groups:  plot:stream, 93; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)    
## (Intercept) -3.68225    0.12095 -30.445  < 2e-16 ***
## gradient    -0.13688    0.03566  -3.839 0.000124 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##          (Intr)
## gradient -0.538
#pseudo R^2
r.squaredGLMM(all.lm) #MuMIN package
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help page.
##                 R2m       R2c
## delta     0.1486670 0.5426332
## lognormal 0.1595353 0.5823025
## trigamma  0.1358179 0.4957342
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 50% of variation in density

# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02626246
## [1] 0.02626246
### something funky with the estimate values with the poss/neg gradient
# Intercept
#exp(-3.68225) 
# X variables 
#(exp(-0.13688)-1)*100 # --> incorrect way of calculating this? value seems off
# or XX% decrease in density per one unit increase in gradient

#Check the Model Assumptions
# Linearity
### Residuals
plot(fitted(all.lm),residuals(all.lm))

residuals <- residuals(all.lm, type = "response", retype="normalized")
plot(residuals)

par(mfrow=c(2,2), cex=1.0, mai=c(1.0,1.0,0.6,0.6))
plot(all.lm)

par(mfrow=c(1,1), cex=1.0, mai=c(1.0,1.0,1.0,1.0))

## Colinearity - no relationship 
#ggplot(all.c,aes(x=bfw, y=gradient))+
#geom_point() + geom_smooth(method = "lm")+ theme_bw()+
# facet_wrap(~stream)

#test<-lm(bfw ~ gradient, data =all.c) # Not significantly related
#summary(test)

## Heteroskedasticity
plot(fitted(all.lm),residuals(all.lm))

# Normality
hist(residuals(all.lm))

# QQ Plot
dres <- resid(all.lm)
qqnorm(dres, main="Deviance residuals", las=1); qqline(dres)

## Partial F-test to test significance of terms; do you need to do this for glm?
#gradient
all.null<- glmer(density.m2 ~  1 +
                   + (1|stream/plot), data=all.c,family=Gamma(link=log)) 
## boundary (singular) fit: see ?isSingular
summary(all.null) #singularity issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ 1 + +(1 | stream/plot)
##    Data: all.c
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2231.3  -2215.6   1119.7  -2239.3      368 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4360 -0.5076 -0.2046  0.2798  3.7907 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.3943   0.6279  
##  stream      (Intercept) 0.0000   0.0000  
##  Residual                0.3869   0.6220  
## Number of obs: 372, groups:  plot:stream, 93; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)    
## (Intercept) -3.95032    0.09324  -42.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(all.lm, all.null)
## Data: all.c
## Models:
## all.null: density.m2 ~ 1 + +(1 | stream/plot)
## all.lm: density.m2 ~ gradient + +(1 | stream/plot)
##          npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)    
## all.null    4 -2231.3 -2215.7 1119.7  -2239.3                         
## all.lm      5 -2243.4 -2223.8 1126.7  -2253.4 14.038  1  0.0001792 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Wood investigation
## Wood presence/absence having issues with finding significance
## no significance with removing or keeping zeros
#all.c$wood.p.a <- factor(all.c$wood.p.a, levels = c("0", "1"))
#all.c<-all.c%>%  
#mutate(wood.p.a = recode_factor(wood.p.a, "0" ="Absent"))%>%
#mutate(wood.p.a = recode_factor(wood.p.a, "1" ="Present"))

#################
## Wood per m2 vs. total.per.unit (CPUE) --> no significance
##################3
no.zeros<-all.c%>%
  filter(total.per.habitat !="0")

ggplot(data = no.zeros) + 
  geom_density(aes(x = total.per.habitat), col = "darkblue", fill = "lightblue") + 
  theme_bw()+
  facet_wrap(~stream)

wood<-lmer(total.per.habitat ~ wood.m2 + (1|stream/plot), data=no.zeros)
summary(wood)
## Linear mixed model fit by REML ['lmerMod']
## Formula: total.per.habitat ~ wood.m2 + (1 | stream/plot)
##    Data: no.zeros
## 
## REML criterion at convergence: 728.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4621 -0.4556 -0.1724  0.2369  6.8908 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 1.3570   1.1649  
##  stream      (Intercept) 0.7478   0.8648  
##  Residual                2.9264   1.7107  
## Number of obs: 173, groups:  plot:stream, 66; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   2.1360     0.4512   4.734
## wood.m2       5.5412     4.6782   1.184
## 
## Correlation of Fixed Effects:
##         (Intr)
## wood.m2 -0.251
anova(wood)
## Analysis of Variance Table
##         npar Sum Sq Mean Sq F value
## wood.m2    1 4.1057  4.1057   1.403
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
stargazer(wood, type = "text",
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          digit.separator = "")
## 
## =================================================
##                          Dependent variable:     
##                     -----------------------------
##                           total.per.habitat      
## -------------------------------------------------
## wood.m2                         5.541            
##                                (4.678)           
##                                                  
## Constant                      2.136***           
##                                (0.451)           
##                                                  
## -------------------------------------------------
## Observations                     173             
## Log Likelihood                -364.078           
## Akaike Inf. Crit.              738.156           
## Bayesian Inf. Crit.            753.922           
## =================================================
## Note:               *p<0.05; **p<0.01; ***p<0.001
wood<- glmer(total.per.habitat ~ wood.m2
             + (1|stream/plot), data=no.zeros,family=Gamma(link=log)) 
summary(wood)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: total.per.habitat ~ wood.m2 + (1 | stream/plot)
##    Data: no.zeros
## 
##      AIC      BIC   logLik deviance df.resid 
##    512.3    528.1   -251.2    502.3      168 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1492 -0.6009 -0.2645  0.5979  3.2099 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.16507  0.4063  
##  stream      (Intercept) 0.03314  0.1821  
##  Residual                0.27664  0.5260  
## Number of obs: 173, groups:  plot:stream, 66; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)   
## (Intercept)   0.4874     0.1607   3.033  0.00242 **
## wood.m2       2.2732     1.7771   1.279  0.20084   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr)
## wood.m2 -0.279
anova(wood)
## Analysis of Variance Table
##         npar Sum Sq Mean Sq F value
## wood.m2    1 0.6161  0.6161  2.2271
###########
# NOTE: wood should be an influential factor; how to make this work?
###########

#######################
# Field Habitat Selection 
########################
all.c.hab<-all.c%>%
  filter(habitat != "STEP")

habitat<- glmer(density.m2 ~ habitat + 
                  + (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(habitat)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ habitat + +(1 | stream/plot)
##    Data: all.c.hab
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2200.0  -2172.7   1107.0  -2214.0      357 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5030 -0.5146 -0.1832  0.1321  4.0234 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.2172   0.4661  
##  stream      (Intercept) 0.0000   0.0000  
##  Residual                0.3611   0.6009  
## Number of obs: 364, groups:  plot:stream, 91; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)    
## (Intercept)  -4.4457     0.1718 -25.872  < 2e-16 ***
## habitatRI     0.2220     0.2085   1.065   0.2870    
## habitatG      0.5443     0.2217   2.455   0.0141 *  
## habitatPOOL   1.5484     0.2393   6.471 9.76e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) hbttRI habttG
## habitatRI   -0.824              
## habitatG    -0.775  0.639       
## habitatPOOL -0.718  0.592  0.557
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
hab.null<- glmer(density.m2 ~ 1
                 + (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(hab.null)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ 1 + (1 | stream/plot)
##    Data: all.c.hab
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2166.3  -2150.7   1087.2  -2174.3      360 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4255 -0.5108 -0.2087  0.3172  3.7568 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  plot:stream (Intercept) 3.977e-01 6.306e-01
##  stream      (Intercept) 7.516e-10 2.742e-05
##  Residual                3.927e-01 6.267e-01
## Number of obs: 364, groups:  plot:stream, 91; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)    
## (Intercept)  -3.9351     0.0944  -41.69   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(habitat, hab.null) # habitat significant --> error
## Data: all.c.hab
## Models:
## hab.null: density.m2 ~ 1 + (1 | stream/plot)
## habitat: density.m2 ~ habitat + +(1 | stream/plot)
##          npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)    
## hab.null    4 -2166.3 -2150.7 1087.2  -2174.3                         
## habitat     7 -2200.0 -2172.7 1107.0  -2214.0 39.717  3  1.224e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pseudo R^2
r.squaredGLMM(habitat)
##                 R2m       R2c
## delta     0.3256128 0.5789230
## lognormal 0.3469767 0.6169070
## trigamma  0.3001190 0.5335965
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 53.3% of variation in density

# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02626246
## [1] 0.02626246
##########################
## Field Pool area
#########################
pool.area.data<-all.c%>%
  filter(habitat == "POOL")
foo<-pool.area.data%>%
  distinct(plot, .keep_all = TRUE) # 17 distrinct pools 

# GLMM poisson distribution 
### Stream no plot nested
poisson<-glmer(total.per.habitat ~ pool.area + (1|stream), 
               family = "poisson", data = pool.area.data)
summary(poisson)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: total.per.habitat ~ pool.area + (1 | stream)
##    Data: pool.area.data
## 
##      AIC      BIC   logLik deviance df.resid 
##    271.2    277.9   -132.6    265.2       65 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9454 -1.1006 -0.2207  0.6029  3.8349 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  stream (Intercept) 0.1725   0.4153  
## Number of obs: 68, groups:  stream, 5
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.34586    0.24974   1.385 0.166088    
## pool.area    0.01314    0.00350   3.755 0.000173 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## pool.area -0.569
# Plot included, model cannot converge
poisson.1<-glmer(total.per.habitat ~ pool.area + (1|stream/plot), 
               family = "poisson", data = pool.area.data)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00307044 (tol = 0.002, component 1)
summary(poisson.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: total.per.habitat ~ pool.area + (1 | stream/plot)
##    Data: pool.area.data
## 
##      AIC      BIC   logLik deviance df.resid 
##    259.7    268.6   -125.8    251.7       64 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.96554 -0.97470 -0.07414  0.68598  3.01704 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. 
##  plot:stream (Intercept) 3.669e-01 0.6056924
##  stream      (Intercept) 1.934e-07 0.0004398
## Number of obs: 68, groups:  plot:stream, 17; stream, 5
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -0.028247   0.277633  -0.102   0.9190   
## pool.area    0.018178   0.006667   2.726   0.0064 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## pool.area -0.763
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00307044 (tol = 0.002, component 1)
###########
# LiDAR fish GLMM  w/ als.gradient
###########
### Response 0.001 has better residuals than 0.01; better results with 0.01
all.c <- all%>% 
  mutate(density.m2=if_else(total.per.habitat==0, 0.01, density.m2))

#explore the data a bit: general shape per stream and sample
ggplot(data = all.c) + 
  geom_density(aes(x = density.m2), col = "darkblue", fill = "lightblue") + 
  theme_bw()+
  facet_wrap(~stream)

all.lm<- glmer(density.m2 ~ als.gradient.poss+ # significant
                   + (1|stream/plot), data=all.c,family=Gamma(link=log)) 
## boundary (singular) fit: see ?isSingular
summary(all.lm) # singular fit issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ als.gradient.poss + +(1 | stream/plot)
##    Data: all.c
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2233.5  -2213.9   1121.7  -2243.5      367 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4365 -0.4696 -0.1971  0.2330  3.8425 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.3724   0.6103  
##  stream      (Intercept) 0.0000   0.0000  
##  Residual                0.3845   0.6201  
## Number of obs: 372, groups:  plot:stream, 93; stream, 5
## 
## Fixed effects:
##                   Estimate Std. Error t value Pr(>|z|)    
## (Intercept)       -4.18050    0.14372 -29.089   <2e-16 ***
## als.gradient.poss  0.04243    0.02050   2.069   0.0385 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## als.grdnt.p -0.775
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
#pseudo R^2
r.squaredGLMM(all.lm) #MuMIN package
##                  R2m       R2c
## delta     0.04471180 0.5147338
## lognormal 0.04831954 0.5562670
## trigamma  0.04046591 0.4658540
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 46.6% of variation in density

# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02628701
## [1] 0.02628701
### something funky with the estimate values with the poss/neg gradient
# Intercept
exp(-4.18050) 
## [1] 0.01529086
# X variables 
(exp(0.04243)-1)*100
## [1] 4.334302
# or 4.33% decrease in density per one unit increase in gradient

#Check the Model Assumptions
# Linearity
### Residuals
plot(fitted(all.lm),residuals(all.lm))

residuals <- residuals(all.lm, type = "response", retype="normalized")
plot(residuals)

par(mfrow=c(2,2), cex=1.0, mai=c(1.0,1.0,0.6,0.6))
plot(all.lm)

par(mfrow=c(1,1), cex=1.0, mai=c(1.0,1.0,1.0,1.0))

## Colinearity - no relationship 
#ggplot(all.c,aes(x=bfw, y=gradient))+
  #geom_point() + geom_smooth(method = "lm")+ theme_bw()+
 # facet_wrap(~stream)

#test<-lm(bfw ~ gradient, data =all.c) # Not significantly related
#summary(test)

## Heteroskedasticity
plot(fitted(all.lm),residuals(all.lm))

# Normality
hist(residuals(all.lm))

# QQ Plot
dres <- resid(all.lm)
qqnorm(dres, main="Deviance residuals", las=1); qqline(dres)

## Partial F-test to test significance of terms; do you need to do this for glm?
#gradient
all.null<- glmer(density.m2 ~  1 +
               + (1|stream/plot), data=all.c,family=Gamma(link=log)) 
## boundary (singular) fit: see ?isSingular
summary(all.null) #singularity issue
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ 1 + +(1 | stream/plot)
##    Data: all.c
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2231.3  -2215.6   1119.7  -2239.3      368 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4360 -0.5076 -0.2046  0.2798  3.7907 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.3943   0.6279  
##  stream      (Intercept) 0.0000   0.0000  
##  Residual                0.3869   0.6220  
## Number of obs: 372, groups:  plot:stream, 93; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)    
## (Intercept) -3.95032    0.09324  -42.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(all.lm, all.null)
## Data: all.c
## Models:
## all.null: density.m2 ~ 1 + +(1 | stream/plot)
## all.lm: density.m2 ~ als.gradient.poss + +(1 | stream/plot)
##          npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)  
## all.null    4 -2231.3 -2215.7 1119.7  -2239.3                       
## all.lm      5 -2233.4 -2213.9 1121.7  -2243.4 4.1287  1    0.04216 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###########
# NOTE: Wood investigation --> nothing from LiDAR yet
###########

#######################
# ALS Habitat Selection 
########################
all.c.hab<-all.c%>%
  filter(als.habitat != "STEP")

habitat<- glmer(density.m2 ~ als.habitat + 
                  + (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(habitat)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ als.habitat + +(1 | stream/plot)
##    Data: all.c.hab
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2254.4  -2227.0   1134.2  -2268.4      365 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5017 -0.5620 -0.1627  0.1534  3.8626 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.2551   0.5051  
##  stream      (Intercept) 0.0000   0.0000  
##  Residual                0.3598   0.5998  
## Number of obs: 372, groups:  plot:stream, 93; stream, 5
## 
## Fixed effects:
##                 Estimate Std. Error t value Pr(>|z|)    
## (Intercept)      -4.4105     0.1752 -25.181  < 2e-16 ***
## als.habitatRI     0.1850     0.2167   0.854    0.393    
## als.habitatG      0.5585     0.2360   2.366    0.018 *  
## als.habitatPOOL   1.2901     0.2442   5.283 1.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) als.RI als.hG
## als.habttRI -0.808              
## als.habittG -0.742  0.600       
## als.hbtPOOL -0.717  0.580  0.532
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
hab.null<- glmer(density.m2 ~ 1
                 + (1|stream/plot), data=all.c.hab,family=Gamma(link=log))
## boundary (singular) fit: see ?isSingular
summary(hab.null)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( log )
## Formula: density.m2 ~ 1 + (1 | stream/plot)
##    Data: all.c.hab
## 
##      AIC      BIC   logLik deviance df.resid 
##  -2231.3  -2215.6   1119.7  -2239.3      368 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4360 -0.5076 -0.2046  0.2798  3.7907 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.3943   0.6279  
##  stream      (Intercept) 0.0000   0.0000  
##  Residual                0.3869   0.6220  
## Number of obs: 372, groups:  plot:stream, 93; stream, 5
## 
## Fixed effects:
##             Estimate Std. Error t value Pr(>|z|)    
## (Intercept) -3.95032    0.09324  -42.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(habitat, hab.null) # habitat significant --> error
## Data: all.c.hab
## Models:
## hab.null: density.m2 ~ 1 + (1 | stream/plot)
## habitat: density.m2 ~ als.habitat + +(1 | stream/plot)
##          npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)    
## hab.null    4 -2231.3 -2215.7 1119.7  -2239.3                         
## habitat     7 -2254.4 -2227.0 1134.2  -2268.4 29.099  3  2.135e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pseudo R^2
r.squaredGLMM(habitat)
##                 R2m       R2c
## delta     0.2566016 0.5650399
## lognormal 0.2739770 0.6033007
## trigamma  0.2359759 0.5196218
#100*(null_model$deviance - all.lm$deviance)/null_model$deviance
# model explains 52% of variation in density

# RMSE
all.c$density.m2.pred <- predict(all.lm, all.c, type = "response")
sqrt(mean((all.c$density.m2 - all.c$density.m2.pred)^2))# 0.02628695
## [1] 0.02628701
##########################
## ALS Pool area
#########################
pool.area.data<-all.c%>%
  filter(als.habitat == "POOL")
foo<-pool.area.data%>%
  distinct(plot, .keep_all = TRUE) # 19 classified pools

# GLMM poisson distribution 
### Stream without plot nested
poisson<-glmer(total.per.habitat ~ als.pool.area + (1|stream/plot), 
               family = "poisson", data = pool.area.data)
## boundary (singular) fit: see ?isSingular
summary(poisson)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: total.per.habitat ~ als.pool.area + (1 | stream/plot)
##    Data: pool.area.data
## 
##      AIC      BIC   logLik deviance df.resid 
##    263.6    273.0   -127.8    255.6       72 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0473 -0.7340 -0.1198  0.6493  2.8452 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  plot:stream (Intercept) 0.3823   0.6183  
##  stream      (Intercept) 0.0000   0.0000  
## Number of obs: 76, groups:  plot:stream, 19; stream, 5
## 
## Fixed effects:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.778729   0.002934 -265.42   <2e-16 ***
## als.pool.area  0.030561   0.002380   12.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## als.pool.ar -0.008
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Plot not included, fixes singular fit?
poisson.1<-glmer(total.per.habitat ~ als.pool.area + (1|stream), 
                 family = "poisson", data = pool.area.data)
summary(poisson.1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: total.per.habitat ~ als.pool.area + (1 | stream)
##    Data: pool.area.data
## 
##      AIC      BIC   logLik deviance df.resid 
##    283.7    290.7   -138.9    277.7       73 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8987 -0.9553 -0.1415  0.4514  3.3118 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  stream (Intercept) 0.03949  0.1987  
## Number of obs: 76, groups:  stream, 5
## 
## Fixed effects:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.437169   0.222993  -1.960   0.0499 *  
## als.pool.area  0.026843   0.003793   7.078 1.47e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## als.pool.ar -0.835