Bat data models

Set up stuff

rm(list=ls())
pacman::p_load(plyr,dplyr,ggplot2,lme4,car,multcomp,tidyr,reshape2,AICcmodavg)
setwd("C:/Users/Katy/Desktop/thesisr")

Data organization

# Read in data, add column calculating Calls Per Night
data.allparks <- read.csv("./MixedModel_allparks.csv")
data.allparks$Year <- as.factor(data.allparks$Year)
data.allparks$CallsPN <- round(data.allparks$Calls/data.allparks$Nights, 3)
str(data.allparks)

## 'data.frame':    2460 obs. of  7 variables:
##  $ Park   : Factor w/ 8 levels "APIS","GRPO",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Site   : Factor w/ 185 levels "APIS001A","APIS002A",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ Year   : Factor w/ 2 levels "2016","2017": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Nights : int  12 12 12 12 12 12 9 9 9 9 ...
##  $ Species: Factor w/ 9 levels "EPFU","LABO",..: 1 2 3 4 5 6 1 2 3 4 ...
##  $ Calls  : int  130 92 95 1496 2311 1 8 2 7 47 ...
##  $ CallsPN: num  10.83 7.67 7.92 124.67 192.58 ...

head(data.allparks)

##   Park     Site Year Nights Species Calls CallsPN
## 1 APIS APIS001A 2017     12    EPFU   130  10.833
## 2 APIS APIS001A 2017     12    LABO    92   7.667
## 3 APIS APIS001A 2017     12    LACI    95   7.917
## 4 APIS APIS001A 2017     12    LANO  1496 124.667
## 5 APIS APIS001A 2017     12    MYLU  2311 192.583
## 6 APIS APIS001A 2017     12    MYSE     1   0.083

# Temporarily subset the data by year. Rename columns to include year information.
# and remove the actual "Year" column. Then merge all data back to one wide
# data frame, with rows for each combination of Park, Site, and Species. Add a new
# column showing difference between Calls Per Night in 2017 vs 2016. 
data.allparks.2016 <- data.allparks %>% 
                      subset(Year == "2016") %>%
                      rename(Nights.2016 = Nights, Calls.2016 = Calls, 
                             CallsPN.2016 = CallsPN) %>%
                      subset(select = -c(Year))

data.allparks.2017 <- data.allparks %>% 
                      subset(Year == "2017") %>%
                      rename(Nights.2017 = Nights, Calls.2017 = Calls, 
                           CallsPN.2017 = CallsPN) %>%
                      subset(select = -c(Year))

data.allparks.wide <- merge(data.allparks.2016, data.allparks.2017, 
                            by = c("Park","Site","Species")) %>%
                      mutate(Diff = CallsPN.2017 - CallsPN.2016)
str(data.allparks.wide)

## 'data.frame':    1230 obs. of  10 variables:
##  $ Park        : Factor w/ 8 levels "APIS","GRPO",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Site        : Factor w/ 185 levels "APIS001A","APIS002A",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ Species     : Factor w/ 9 levels "EPFU","LABO",..: 1 2 3 4 5 6 1 2 3 4 ...
##  $ Nights.2016 : int  7 7 7 7 7 7 10 10 10 10 ...
##  $ Calls.2016  : int  17 91 35 144 2380 16 4 1 4 33 ...
##  $ CallsPN.2016: num  2.43 13 5 20.57 340 ...
##  $ Nights.2017 : int  12 12 12 12 12 12 9 9 9 9 ...
##  $ Calls.2017  : int  130 92 95 1496 2311 1 8 2 7 47 ...
##  $ CallsPN.2017: num  10.83 7.67 7.92 124.67 192.58 ...
##  $ Diff        : num  8.4 -5.33 2.92 104.1 -147.42 ...

head(data.allparks.wide)

##   Park     Site Species Nights.2016 Calls.2016 CallsPN.2016 Nights.2017
## 1 APIS APIS001A    EPFU           7         17        2.429          12
## 2 APIS APIS001A    LABO           7         91       13.000          12
## 3 APIS APIS001A    LACI           7         35        5.000          12
## 4 APIS APIS001A    LANO           7        144       20.571          12
## 5 APIS APIS001A    MYLU           7       2380      340.000          12
## 6 APIS APIS001A    MYSE           7         16        2.286          12
##   Calls.2017 CallsPN.2017     Diff
## 1        130       10.833    8.404
## 2         92        7.667   -5.333
## 3         95        7.917    2.917
## 4       1496      124.667  104.096
## 5       2311      192.583 -147.417
## 6          1        0.083   -2.203

Plot 1 - histogram of counts

gg.diff1 <- ggplot(data.allparks.wide, aes(data.allparks.wide$Diff)) + 
            geom_histogram(binwidth = 20, colour="black", fill="gray") + 
            theme_bw()
gg.diff1

Plot 2 - histogram of densities, density estimate, and probability density function

gg.diff2 <- ggplot(data.allparks.wide, aes(x=Diff)) + 
            geom_histogram(aes(y=..density..), binwidth = 20, 
                           colour="black", fill="gray") + 
            geom_density(alpha=0.4, color= "coral2", size=.7, fill= "coral2") + 
            theme_bw() 

mean(data.allparks.wide$Diff)

## [1] -6.78478

sd(data.allparks.wide$Diff)

## [1] 60.77969

gg.diff3 <- gg.diff2 + 
            stat_function(fun = dnorm, colour="blue", size=1.2,
                    args = list(mean = -6.78478, sd = 60.77969))
gg.diff3

# normal distribution seems to fit okay

Test significance of “Park” variable

# Using a linear model
mod.null <- lm(data.allparks.wide$Diff ~ 1 , data= data.allparks.wide)
mod.park <- lm(data.allparks.wide$Diff ~ Park, data= data.allparks.wide)
anova(mod.null, mod.park)

## Analysis of Variance Table
## 
## Model 1: data.allparks.wide$Diff ~ 1
## Model 2: data.allparks.wide$Diff ~ Park
##   Res.Df     RSS Df Sum of Sq      F   Pr(>F)   
## 1   1229 4540136                                
## 2   1222 4469108  7     71028 2.7745 0.007275 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Using a linear mixed model
mod.null2 <- lmer(data.allparks.wide$Diff ~ 1 + (1|Site), data= data.allparks.wide)
mod.park2 <- lmer(data.allparks.wide$Diff ~ Park + (1|Site), data= data.allparks.wide)
anova(mod.null2, mod.park2)

## refitting model(s) with ML (instead of REML)

## Data: data.allparks.wide
## Models:
## mod.null2: data.allparks.wide$Diff ~ 1 + (1 | Site)
## mod.park2: data.allparks.wide$Diff ~ Park + (1 | Site)
##           Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## mod.null2  3 13597 13612 -6795.6    13591                           
## mod.park2 10 13594 13645 -6786.9    13574 17.368      7    0.01517 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# I wasn't sure whether to used the regular linear vs. mixed model for this test? Ran both and both gave a significant p-value. Therefore, conclude that there are significant differences by park, so I will run separate models for each park.

Plot 3 - boxplots of Call Differences by park and species

# Plot Calls Per Night Differences (y axis) by park (x axis) for each species. Except leave out the two species MYSO and NYHU since they're only present at one park. 
diff.box <- ggplot(subset(data.allparks.wide, Species %in% c("EPFU","LABO","LACI","LANO", "MYLU", "MYSE", "PESU")), 
            aes(x=Park, y=Diff, color=Park)) + 
            geom_boxplot() + 
            scale_color_brewer(palette = "Dark2") +
            theme_bw() +
            facet_grid(cols = vars(Species)) + 
            theme(axis.text.x.bottom = element_text(angle=90)) +
            theme(legend.position = "bottom")
diff.box

Subset data by parks

park.list <- list("APIS", "GRPO", "INDU", "ISRO","MISS", "SACN", "SLBE", "VOYA")
for (i in park.list) {
  temp <- subset(data.allparks.wide, Park == i)
  temp <- droplevels(temp)
  assign(paste("data",i,sep="."), temp)
        }

Run model and hypothesis tests for each of the 8 parks

Apostle Islands

# This model yields a "singular fit" and lots of warnings when I get the confidence intervals. So still need to figure out what the problem is and how to resolve it.
model.APIS <- lmer(Diff ~ 0 + Species + (1|Site), data=data.APIS)
summary(model.APIS)
car::Anova(model.APIS)

hyptest.APIS <- glht(model.APIS, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.APIS)
confint(model.APIS, oldNames=FALSE)

Grand Portage

model.GRPO <- lmer(Diff ~ 0 + Species + (1|Site), data=data.GRPO)
summary(model.GRPO)
car::Anova(model.GRPO)

hyptest.GRPO <- glht(model.GRPO, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.GRPO)
confint(model.GRPO, oldNames=FALSE)

Indiana Dunes

model.INDU <- lmer(Diff ~ 0 + Species + (1|Site), data=data.INDU)
summary(model.INDU)
car::Anova(model.INDU)

hyptest.INDU <- glht(model.INDU, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesMYSO = 0", "SpeciesNYHU = 0", "SpeciesPESU = 0")))
summary(hyptest.INDU)
confint(model.INDU, oldNames=FALSE)

Isle Royale

model.ISRO <- lmer(Diff ~ 0 + Species + (1|Site), data=data.ISRO)
summary(model.ISRO)
car::Anova(model.ISRO)

hyptest.ISRO <- glht(model.ISRO, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.ISRO)
confint(model.ISRO, oldNames=FALSE)

Mississippi River

model.MISS <- lmer(Diff ~ 0 + Species + (1|Site), data=data.MISS)
summary(model.MISS)
car::Anova(model.MISS)

hyptest.MISS <- glht(model.MISS, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesPESU = 0")))
summary(hyptest.MISS)
confint(model.MISS, oldNames=FALSE)

Saint Croix

model.SACN <- lmer(Diff ~ 0 + Species + (1|Site), data=data.SACN)
summary(model.SACN)
car::Anova(model.SACN)

hyptest.SACN <- glht(model.SACN, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesPESU = 0")))
summary(hyptest.SACN)
confint(model.SACN, oldNames=FALSE)

Sleeping Bear Dunes

model.SLBE <- lmer(Diff ~ 0 + Species + (1|Site), data=data.SLBE)
summary(model.SLBE)
car::Anova(model.SLBE)

hyptest.SLBE <- glht(model.SLBE, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.SLBE)
confint(model.SLBE, oldNames=FALSE)

Voyageurs

model.VOYA <- lmer(Diff ~ 0 + Species + (1|Site), data=data.VOYA)
summary(model.VOYA)
car::Anova(model.VOYA)

hyptest.VOYA <- glht(model.VOYA, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.VOYA)
confint(model.VOYA, oldNames=FALSE)

Candidate Models with all parks together

# Change of strategy. DO NOT separate by park. Leave all parks together. 
# Now try these 5 candidate models. Set REML = FALSE so that log-likelihood is used, 
# so that we can compare the models with AIC. 

# NULL MODEL
model.null <- lmer(data.allparks.wide$Diff ~ 1 + (1|Site), 
                   data= data.allparks.wide, REML = FALSE)
summary(model.null)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 1 + (1 | Site)
##    Data: data.allparks.wide
## 
##      AIC      BIC   logLik deviance df.resid 
##  13597.1  13612.5  -6795.6  13591.1     1227 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -10.7505  -0.0026   0.0964   0.1553   7.1903 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept)   99.19   9.959  
##  Residual             3592.17  59.935  
## Number of obs: 1230, groups:  Site, 185
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   -6.933      1.862  -3.723

# MODEL 1, FIXED EFFECTS = 0 + PARK
model.parkonly <- lmer(data.allparks.wide$Diff ~ 0 + Park + (1|Site), 
                     data= data.allparks.wide, REML = FALSE)
summary(model.parkonly)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Park + (1 | Site)
##    Data: data.allparks.wide
## 
##      AIC      BIC   logLik deviance df.resid 
##  13593.8  13644.9  -6786.9  13573.8     1220 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -10.7864  -0.0844   0.1029   0.2235   7.3730 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept)   33.88   5.821  
##  Residual             3599.59  59.997  
## Number of obs: 1230, groups:  Site, 185
## 
## Fixed effects:
##          Estimate Std. Error t value
## ParkAPIS   -9.760      5.139  -1.899
## ParkGRPO  -19.999      6.106  -3.275
## ParkINDU    5.327      4.085   1.304
## ParkISRO  -14.417      5.139  -2.805
## ParkMISS   -5.838      4.779  -1.222
## ParkSACN  -11.086      5.518  -2.009
## ParkSLBE   -2.189      4.318  -0.507
## ParkVOYA  -11.539      5.934  -1.945
## 
## Correlation of Fixed Effects:
##          PrAPIS PrGRPO PrINDU PrISRO PrMISS PrSACN PrSLBE
## ParkGRPO 0.000                                           
## ParkINDU 0.000  0.000                                    
## ParkISRO 0.000  0.000  0.000                             
## ParkMISS 0.000  0.000  0.000  0.000                      
## ParkSACN 0.000  0.000  0.000  0.000  0.000               
## ParkSLBE 0.000  0.000  0.000  0.000  0.000  0.000        
## ParkVOYA 0.000  0.000  0.000  0.000  0.000  0.000  0.000

car::Anova(model.parkonly)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: data.allparks.wide$Diff
##       Chisq Df Pr(>Chisq)    
## Park 33.472  8  5.063e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

hyptest.parkonly <- glht(model.parkonly, linfct= mcp(Park = "Tukey"))
# Only see one significant difference among all of the Tukey pairwise comparisons
summary(hyptest.parkonly)

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lmer(formula = data.allparks.wide$Diff ~ 0 + Park + (1 | Site), 
##     data = data.allparks.wide, REML = FALSE)
## 
## Linear Hypotheses:
##                  Estimate Std. Error z value Pr(>|z|)  
## GRPO - APIS == 0 -10.2383     7.9807  -1.283   0.9038  
## INDU - APIS == 0  15.0872     6.5647   2.298   0.2910  
## ISRO - APIS == 0  -4.6562     7.2676  -0.641   0.9983  
## MISS - APIS == 0   3.9219     7.0176   0.559   0.9993  
## SACN - APIS == 0  -1.3254     7.5405  -0.176   1.0000  
## SLBE - APIS == 0   7.5711     6.7120   1.128   0.9499  
## VOYA - APIS == 0  -1.7785     7.8499  -0.227   1.0000  
## INDU - GRPO == 0  25.3254     7.3464   3.447   0.0131 *
## ISRO - GRPO == 0   5.5821     7.9807   0.699   0.9970  
## MISS - GRPO == 0  14.1601     7.7538   1.826   0.5981  
## SACN - GRPO == 0   8.9129     8.2300   1.083   0.9597  
## SLBE - GRPO == 0  17.8094     7.4783   2.381   0.2473  
## VOYA - GRPO == 0   8.4598     8.5144   0.994   0.9750  
## ISRO - INDU == 0 -19.7434     6.5647  -3.008   0.0525 .
## MISS - INDU == 0 -11.1653     6.2868  -1.776   0.6325  
## SACN - INDU == 0 -16.4126     6.8656  -2.391   0.2432  
## SLBE - INDU == 0  -7.5161     5.9437  -1.265   0.9104  
## VOYA - INDU == 0 -16.8656     7.2040  -2.341   0.2676  
## MISS - ISRO == 0   8.5781     7.0176   1.222   0.9242  
## SACN - ISRO == 0   3.3308     7.5405   0.442   0.9998  
## SLBE - ISRO == 0  12.2273     6.7120   1.822   0.6010  
## VOYA - ISRO == 0   2.8777     7.8499   0.367   1.0000  
## SACN - MISS == 0  -5.2472     7.2999  -0.719   0.9964  
## SLBE - MISS == 0   3.6493     6.4405   0.567   0.9992  
## VOYA - MISS == 0  -5.7003     7.6190  -0.748   0.9954  
## SLBE - SACN == 0   8.8965     7.0066   1.270   0.9086  
## VOYA - SACN == 0  -0.4531     8.1032  -0.056   1.0000  
## VOYA - SLBE == 0  -9.3496     7.3385  -1.274   0.9070  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

# MODEL 2, FIXED EFFECTS = 0 + SPECIES
model.speciesonly <- lmer(data.allparks.wide$Diff ~ 0 + Species + (1|Site), 
                       data= data.allparks.wide, REML = FALSE)
summary(model.speciesonly)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Species + (1 | Site)
##    Data: data.allparks.wide
## 
##      AIC      BIC   logLik deviance df.resid 
##  13523.9  13580.2  -6750.9  13501.9     1219 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -11.2826  -0.1235   0.0171   0.1812   8.0180 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept)  138.5   11.77   
##  Residual             3302.3   57.47   
## Number of obs: 1230, groups:  Site, 185
## 
## Fixed effects:
##             Estimate Std. Error t value
## SpeciesEPFU   8.9134     4.3126   2.067
## SpeciesLABO -11.6879     4.3126  -2.710
## SpeciesLACI   0.6645     4.3126   0.154
## SpeciesLANO   3.9405     4.3126   0.914
## SpeciesMYLU -41.5046     4.3126  -9.624
## SpeciesMYSE  -4.4617     4.3126  -1.035
## SpeciesMYSO  -3.3118    11.4600  -0.289
## SpeciesNYHU  -6.9980    11.4600  -0.611
## SpeciesPESU  -2.1511     7.0951  -0.303
## 
## Correlation of Fixed Effects:
##             SpEPFU SpLABO SpLACI SpLANO SpMYLU SpMYSE SpMYSO SpNYHU
## SpeciesLABO 0.040                                                  
## SpeciesLACI 0.040  0.040                                           
## SpeciesLANO 0.040  0.040  0.040                                    
## SpeciesMYLU 0.040  0.040  0.040  0.040                             
## SpeciesMYSE 0.040  0.040  0.040  0.040  0.040                      
## SpeciesMYSO 0.015  0.015  0.015  0.015  0.015  0.015               
## SpeciesNYHU 0.015  0.015  0.015  0.015  0.015  0.015  0.033        
## SpeciesPESU 0.024  0.024  0.024  0.024  0.024  0.024  0.022  0.022

car::Anova(model.speciesonly)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: data.allparks.wide$Diff
##          Chisq Df Pr(>Chisq)    
## Species 107.22  9  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

hyptest.speciesonly <- glht(model.speciesonly, 
                       linfct= (Species = c("SpeciesEPFU = 0",  "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesMYSO = 0", "SpeciesNYHU = 0", "SpeciesPESU = 0")))
# Only see one species with estimate significantly different than zero (MYLU)
summary(hyptest.speciesonly)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lmer(formula = data.allparks.wide$Diff ~ 0 + Species + (1 | Site), 
##     data = data.allparks.wide, REML = FALSE)
## 
## Linear Hypotheses:
##                  Estimate Std. Error z value Pr(>|z|)    
## SpeciesEPFU == 0   8.9134     4.3126   2.067   0.2989    
## SpeciesLABO == 0 -11.6879     4.3126  -2.710   0.0589 .  
## SpeciesLACI == 0   0.6645     4.3126   0.154   1.0000    
## SpeciesLANO == 0   3.9405     4.3126   0.914   0.9821    
## SpeciesMYLU == 0 -41.5046     4.3126  -9.624   <1e-05 ***
## SpeciesMYSE == 0  -4.4617     4.3126  -1.035   0.9598    
## SpeciesMYSO == 0  -3.3118    11.4600  -0.289   1.0000    
## SpeciesNYHU == 0  -6.9980    11.4600  -0.611   0.9991    
## SpeciesPESU == 0  -2.1511     7.0951  -0.303   1.0000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

# MODEL 3, FIXED EFFECTS = 0 + PARK + SPECIES
model.parkspecies <- lmer(data.allparks.wide$Diff ~ 0 + Park + Species + (1|Site), 
                       data= data.allparks.wide, REML = FALSE)
summary(model.parkspecies)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Park + Species + (1 | Site)
##    Data: data.allparks.wide
## 
##      AIC      BIC   logLik deviance df.resid 
##  13520.1  13612.2  -6742.1  13484.1     1212 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -11.3252  -0.1366   0.0341   0.2155   8.1842 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept)   80.12   8.951  
##  Residual             3302.34  57.466  
## Number of obs: 1230, groups:  Site, 185
## 
## Fixed effects:
##             Estimate Std. Error t value
## ParkAPIS       6.509      6.415   1.015
## ParkGRPO      -3.729      7.209  -0.517
## ParkINDU      23.512      6.104   3.852
## ParkISRO       1.853      6.415   0.289
## ParkMISS      10.261      6.247   1.642
## ParkSACN       5.014      6.833   0.734
## ParkSLBE      14.080      5.781   2.436
## ParkVOYA       4.731      7.064   0.670
## SpeciesLABO  -20.601      5.975  -3.448
## SpeciesLACI   -8.249      5.975  -1.381
## SpeciesLANO   -4.973      5.975  -0.832
## SpeciesMYLU  -50.418      5.975  -8.438
## SpeciesMYSE  -13.375      5.975  -2.239
## SpeciesMYSO  -23.641     12.696  -1.862
## SpeciesNYHU  -27.327     12.696  -2.152
## SpeciesPESU  -15.079      8.458  -1.783

## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

car::Anova(model.parkspecies)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: data.allparks.wide$Diff
##          Chisq Df Pr(>Chisq)    
## Park    22.939  8   0.003444 ** 
## Species 93.586  8  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# MODEL 4, FIXED EFFECTS = 0 + PARK*SPECIES
model.parkspeciesInt <- lmer(data.allparks.wide$Diff ~ 0 + Park*Species + (1|Site), 
                          data= data.allparks.wide, REML = FALSE)

## fixed-effect model matrix is rank deficient so dropping 19 columns / coefficients

# I think the "rank-deficient" error is ok because not all parks have all species.
summary(model.parkspeciesInt)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Park * Species + (1 | Site)
##    Data: data.allparks.wide
## 
##      AIC      BIC   logLik deviance df.resid 
##  13530.1  13811.4  -6710.0  13420.1     1175 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -11.2223  -0.0978   0.0088   0.1878   8.7411 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept)  110.9   10.53   
##  Residual             3105.0   55.72   
## Number of obs: 1230, groups:  Site, 185
## 
## Fixed effects:
##                      Estimate Std. Error t value
## ParkAPIS               0.4025    11.5758   0.035
## ParkGRPO               0.3791    13.7541   0.028
## ParkINDU              57.1914    11.1216   5.142
## ParkISRO              -1.3425    11.5758  -0.116
## ParkMISS              12.5875    11.5758   1.087
## ParkSACN             -15.8639    13.3665  -1.187
## ParkSLBE               5.0721     9.7256   0.522
## ParkVOYA              -0.6048    13.3665  -0.045
## SpeciesLABO           -2.8538    16.0858  -0.177
## SpeciesLACI          -24.6926    16.0858  -1.535
## SpeciesLANO            3.7357    16.0858   0.232
## SpeciesMYLU          -35.4823    16.0858  -2.206
## SpeciesMYSE           -1.6836    16.0858  -0.105
## SpeciesMYSO          -57.3206    15.4547  -3.709
## SpeciesNYHU          -61.0068    15.4547  -3.947
## SpeciesPESU           15.8167    18.5742   0.852
## ParkGRPO:SpeciesLABO -14.2325    24.9810  -0.570
## ParkINDU:SpeciesLABO -77.9067    22.3069  -3.492
## ParkISRO:SpeciesLABO  -1.7180    22.7487  -0.076
## ParkMISS:SpeciesLABO -14.6591    22.7487  -0.644
## ParkSACN:SpeciesLABO   6.8546    24.5714   0.279
## ParkSLBE:SpeciesLABO -21.2129    21.0095  -1.010
## ParkVOYA:SpeciesLABO  -1.3801    24.5714  -0.056
## ParkGRPO:SpeciesLACI  25.9395    24.9810   1.038
## ParkINDU:SpeciesLACI -25.8330    22.3069  -1.158
## ParkISRO:SpeciesLACI  24.8234    22.7487   1.091
## ParkMISS:SpeciesLACI  16.8395    22.7487   0.740
## ParkSACN:SpeciesLACI  36.6199    24.5714   1.490
## ParkSLBE:SpeciesLACI  26.4855    21.0095   1.261
## ParkVOYA:SpeciesLACI  39.6226    24.5714   1.613
## ParkGRPO:SpeciesLANO   8.8626    24.9810   0.355
## ParkINDU:SpeciesLANO -46.1231    22.3069  -2.068
## ParkISRO:SpeciesLANO  -4.5815    22.7487  -0.201
## ParkMISS:SpeciesLANO -22.6870    22.7487  -0.997
## ParkSACN:SpeciesLANO  12.9587    24.5714   0.527
## ParkSLBE:SpeciesLANO  -0.7748    21.0095  -0.037
## ParkVOYA:SpeciesLANO  -6.3904    24.5714  -0.260
## ParkGRPO:SpeciesMYLU -56.7317    24.9810  -2.271
## ParkINDU:SpeciesMYLU -23.8204    22.3069  -1.068
## ParkISRO:SpeciesMYLU -35.4577    22.7487  -1.559
## ParkMISS:SpeciesMYLU -21.7059    22.7487  -0.954
## ParkSACN:SpeciesMYLU  10.2769    24.5714   0.418
## ParkSLBE:SpeciesMYLU  16.3836    21.0095   0.780
## ParkVOYA:SpeciesMYLU -30.5234    24.5714  -1.242
## ParkGRPO:SpeciesMYSE -25.1271    24.9810  -1.006
## ParkINDU:SpeciesMYSE -55.5230    22.3069  -2.489
## ParkISRO:SpeciesMYSE  -0.5339    22.7487  -0.023
## ParkMISS:SpeciesMYSE -11.1767    22.7487  -0.491
## ParkSACN:SpeciesMYSE  11.8976    24.5714   0.484
## ParkSLBE:SpeciesMYSE  -3.4723    21.0095  -0.165
## ParkVOYA:SpeciesMYSE  -5.9560    24.5714  -0.242
## ParkINDU:SpeciesPESU -74.0871    24.1630  -3.066
## ParkMISS:SpeciesPESU -30.4328    24.5714  -1.239

## 
## Correlation matrix not shown by default, as p = 53 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 19 columns / coefficients

car::Anova(model.parkspeciesInt)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: data.allparks.wide$Diff
##               Chisq Df Pr(>Chisq)    
## Park         23.164  8   0.003160 ** 
## Species      99.533  8  < 2.2e-16 ***
## Park:Species 66.070 37   0.002303 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

AIC Comparison Tables

# Standard AIC (using stats pkg) -> Model 3 has lowest AIC
AIC(model.null, model.parkonly, model.speciesonly, model.parkspecies, model.parkspeciesInt)

##                      df      AIC
## model.null            3 13597.13
## model.parkonly       10 13593.76
## model.speciesonly    11 13523.89
## model.parkspecies    18 13520.15
## model.parkspeciesInt 55 13530.09

# Small sample AICc (using AICcmodavg pkg) -> Model 3 has lowest AIC
# (Note: aictab function automatically selected AICc instead of AIC, even when I did not include the argument "second.ord = TRUE".)
model.list <- list(model.null, model.parkonly, model.speciesonly, model.parkspecies, model.parkspeciesInt)
model.names <- c("model.null", "model.parkonly", "model.speciesonly", "model.parkspecies", "model.parkspeciesInt")
aictab(cand.set = model.list, modnames = model.names, sort = TRUE, second.ord = TRUE)

## 
## Model selection based on AICc:
## 
##                       K     AICc Delta_AICc AICcWt Cum.Wt       LL
## model.parkspecies    18 13520.71       0.00   0.84   0.84 -6742.07
## model.speciesonly    11 13524.11       3.39   0.15   1.00 -6750.94
## model.parkspeciesInt 55 13535.34      14.63   0.00   1.00 -6710.05
## model.parkonly       10 13593.94      73.23   0.00   1.00 -6786.88
## model.null            3 13597.15      76.43   0.00   1.00 -6795.56

Bat data models

Katy Goodwin

February 6, 2019

Set up stuff

Data organization

Plot 1 - histogram of counts

Plot 2 - histogram of densities, density estimate, and probability density function

Test significance of “Park” variable

Plot 3 - boxplots of Call Differences by park and species

Subset data by parks

Run model and hypothesis tests for each of the 8 parks

Apostle Islands

Grand Portage

Indiana Dunes

Isle Royale

Mississippi River

Saint Croix

Sleeping Bear Dunes

Voyageurs

Candidate Models with all parks together

AIC Comparison Tables