Set up stuff
rm(list=ls())
pacman::p_load(plyr,dplyr,ggplot2,lme4,car,multcomp,tidyr,reshape2,AICcmodavg)
setwd("C:/Users/Katy/Desktop/thesisr")
Data organization
# Read in data, add column calculating Calls Per Night
data.allparks <- read.csv("./MixedModel_allparks.csv")
data.allparks$Year <- as.factor(data.allparks$Year)
data.allparks$CallsPN <- round(data.allparks$Calls/data.allparks$Nights, 3)
str(data.allparks)
## 'data.frame': 2460 obs. of 7 variables:
## $ Park : Factor w/ 8 levels "APIS","GRPO",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Site : Factor w/ 185 levels "APIS001A","APIS002A",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Year : Factor w/ 2 levels "2016","2017": 2 2 2 2 2 2 2 2 2 2 ...
## $ Nights : int 12 12 12 12 12 12 9 9 9 9 ...
## $ Species: Factor w/ 9 levels "EPFU","LABO",..: 1 2 3 4 5 6 1 2 3 4 ...
## $ Calls : int 130 92 95 1496 2311 1 8 2 7 47 ...
## $ CallsPN: num 10.83 7.67 7.92 124.67 192.58 ...
head(data.allparks)
## Park Site Year Nights Species Calls CallsPN
## 1 APIS APIS001A 2017 12 EPFU 130 10.833
## 2 APIS APIS001A 2017 12 LABO 92 7.667
## 3 APIS APIS001A 2017 12 LACI 95 7.917
## 4 APIS APIS001A 2017 12 LANO 1496 124.667
## 5 APIS APIS001A 2017 12 MYLU 2311 192.583
## 6 APIS APIS001A 2017 12 MYSE 1 0.083
# Temporarily subset the data by year. Rename columns to include year information.
# and remove the actual "Year" column. Then merge all data back to one wide
# data frame, with rows for each combination of Park, Site, and Species. Add a new
# column showing difference between Calls Per Night in 2017 vs 2016.
data.allparks.2016 <- data.allparks %>%
subset(Year == "2016") %>%
rename(Nights.2016 = Nights, Calls.2016 = Calls,
CallsPN.2016 = CallsPN) %>%
subset(select = -c(Year))
data.allparks.2017 <- data.allparks %>%
subset(Year == "2017") %>%
rename(Nights.2017 = Nights, Calls.2017 = Calls,
CallsPN.2017 = CallsPN) %>%
subset(select = -c(Year))
data.allparks.wide <- merge(data.allparks.2016, data.allparks.2017,
by = c("Park","Site","Species")) %>%
mutate(Diff = CallsPN.2017 - CallsPN.2016)
str(data.allparks.wide)
## 'data.frame': 1230 obs. of 10 variables:
## $ Park : Factor w/ 8 levels "APIS","GRPO",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Site : Factor w/ 185 levels "APIS001A","APIS002A",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Species : Factor w/ 9 levels "EPFU","LABO",..: 1 2 3 4 5 6 1 2 3 4 ...
## $ Nights.2016 : int 7 7 7 7 7 7 10 10 10 10 ...
## $ Calls.2016 : int 17 91 35 144 2380 16 4 1 4 33 ...
## $ CallsPN.2016: num 2.43 13 5 20.57 340 ...
## $ Nights.2017 : int 12 12 12 12 12 12 9 9 9 9 ...
## $ Calls.2017 : int 130 92 95 1496 2311 1 8 2 7 47 ...
## $ CallsPN.2017: num 10.83 7.67 7.92 124.67 192.58 ...
## $ Diff : num 8.4 -5.33 2.92 104.1 -147.42 ...
head(data.allparks.wide)
## Park Site Species Nights.2016 Calls.2016 CallsPN.2016 Nights.2017
## 1 APIS APIS001A EPFU 7 17 2.429 12
## 2 APIS APIS001A LABO 7 91 13.000 12
## 3 APIS APIS001A LACI 7 35 5.000 12
## 4 APIS APIS001A LANO 7 144 20.571 12
## 5 APIS APIS001A MYLU 7 2380 340.000 12
## 6 APIS APIS001A MYSE 7 16 2.286 12
## Calls.2017 CallsPN.2017 Diff
## 1 130 10.833 8.404
## 2 92 7.667 -5.333
## 3 95 7.917 2.917
## 4 1496 124.667 104.096
## 5 2311 192.583 -147.417
## 6 1 0.083 -2.203
Plot 1 - histogram of counts
gg.diff1 <- ggplot(data.allparks.wide, aes(data.allparks.wide$Diff)) +
geom_histogram(binwidth = 20, colour="black", fill="gray") +
theme_bw()
gg.diff1

Plot 2 - histogram of densities, density estimate, and probability density function
gg.diff2 <- ggplot(data.allparks.wide, aes(x=Diff)) +
geom_histogram(aes(y=..density..), binwidth = 20,
colour="black", fill="gray") +
geom_density(alpha=0.4, color= "coral2", size=.7, fill= "coral2") +
theme_bw()
mean(data.allparks.wide$Diff)
## [1] -6.78478
sd(data.allparks.wide$Diff)
## [1] 60.77969
gg.diff3 <- gg.diff2 +
stat_function(fun = dnorm, colour="blue", size=1.2,
args = list(mean = -6.78478, sd = 60.77969))
gg.diff3

# normal distribution seems to fit okay
Test significance of “Park” variable
# Using a linear model
mod.null <- lm(data.allparks.wide$Diff ~ 1 , data= data.allparks.wide)
mod.park <- lm(data.allparks.wide$Diff ~ Park, data= data.allparks.wide)
anova(mod.null, mod.park)
## Analysis of Variance Table
##
## Model 1: data.allparks.wide$Diff ~ 1
## Model 2: data.allparks.wide$Diff ~ Park
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 1229 4540136
## 2 1222 4469108 7 71028 2.7745 0.007275 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Using a linear mixed model
mod.null2 <- lmer(data.allparks.wide$Diff ~ 1 + (1|Site), data= data.allparks.wide)
mod.park2 <- lmer(data.allparks.wide$Diff ~ Park + (1|Site), data= data.allparks.wide)
anova(mod.null2, mod.park2)
## refitting model(s) with ML (instead of REML)
## Data: data.allparks.wide
## Models:
## mod.null2: data.allparks.wide$Diff ~ 1 + (1 | Site)
## mod.park2: data.allparks.wide$Diff ~ Park + (1 | Site)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## mod.null2 3 13597 13612 -6795.6 13591
## mod.park2 10 13594 13645 -6786.9 13574 17.368 7 0.01517 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I wasn't sure whether to used the regular linear vs. mixed model for this test? Ran both and both gave a significant p-value. Therefore, conclude that there are significant differences by park, so I will run separate models for each park.
Plot 3 - boxplots of Call Differences by park and species
# Plot Calls Per Night Differences (y axis) by park (x axis) for each species. Except leave out the two species MYSO and NYHU since they're only present at one park.
diff.box <- ggplot(subset(data.allparks.wide, Species %in% c("EPFU","LABO","LACI","LANO", "MYLU", "MYSE", "PESU")),
aes(x=Park, y=Diff, color=Park)) +
geom_boxplot() +
scale_color_brewer(palette = "Dark2") +
theme_bw() +
facet_grid(cols = vars(Species)) +
theme(axis.text.x.bottom = element_text(angle=90)) +
theme(legend.position = "bottom")
diff.box

Subset data by parks
park.list <- list("APIS", "GRPO", "INDU", "ISRO","MISS", "SACN", "SLBE", "VOYA")
for (i in park.list) {
temp <- subset(data.allparks.wide, Park == i)
temp <- droplevels(temp)
assign(paste("data",i,sep="."), temp)
}
Run model and hypothesis tests for each of the 8 parks
Apostle Islands
# This model yields a "singular fit" and lots of warnings when I get the confidence intervals. So still need to figure out what the problem is and how to resolve it.
model.APIS <- lmer(Diff ~ 0 + Species + (1|Site), data=data.APIS)
summary(model.APIS)
car::Anova(model.APIS)
hyptest.APIS <- glht(model.APIS, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.APIS)
confint(model.APIS, oldNames=FALSE)
Grand Portage
model.GRPO <- lmer(Diff ~ 0 + Species + (1|Site), data=data.GRPO)
summary(model.GRPO)
car::Anova(model.GRPO)
hyptest.GRPO <- glht(model.GRPO, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.GRPO)
confint(model.GRPO, oldNames=FALSE)
Indiana Dunes
model.INDU <- lmer(Diff ~ 0 + Species + (1|Site), data=data.INDU)
summary(model.INDU)
car::Anova(model.INDU)
hyptest.INDU <- glht(model.INDU, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesMYSO = 0", "SpeciesNYHU = 0", "SpeciesPESU = 0")))
summary(hyptest.INDU)
confint(model.INDU, oldNames=FALSE)
Isle Royale
model.ISRO <- lmer(Diff ~ 0 + Species + (1|Site), data=data.ISRO)
summary(model.ISRO)
car::Anova(model.ISRO)
hyptest.ISRO <- glht(model.ISRO, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.ISRO)
confint(model.ISRO, oldNames=FALSE)
Mississippi River
model.MISS <- lmer(Diff ~ 0 + Species + (1|Site), data=data.MISS)
summary(model.MISS)
car::Anova(model.MISS)
hyptest.MISS <- glht(model.MISS, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesPESU = 0")))
summary(hyptest.MISS)
confint(model.MISS, oldNames=FALSE)
Saint Croix
model.SACN <- lmer(Diff ~ 0 + Species + (1|Site), data=data.SACN)
summary(model.SACN)
car::Anova(model.SACN)
hyptest.SACN <- glht(model.SACN, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesPESU = 0")))
summary(hyptest.SACN)
confint(model.SACN, oldNames=FALSE)
Sleeping Bear Dunes
model.SLBE <- lmer(Diff ~ 0 + Species + (1|Site), data=data.SLBE)
summary(model.SLBE)
car::Anova(model.SLBE)
hyptest.SLBE <- glht(model.SLBE, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.SLBE)
confint(model.SLBE, oldNames=FALSE)
Voyageurs
model.VOYA <- lmer(Diff ~ 0 + Species + (1|Site), data=data.VOYA)
summary(model.VOYA)
car::Anova(model.VOYA)
hyptest.VOYA <- glht(model.VOYA, linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0")))
summary(hyptest.VOYA)
confint(model.VOYA, oldNames=FALSE)
Candidate Models with all parks together
# Change of strategy. DO NOT separate by park. Leave all parks together.
# Now try these 5 candidate models. Set REML = FALSE so that log-likelihood is used,
# so that we can compare the models with AIC.
# NULL MODEL
model.null <- lmer(data.allparks.wide$Diff ~ 1 + (1|Site),
data= data.allparks.wide, REML = FALSE)
summary(model.null)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 1 + (1 | Site)
## Data: data.allparks.wide
##
## AIC BIC logLik deviance df.resid
## 13597.1 13612.5 -6795.6 13591.1 1227
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -10.7505 -0.0026 0.0964 0.1553 7.1903
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site (Intercept) 99.19 9.959
## Residual 3592.17 59.935
## Number of obs: 1230, groups: Site, 185
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -6.933 1.862 -3.723
# MODEL 1, FIXED EFFECTS = 0 + PARK
model.parkonly <- lmer(data.allparks.wide$Diff ~ 0 + Park + (1|Site),
data= data.allparks.wide, REML = FALSE)
summary(model.parkonly)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Park + (1 | Site)
## Data: data.allparks.wide
##
## AIC BIC logLik deviance df.resid
## 13593.8 13644.9 -6786.9 13573.8 1220
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -10.7864 -0.0844 0.1029 0.2235 7.3730
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site (Intercept) 33.88 5.821
## Residual 3599.59 59.997
## Number of obs: 1230, groups: Site, 185
##
## Fixed effects:
## Estimate Std. Error t value
## ParkAPIS -9.760 5.139 -1.899
## ParkGRPO -19.999 6.106 -3.275
## ParkINDU 5.327 4.085 1.304
## ParkISRO -14.417 5.139 -2.805
## ParkMISS -5.838 4.779 -1.222
## ParkSACN -11.086 5.518 -2.009
## ParkSLBE -2.189 4.318 -0.507
## ParkVOYA -11.539 5.934 -1.945
##
## Correlation of Fixed Effects:
## PrAPIS PrGRPO PrINDU PrISRO PrMISS PrSACN PrSLBE
## ParkGRPO 0.000
## ParkINDU 0.000 0.000
## ParkISRO 0.000 0.000 0.000
## ParkMISS 0.000 0.000 0.000 0.000
## ParkSACN 0.000 0.000 0.000 0.000 0.000
## ParkSLBE 0.000 0.000 0.000 0.000 0.000 0.000
## ParkVOYA 0.000 0.000 0.000 0.000 0.000 0.000 0.000
car::Anova(model.parkonly)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: data.allparks.wide$Diff
## Chisq Df Pr(>Chisq)
## Park 33.472 8 5.063e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hyptest.parkonly <- glht(model.parkonly, linfct= mcp(Park = "Tukey"))
# Only see one significant difference among all of the Tukey pairwise comparisons
summary(hyptest.parkonly)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lmer(formula = data.allparks.wide$Diff ~ 0 + Park + (1 | Site),
## data = data.allparks.wide, REML = FALSE)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## GRPO - APIS == 0 -10.2383 7.9807 -1.283 0.9038
## INDU - APIS == 0 15.0872 6.5647 2.298 0.2910
## ISRO - APIS == 0 -4.6562 7.2676 -0.641 0.9983
## MISS - APIS == 0 3.9219 7.0176 0.559 0.9993
## SACN - APIS == 0 -1.3254 7.5405 -0.176 1.0000
## SLBE - APIS == 0 7.5711 6.7120 1.128 0.9499
## VOYA - APIS == 0 -1.7785 7.8499 -0.227 1.0000
## INDU - GRPO == 0 25.3254 7.3464 3.447 0.0131 *
## ISRO - GRPO == 0 5.5821 7.9807 0.699 0.9970
## MISS - GRPO == 0 14.1601 7.7538 1.826 0.5981
## SACN - GRPO == 0 8.9129 8.2300 1.083 0.9597
## SLBE - GRPO == 0 17.8094 7.4783 2.381 0.2473
## VOYA - GRPO == 0 8.4598 8.5144 0.994 0.9750
## ISRO - INDU == 0 -19.7434 6.5647 -3.008 0.0525 .
## MISS - INDU == 0 -11.1653 6.2868 -1.776 0.6325
## SACN - INDU == 0 -16.4126 6.8656 -2.391 0.2432
## SLBE - INDU == 0 -7.5161 5.9437 -1.265 0.9104
## VOYA - INDU == 0 -16.8656 7.2040 -2.341 0.2676
## MISS - ISRO == 0 8.5781 7.0176 1.222 0.9242
## SACN - ISRO == 0 3.3308 7.5405 0.442 0.9998
## SLBE - ISRO == 0 12.2273 6.7120 1.822 0.6010
## VOYA - ISRO == 0 2.8777 7.8499 0.367 1.0000
## SACN - MISS == 0 -5.2472 7.2999 -0.719 0.9964
## SLBE - MISS == 0 3.6493 6.4405 0.567 0.9992
## VOYA - MISS == 0 -5.7003 7.6190 -0.748 0.9954
## SLBE - SACN == 0 8.8965 7.0066 1.270 0.9086
## VOYA - SACN == 0 -0.4531 8.1032 -0.056 1.0000
## VOYA - SLBE == 0 -9.3496 7.3385 -1.274 0.9070
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
# MODEL 2, FIXED EFFECTS = 0 + SPECIES
model.speciesonly <- lmer(data.allparks.wide$Diff ~ 0 + Species + (1|Site),
data= data.allparks.wide, REML = FALSE)
summary(model.speciesonly)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Species + (1 | Site)
## Data: data.allparks.wide
##
## AIC BIC logLik deviance df.resid
## 13523.9 13580.2 -6750.9 13501.9 1219
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.2826 -0.1235 0.0171 0.1812 8.0180
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site (Intercept) 138.5 11.77
## Residual 3302.3 57.47
## Number of obs: 1230, groups: Site, 185
##
## Fixed effects:
## Estimate Std. Error t value
## SpeciesEPFU 8.9134 4.3126 2.067
## SpeciesLABO -11.6879 4.3126 -2.710
## SpeciesLACI 0.6645 4.3126 0.154
## SpeciesLANO 3.9405 4.3126 0.914
## SpeciesMYLU -41.5046 4.3126 -9.624
## SpeciesMYSE -4.4617 4.3126 -1.035
## SpeciesMYSO -3.3118 11.4600 -0.289
## SpeciesNYHU -6.9980 11.4600 -0.611
## SpeciesPESU -2.1511 7.0951 -0.303
##
## Correlation of Fixed Effects:
## SpEPFU SpLABO SpLACI SpLANO SpMYLU SpMYSE SpMYSO SpNYHU
## SpeciesLABO 0.040
## SpeciesLACI 0.040 0.040
## SpeciesLANO 0.040 0.040 0.040
## SpeciesMYLU 0.040 0.040 0.040 0.040
## SpeciesMYSE 0.040 0.040 0.040 0.040 0.040
## SpeciesMYSO 0.015 0.015 0.015 0.015 0.015 0.015
## SpeciesNYHU 0.015 0.015 0.015 0.015 0.015 0.015 0.033
## SpeciesPESU 0.024 0.024 0.024 0.024 0.024 0.024 0.022 0.022
car::Anova(model.speciesonly)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: data.allparks.wide$Diff
## Chisq Df Pr(>Chisq)
## Species 107.22 9 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hyptest.speciesonly <- glht(model.speciesonly,
linfct= (Species = c("SpeciesEPFU = 0", "SpeciesLABO = 0", "SpeciesLACI = 0", "SpeciesLANO = 0", "SpeciesMYLU = 0", "SpeciesMYSE = 0", "SpeciesMYSO = 0", "SpeciesNYHU = 0", "SpeciesPESU = 0")))
# Only see one species with estimate significantly different than zero (MYLU)
summary(hyptest.speciesonly)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Fit: lmer(formula = data.allparks.wide$Diff ~ 0 + Species + (1 | Site),
## data = data.allparks.wide, REML = FALSE)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## SpeciesEPFU == 0 8.9134 4.3126 2.067 0.2989
## SpeciesLABO == 0 -11.6879 4.3126 -2.710 0.0589 .
## SpeciesLACI == 0 0.6645 4.3126 0.154 1.0000
## SpeciesLANO == 0 3.9405 4.3126 0.914 0.9821
## SpeciesMYLU == 0 -41.5046 4.3126 -9.624 <1e-05 ***
## SpeciesMYSE == 0 -4.4617 4.3126 -1.035 0.9598
## SpeciesMYSO == 0 -3.3118 11.4600 -0.289 1.0000
## SpeciesNYHU == 0 -6.9980 11.4600 -0.611 0.9991
## SpeciesPESU == 0 -2.1511 7.0951 -0.303 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
# MODEL 3, FIXED EFFECTS = 0 + PARK + SPECIES
model.parkspecies <- lmer(data.allparks.wide$Diff ~ 0 + Park + Species + (1|Site),
data= data.allparks.wide, REML = FALSE)
summary(model.parkspecies)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Park + Species + (1 | Site)
## Data: data.allparks.wide
##
## AIC BIC logLik deviance df.resid
## 13520.1 13612.2 -6742.1 13484.1 1212
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.3252 -0.1366 0.0341 0.2155 8.1842
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site (Intercept) 80.12 8.951
## Residual 3302.34 57.466
## Number of obs: 1230, groups: Site, 185
##
## Fixed effects:
## Estimate Std. Error t value
## ParkAPIS 6.509 6.415 1.015
## ParkGRPO -3.729 7.209 -0.517
## ParkINDU 23.512 6.104 3.852
## ParkISRO 1.853 6.415 0.289
## ParkMISS 10.261 6.247 1.642
## ParkSACN 5.014 6.833 0.734
## ParkSLBE 14.080 5.781 2.436
## ParkVOYA 4.731 7.064 0.670
## SpeciesLABO -20.601 5.975 -3.448
## SpeciesLACI -8.249 5.975 -1.381
## SpeciesLANO -4.973 5.975 -0.832
## SpeciesMYLU -50.418 5.975 -8.438
## SpeciesMYSE -13.375 5.975 -2.239
## SpeciesMYSO -23.641 12.696 -1.862
## SpeciesNYHU -27.327 12.696 -2.152
## SpeciesPESU -15.079 8.458 -1.783
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
car::Anova(model.parkspecies)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: data.allparks.wide$Diff
## Chisq Df Pr(>Chisq)
## Park 22.939 8 0.003444 **
## Species 93.586 8 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# MODEL 4, FIXED EFFECTS = 0 + PARK*SPECIES
model.parkspeciesInt <- lmer(data.allparks.wide$Diff ~ 0 + Park*Species + (1|Site),
data= data.allparks.wide, REML = FALSE)
## fixed-effect model matrix is rank deficient so dropping 19 columns / coefficients
# I think the "rank-deficient" error is ok because not all parks have all species.
summary(model.parkspeciesInt)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: data.allparks.wide$Diff ~ 0 + Park * Species + (1 | Site)
## Data: data.allparks.wide
##
## AIC BIC logLik deviance df.resid
## 13530.1 13811.4 -6710.0 13420.1 1175
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.2223 -0.0978 0.0088 0.1878 8.7411
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site (Intercept) 110.9 10.53
## Residual 3105.0 55.72
## Number of obs: 1230, groups: Site, 185
##
## Fixed effects:
## Estimate Std. Error t value
## ParkAPIS 0.4025 11.5758 0.035
## ParkGRPO 0.3791 13.7541 0.028
## ParkINDU 57.1914 11.1216 5.142
## ParkISRO -1.3425 11.5758 -0.116
## ParkMISS 12.5875 11.5758 1.087
## ParkSACN -15.8639 13.3665 -1.187
## ParkSLBE 5.0721 9.7256 0.522
## ParkVOYA -0.6048 13.3665 -0.045
## SpeciesLABO -2.8538 16.0858 -0.177
## SpeciesLACI -24.6926 16.0858 -1.535
## SpeciesLANO 3.7357 16.0858 0.232
## SpeciesMYLU -35.4823 16.0858 -2.206
## SpeciesMYSE -1.6836 16.0858 -0.105
## SpeciesMYSO -57.3206 15.4547 -3.709
## SpeciesNYHU -61.0068 15.4547 -3.947
## SpeciesPESU 15.8167 18.5742 0.852
## ParkGRPO:SpeciesLABO -14.2325 24.9810 -0.570
## ParkINDU:SpeciesLABO -77.9067 22.3069 -3.492
## ParkISRO:SpeciesLABO -1.7180 22.7487 -0.076
## ParkMISS:SpeciesLABO -14.6591 22.7487 -0.644
## ParkSACN:SpeciesLABO 6.8546 24.5714 0.279
## ParkSLBE:SpeciesLABO -21.2129 21.0095 -1.010
## ParkVOYA:SpeciesLABO -1.3801 24.5714 -0.056
## ParkGRPO:SpeciesLACI 25.9395 24.9810 1.038
## ParkINDU:SpeciesLACI -25.8330 22.3069 -1.158
## ParkISRO:SpeciesLACI 24.8234 22.7487 1.091
## ParkMISS:SpeciesLACI 16.8395 22.7487 0.740
## ParkSACN:SpeciesLACI 36.6199 24.5714 1.490
## ParkSLBE:SpeciesLACI 26.4855 21.0095 1.261
## ParkVOYA:SpeciesLACI 39.6226 24.5714 1.613
## ParkGRPO:SpeciesLANO 8.8626 24.9810 0.355
## ParkINDU:SpeciesLANO -46.1231 22.3069 -2.068
## ParkISRO:SpeciesLANO -4.5815 22.7487 -0.201
## ParkMISS:SpeciesLANO -22.6870 22.7487 -0.997
## ParkSACN:SpeciesLANO 12.9587 24.5714 0.527
## ParkSLBE:SpeciesLANO -0.7748 21.0095 -0.037
## ParkVOYA:SpeciesLANO -6.3904 24.5714 -0.260
## ParkGRPO:SpeciesMYLU -56.7317 24.9810 -2.271
## ParkINDU:SpeciesMYLU -23.8204 22.3069 -1.068
## ParkISRO:SpeciesMYLU -35.4577 22.7487 -1.559
## ParkMISS:SpeciesMYLU -21.7059 22.7487 -0.954
## ParkSACN:SpeciesMYLU 10.2769 24.5714 0.418
## ParkSLBE:SpeciesMYLU 16.3836 21.0095 0.780
## ParkVOYA:SpeciesMYLU -30.5234 24.5714 -1.242
## ParkGRPO:SpeciesMYSE -25.1271 24.9810 -1.006
## ParkINDU:SpeciesMYSE -55.5230 22.3069 -2.489
## ParkISRO:SpeciesMYSE -0.5339 22.7487 -0.023
## ParkMISS:SpeciesMYSE -11.1767 22.7487 -0.491
## ParkSACN:SpeciesMYSE 11.8976 24.5714 0.484
## ParkSLBE:SpeciesMYSE -3.4723 21.0095 -0.165
## ParkVOYA:SpeciesMYSE -5.9560 24.5714 -0.242
## ParkINDU:SpeciesPESU -74.0871 24.1630 -3.066
## ParkMISS:SpeciesPESU -30.4328 24.5714 -1.239
##
## Correlation matrix not shown by default, as p = 53 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 19 columns / coefficients
car::Anova(model.parkspeciesInt)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: data.allparks.wide$Diff
## Chisq Df Pr(>Chisq)
## Park 23.164 8 0.003160 **
## Species 99.533 8 < 2.2e-16 ***
## Park:Species 66.070 37 0.002303 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC Comparison Tables
# Standard AIC (using stats pkg) -> Model 3 has lowest AIC
AIC(model.null, model.parkonly, model.speciesonly, model.parkspecies, model.parkspeciesInt)
## df AIC
## model.null 3 13597.13
## model.parkonly 10 13593.76
## model.speciesonly 11 13523.89
## model.parkspecies 18 13520.15
## model.parkspeciesInt 55 13530.09
# Small sample AICc (using AICcmodavg pkg) -> Model 3 has lowest AIC
# (Note: aictab function automatically selected AICc instead of AIC, even when I did not include the argument "second.ord = TRUE".)
model.list <- list(model.null, model.parkonly, model.speciesonly, model.parkspecies, model.parkspeciesInt)
model.names <- c("model.null", "model.parkonly", "model.speciesonly", "model.parkspecies", "model.parkspeciesInt")
aictab(cand.set = model.list, modnames = model.names, sort = TRUE, second.ord = TRUE)
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## model.parkspecies 18 13520.71 0.00 0.84 0.84 -6742.07
## model.speciesonly 11 13524.11 3.39 0.15 1.00 -6750.94
## model.parkspeciesInt 55 13535.34 14.63 0.00 1.00 -6710.05
## model.parkonly 10 13593.94 73.23 0.00 1.00 -6786.88
## model.null 3 13597.15 76.43 0.00 1.00 -6795.56