rikzdata <- read_delim("http://www.rossmanchance.com/stat414/data/RIKZ.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 45 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## dbl (5): Sample, Richness, Exposure, NAP, Beach
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
There appear to be differences in species richness across the beaches.
plot(rikzdata$Richness ~ rikzdata$Beach)
iscamsummary(rikzdata$Richness, rikzdata$Beach)
Fitting the null model that allows for the intercepts to vary by beach.
#lmList gives us a list of the intercepts for the different beaches
model0 = lmer(Richness ~ 1 + (1 | Beach), data = rikzdata)
# (1 | Beach) means we want to intercept to vary beach by beach
summary(model0, corr=F)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Richness ~ 1 + (1 | Beach)
## Data: rikzdata
##
## REML criterion at convergence: 261.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7797 -0.5070 -0.0980 0.2547 3.8063
##
## Random effects:
## Groups Name Variance Std.Dev.
## Beach (Intercept) 10.48 3.237
## Residual 15.51 3.938
## Number of obs: 45, groups: Beach, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.689 1.228 4.631
rand_ints <- as.data.frame(ranef(model0, condVar = TRUE))
# plot
ggplot(rand_ints, aes(y = condval, x = grp)) +
geom_point() +
geom_errorbar(aes(ymin = condval - 1.96*condsd,
max = condval + 1.96*condsd), width = 0) +
labs(title = "EB est. of random component of intercept",
x = "Beach",
y = "Estimate and 95% CI") +
theme(axis.text.x = element_text(angle = 90)) +
theme_bw()
AIC and BIC are smaller the better
AIC(model0)
## [1] 267.1142
Does there appear to be a relationship between species richness and NAP?
modelb = lm(Richness ~ NAP, data = rikzdata)
summary(modelb)
##
## Call:
## lm(formula = Richness ~ NAP, data = rikzdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0675 -2.7607 -0.8029 1.3534 13.8723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.6857 0.6578 10.164 0.000000000000525 ***
## NAP -2.8669 0.6307 -4.545 0.000044175212096 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.16 on 43 degrees of freedom
## Multiple R-squared: 0.3245, Adjusted R-squared: 0.3088
## F-statistic: 20.66 on 1 and 43 DF, p-value: 0.00004418
plot(rikzdata$Richness ~ rikzdata$NAP)
abline(modelb)
model1 = lmer(Richness ~ NAP + (1 | Beach), data = rikzdata)
summary(model1, corr=F)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Richness ~ NAP + (1 | Beach)
## Data: rikzdata
##
## REML criterion at convergence: 239.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4227 -0.4848 -0.1576 0.2519 3.9794
##
## Random effects:
## Groups Name Variance Std.Dev.
## Beach (Intercept) 8.668 2.944
## Residual 9.362 3.060
## Number of obs: 45, groups: Beach, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.5819 1.0958 6.007
## NAP -2.5684 0.4947 -5.192
AIC(model1)
## [1] 247.4802
#Our predicted models
preds = predict(model1, newdata = rikzdata)
ggplot(rikzdata, aes(x = NAP , y = preds , group = Beach, color = factor(rikzdata$Beach) )) +
geom_smooth(method = "lm", alpha = .5, se = FALSE) +
geom_abline(intercept = 6.686, slope = -2.867) +
geom_point(data = rikzdata, aes(y = Richness, color=factor(rikzdata$Beach)), alpha = .5) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
plot(rikzdata$NAP~ rikzdata$Beach)
abline(lm(rikzdata$NAP~rikzdata$Beach))
One could argue that there are runs of positive/negative residuals for different beaches. This might indicate that our model would be improved with an interaction. In particular, we could ask whether the slopes of NAP on Richness differ across the beaches. First look at the data
ggplot(rikzdata) +
aes(x = NAP, y = Richness) +
stat_smooth(method = "lm", se = FALSE) +
geom_point() +
geom_abline(intercept = 6.582, slope = -2.568) + #fixed part of model1
facet_wrap(~Beach) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
So one option is to fit these separate regression lines for each beach
Intercepts = vector(length=9)
Slopes = vector(length = 9)
for (j in 1:9){
modelj = summary(lm(Richness ~ NAP,
subset = (Beach==j), data=rikzdata))
Intercepts[j] = modelj$coefficients[1,1]
Slopes[j] = modelj$coefficients[2, 1]
}
hist(Intercepts); iscamsummary(Intercepts)
## missing n Min Q1 Median Q3 Max Mean SD
## 0 9 3.088 3.521 4.951 10.822 13.346 6.948 4.191
hist(Slopes); iscamsummary(Slopes)
## missing n Min Q1 Median Q3 Max Mean SD
## 0 9 -8.900 -2.968 -1.755 -1.389 -0.372 -2.691 2.571
Here is the syntax for lmer to fit a random slopes model
model2 = lmer(Richness ~ NAP + (1 + NAP | Beach), data = rikzdata)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00267151 (tol = 0.002, component 1)
# First NAP = fixed slope, (1 + NAP | Beach) = random slopes for each Beaches
summary(model2, corr=F)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Richness ~ NAP + (1 + NAP | Beach)
## Data: rikzdata
##
## REML criterion at convergence: 232.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8212 -0.3410 -0.1674 0.1925 3.0397
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Beach (Intercept) 12.600 3.550
## NAP 2.942 1.715 -0.99
## Residual 7.307 2.703
## Number of obs: 45, groups: Beach, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.5884 1.2649 5.208
## NAP -2.8301 0.7231 -3.914
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00267151 (tol = 0.002, component 1)
AIC(model2)
## [1] 244.3839
#Our predicted models
BeachAsFactor = factor(rikzdata$Beach)
preds = predict(model2, newdata = rikzdata)
ggplot(rikzdata, aes(x = NAP , y = preds , group = Beach, color = factor(rikzdata$Beach) )) +
geom_smooth(method = "lm", alpha = .5, se = FALSE) +
geom_abline(intercept = 6.582, slope = -2.568) +
geom_point(data = rikzdata, aes(y = Richness, color=factor(rikzdata$Beach)), alpha = .5) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
Compare the slopes and intercepts from “no pooling” to “partial pooling.”
s.estimates = fixef(model2)+ranef(model2)$Beach[,]
s.intercepts = fixef(model2)[1] + ranef(model2)$Beach[,1]
s.slopes = fixef(model2)[2] + ranef(model2)$Beach[,2]
par(mfrow=c(2,2))
hist(Intercepts); hist(s.intercepts)
hist(Slopes); hist(s.slopes)
par(mfrow=c(1,1))
Lesson 19 Random intercept model \[y_{ij} = \beta_{0j} + \beta_1x_{ij} + \epsilon_{ij}\] where \(\beta_{0j} = \beta_{00}+u_j\) and \(u_j-N(0, \tau^2)\)
Random intercept and slope model \[y_{ij} = \beta_{0j} + \beta_1x_{1ij} + \epsilon_{ij}\] where \(\beta_{0j} = \beta_{00}+u_{0j}\) and \(u_{0j}-N(0, \tau^2_0)\) and \(\beta_{1j} = \beta_{10}+u_{1j}\) and \(u_{1j}-N(0, \tau^2_1)\)
OR \[y_{ij} = (\beta_{00}+u_{0j}) + (\beta_{10}+u_{1}j)x_{ij} + \epsilon_{ij}\]
model1 = lmer(Richness ~ NAP + (1 + NAP | Beach), data = rikzdata, REML = FALSE)
## boundary (singular) fit: see help('isSingular')
summary(model1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: Richness ~ NAP + (1 + NAP | Beach)
## Data: rikzdata
##
## AIC BIC logLik deviance df.resid
## 246.7 257.5 -117.3 234.7 39
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7985 -0.3418 -0.1827 0.1749 3.1389
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Beach (Intercept) 10.949 3.309
## NAP 2.502 1.582 -1.00
## Residual 7.174 2.678
## Number of obs: 45, groups: Beach, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.5818 1.1883 5.539
## NAP -2.8293 0.6849 -4.131
##
## Correlation of Fixed Effects:
## (Intr)
## NAP -0.810
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
fixef(model1)
## (Intercept) NAP
## 6.581817 -2.829255
ranef(model1)
## $Beach
## (Intercept) NAP
## 1 1.7986325 -0.8597860
## 2 5.6926249 -2.7212003
## 3 -2.7426699 1.3110567
## 4 -2.9682494 1.4188887
## 5 4.5044936 -2.1532473
## 6 -2.1372277 1.0216420
## 7 -2.4398536 1.1663039
## 8 -1.4646228 0.7001220
## 9 -0.2431276 0.1162204
##
## with conditional variances for "Beach"
Equation 1: y-hat = (6.582 + 1.799) - (2.829 + .8598) NAP Equation 9: y-hat = (6.582 - 0.2431) - (2.829 + .1162) NAP