Multilevel Model

rikzdata <- read_delim("http://www.rossmanchance.com/stat414/data/RIKZ.txt", 
                       "\t", escape_double = FALSE, trim_ws = TRUE)

## Rows: 45 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## dbl (5): Sample, Richness, Exposure, NAP, Beach
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

There appear to be differences in species richness across the beaches.

plot(rikzdata$Richness ~ rikzdata$Beach)

iscamsummary(rikzdata$Richness, rikzdata$Beach)

Fitting the null model that allows for the intercepts to vary by beach.

#lmList gives us a list of the intercepts for the different beaches
model0 = lmer(Richness ~ 1 + (1 | Beach), data = rikzdata)
# (1 | Beach) means we want to intercept to vary beach by beach 
summary(model0, corr=F)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Richness ~ 1 + (1 | Beach)
##    Data: rikzdata
## 
## REML criterion at convergence: 261.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7797 -0.5070 -0.0980  0.2547  3.8063 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Beach    (Intercept) 10.48    3.237   
##  Residual             15.51    3.938   
## Number of obs: 45, groups:  Beach, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    5.689      1.228   4.631

rand_ints <- as.data.frame(ranef(model0, condVar = TRUE))
# plot
ggplot(rand_ints, aes(y = condval, x = grp)) +
geom_point() +
geom_errorbar(aes(ymin = condval - 1.96*condsd,
max = condval + 1.96*condsd), width = 0) +
labs(title = "EB est. of random component of intercept",
x = "Beach",
y = "Estimate and 95% CI") +
theme(axis.text.x = element_text(angle = 90)) +
theme_bw()

AIC and BIC are smaller the better

AIC(model0)

## [1] 267.1142

Does there appear to be a relationship between species richness and NAP?

modelb = lm(Richness ~ NAP, data = rikzdata)
summary(modelb)

## 
## Call:
## lm(formula = Richness ~ NAP, data = rikzdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0675 -2.7607 -0.8029  1.3534 13.8723 
## 
## Coefficients:
##             Estimate Std. Error t value          Pr(>|t|)    
## (Intercept)   6.6857     0.6578  10.164 0.000000000000525 ***
## NAP          -2.8669     0.6307  -4.545 0.000044175212096 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.16 on 43 degrees of freedom
## Multiple R-squared:  0.3245, Adjusted R-squared:  0.3088 
## F-statistic: 20.66 on 1 and 43 DF,  p-value: 0.00004418

plot(rikzdata$Richness ~ rikzdata$NAP)
abline(modelb)

model1 = lmer(Richness ~ NAP + (1 | Beach), data  = rikzdata)
summary(model1, corr=F)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Richness ~ NAP + (1 | Beach)
##    Data: rikzdata
## 
## REML criterion at convergence: 239.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4227 -0.4848 -0.1576  0.2519  3.9794 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Beach    (Intercept) 8.668    2.944   
##  Residual             9.362    3.060   
## Number of obs: 45, groups:  Beach, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   6.5819     1.0958   6.007
## NAP          -2.5684     0.4947  -5.192

AIC(model1)

## [1] 247.4802

#Our predicted models
preds = predict(model1, newdata = rikzdata)
ggplot(rikzdata, aes(x = NAP , y = preds , group = Beach, color = factor(rikzdata$Beach) )) +
geom_smooth(method = "lm", alpha = .5, se = FALSE) +
geom_abline(intercept = 6.686, slope = -2.867) +
geom_point(data = rikzdata, aes(y = Richness, color=factor(rikzdata$Beach)), alpha = .5) +
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

plot(rikzdata$NAP~ rikzdata$Beach)
abline(lm(rikzdata$NAP~rikzdata$Beach))

One could argue that there are runs of positive/negative residuals for different beaches. This might indicate that our model would be improved with an interaction. In particular, we could ask whether the slopes of NAP on Richness differ across the beaches. First look at the data

ggplot(rikzdata) + 
  aes(x = NAP, y = Richness) + 
  stat_smooth(method = "lm", se = FALSE) +
  geom_point() +
  geom_abline(intercept = 6.582, slope = -2.568) +  #fixed part of model1
  facet_wrap(~Beach) +
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

So one option is to fit these separate regression lines for each beach

Intercepts = vector(length=9)
Slopes = vector(length = 9)
for (j in 1:9){
  modelj = summary(lm(Richness ~ NAP,
                   subset = (Beach==j), data=rikzdata))
  Intercepts[j] = modelj$coefficients[1,1]
  Slopes[j] = modelj$coefficients[2, 1]
}

hist(Intercepts); iscamsummary(Intercepts)

## missing       n    Min       Q1  Median      Q3     Max    Mean      SD 
##       0       9   3.088   3.521   4.951  10.822  13.346   6.948   4.191

hist(Slopes); iscamsummary(Slopes)

## missing       n    Min       Q1  Median      Q3     Max    Mean      SD 
##       0       9  -8.900  -2.968  -1.755  -1.389  -0.372  -2.691   2.571

Here is the syntax for lmer to fit a random slopes model

model2 = lmer(Richness ~ NAP + (1 + NAP | Beach), data  = rikzdata)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00267151 (tol = 0.002, component 1)

# First NAP = fixed slope, (1 + NAP | Beach) = random slopes for each Beaches
summary(model2, corr=F)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Richness ~ NAP + (1 + NAP | Beach)
##    Data: rikzdata
## 
## REML criterion at convergence: 232.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8212 -0.3410 -0.1674  0.1925  3.0397 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Beach    (Intercept) 12.600   3.550         
##           NAP          2.942   1.715    -0.99
##  Residual              7.307   2.703         
## Number of obs: 45, groups:  Beach, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   6.5884     1.2649   5.208
## NAP          -2.8301     0.7231  -3.914
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00267151 (tol = 0.002, component 1)

AIC(model2)

## [1] 244.3839

#Our predicted models
BeachAsFactor = factor(rikzdata$Beach)
preds = predict(model2, newdata = rikzdata)
ggplot(rikzdata, aes(x = NAP , y = preds , group = Beach, color = factor(rikzdata$Beach) )) +
geom_smooth(method = "lm", alpha = .5, se = FALSE) +
geom_abline(intercept = 6.582, slope = -2.568) +
geom_point(data = rikzdata, aes(y = Richness, color=factor(rikzdata$Beach)), alpha = .5) +
  theme_bw()

## `geom_smooth()` using formula = 'y ~ x'

Compare the slopes and intercepts from “no pooling” to “partial pooling.”

s.estimates = fixef(model2)+ranef(model2)$Beach[,]
s.intercepts = fixef(model2)[1] + ranef(model2)$Beach[,1]
s.slopes = fixef(model2)[2] + ranef(model2)$Beach[,2]
par(mfrow=c(2,2))
hist(Intercepts); hist(s.intercepts)
hist(Slopes); hist(s.slopes)

par(mfrow=c(1,1))

Lesson 19 Random intercept model \[y_{ij} = \beta_{0j} + \beta_1x_{ij} + \epsilon_{ij}\] where \(\beta_{0j} = \beta_{00}+u_j\) and \(u_j-N(0, \tau^2)\)

Random intercept and slope model \[y_{ij} = \beta_{0j} + \beta_1x_{1ij} + \epsilon_{ij}\] where \(\beta_{0j} = \beta_{00}+u_{0j}\) and \(u_{0j}-N(0, \tau^2_0)\) and \(\beta_{1j} = \beta_{10}+u_{1j}\) and \(u_{1j}-N(0, \tau^2_1)\)

OR \[y_{ij} = (\beta_{00}+u_{0j}) + (\beta_{10}+u_{1}j)x_{ij} + \epsilon_{ij}\]

model1 = lmer(Richness ~ NAP + (1 + NAP | Beach), data = rikzdata, REML = FALSE)

## boundary (singular) fit: see help('isSingular')

summary(model1)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: Richness ~ NAP + (1 + NAP | Beach)
##    Data: rikzdata
## 
##      AIC      BIC   logLik deviance df.resid 
##    246.7    257.5   -117.3    234.7       39 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7985 -0.3418 -0.1827  0.1749  3.1389 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Beach    (Intercept) 10.949   3.309         
##           NAP          2.502   1.582    -1.00
##  Residual              7.174   2.678         
## Number of obs: 45, groups:  Beach, 9
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   6.5818     1.1883   5.539
## NAP          -2.8293     0.6849  -4.131
## 
## Correlation of Fixed Effects:
##     (Intr)
## NAP -0.810
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

fixef(model1)

## (Intercept)         NAP 
##    6.581817   -2.829255

ranef(model1)

## $Beach
##   (Intercept)        NAP
## 1   1.7986325 -0.8597860
## 2   5.6926249 -2.7212003
## 3  -2.7426699  1.3110567
## 4  -2.9682494  1.4188887
## 5   4.5044936 -2.1532473
## 6  -2.1372277  1.0216420
## 7  -2.4398536  1.1663039
## 8  -1.4646228  0.7001220
## 9  -0.2431276  0.1162204
## 
## with conditional variances for "Beach"

Equation 1: y-hat = (6.582 + 1.799) - (2.829 + .8598) NAP Equation 9: y-hat = (6.582 - 0.2431) - (2.829 + .1162) NAP