#Quiz 4
#1. response: wheezing (binary), fixed: maternal smoking, random: child IDs
#GLMM with logistic regression and random effects
#2. response: rating, fixed: temperature and each recipe, random: give random effect to each batch
#LMM
#3. response: oz cheese, fixed: none, random: each worker and each store
#worker random effect is nested in the store random effect
#LMM with grand mean (no fixed effects)
#4. lmer(time ~ week + (1|runner:team) + (1|team))
#runner is nested in team
#5. lmer(score ~ 1 + (1|coachID) + (1|advancedID))
#coachID and advancedID are crossed
#Orthodont LMM
library(nlme)
data(Orthodont)
library(lattice)
xyplot(distance ~ age|Subject, data = Orthodont)

#plots for each child
#justification for a random intercept for each kid because it shows that there's variation between each kid's intercept - some kids start off with a longer distance than others
#lattice plot helps us see variability from kid to kid
xyplot(distance ~ age|Sex, data = Orthodont)

#male vs female plot
#it looks like there's potentially a different slope and a different intercept for boys and girls if we look at where a regression line might be for each
#should each child have a random intercept? We fit two models: one with a random intercept and one without
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
randint = lmer(distance ~ age + (1|Subject), data = Orthodont)
nullmod = lm(distance ~ age, data = Orthodont)
#then we find our likelihood ratio test statistic
(teststat = 2*as.numeric(logLik(randint)-logLik(nullmod)))
## [1] 58.57445
#the test stat is large, so our p-value will be small
pchisq(teststat, df = 1, lower.tail=FALSE)
## [1] 1.957419e-14
#this tells us that the larger model (LMM) is significantly better than the LM
#the variance of the subject specific intercepts is significantly > 0
#yes, we should have a random intercept for each child
#interpret the variance component for the random intercepts
summary(randint)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age + (1 | Subject)
## Data: Orthodont
##
## REML criterion at convergence: 447
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6645 -0.5351 -0.0129 0.4874 3.7218
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 4.472 2.115
## Residual 2.049 1.432
## Number of obs: 108, groups: Subject, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 16.76111 0.80240 20.89
## age 0.66019 0.06161 10.72
##
## Correlation of Fixed Effects:
## (Intr)
## age -0.845
#the variance for the random intercepts = 4.472
#we can interpret this with a 95% rule for normal distributions
#distance = 16.76 + 0.66(age)
#when age = 8, distance = 22.04 (predicted or average distance for age 8)
#95% of kids at age 8 will have an orthodont distance within 2*standard deviation = 2*2.115 = 4.23 mm of 22.04
#should each child have a random slope?
#random slopes can only be included if you have decided you need a random intercept
#you can only consider having random slopes if you already have random intercepts
#we compare the model with the random intercept only against a model with both the random slope and random intercept
randslope = lmer(distance ~ age + (age|Subject), data = Orthodont)
#this model differs from randint by one parameter: the variance for random slopes, which gives us df = 1
(teststat = 2*as.numeric(logLik(randslope)-logLik(randint)))
## [1] 4.36583
pchisq(teststat, df = 1, lower.tail = FALSE)
## [1] 0.03666663
#Our p-value is smaller than 0.05, so there is evidence that we do need the random slope; the random intercept isn't enough
#children vary significantly in the rates of growth of their orthodont distance
#slope represents the rate of growth
summary(randslope)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age + (age | Subject)
## Data: Orthodont
##
## REML criterion at convergence: 442.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2229 -0.4938 0.0073 0.4721 3.9161
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 5.41660 2.3274
## age 0.05128 0.2264 -0.61
## Residual 1.71616 1.3100
## Number of obs: 108, groups: Subject, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 16.76111 0.77528 21.620
## age 0.66019 0.07126 9.265
##
## Correlation of Fixed Effects:
## (Intr)
## age -0.848
#we can use a 95% rule interpretation again
#average slope is 0.66019
#the child to child variability for this slope is .0.2264*2
2*0.2264*c(-1,1)+0.66019
## [1] 0.20739 1.11299
#95% of children's slopes are between 0.207 and 1.11 mm per year
#for 95% of children, when they age one year their orthodont distance increases by between 0.207 and 1.11 mm
#there is a wide variability in growth rates from child to child
#The correlation between the kid's random intercept and the kid's random slope is -0.61
#kids with larger intercepts tend to have smaller slopes and kids with a smaller intercept tend to have a larger slope
#this makes sense because if you have a short orthodont distance you have to grow a lot to catch up. kids that start out with a larger orthodont distance don't need to grow as much
#should we add a fixed effect to distinguish between boys and girls?
#fit a model with and without sex
nullmod = lmer(distance ~ age + (age|Subject), data = Orthodont, REML = FALSE)
altmod = lmer(distance ~ age + Sex + (age|Subject), data = Orthodont, REML = FALSE)
(teststat = 2*as.numeric(logLik(altmod)-logLik(nullmod)))
## [1] 6.37644
pchisq(teststat, df = 1, lower.tail = FALSE)
## [1] 0.01156451
#df = 1 because there is 1 different parameter between the two models - Sex as a fixed effect
#the small p-value indicates that there is a difference between boys' and girls' orthodont distances at each age
#crossed random effects, ex. rating speed dating
#raterID, receiverID
#lmer(y ~ x... + (1|raterID) + (1|receiverID))
#nested random effects, ex: calc exams, students nested in professors
#lmer(y ~ x + (1|prof) + (1|prof:student))