Week 12

#Quiz 4
#1. response: wheezing (binary), fixed: maternal smoking, random: child IDs
  #GLMM with logistic regression and random effects
#2. response: rating, fixed: temperature and each recipe, random: give random effect to each batch
  #LMM
#3. response: oz cheese, fixed: none, random: each worker and each store
  #worker random effect is nested in the store random effect
  #LMM with grand mean (no fixed effects)
#4. lmer(time ~ week + (1|runner:team) + (1|team))
  #runner is nested in team
#5. lmer(score ~ 1 + (1|coachID) + (1|advancedID))
  #coachID and advancedID are crossed



#Orthodont LMM
library(nlme)
data(Orthodont)
library(lattice)
xyplot(distance ~ age|Subject, data = Orthodont)

  #plots for each child
  #justification for a random intercept for each kid because it shows that there's variation between each kid's intercept - some kids start off with a longer distance than others
  #lattice plot helps us see variability from kid to kid

xyplot(distance ~ age|Sex, data = Orthodont)

  #male vs female plot
  #it looks like there's potentially a different slope and a different intercept for boys and girls if we look at where a regression line might be for each

#should each child have a random intercept? We fit two models: one with a random intercept and one without
library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'lme4'

## The following object is masked from 'package:nlme':
## 
##     lmList

randint = lmer(distance ~ age + (1|Subject), data = Orthodont)
nullmod = lm(distance ~ age, data = Orthodont)
#then we find our likelihood ratio test statistic
(teststat = 2*as.numeric(logLik(randint)-logLik(nullmod)))

## [1] 58.57445

#the test stat is large, so our p-value will be small
pchisq(teststat, df = 1, lower.tail=FALSE)

## [1] 1.957419e-14

#this tells us that the larger model (LMM) is significantly better than the LM
  #the variance of the subject specific intercepts is significantly > 0
#yes, we should have a random intercept for each child

#interpret the variance component for the random intercepts
summary(randint)

## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age + (1 | Subject)
##    Data: Orthodont
## 
## REML criterion at convergence: 447
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6645 -0.5351 -0.0129  0.4874  3.7218 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 4.472    2.115   
##  Residual             2.049    1.432   
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 16.76111    0.80240   20.89
## age          0.66019    0.06161   10.72
## 
## Correlation of Fixed Effects:
##     (Intr)
## age -0.845

#the variance for the random intercepts = 4.472
  #we can interpret this with a 95% rule for normal distributions
  #distance = 16.76 + 0.66(age)
    #when age = 8, distance = 22.04 (predicted or average distance for age 8)
    #95% of kids at age 8 will have an orthodont distance within 2*standard deviation = 2*2.115 = 4.23 mm of 22.04


#should each child have a random slope?
#random slopes can only be included if you have decided you need a random intercept
  #you can only consider having random slopes if you already have random intercepts
#we compare the model with the random intercept only against a model with both the random slope and random intercept
randslope = lmer(distance ~ age + (age|Subject), data = Orthodont)
  #this model differs from randint by one parameter: the variance for random slopes, which gives us df = 1
(teststat = 2*as.numeric(logLik(randslope)-logLik(randint)))

## [1] 4.36583

pchisq(teststat, df = 1, lower.tail = FALSE)

## [1] 0.03666663

#Our p-value is smaller than 0.05, so there is evidence that we do need the random slope; the random intercept isn't enough
  #children vary significantly in the rates of growth of their orthodont distance
  #slope represents the rate of growth
summary(randslope)

## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age + (age | Subject)
##    Data: Orthodont
## 
## REML criterion at convergence: 442.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2229 -0.4938  0.0073  0.4721  3.9161 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Subject  (Intercept) 5.41660  2.3274        
##           age         0.05128  0.2264   -0.61
##  Residual             1.71616  1.3100        
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 16.76111    0.77528  21.620
## age          0.66019    0.07126   9.265
## 
## Correlation of Fixed Effects:
##     (Intr)
## age -0.848

  #we can use a 95% rule interpretation again
  #average slope is 0.66019
    #the child to child variability for this slope is .0.2264*2
2*0.2264*c(-1,1)+0.66019

## [1] 0.20739 1.11299

  #95% of children's slopes are between 0.207 and 1.11 mm per year
    #for 95% of children, when they age one year their orthodont distance increases by between 0.207 and 1.11 mm
    #there is a wide variability in growth rates from child to child


#The correlation between the kid's random intercept and the kid's random slope is -0.61
  #kids with larger intercepts tend to have smaller slopes and kids with a smaller intercept tend to have a larger slope
  #this makes sense because if you have a short orthodont distance you have to grow a lot to catch up. kids that start out with a larger orthodont distance don't need to grow as much


#should we add a fixed effect to distinguish between boys and girls?
#fit a model with and without sex
nullmod = lmer(distance ~ age + (age|Subject), data = Orthodont, REML = FALSE)
altmod = lmer(distance ~ age + Sex + (age|Subject), data = Orthodont, REML = FALSE)
(teststat = 2*as.numeric(logLik(altmod)-logLik(nullmod)))

## [1] 6.37644

pchisq(teststat, df = 1, lower.tail = FALSE)

## [1] 0.01156451

  #df = 1 because there is 1 different parameter between the two models - Sex as a fixed effect
  #the small p-value indicates that there is a difference between boys' and girls' orthodont distances at each age

#crossed random effects, ex. rating speed dating
  #raterID, receiverID
  #lmer(y ~ x... + (1|raterID) + (1|receiverID))

#nested random effects, ex: calc exams, students nested in professors
  #lmer(y ~ x + (1|prof) + (1|prof:student))