Homework

In class Exercise 1

Data

The data consist of weight gains of 68 Asian children in a British community. Measurements of weight were recorded for children on up to five occasions visiting a clinic. The ages at which the measurements were taken are roughly to target examination dates of 6 weeks, then 8, 12, 27 months.

pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest)

fL <- "http://www.stata-press.com/data/mlmus3/asian.dta"
dta <- read.dta(fL)

dta$id <- factor(dta$id)
dta$gender <- factor(dta$gender, 1:2, labels = c("M", "F"))
dta$brthwt <- dta$brthwt/1000
dta$occ <- factor(dta$occ)

glimpse(dta)

## Rows: 198
## Columns: 6
## $ id     <fct> 45, 45, 45, 45, 45, 258, 258, 258, 258, 287, 287, 287, 287, 4…
## $ occ    <fct> 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 2, 1…
## $ age    <dbl> 0.1368925, 0.6570842, 1.2183436, 1.4291581, 2.2724161, 0.1916…
## $ weight <dbl> 5.171, 10.860, 13.150, 13.200, 15.880, 5.300, 9.740, 9.980, 1…
## $ brthwt <dbl> 4.140, 4.140, 4.140, 4.140, 4.140, 3.155, 3.155, 3.155, 3.155…
## $ gender <fct> M, M, M, M, M, F, F, F, F, M, M, M, M, F, F, F, F, F, M, M, M…

head(dta)

##    id occ       age weight brthwt gender
## 1  45   1 0.1368925  5.171  4.140      M
## 2  45   2 0.6570842 10.860  4.140      M
## 3  45   3 1.2183436 13.150  4.140      M
## 4  45   4 1.4291581 13.200  4.140      M
## 5  45   5 2.2724161 15.880  4.140      M
## 6 258   1 0.1916496  5.300  3.155      F

ggplot(dta, aes(age, weight, group = id, color=gender))+
 geom_point()+ 
 geom_line() +
 facet_wrap( ~ gender)+
 labs(x = "Age (year)", y = "Weight (kg)") +
 theme(legend.position = c(.9, .2))

ggplot(dta, aes(age, weight, color = gender))+
 geom_point()+
 stat_smooth(method = "lm", formula = y ~ poly(x, 2)) +
 labs(x = "Age (year)", y = "Weight (kg)")+
 theme(legend.position = c(.9, .2))

dta$age2 <- dta$age^2

# t-tests use Satterthwaite's method
summary(m0 <- lmer(weight ~ brthwt + gender + age + age2 + (age |id),
data = dta))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
##    Data: dta
## 
## REML criterion at convergence: 496.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.81353 -0.43471 -0.03652  0.45573  2.79813 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  id       (Intercept) 0.1920   0.4381       
##           age         0.2702   0.5198   0.07
##  Residual             0.3346   0.5784       
## Number of obs: 198, groups:  id, 68
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   1.02225    0.54924  57.65949   1.861  0.06782 .  
## brthwt        0.87755    0.16616  53.16001   5.281 2.43e-06 ***
## genderF      -0.51922    0.16698  52.23221  -3.109  0.00303 ** 
## age           7.73933    0.23674 135.03624  32.691  < 2e-16 ***
## age2         -1.67125    0.08762 114.78740 -19.074  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) brthwt gendrF age   
## brthwt  -0.963                     
## genderF -0.240  0.095              
## age     -0.221  0.058 -0.003       
## age2     0.199 -0.051  0.002 -0.927

# Type 3 tests, KR-method - This is better
afex::mixed(weight ~ brthwt + gender + age + age2 + (age | id), data = dta)

## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]

## Mixed Model Anova Table (Type 3 tests, KR-method)
## 
## Model: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
##   Effect        df           F p.value
## 1 brthwt  1, 62.42   26.79 ***   <.001
## 2 gender  1, 61.58     9.31 **    .003
## 3    age 1, 141.54 1046.79 ***   <.001
## 4   age2 1, 122.93  355.47 ***   <.001
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

# construct piecewise lines defined by knots
dta$d1 <- dta$age - 0.4; dta$d1[dta$d1 < 0] <- 0
dta$d2 <- dta$age - 0.9; dta$d2[dta$d2 < 0] <- 0
dta$d3 <- dta$age - 1.5; dta$d3[dta$d3 < 0] <- 0

summary(m1 <- lmer(weight ~ brthwt + gender + age + d1 + d2 + d3 +
(age | id), data = dta))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + d1 + d2 + d3 + (age | id)
##    Data: dta
## 
## REML criterion at convergence: 465
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.81475 -0.41746 -0.01623  0.42323  2.51409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  id       (Intercept) 0.2476   0.4976        
##           age         0.3273   0.5721   -0.12
##  Residual             0.2523   0.5023        
## Number of obs: 198, groups:  id, 68
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   0.4941     0.5600 65.7487   0.882 0.380826    
## brthwt        0.8949     0.1617 51.4586   5.533 1.07e-06 ***
## genderF      -0.5459     0.1627 50.8735  -3.356 0.001504 ** 
## age          10.3595     1.0792 83.2345   9.599 4.03e-15 ***
## d1           -6.1891     1.8004 80.6638  -3.438 0.000931 ***
## d2           -1.6641     1.3071 82.8518  -1.273 0.206547    
## d3           -0.3684     0.9920 80.3757  -0.371 0.711367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) brthwt gendrF age    d1     d2    
## brthwt  -0.927                                   
## genderF -0.231  0.099                            
## age     -0.339  0.045 -0.002                     
## d1       0.313 -0.039 -0.001 -0.985              
## d2      -0.239  0.032  0.012  0.816 -0.884       
## d3       0.167 -0.031 -0.019 -0.533  0.606 -0.886

dtafm1 <- data.frame(dta, yhat=fitted(m1))

ggplot(dtafm1, aes(age, weight, group=id, color = gender))+
 geom_point()+
 geom_line(aes(age, yhat))+
 facet_wrap(~ gender)+
 labs(x = "Age (year)", y = "Weight (kg)")

quantile(dta$age)

##        0%       25%       50%       75%      100% 
## 0.1149897 0.6461328 0.9965777 1.4709103 2.5462012

summary(m0 <- lm(weight ~ age, data=dta))

## 
## Call:
## lm(formula = weight ~ age, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2994 -1.0341 -0.2524  1.0947  4.2614 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.2021     0.1779   29.24   <2e-16 ***
## age           3.3640     0.1332   25.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.471 on 196 degrees of freedom
## Multiple R-squared:  0.765,  Adjusted R-squared:  0.7638 
## F-statistic: 637.9 on 1 and 196 DF,  p-value: < 2.2e-16

summary(m0z <- segmented::segmented(m0, seg.Z= ~ age, 
                            psi=list(age=c(0.6, 1.4)),
                            control=seg.control(display=FALSE)))

## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~age, psi = list(age = c(0.6, 
##     1.4)), control = seg.control(display = FALSE))
## 
## Estimated Break-Point(s):
##            Est. St.Err
## psi1.age 0.317  0.138
## psi2.age 2.300  0.046
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    2.262      1.391   1.626    0.106
## age           16.425     10.311   1.593    0.113
## U1.age       -13.785     10.313  -1.337       NA
## U2.age        -8.540      3.400  -2.512       NA
## 
## Residual standard error: 1.176 on 192 degrees of freedom
## Multiple R-Squared: 0.8528,  Adjusted R-squared: 0.849 
## 
## Convergence attained in 5 iter. (rel. change 1.5162e-06)

plot(m0z)
with(dta, points(age, weight))
abline(v = m0z$psi[,2], lty = 3)
grid()

In class Exercise 2

#
library(lme4)

# 18 subjects (3hrs sleep) 10 observations on each, p64 of bates
# the "new" random effects (vs nlme) see Bates book 2010
# sleep deprivation 3hrs/night truckers

#
data(sleepstudy, package="lme4")

#
dim(sleepstudy)

## [1] 180   3

#
head(sleepstudy)

##   Reaction Days Subject
## 1 249.5600    0     308
## 2 258.7047    1     308
## 3 250.8006    2     308
## 4 321.4398    3     308
## 5 356.8519    4     308
## 6 414.6901    5     308

#
with(sleepstudy, table(Subject))

## Subject
## 308 309 310 330 331 332 333 334 335 337 349 350 351 352 369 370 371 372 
##  10  10  10  10  10  10  10  10  10  10  10  10  10  10  10  10  10  10

#
lattice::xyplot(Reaction ~ Days | Subject, data=sleepstudy, 
       xlab="Days of sleep deprivation",
       ylab="Average reaction time (ms)",
       type=c("g","p","r"))

#
sleeplmList <- lmList(Reaction ~ Days |Subject, data = sleepstudy)

#
sleeplmList

## Call: lmList(formula = Reaction ~ Days | Subject, data = sleepstudy) 
## Coefficients:
##     (Intercept)      Days
## 308    244.1927 21.764702
## 309    205.0549  2.261785
## 310    203.4842  6.114899
## 330    289.6851  3.008073
## 331    285.7390  5.266019
## 332    264.2516  9.566768
## 333    275.0191  9.142045
## 334    240.1629 12.253141
## 335    263.0347 -2.881034
## 337    290.1041 19.025974
## 349    215.1118 13.493933
## 350    225.8346 19.504017
## 351    261.1470  6.433498
## 352    276.3721 13.566549
## 369    254.9681 11.348109
## 370    210.4491 18.056151
## 371    253.6360  9.188445
## 372    267.0448 11.298073
## 
## Degrees of freedom: 180 total; 144 residual
## Residual standard error: 25.59182

#mean int and slope match lmer Fixed effects results p.67
mean(coef(sleeplmList)[,1])

## [1] 251.4051

mean(coef(sleeplmList)[,2])

## [1] 10.46729

#these are too big as they should be, 
#compare with variance random effects p.67
# mle subtracts off wobble in estimated indiv regressions Y on t

# - SSR/SST
var(coef(sleeplmList)[,1])

## [1] 838.3423

var(coef(sleeplmList)[,2])

## [1] 43.01034

#
quantile(coef(sleeplmList)[,1])

##       0%      25%      50%      75%     100% 
## 203.4842 229.4167 258.0576 273.0255 290.1041

quantile(coef(sleeplmList)[,2])

##        0%       25%       50%       75%      100% 
## -2.881034  6.194548 10.432421 13.548395 21.764702

#     
stem(coef(sleeplmList)[,2])

## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   -0 | 3
##   0 | 23
##   0 | 56699
##   1 | 011234
##   1 | 89
##   2 | 02

#
cor(coef(sleeplmList)[,1], coef(sleeplmList)[,2])

## [1] -0.1375534

# first 2 do REML, second pair match 
# Bates p.67 in doing MLE (REML FALSE)

#
sleeplmer <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
summary(sleeplmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction ~ Days + (Days | Subject)
##    Data: sleepstudy
## 
## REML criterion at convergence: 1743.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9536 -0.4634  0.0231  0.4634  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 612.10   24.741       
##           Days         35.07    5.922   0.07
##  Residual             654.94   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)  251.405      6.825  17.000  36.838  < 2e-16 ***
## Days          10.467      1.546  17.000   6.771 3.26e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138

#
sleeplmer2 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy, REML=FALSE)
summary(sleeplmer2)

## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Reaction ~ Days + (Days | Subject)
##    Data: sleepstudy
## 
##      AIC      BIC   logLik deviance df.resid 
##   1763.9   1783.1   -876.0   1751.9      174 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9416 -0.4656  0.0289  0.4636  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 565.48   23.780       
##           Days         32.68    5.717   0.08
##  Residual             654.95   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)  251.405      6.632  18.001  37.907  < 2e-16 ***
## Days          10.467      1.502  18.000   6.968 1.65e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138

# The end

In class Exercise 3

# load them
pacman::p_load(mlmRev, tidyverse, lme4, foreign, merTools, sjPlot)

# download a copy of data from the following link and save it to the data folder
fl<-"https://content.sph.harvard.edu/fitzmaur/ala2e/fat.dta"

# input data
dta <- read.dta(fl)

#重新命名變項
names(dta) <-c("ID", "Age", "Age_m", "Time", "PBF")

# show first 6 obs.
head(dta)

##   ID   Age Age_m  Time   PBF
## 1  1  9.32 13.19 -3.87  7.94
## 2  1 10.33 13.19 -2.86 15.65
## 3  1 11.24 13.19 -1.95 13.51
## 4  1 12.19 13.19 -1.00 23.23
## 5  1 13.24 13.19  0.05 10.52
## 6  1 14.24 13.19  1.05 20.45

# construct indicator variable for after menarche
dta <- dta %>% 
       mutate(ID = factor(ID),                
              Time_a = ifelse(Time > 0, Time, 0),
              Menarche = as.factor(ifelse(Time > 0, "T", "F")))

# show data structure
str(dta)

## 'data.frame':    1049 obs. of  7 variables:
##  $ ID      : Factor w/ 162 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ Age     : num  9.32 10.33 11.24 12.19 13.24 ...
##  $ Age_m   : num  13.2 13.2 13.2 13.2 13.2 ...
##  $ Time    : num  -3.87 -2.86 -1.95 -1 0.05 ...
##  $ PBF     : num  7.94 15.65 13.51 23.23 10.52 ...
##  $ Time_a  : num  0 0 0 0 0.05 ...
##  $ Menarche: Factor w/ 2 levels "F","T": 1 1 1 1 2 2 1 1 1 1 ...

# black and white theme                     
theme_set(theme_minimal())

# 畫經期沒有來族群的散佈圖及回歸線
ggplot(subset(dta, Menarche=="F"),
       aes(Time, PBF, group=ID)) +
 #geom_point(col=8)+
 geom_line(col=8)+
 stat_smooth(aes(group=1),method='lm',formula=y~x)+
 labs(x="Age (in year)",
      y="% Body fat")

# baseline model if you want one
mb_0 <- lmer(PBF ~ Time + (1 | ID), data = subset(dta, Menarche=='F'))
tab_model(mb_0)

	PBF
Predictors	Estimates	CI	p
(Intercept)	20.20	19.06 – 21.34	<0.001
Time	-0.09	-0.33 – 0.16	0.494
Random Effects
σ²	10.02
τ₀₀ _ID	43.81
ICC	0.81
N _ID	162
Observations	497
Marginal R² / Conditional R²	0.000 / 0.814

# 畫經期已經來族群的散佈圖及回歸線
ggplot(subset(dta, Menarche=="T"),
       aes(Time, PBF, group=ID)) +
# geom_point()+
 geom_line(col=8)+
 stat_smooth(aes(group=1),method='lm',formula=y~x)+
 labs(x="Age (in year)",
      y="% Body fat")

#fixed Time and random effect in Time within subjects
ma_0 <- lmer(PBF ~ Time + (Time | ID), data=subset(dta, Menarche=='T'))
tab_model(ma_0)

	PBF
Predictors	Estimates	CI	p
(Intercept)	22.59	21.42 – 23.75	<0.001
Time	1.97	1.68 – 2.25	<0.001
Random Effects
σ²	7.82
τ₀₀ _ID	46.21
τ₁₁ _ID.Time	1.42
ρ₀₁ _ID	-0.55
ICC	0.82
N _ID	160
Observations	552
Marginal R² / Conditional R²	0.123 / 0.844

betas_a <- na.omit(coef(lmList(PBF ~ Time | ID, data=subset(dta, Menarche=='T'))))
car::dataEllipse(betas_a[,1],betas_a[,2])

m4<-lmer(PBF~Time+Time_a+(Time+Time_a|ID), data=dta)
dta_full <- dta %>% mutate(yhat=fitted(m4))

# individual connected lines different color for before and after add group regression lines
ggplot(dta_full, aes(Time, yhat, color = Menarche)) +
 geom_line(aes(group = ID)) +
 geom_point(alpha = 0.5) +
 stat_smooth(method="lm", aes(group = Menarche)) +
 guides(color = F) +
 labs(x = "Age relative to menarche (in years)", 
      y = "Percent body fat")

# model with 3 random effects associated with an individual
summary(m1 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data = dta))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PBF ~ Time + Time_a + (Time + Time_a | ID)
##    Data: dta
## 
## REML criterion at convergence: 6062.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7742 -0.5900 -0.0359  0.5946  3.3798 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  ID       (Intercept) 45.942   6.778               
##           Time         1.631   1.277     0.29      
##           Time_a       2.750   1.658    -0.54 -0.83
##  Residual              9.473   3.078               
## Number of obs: 1049, groups:  ID, 162
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  21.3614     0.5646 161.5610  37.837  < 2e-16 ***
## Time          0.4171     0.1572 108.4614   2.654  0.00915 ** 
## Time_a        2.0471     0.2280 132.6773   8.980 2.32e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##        (Intr) Time  
## Time    0.351       
## Time_a -0.515 -0.872

# baseline model if you want one
summary(m0 <- lmer(PBF ~ Time + (Time | ID), data=dta))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PBF ~ Time + (Time | ID)
##    Data: dta
## 
## REML criterion at convergence: 6196.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7577 -0.5961  0.0221  0.6169  3.0981 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  ID       (Intercept) 38.2178  6.1821        
##           Time         0.6938  0.8329   -0.24
##  Residual             11.6215  3.4090        
## Number of obs: 1049, groups:  ID, 162
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  23.04978    0.49985 159.73871   46.11   <2e-16 ***
## Time          1.55480    0.08484 145.73759   18.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## Time -0.199
## convergence code: 0
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)

# estimate random effects by simulation
m1_re <- REsim(m1)

# plot for random effects
plotREsim(m1_re)

# residual plot
plot(m1, resid(., type = "pearson") ~ fitted(.),
     abline = 0, pch = 20, cex = .8, id = 0.05,
     xlab = "Ftted values", ylab = "Pearson Residuals")

Exercise 1

dta_e1<- read.table("visual_search.csv", h=T, sep = ",")
head(dta_e1)

##   Participant      Dx Set.Size      RT
## 1        9129 Control        1  786.50
## 2        9051 Control        1  935.83
## 3        9126 Control        1  750.83
## 4       9171* Control        1 1129.50
## 5        9176 Control        1 1211.33
## 6        9167 Control        1 1178.83

str(dta_e1)

## 'data.frame':    132 obs. of  4 variables:
##  $ Participant: chr  "9129" "9051" "9126" "9171*" ...
##  $ Dx         : chr  "Control" "Control" "Control" "Control" ...
##  $ Set.Size   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ RT         : num  786 936 751 1130 1211 ...

head(dta_e1)

##   Participant      Dx Set.Size      RT
## 1        9129 Control        1  786.50
## 2        9051 Control        1  935.83
## 3        9126 Control        1  750.83
## 4       9171* Control        1 1129.50
## 5        9176 Control        1 1211.33
## 6        9167 Control        1 1178.83

ggplot(dta_e1, aes(Set.Size, RT))+
  geom_line(aes(group=Participant), alpha=.5)+
  stat_smooth(aes(group=Dx), method="lm", formula=y~x)+
  geom_point(alpha=.5)+
  facet_wrap(~Dx)+
  labs(y="Response Time(ms)",
       x="Set Si")

summary(e1_m0<-lmer(RT~Set.Size*Dx+(Set.Size|Participant), data=dta_e1))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: RT ~ Set.Size * Dx + (Set.Size | Participant)
##    Data: dta_e1
## 
## REML criterion at convergence: 2186.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7577 -0.3130 -0.0750  0.3117  6.1372 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev. Corr
##  Participant (Intercept) 657489.0 810.86       
##              Set.Size       407.4  20.18   1.00
##  Residual                772445.8 878.89       
## Number of obs: 132, groups:  Participant, 33
## 
## Fixed effects:
##                    Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)         2078.75     270.97    33.37   7.672 7.27e-09 ***
## Set.Size              73.49      11.40    51.35   6.446 3.97e-08 ***
## DxControl          -1106.05     366.89    33.37  -3.015  0.00489 ** 
## Set.Size:DxControl   -21.74      15.44    51.35  -1.408  0.16514    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Set.Sz DxCntr
## Set.Size    -0.071              
## DxControl   -0.739  0.053       
## St.Sz:DxCnt  0.053 -0.739 -0.071
## convergence code: 0
## boundary (singular) fit: see ?isSingular

#set dodge amount
pd<- position_dodge(width=.2)
ggplot(dta_e1, aes(Set.Size, RT, shape=Dx, linetype=Dx))+
  stat_summary(fun.y=mean, geom="line", position=pd)+
  stat_summary(fun.y = mean, geom="point", position=pd)+
  stat_summary(fun.data=mean_se, geom="errorbar",
               position=pd, linetype="solid", width=.5)+
  labs(x="Set Size", 
       y="Response Time(ms)")+
  theme(legend.justification = c(0,1), legend.position = c(0,1), legend.background = element_rect(fill="white", color="black"))

anova(e1_m0)

## Type III Analysis of Variance Table with Satterthwaite's method
##               Sum Sq  Mean Sq NumDF  DenDF F value    Pr(>F)    
## Set.Size    50844340 50844340     1 51.354 65.8225 9.214e-11 ***
## Dx           7020030  7020030     1 33.372  9.0881  0.004888 ** 
## Set.Size:Dx  1531430  1531430     1 51.354  1.9826  0.165145    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m0_fe<-FEsim(e1_m0, 1000)
plotFEsim(m0_fe)

plot(e1_m0, resid(.,type="pearson")~fitted(.)|Dx, 
     layout=c(2,1), pch=20, cex=.8,, abline=0,
     xlab="Fitted values", ylab="Pearson residuals")

Exercise 2

Eighty-nine children’s scores on the reading subtest of the Peabody Individual Achievement Test (PIAT) were recorded. Each child was 6 years old in 1986, the first year of data collection. Source: Singer, J.D., & Willet, J.B. (2003), Applied Longitudinal Data Analysis. p. 140-141.

Column 1: Child ID Column 2: Wave (1 = 1986, 2 = 1988, 3 = 1990) Column 3: Child’s expected age on each measurement occasion Column 4: Age in years Column 5: Child’s PIAT score

dta_e2<- read.table("reading_piat.txt", h=T)
head(dta_e2)

##   ID Wave Age_G     Age PIAT
## 1  1    1   6.5  6.0000   18
## 2  1    2   8.5  8.3333   35
## 3  1    3  10.5 10.3333   59
## 4  2    1   6.5  6.0000   18
## 5  2    2   8.5  8.5000   25
## 6  2    3  10.5 10.5833   28

str(dta_e2)

## 'data.frame':    267 obs. of  5 variables:
##  $ ID   : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ Wave : int  1 2 3 1 2 3 1 2 3 1 ...
##  $ Age_G: num  6.5 8.5 10.5 6.5 8.5 10.5 6.5 8.5 10.5 6.5 ...
##  $ Age  : num  6 8.33 10.33 6 8.5 ...
##  $ PIAT : int  18 35 59 18 25 28 18 23 32 18 ...

dta_e2$Wave<-factor(dta_e2$Wave)
dta_e2$Age_c <-dta_e2$Age_G-6.5
ggplot(dta_e2, aes(Age_c, PIAT, group=ID))+
  stat_smooth(method="rlm", se=F, lwd=.1)+
  stat_smooth(aes(group=1), method="lm", se=F, col="magenta")+
  geom_jitter(cex=.6,alpha=.5)+
  labs(x="Age(-6.5)", y="PIAT score")

#random intercepts, random slopes
summary(e2_m1<-lmer(PIAT ~ Age_c+ (Age_c|ID),data=dta_e2))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PIAT ~ Age_c + (Age_c | ID)
##    Data: dta_e2
## 
## REML criterion at convergence: 1819.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6539 -0.5413 -0.1497  0.3854  3.2811 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  ID       (Intercept) 11.427   3.380        
##           Age_c        4.486   2.118    0.22
##  Residual             27.043   5.200        
## Number of obs: 267, groups:  ID, 89
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)  21.1629     0.6177 87.9991   34.26   <2e-16 ***
## Age_c         5.0309     0.2973 88.0012   16.92   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr)
## Age_c -0.316

PIAT score=21.16 give or take 3.38(=sqrt(21.16)) + 5.03 give or take 2.12(=sqrt(4.486))* age_c

plot(e2_m1, pch=20, cex=.5, col="steelblue", 
     xlab = "Fitted values",
     ylab = "Pearson residuals")

Exercise 3

Each year, beginning at age 14, 82 teenagers completed a 4-item questionnaire assessing their alcohol consumption during the previous year. Using a 8-point scale (0 = “not al all, 8 =”every day") teenagers described the frequency with which they (1) drank beer or wine, (2) drank hard liquor, (3) had five or more drinks in a row, and (4) got drunk.

Two potential predictors of alcohol use are whether the teenager is a child of an alcoholic parent; and alcohol use among the teenager’s peers. The teenager used a 6-point scale to estimate the proportion of their friends who drank alcohol occasionally (item 1) or regularly (item 2). This was obtained during the first wave of data collection.

Source: Currant, P. et al. (1997). Reported in Singer, J., & Willet (2003). Applied Longitudinal Data Analysis. p. 76-77.

Column 1: Teenager ID Column 2: Whether the teenager is a child of a alcohlic parent Column 3: Sex (male = 1, female = 0) Column 4: Number of year since age 14 Column 5: Alcohol use of the teenager (sqrt-root of mean of 6 items) Column 6: Alcohol use among the teenager’s peers (sqrt-root of mean of 2 items) Column 7: Alcoholic parenet variable centered Column 8: Peer variable centered

dta_e3<- read.table("alcohol_use.txt", h=T)
head(dta_e3)

##   sid coa sex age14  alcuse    peer    cpeer  ccoa
## 1   1   1   0     0 1.73205 1.26491  0.24691 0.549
## 2   1   1   0     1 2.00000 1.26491  0.24691 0.549
## 3   1   1   0     2 2.00000 1.26491  0.24691 0.549
## 4   2   1   1     0 0.00000 0.89443 -0.12357 0.549
## 5   2   1   1     1 0.00000 0.89443 -0.12357 0.549
## 6   2   1   1     2 1.00000 0.89443 -0.12357 0.549

str(dta_e3)

## 'data.frame':    246 obs. of  8 variables:
##  $ sid   : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ coa   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ sex   : int  0 0 0 1 1 1 1 1 1 1 ...
##  $ age14 : int  0 1 2 0 1 2 0 1 2 0 ...
##  $ alcuse: num  1.73 2 2 0 0 ...
##  $ peer  : num  1.265 1.265 1.265 0.894 0.894 ...
##  $ cpeer : num  0.247 0.247 0.247 -0.124 -0.124 ...
##  $ ccoa  : num  0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 ...

dta_e3$sid<-factor(dta_e3$sid)
dta_e3$coa<-factor(dta_e3$coa)
dta_e3$sex<-factor(dta_e3$sex)
dta_e3$age14<-factor(dta_e3$age14)

summary(e3_m0<-lmer(alcuse~coa+peer*age14+(1|sid), data=dta_e3))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: alcuse ~ coa + peer * age14 + (1 | sid)
##    Data: dta_e3
## 
## REML criterion at convergence: 622.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.46909 -0.65888  0.02567  0.51863  2.56829 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sid      (Intercept) 0.3373   0.5808  
##  Residual             0.4834   0.6952  
## Number of obs: 246, groups:  sid, 82
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)  -0.284160   0.180572 166.151585  -1.574 0.117468    
## coa1          0.565134   0.158782  79.000000   3.559 0.000633 ***
## peer          0.648244   0.139127 175.437485   4.659 6.25e-06 ***
## age141        0.341846   0.187127 160.000000   1.827 0.069592 .  
## age142        0.849373   0.187127 160.000000   4.539 1.11e-05 ***
## peer:age141  -0.008533   0.149775 160.000000  -0.057 0.954637    
## peer:age142  -0.302754   0.149775 160.000000  -2.021 0.044906 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) coa1   peer   age141 age142 pr:141
## coa1        -0.297                                   
## peer        -0.734 -0.127                            
## age141      -0.518  0.000  0.438                     
## age142      -0.518  0.000  0.438  0.500              
## peer:age141  0.422  0.000 -0.538 -0.814 -0.407       
## peer:age142  0.422  0.000 -0.538 -0.407 -0.814  0.500

summary(e3_m1<-lmer(alcuse~coa+peer*age14 +(1|age14)+(1|sid), data=dta_e3))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: alcuse ~ coa + peer * age14 + (1 | age14) + (1 | sid)
##    Data: dta_e3
## 
## REML criterion at convergence: 622.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.46909 -0.65888  0.02567  0.51863  2.56829 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sid      (Intercept) 0.33733  0.5808  
##  age14    (Intercept) 0.09478  0.3079  
##  Residual             0.48336  0.6952  
## Number of obs: 246, groups:  sid, 82; age14, 3
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)  -0.284160   0.356914 212.474160  -0.796 0.426830    
## coa1          0.565134   0.158782  79.000062   3.559 0.000633 ***
## peer          0.648244   0.139127 175.437652   4.659 6.25e-06 ***
## age141        0.341846   0.473898 159.999963   0.721 0.471747    
## age142        0.849373   0.473898 159.999963   1.792 0.074972 .  
## peer:age141  -0.008533   0.149775 159.999964  -0.057 0.954637    
## peer:age142  -0.302754   0.149775 159.999964  -2.021 0.044906 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) coa1   peer   age141 age142 pr:141
## coa1        -0.150                                   
## peer        -0.371 -0.127                            
## age141      -0.664  0.000  0.173                     
## age142      -0.664  0.000  0.173  0.500              
## peer:age141  0.214  0.000 -0.538 -0.322 -0.161       
## peer:age142  0.214  0.000 -0.538 -0.161 -0.322  0.500
## convergence code: 0
## Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

Homework_1012

yuwenchen

10/12/2020