The data consist of weight gains of 68 Asian children in a British community. Measurements of weight were recorded for children on up to five occasions visiting a clinic. The ages at which the measurements were taken are roughly to target examination dates of 6 weeks, then 8, 12, 27 months.
pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest)
fL <- "http://www.stata-press.com/data/mlmus3/asian.dta"
dta <- read.dta(fL)
dta$id <- factor(dta$id)
dta$gender <- factor(dta$gender, 1:2, labels = c("M", "F"))
dta$brthwt <- dta$brthwt/1000
dta$occ <- factor(dta$occ)
glimpse(dta)
## Rows: 198
## Columns: 6
## $ id <fct> 45, 45, 45, 45, 45, 258, 258, 258, 258, 287, 287, 287, 287, 4…
## $ occ <fct> 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 2, 1…
## $ age <dbl> 0.1368925, 0.6570842, 1.2183436, 1.4291581, 2.2724161, 0.1916…
## $ weight <dbl> 5.171, 10.860, 13.150, 13.200, 15.880, 5.300, 9.740, 9.980, 1…
## $ brthwt <dbl> 4.140, 4.140, 4.140, 4.140, 4.140, 3.155, 3.155, 3.155, 3.155…
## $ gender <fct> M, M, M, M, M, F, F, F, F, M, M, M, M, F, F, F, F, F, M, M, M…
head(dta)
## id occ age weight brthwt gender
## 1 45 1 0.1368925 5.171 4.140 M
## 2 45 2 0.6570842 10.860 4.140 M
## 3 45 3 1.2183436 13.150 4.140 M
## 4 45 4 1.4291581 13.200 4.140 M
## 5 45 5 2.2724161 15.880 4.140 M
## 6 258 1 0.1916496 5.300 3.155 F
ggplot(dta, aes(age, weight, group = id, color=gender))+
geom_point()+
geom_line() +
facet_wrap( ~ gender)+
labs(x = "Age (year)", y = "Weight (kg)") +
theme(legend.position = c(.9, .2))
ggplot(dta, aes(age, weight, color = gender))+
geom_point()+
stat_smooth(method = "lm", formula = y ~ poly(x, 2)) +
labs(x = "Age (year)", y = "Weight (kg)")+
theme(legend.position = c(.9, .2))
dta$age2 <- dta$age^2
# t-tests use Satterthwaite's method
summary(m0 <- lmer(weight ~ brthwt + gender + age + age2 + (age |id),
data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
##
## REML criterion at convergence: 496.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81353 -0.43471 -0.03652 0.45573 2.79813
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.1920 0.4381
## age 0.2702 0.5198 0.07
## Residual 0.3346 0.5784
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.02225 0.54924 57.65949 1.861 0.06782 .
## brthwt 0.87755 0.16616 53.16001 5.281 2.43e-06 ***
## genderF -0.51922 0.16698 52.23221 -3.109 0.00303 **
## age 7.73933 0.23674 135.03624 32.691 < 2e-16 ***
## age2 -1.67125 0.08762 114.78740 -19.074 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age
## brthwt -0.963
## genderF -0.240 0.095
## age -0.221 0.058 -0.003
## age2 0.199 -0.051 0.002 -0.927
# Type 3 tests, KR-method - This is better
afex::mixed(weight ~ brthwt + gender + age + age2 + (age | id), data = dta)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
## Mixed Model Anova Table (Type 3 tests, KR-method)
##
## Model: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
## Effect df F p.value
## 1 brthwt 1, 62.42 26.79 *** <.001
## 2 gender 1, 61.58 9.31 ** .003
## 3 age 1, 141.54 1046.79 *** <.001
## 4 age2 1, 122.93 355.47 *** <.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
# construct piecewise lines defined by knots
dta$d1 <- dta$age - 0.4; dta$d1[dta$d1 < 0] <- 0
dta$d2 <- dta$age - 0.9; dta$d2[dta$d2 < 0] <- 0
dta$d3 <- dta$age - 1.5; dta$d3[dta$d3 < 0] <- 0
summary(m1 <- lmer(weight ~ brthwt + gender + age + d1 + d2 + d3 +
(age | id), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + d1 + d2 + d3 + (age | id)
## Data: dta
##
## REML criterion at convergence: 465
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81475 -0.41746 -0.01623 0.42323 2.51409
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.2476 0.4976
## age 0.3273 0.5721 -0.12
## Residual 0.2523 0.5023
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.4941 0.5600 65.7487 0.882 0.380826
## brthwt 0.8949 0.1617 51.4586 5.533 1.07e-06 ***
## genderF -0.5459 0.1627 50.8735 -3.356 0.001504 **
## age 10.3595 1.0792 83.2345 9.599 4.03e-15 ***
## d1 -6.1891 1.8004 80.6638 -3.438 0.000931 ***
## d2 -1.6641 1.3071 82.8518 -1.273 0.206547
## d3 -0.3684 0.9920 80.3757 -0.371 0.711367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age d1 d2
## brthwt -0.927
## genderF -0.231 0.099
## age -0.339 0.045 -0.002
## d1 0.313 -0.039 -0.001 -0.985
## d2 -0.239 0.032 0.012 0.816 -0.884
## d3 0.167 -0.031 -0.019 -0.533 0.606 -0.886
dtafm1 <- data.frame(dta, yhat=fitted(m1))
ggplot(dtafm1, aes(age, weight, group=id, color = gender))+
geom_point()+
geom_line(aes(age, yhat))+
facet_wrap(~ gender)+
labs(x = "Age (year)", y = "Weight (kg)")
quantile(dta$age)
## 0% 25% 50% 75% 100%
## 0.1149897 0.6461328 0.9965777 1.4709103 2.5462012
summary(m0 <- lm(weight ~ age, data=dta))
##
## Call:
## lm(formula = weight ~ age, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2994 -1.0341 -0.2524 1.0947 4.2614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2021 0.1779 29.24 <2e-16 ***
## age 3.3640 0.1332 25.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.471 on 196 degrees of freedom
## Multiple R-squared: 0.765, Adjusted R-squared: 0.7638
## F-statistic: 637.9 on 1 and 196 DF, p-value: < 2.2e-16
summary(m0z <- segmented::segmented(m0, seg.Z= ~ age,
psi=list(age=c(0.6, 1.4)),
control=seg.control(display=FALSE)))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~age, psi = list(age = c(0.6,
## 1.4)), control = seg.control(display = FALSE))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.age 0.317 0.138
## psi2.age 2.300 0.046
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.262 1.391 1.626 0.106
## age 16.425 10.311 1.593 0.113
## U1.age -13.785 10.313 -1.337 NA
## U2.age -8.540 3.400 -2.512 NA
##
## Residual standard error: 1.176 on 192 degrees of freedom
## Multiple R-Squared: 0.8528, Adjusted R-squared: 0.849
##
## Convergence attained in 5 iter. (rel. change 1.5162e-06)
plot(m0z)
with(dta, points(age, weight))
abline(v = m0z$psi[,2], lty = 3)
grid()
#
library(lme4)
# 18 subjects (3hrs sleep) 10 observations on each, p64 of bates
# the "new" random effects (vs nlme) see Bates book 2010
# sleep deprivation 3hrs/night truckers
#
data(sleepstudy, package="lme4")
#
dim(sleepstudy)
## [1] 180 3
#
head(sleepstudy)
## Reaction Days Subject
## 1 249.5600 0 308
## 2 258.7047 1 308
## 3 250.8006 2 308
## 4 321.4398 3 308
## 5 356.8519 4 308
## 6 414.6901 5 308
#
with(sleepstudy, table(Subject))
## Subject
## 308 309 310 330 331 332 333 334 335 337 349 350 351 352 369 370 371 372
## 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
#
lattice::xyplot(Reaction ~ Days | Subject, data=sleepstudy,
xlab="Days of sleep deprivation",
ylab="Average reaction time (ms)",
type=c("g","p","r"))
#
sleeplmList <- lmList(Reaction ~ Days |Subject, data = sleepstudy)
#
sleeplmList
## Call: lmList(formula = Reaction ~ Days | Subject, data = sleepstudy)
## Coefficients:
## (Intercept) Days
## 308 244.1927 21.764702
## 309 205.0549 2.261785
## 310 203.4842 6.114899
## 330 289.6851 3.008073
## 331 285.7390 5.266019
## 332 264.2516 9.566768
## 333 275.0191 9.142045
## 334 240.1629 12.253141
## 335 263.0347 -2.881034
## 337 290.1041 19.025974
## 349 215.1118 13.493933
## 350 225.8346 19.504017
## 351 261.1470 6.433498
## 352 276.3721 13.566549
## 369 254.9681 11.348109
## 370 210.4491 18.056151
## 371 253.6360 9.188445
## 372 267.0448 11.298073
##
## Degrees of freedom: 180 total; 144 residual
## Residual standard error: 25.59182
#mean int and slope match lmer Fixed effects results p.67
mean(coef(sleeplmList)[,1])
## [1] 251.4051
mean(coef(sleeplmList)[,2])
## [1] 10.46729
#these are too big as they should be,
#compare with variance random effects p.67
# mle subtracts off wobble in estimated indiv regressions Y on t
# - SSR/SST
var(coef(sleeplmList)[,1])
## [1] 838.3423
var(coef(sleeplmList)[,2])
## [1] 43.01034
#
quantile(coef(sleeplmList)[,1])
## 0% 25% 50% 75% 100%
## 203.4842 229.4167 258.0576 273.0255 290.1041
quantile(coef(sleeplmList)[,2])
## 0% 25% 50% 75% 100%
## -2.881034 6.194548 10.432421 13.548395 21.764702
#
stem(coef(sleeplmList)[,2])
##
## The decimal point is 1 digit(s) to the right of the |
##
## -0 | 3
## 0 | 23
## 0 | 56699
## 1 | 011234
## 1 | 89
## 2 | 02
#
cor(coef(sleeplmList)[,1], coef(sleeplmList)[,2])
## [1] -0.1375534
# first 2 do REML, second pair match
# Bates p.67 in doing MLE (REML FALSE)
#
sleeplmer <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
summary(sleeplmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction ~ Days + (Days | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1743.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9536 -0.4634 0.0231 0.4634 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 612.10 24.741
## Days 35.07 5.922 0.07
## Residual 654.94 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 251.405 6.825 17.000 36.838 < 2e-16 ***
## Days 10.467 1.546 17.000 6.771 3.26e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
#
sleeplmer2 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy, REML=FALSE)
summary(sleeplmer2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: Reaction ~ Days + (Days | Subject)
## Data: sleepstudy
##
## AIC BIC logLik deviance df.resid
## 1763.9 1783.1 -876.0 1751.9 174
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9416 -0.4656 0.0289 0.4636 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 565.48 23.780
## Days 32.68 5.717 0.08
## Residual 654.95 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 251.405 6.632 18.001 37.907 < 2e-16 ***
## Days 10.467 1.502 18.000 6.968 1.65e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
# The end
# load them
pacman::p_load(mlmRev, tidyverse, lme4, foreign, merTools, sjPlot)
# download a copy of data from the following link and save it to the data folder
fl<-"https://content.sph.harvard.edu/fitzmaur/ala2e/fat.dta"
# input data
dta <- read.dta(fl)
#重新命名變項
names(dta) <-c("ID", "Age", "Age_m", "Time", "PBF")
# show first 6 obs.
head(dta)
## ID Age Age_m Time PBF
## 1 1 9.32 13.19 -3.87 7.94
## 2 1 10.33 13.19 -2.86 15.65
## 3 1 11.24 13.19 -1.95 13.51
## 4 1 12.19 13.19 -1.00 23.23
## 5 1 13.24 13.19 0.05 10.52
## 6 1 14.24 13.19 1.05 20.45
# construct indicator variable for after menarche
dta <- dta %>%
mutate(ID = factor(ID),
Time_a = ifelse(Time > 0, Time, 0),
Menarche = as.factor(ifelse(Time > 0, "T", "F")))
# show data structure
str(dta)
## 'data.frame': 1049 obs. of 7 variables:
## $ ID : Factor w/ 162 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Age : num 9.32 10.33 11.24 12.19 13.24 ...
## $ Age_m : num 13.2 13.2 13.2 13.2 13.2 ...
## $ Time : num -3.87 -2.86 -1.95 -1 0.05 ...
## $ PBF : num 7.94 15.65 13.51 23.23 10.52 ...
## $ Time_a : num 0 0 0 0 0.05 ...
## $ Menarche: Factor w/ 2 levels "F","T": 1 1 1 1 2 2 1 1 1 1 ...
# black and white theme
theme_set(theme_minimal())
# 畫經期沒有來族群的散佈圖及回歸線
ggplot(subset(dta, Menarche=="F"),
aes(Time, PBF, group=ID)) +
#geom_point(col=8)+
geom_line(col=8)+
stat_smooth(aes(group=1),method='lm',formula=y~x)+
labs(x="Age (in year)",
y="% Body fat")
# baseline model if you want one
mb_0 <- lmer(PBF ~ Time + (1 | ID), data = subset(dta, Menarche=='F'))
tab_model(mb_0)
| PBF | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 20.20 | 19.06 – 21.34 | <0.001 |
| Time | -0.09 | -0.33 – 0.16 | 0.494 |
| Random Effects | |||
| σ2 | 10.02 | ||
| τ00 ID | 43.81 | ||
| ICC | 0.81 | ||
| N ID | 162 | ||
| Observations | 497 | ||
| Marginal R2 / Conditional R2 | 0.000 / 0.814 | ||
# 畫經期已經來族群的散佈圖及回歸線
ggplot(subset(dta, Menarche=="T"),
aes(Time, PBF, group=ID)) +
# geom_point()+
geom_line(col=8)+
stat_smooth(aes(group=1),method='lm',formula=y~x)+
labs(x="Age (in year)",
y="% Body fat")
#fixed Time and random effect in Time within subjects
ma_0 <- lmer(PBF ~ Time + (Time | ID), data=subset(dta, Menarche=='T'))
tab_model(ma_0)
| PBF | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 22.59 | 21.42 – 23.75 | <0.001 |
| Time | 1.97 | 1.68 – 2.25 | <0.001 |
| Random Effects | |||
| σ2 | 7.82 | ||
| τ00 ID | 46.21 | ||
| τ11 ID.Time | 1.42 | ||
| ρ01 ID | -0.55 | ||
| ICC | 0.82 | ||
| N ID | 160 | ||
| Observations | 552 | ||
| Marginal R2 / Conditional R2 | 0.123 / 0.844 | ||
betas_a <- na.omit(coef(lmList(PBF ~ Time | ID, data=subset(dta, Menarche=='T'))))
car::dataEllipse(betas_a[,1],betas_a[,2])
m4<-lmer(PBF~Time+Time_a+(Time+Time_a|ID), data=dta)
dta_full <- dta %>% mutate(yhat=fitted(m4))
# individual connected lines different color for before and after add group regression lines
ggplot(dta_full, aes(Time, yhat, color = Menarche)) +
geom_line(aes(group = ID)) +
geom_point(alpha = 0.5) +
stat_smooth(method="lm", aes(group = Menarche)) +
guides(color = F) +
labs(x = "Age relative to menarche (in years)",
y = "Percent body fat")
# model with 3 random effects associated with an individual
summary(m1 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PBF ~ Time + Time_a + (Time + Time_a | ID)
## Data: dta
##
## REML criterion at convergence: 6062.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7742 -0.5900 -0.0359 0.5946 3.3798
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 45.942 6.778
## Time 1.631 1.277 0.29
## Time_a 2.750 1.658 -0.54 -0.83
## Residual 9.473 3.078
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 21.3614 0.5646 161.5610 37.837 < 2e-16 ***
## Time 0.4171 0.1572 108.4614 2.654 0.00915 **
## Time_a 2.0471 0.2280 132.6773 8.980 2.32e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Time
## Time 0.351
## Time_a -0.515 -0.872
# baseline model if you want one
summary(m0 <- lmer(PBF ~ Time + (Time | ID), data=dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PBF ~ Time + (Time | ID)
## Data: dta
##
## REML criterion at convergence: 6196.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7577 -0.5961 0.0221 0.6169 3.0981
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 38.2178 6.1821
## Time 0.6938 0.8329 -0.24
## Residual 11.6215 3.4090
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 23.04978 0.49985 159.73871 46.11 <2e-16 ***
## Time 1.55480 0.08484 145.73759 18.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Time -0.199
## convergence code: 0
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)
# estimate random effects by simulation
m1_re <- REsim(m1)
# plot for random effects
plotREsim(m1_re)
# residual plot
plot(m1, resid(., type = "pearson") ~ fitted(.),
abline = 0, pch = 20, cex = .8, id = 0.05,
xlab = "Ftted values", ylab = "Pearson Residuals")
dta_e1<- read.table("visual_search.csv", h=T, sep = ",")
head(dta_e1)
## Participant Dx Set.Size RT
## 1 9129 Control 1 786.50
## 2 9051 Control 1 935.83
## 3 9126 Control 1 750.83
## 4 9171* Control 1 1129.50
## 5 9176 Control 1 1211.33
## 6 9167 Control 1 1178.83
str(dta_e1)
## 'data.frame': 132 obs. of 4 variables:
## $ Participant: chr "9129" "9051" "9126" "9171*" ...
## $ Dx : chr "Control" "Control" "Control" "Control" ...
## $ Set.Size : int 1 1 1 1 1 1 1 1 1 1 ...
## $ RT : num 786 936 751 1130 1211 ...
head(dta_e1)
## Participant Dx Set.Size RT
## 1 9129 Control 1 786.50
## 2 9051 Control 1 935.83
## 3 9126 Control 1 750.83
## 4 9171* Control 1 1129.50
## 5 9176 Control 1 1211.33
## 6 9167 Control 1 1178.83
ggplot(dta_e1, aes(Set.Size, RT))+
geom_line(aes(group=Participant), alpha=.5)+
stat_smooth(aes(group=Dx), method="lm", formula=y~x)+
geom_point(alpha=.5)+
facet_wrap(~Dx)+
labs(y="Response Time(ms)",
x="Set Si")
summary(e1_m0<-lmer(RT~Set.Size*Dx+(Set.Size|Participant), data=dta_e1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: RT ~ Set.Size * Dx + (Set.Size | Participant)
## Data: dta_e1
##
## REML criterion at convergence: 2186.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7577 -0.3130 -0.0750 0.3117 6.1372
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Participant (Intercept) 657489.0 810.86
## Set.Size 407.4 20.18 1.00
## Residual 772445.8 878.89
## Number of obs: 132, groups: Participant, 33
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2078.75 270.97 33.37 7.672 7.27e-09 ***
## Set.Size 73.49 11.40 51.35 6.446 3.97e-08 ***
## DxControl -1106.05 366.89 33.37 -3.015 0.00489 **
## Set.Size:DxControl -21.74 15.44 51.35 -1.408 0.16514
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Set.Sz DxCntr
## Set.Size -0.071
## DxControl -0.739 0.053
## St.Sz:DxCnt 0.053 -0.739 -0.071
## convergence code: 0
## boundary (singular) fit: see ?isSingular
#set dodge amount
pd<- position_dodge(width=.2)
ggplot(dta_e1, aes(Set.Size, RT, shape=Dx, linetype=Dx))+
stat_summary(fun.y=mean, geom="line", position=pd)+
stat_summary(fun.y = mean, geom="point", position=pd)+
stat_summary(fun.data=mean_se, geom="errorbar",
position=pd, linetype="solid", width=.5)+
labs(x="Set Size",
y="Response Time(ms)")+
theme(legend.justification = c(0,1), legend.position = c(0,1), legend.background = element_rect(fill="white", color="black"))
anova(e1_m0)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Set.Size 50844340 50844340 1 51.354 65.8225 9.214e-11 ***
## Dx 7020030 7020030 1 33.372 9.0881 0.004888 **
## Set.Size:Dx 1531430 1531430 1 51.354 1.9826 0.165145
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m0_fe<-FEsim(e1_m0, 1000)
plotFEsim(m0_fe)
plot(e1_m0, resid(.,type="pearson")~fitted(.)|Dx,
layout=c(2,1), pch=20, cex=.8,, abline=0,
xlab="Fitted values", ylab="Pearson residuals")
Eighty-nine children’s scores on the reading subtest of the Peabody Individual Achievement Test (PIAT) were recorded. Each child was 6 years old in 1986, the first year of data collection. Source: Singer, J.D., & Willet, J.B. (2003), Applied Longitudinal Data Analysis. p. 140-141.
Column 1: Child ID Column 2: Wave (1 = 1986, 2 = 1988, 3 = 1990) Column 3: Child’s expected age on each measurement occasion Column 4: Age in years Column 5: Child’s PIAT score
dta_e2<- read.table("reading_piat.txt", h=T)
head(dta_e2)
## ID Wave Age_G Age PIAT
## 1 1 1 6.5 6.0000 18
## 2 1 2 8.5 8.3333 35
## 3 1 3 10.5 10.3333 59
## 4 2 1 6.5 6.0000 18
## 5 2 2 8.5 8.5000 25
## 6 2 3 10.5 10.5833 28
str(dta_e2)
## 'data.frame': 267 obs. of 5 variables:
## $ ID : int 1 1 1 2 2 2 3 3 3 4 ...
## $ Wave : int 1 2 3 1 2 3 1 2 3 1 ...
## $ Age_G: num 6.5 8.5 10.5 6.5 8.5 10.5 6.5 8.5 10.5 6.5 ...
## $ Age : num 6 8.33 10.33 6 8.5 ...
## $ PIAT : int 18 35 59 18 25 28 18 23 32 18 ...
dta_e2$Wave<-factor(dta_e2$Wave)
dta_e2$Age_c <-dta_e2$Age_G-6.5
ggplot(dta_e2, aes(Age_c, PIAT, group=ID))+
stat_smooth(method="rlm", se=F, lwd=.1)+
stat_smooth(aes(group=1), method="lm", se=F, col="magenta")+
geom_jitter(cex=.6,alpha=.5)+
labs(x="Age(-6.5)", y="PIAT score")
#random intercepts, random slopes
summary(e2_m1<-lmer(PIAT ~ Age_c+ (Age_c|ID),data=dta_e2))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PIAT ~ Age_c + (Age_c | ID)
## Data: dta_e2
##
## REML criterion at convergence: 1819.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6539 -0.5413 -0.1497 0.3854 3.2811
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 11.427 3.380
## Age_c 4.486 2.118 0.22
## Residual 27.043 5.200
## Number of obs: 267, groups: ID, 89
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 21.1629 0.6177 87.9991 34.26 <2e-16 ***
## Age_c 5.0309 0.2973 88.0012 16.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Age_c -0.316
PIAT score=21.16 give or take 3.38(=sqrt(21.16)) + 5.03 give or take 2.12(=sqrt(4.486))* age_c
plot(e2_m1, pch=20, cex=.5, col="steelblue",
xlab = "Fitted values",
ylab = "Pearson residuals")
Each year, beginning at age 14, 82 teenagers completed a 4-item questionnaire assessing their alcohol consumption during the previous year. Using a 8-point scale (0 = “not al all, 8 =”every day") teenagers described the frequency with which they (1) drank beer or wine, (2) drank hard liquor, (3) had five or more drinks in a row, and (4) got drunk.
Two potential predictors of alcohol use are whether the teenager is a child of an alcoholic parent; and alcohol use among the teenager’s peers. The teenager used a 6-point scale to estimate the proportion of their friends who drank alcohol occasionally (item 1) or regularly (item 2). This was obtained during the first wave of data collection.
Source: Currant, P. et al. (1997). Reported in Singer, J., & Willet (2003). Applied Longitudinal Data Analysis. p. 76-77.
Column 1: Teenager ID Column 2: Whether the teenager is a child of a alcohlic parent Column 3: Sex (male = 1, female = 0) Column 4: Number of year since age 14 Column 5: Alcohol use of the teenager (sqrt-root of mean of 6 items) Column 6: Alcohol use among the teenager’s peers (sqrt-root of mean of 2 items) Column 7: Alcoholic parenet variable centered Column 8: Peer variable centered
dta_e3<- read.table("alcohol_use.txt", h=T)
head(dta_e3)
## sid coa sex age14 alcuse peer cpeer ccoa
## 1 1 1 0 0 1.73205 1.26491 0.24691 0.549
## 2 1 1 0 1 2.00000 1.26491 0.24691 0.549
## 3 1 1 0 2 2.00000 1.26491 0.24691 0.549
## 4 2 1 1 0 0.00000 0.89443 -0.12357 0.549
## 5 2 1 1 1 0.00000 0.89443 -0.12357 0.549
## 6 2 1 1 2 1.00000 0.89443 -0.12357 0.549
str(dta_e3)
## 'data.frame': 246 obs. of 8 variables:
## $ sid : int 1 1 1 2 2 2 3 3 3 4 ...
## $ coa : int 1 1 1 1 1 1 1 1 1 1 ...
## $ sex : int 0 0 0 1 1 1 1 1 1 1 ...
## $ age14 : int 0 1 2 0 1 2 0 1 2 0 ...
## $ alcuse: num 1.73 2 2 0 0 ...
## $ peer : num 1.265 1.265 1.265 0.894 0.894 ...
## $ cpeer : num 0.247 0.247 0.247 -0.124 -0.124 ...
## $ ccoa : num 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 ...
dta_e3$sid<-factor(dta_e3$sid)
dta_e3$coa<-factor(dta_e3$coa)
dta_e3$sex<-factor(dta_e3$sex)
dta_e3$age14<-factor(dta_e3$age14)
summary(e3_m0<-lmer(alcuse~coa+peer*age14+(1|sid), data=dta_e3))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: alcuse ~ coa + peer * age14 + (1 | sid)
## Data: dta_e3
##
## REML criterion at convergence: 622.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.46909 -0.65888 0.02567 0.51863 2.56829
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 0.3373 0.5808
## Residual 0.4834 0.6952
## Number of obs: 246, groups: sid, 82
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.284160 0.180572 166.151585 -1.574 0.117468
## coa1 0.565134 0.158782 79.000000 3.559 0.000633 ***
## peer 0.648244 0.139127 175.437485 4.659 6.25e-06 ***
## age141 0.341846 0.187127 160.000000 1.827 0.069592 .
## age142 0.849373 0.187127 160.000000 4.539 1.11e-05 ***
## peer:age141 -0.008533 0.149775 160.000000 -0.057 0.954637
## peer:age142 -0.302754 0.149775 160.000000 -2.021 0.044906 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) coa1 peer age141 age142 pr:141
## coa1 -0.297
## peer -0.734 -0.127
## age141 -0.518 0.000 0.438
## age142 -0.518 0.000 0.438 0.500
## peer:age141 0.422 0.000 -0.538 -0.814 -0.407
## peer:age142 0.422 0.000 -0.538 -0.407 -0.814 0.500
summary(e3_m1<-lmer(alcuse~coa+peer*age14 +(1|age14)+(1|sid), data=dta_e3))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: alcuse ~ coa + peer * age14 + (1 | age14) + (1 | sid)
## Data: dta_e3
##
## REML criterion at convergence: 622.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.46909 -0.65888 0.02567 0.51863 2.56829
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 0.33733 0.5808
## age14 (Intercept) 0.09478 0.3079
## Residual 0.48336 0.6952
## Number of obs: 246, groups: sid, 82; age14, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.284160 0.356914 212.474160 -0.796 0.426830
## coa1 0.565134 0.158782 79.000062 3.559 0.000633 ***
## peer 0.648244 0.139127 175.437652 4.659 6.25e-06 ***
## age141 0.341846 0.473898 159.999963 0.721 0.471747
## age142 0.849373 0.473898 159.999963 1.792 0.074972 .
## peer:age141 -0.008533 0.149775 159.999964 -0.057 0.954637
## peer:age142 -0.302754 0.149775 159.999964 -2.021 0.044906 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) coa1 peer age141 age142 pr:141
## coa1 -0.150
## peer -0.371 -0.127
## age141 -0.664 0.000 0.173
## age142 -0.664 0.000 0.173 0.500
## peer:age141 0.214 0.000 -0.538 -0.322 -0.161
## peer:age142 0.214 0.000 -0.538 -0.161 -0.322 0.500
## convergence code: 0
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?