Column 1: Participant ID
Column 2: Group ID
Column 3: Set size
Column 4: Response time
# Loading package
pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest, ggplot2, Rmisc, car)
# data input
dta1 <- read.csv("visual_search.csv")
head(dta1)
## Participant Dx Set.Size RT
## 1 9129 Control 1 786.50
## 2 9051 Control 1 935.83
## 3 9126 Control 1 750.83
## 4 9171* Control 1 1129.50
## 5 9176 Control 1 1211.33
## 6 9167 Control 1 1178.83
dta1 <- dta1 %>%
mutate(Participant = factor(Participant),
Dx = factor(Dx))
str(dta1)
## 'data.frame': 132 obs. of 4 variables:
## $ Participant: Factor w/ 33 levels "0042","0044",..: 19 16 18 25 28 22 24 23 26 27 ...
## $ Dx : Factor w/ 2 levels "Aphasic","Control": 2 2 2 2 2 2 2 2 2 2 ...
## $ Set.Size : int 1 1 1 1 1 1 1 1 1 1 ...
## $ RT : num 786 936 751 1130 1211 ...
dta1_stat <- summarySEwithin(dta1,
measurevar="RT",
betweenvars = "Dx",
withinvars="Set.Size",
idvar="Participant",
na.rm=FALSE, conf.interval=.95)
## Automatically converting the following non-factors to factors: Set.Size
ggplot(dta1, aes(Set.Size, RT)) +
geom_line(aes(group = Participant), alpha = .2) +
stat_smooth(method = "lm",
formula = y ~ x,
se = TRUE,
lwd = rel(.5)) +
geom_point (size = rel(.5)) +
facet_wrap ( ~ Dx) +
labs(x = "Set size",
y = "Reaction time (ms)")
wrong
ggplot(dta1_stat, aes(Set.Size, RT)) +
geom_errorbar(aes(ymin=RT-ci, ymax=RT+ci), width=.1, show.legend =TRUE) +
geom_point(dta1_stat, mapping=aes(x=Set.Size, y=RT), size = rel(.1)) +
stat_smooth(aes(group=Dx,linetype=Dx),
method="lm",
formula= y ~ x,
se=FALSE,
size=rel(.5)) +
labs(x = "Set size",
y = "Reaction time (ms)")
pd <- position_dodge(width=.2)
p1 <- ggplot(dta1,
aes(Set.Size, RT,
shape=Dx, linetype=Dx)) +
geom_smooth(method='lm', formula= y~x, se=FALSE)+
stat_summary(fun="mean", geom="line", position=pd) +
stat_summary(fun="mean", geom="point", position=pd) +
stat_summary(fun.data=mean_se,
geom="errorbar",
position=pd,
linetype="solid",
width=0.5) +
scale_shape_manual(values = c(1, 2, 16)) +
labs(x="Set.Size",
y="Reaction time (ms)",
linetype="Group", shape="Group") +
scale_x_continuous(breaks=seq(0,30,by=10))+
theme_minimal() +
theme(legend.justification=c(0.1, 1),
legend.position=c(0.1, 1),
legend.background=element_rect(fill="white",
color="black"))
suppressWarnings(suppressMessages(ggplot2:::print.ggplot(p1)))
summary(m0_r <- lmer(RT ~ Set.Size | Participant, data = dta1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: RT ~ Set.Size | Participant
## Data: dta1
##
## REML criterion at convergence: 2260.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5482 -0.4199 -0.0351 0.2632 6.0022
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Participant (Intercept) 909394 953.6
## Set.Size 3982 63.1 0.61
## Residual 810733 900.4
## Number of obs: 132, groups: Participant, 33
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1301.69 200.81 31.99 6.482 2.71e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0 <- lm(RT ~ Set.Size*Dx, data = dta1))
##
## Call:
## lm(formula = RT ~ Set.Size * Dx, data = dta1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3869.8 -651.7 -198.2 300.1 9020.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2078.75 271.72 7.650 4.23e-12 ***
## Set.Size 73.49 16.02 4.588 1.05e-05 ***
## DxControl -1106.05 367.92 -3.006 0.00318 **
## Set.Size:DxControl -21.74 21.69 -1.002 0.31813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1388 on 128 degrees of freedom
## Multiple R-squared: 0.3404, Adjusted R-squared: 0.325
## F-statistic: 22.02 on 3 and 128 DF, p-value: 1.456e-11
plot(m0_r, resid(., scaled=TRUE) ~ fitted(.) | Dx,
xlab="Fitted values", ylab= "Pearson residuals",
abline=0, lty=3)
Column 1: Child ID
Column 2: Wave (1 = 1986, 2 = 1988, 3 = 1990)
Column 3: Child’s expected age on each measurement occasion
Column 4: Age in years
Column 5: Child’s PIAT score
# input
dta2 <- read.table("reading_piat.txt", header = T)
# top six row
head(dta2)
## ID Wave Age_G Age PIAT
## 1 1 1 6.5 6.0000 18
## 2 1 2 8.5 8.3333 35
## 3 1 3 10.5 10.3333 59
## 4 2 1 6.5 6.0000 18
## 5 2 2 8.5 8.5000 25
## 6 2 3 10.5 10.5833 28
str(dta2)
## 'data.frame': 267 obs. of 5 variables:
## $ ID : int 1 1 1 2 2 2 3 3 3 4 ...
## $ Wave : int 1 2 3 1 2 3 1 2 3 1 ...
## $ Age_G: num 6.5 8.5 10.5 6.5 8.5 10.5 6.5 8.5 10.5 6.5 ...
## $ Age : num 6 8.33 10.33 6 8.5 ...
## $ PIAT : int 18 35 59 18 25 28 18 23 32 18 ...
dta2 <- dta2 %>%
mutate(Age_c = Age - 6.5)
ggplot(dta2, aes(Age, PIAT, group = ID, color = Wave))+
geom_point(size = rel(1))+
geom_line() +
labs(x = "Age (year)", y = "Score") +
theme(legend.position = c(.9, .2))
Model:
Scoreij = b0i + b1i × (ageij - 6.5) + εij,
b0i = β0i + U0i, b1i = β1i + U1i, i = 1, 2, …, 89; j = 1, 2, 3,
where εij follows a standard normal distribution with mean zero and SD sigma and (U0i, U1i) is a bivariate normal distribution with a zero mean vector with an unknown covariance matrix.
summary(m1 <- lmer(PIAT ~ Age_c + (Age_c | ID), data = dta2))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PIAT ~ Age_c + (Age_c | ID)
## Data: dta2
##
## REML criterion at convergence: 1804.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0042 -0.4893 -0.1383 0.4067 3.6892
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 5.507 2.347
## Age_c 3.377 1.838 0.53
## Residual 27.400 5.235
## Number of obs: 267, groups: ID, 89
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 21.0621 0.5630 75.7425 37.41 <2e-16 ***
## Age_c 4.5399 0.2622 87.7555 17.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Age_c -0.288
Problem: Replicate the results of analysis in the alcohol use example.
Data: Each year, beginning at age 14, 82 teenagers completed a 4-item questionnaire assessing their alcohol consumption during the previous year. Using a 8-point scale (0 = “not al all, 8 =”every day") teenagers described the frequency with which they
Two potential predictors of alcohol use are whether the teenager is a child of an alcoholic parent; and alcohol use among the teenager’s peers. The teenager used a 6-point scale to estimate the proportion of their friends who drank alcohol occasionally (item 1) or regularly (item 2). This was obtained during the first wave of data collection.
Column 1: Teenager ID
Column 2: Whether the teenager is a child of a alcohlic parent
Column 3: Sex (male = 1, female = 0)
Column 4: Number of year since age 14
Column 5: Alcohol use of the teenager (sqrt-root of mean of 6 items)
Column 6: Alcohol use among the teenager’s peers (sqrt-root of mean of 2 items)
Column 7: Alcoholic parenet variable centered
Column 8: Peer variable centered
# input
dta3 <- read.table("alcohol_use.txt", header = T)
# top six row
head(dta3)
## sid coa sex age14 alcuse peer cpeer ccoa
## 1 1 1 0 0 1.73205 1.26491 0.24691 0.549
## 2 1 1 0 1 2.00000 1.26491 0.24691 0.549
## 3 1 1 0 2 2.00000 1.26491 0.24691 0.549
## 4 2 1 1 0 0.00000 0.89443 -0.12357 0.549
## 5 2 1 1 1 0.00000 0.89443 -0.12357 0.549
## 6 2 1 1 2 1.00000 0.89443 -0.12357 0.549
dta3 <- dta3 %>%
mutate(sid = factor(sid),
coa = factor(coa),
sex = factor(sex),
age14 = factor(age14))
str(dta3)
## 'data.frame': 246 obs. of 8 variables:
## $ sid : Factor w/ 82 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ coa : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ sex : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 2 2 2 ...
## $ age14 : Factor w/ 3 levels "0","1","2": 1 2 3 1 2 3 1 2 3 1 ...
## $ alcuse: num 1.73 2 2 0 0 ...
## $ peer : num 1.265 1.265 1.265 0.894 0.894 ...
## $ cpeer : num 0.247 0.247 0.247 -0.124 -0.124 ...
## $ ccoa : num 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 ...
ggplot(dta3, aes(age14, alcuse, color = sex))+
geom_point()+
facet_wrap( ~ coa)+
labs(x = "Age (after 14)", y = "Alcohol use") +
theme(legend.position = "bottom")
summary(m1 <- lmer(alcuse ~ coa + peer*age14 + (1 | sid), data = dta3))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: alcuse ~ coa + peer * age14 + (1 | sid)
## Data: dta3
##
## REML criterion at convergence: 622.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.46909 -0.65888 0.02567 0.51863 2.56829
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 0.3373 0.5808
## Residual 0.4834 0.6952
## Number of obs: 246, groups: sid, 82
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.284160 0.180572 166.151585 -1.574 0.117468
## coa1 0.565134 0.158782 79.000000 3.559 0.000633 ***
## peer 0.648244 0.139127 175.437485 4.659 6.25e-06 ***
## age141 0.341846 0.187127 160.000000 1.827 0.069592 .
## age142 0.849373 0.187127 160.000000 4.539 1.11e-05 ***
## peer:age141 -0.008533 0.149775 160.000000 -0.057 0.954637
## peer:age142 -0.302754 0.149775 160.000000 -2.021 0.044906 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) coa1 peer age141 age142 pr:141
## coa1 -0.297
## peer -0.734 -0.127
## age141 -0.518 0.000 0.438
## age142 -0.518 0.000 0.438 0.500
## peer:age141 0.422 0.000 -0.538 -0.814 -0.407
## peer:age142 0.422 0.000 -0.538 -0.407 -0.814 0.500
summary(m2 <- lmer(alcuse ~ coa + peer*age14 + (1 | age14) + (1 | sid), data = dta3))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Warning in as_lmerModLT(model, devfun): Model may not have converged with 1
## eigenvalue close to zero: 1.9e-09
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: alcuse ~ coa + peer * age14 + (1 | age14) + (1 | sid)
## Data: dta3
##
## REML criterion at convergence: 622.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.46909 -0.65888 0.02567 0.51863 2.56829
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 0.33733 0.5808
## age14 (Intercept) 0.09478 0.3079
## Residual 0.48336 0.6952
## Number of obs: 246, groups: sid, 82; age14, 3
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.284160 0.356914 212.474160 -0.796 0.426830
## coa1 0.565134 0.158782 79.000062 3.559 0.000633 ***
## peer 0.648244 0.139127 175.437652 4.659 6.25e-06 ***
## age141 0.341846 0.473898 159.999963 0.721 0.471747
## age142 0.849373 0.473898 159.999963 1.792 0.074972 .
## peer:age141 -0.008533 0.149775 159.999964 -0.057 0.954637
## peer:age142 -0.302754 0.149775 159.999964 -2.021 0.044906 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) coa1 peer age141 age142 pr:141
## coa1 -0.150
## peer -0.371 -0.127
## age141 -0.664 0.000 0.173
## age142 -0.664 0.000 0.173 0.500
## peer:age141 0.214 0.000 -0.538 -0.322 -0.161
## peer:age142 0.214 0.000 -0.538 -0.161 -0.322 0.500
## convergence code: 0
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?