p = c("dplyr", "magrittr", "lme4", "ggplot2","segmented", "gridExtra")
lapply(p, library, character.only = TRUE)
dat = read.table("data/reading_piat.txt", header = TRUE)
str(dat)
## 'data.frame': 267 obs. of 5 variables:
## $ ID : int 1 1 1 2 2 2 3 3 3 4 ...
## $ Wave : int 1 2 3 1 2 3 1 2 3 1 ...
## $ Age_G: num 6.5 8.5 10.5 6.5 8.5 10.5 6.5 8.5 10.5 6.5 ...
## $ Age : num 6 8.33 10.33 6 8.5 ...
## $ PIAT : int 18 35 59 18 25 28 18 23 32 18 ...
head(dat,10)
## ID Wave Age_G Age PIAT
## 1 1 1 6.5 6.0000 18
## 2 1 2 8.5 8.3333 35
## 3 1 3 10.5 10.3333 59
## 4 2 1 6.5 6.0000 18
## 5 2 2 8.5 8.5000 25
## 6 2 3 10.5 10.5833 28
## 7 3 1 6.5 6.0833 18
## 8 3 2 8.5 8.4167 23
## 9 3 3 10.5 10.4167 32
## 10 4 1 6.5 6.0000 18
#Check missing data
sum(sapply(dat, is.na))
## [1] 0
#reset age variable
dat %<>% mutate(age_minus = Age_G-6.5)
dat$Wave %<>% as.factor
#Plot the raw data based on individual level
ggplot(dat, aes(x = age_minus, y = PIAT, group = ID))+
geom_point(alpha = 0.5)+
geom_line(size = 0.5)+
labs(x = "Years after 6.5 years old", y = "PIAT Score")+
theme_bw()
#According to the model, each subject has its own intercept and slope
summary(m1 <- lmer(PIAT~age_minus+(1+age_minus|ID), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: PIAT ~ age_minus + (1 + age_minus | ID)
## Data: dat
##
## REML criterion at convergence: 1819.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6539 -0.5413 -0.1497 0.3854 3.2811
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 11.427 3.380
## age_minus 4.486 2.118 0.22
## Residual 27.043 5.200
## Number of obs: 267, groups: ID, 89
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 21.1629 0.6177 34.26
## age_minus 5.0309 0.2973 16.92
##
## Correlation of Fixed Effects:
## (Intr)
## age_minus -0.316
#residuals plot
plot(m1, xlab = "Fitted values", ylab = "Standardized residuals")
data("mtcars", package = "datasets")
dat = mtcars
str(dat)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
#Take a look
ggplot(dat, aes(x = wt, y = mpg))+
geom_point()+
theme_bw()
#Overall lm
summary(m0 <- lm(mpg~wt, data = dat))
##
## Call:
## lm(formula = mpg ~ wt, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
#Try to find the possible cut points: median, mean and 3QD
summary(dat$wt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.513 2.581 3.325 3.217 3.610 5.424
#Using median as cut point
dat %<>% mutate(wt_larger = ifelse(wt<3.325,0,wt-3.325),
index = as.factor(ifelse(wt<3.325, "lighter", "heavier")))
ggplot(dat, aes(x = wt, y = mpg))+
geom_point()+
stat_smooth(aes(group = index), method = "lm")+
theme_bw()
summary(m1 <- segmented(m0, seg.Z = ~wt, psi = 3.325))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3.325)
##
## Estimated Break-Point(s):
## Est. St.Err
## 3.595 0.346
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.5479 2.4221 17.566 < 2e-16 ***
## wt -7.2840 0.8369 -8.704 1.88e-09 ***
## U1.wt 4.7917 1.5495 3.092 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.681 on 28 degrees of freedom
## Multiple R-Squared: 0.8213, Adjusted R-squared: 0.8021
##
## Convergence attained in 4 iterations with relative change 2.851926e-16
anova(m0,m1)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 278.32
## 2 28 201.27 2 77.055 5.3599 0.01069 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The performance of piecewise regression model is better than overall linear regression model
Try to use 3QD as cut point:
#75%
summary(dat$wt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.513 2.581 3.325 3.217 3.610 5.424
dat %<>% mutate(wt_75 = ifelse(wt<3.61,0,wt-3.61),
index_75 = as.factor(ifelse(wt<3.61, "lighter", "heavier")))
ggplot(dat, aes(x = wt, y = mpg))+
geom_point()+
stat_smooth(aes(group = index_75), method = "lm")+
theme_bw()
summary(m2 <- segmented(m0, seg.Z = ~wt, psi = 3.61))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3.61)
##
## Estimated Break-Point(s):
## Est. St.Err
## 3.573 0.350
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.6101 2.4194 17.612 < 2e-16 ***
## wt -7.3103 0.8359 -8.745 1.7e-09 ***
## U1.wt 4.7721 1.5477 3.083 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.678 on 28 degrees of freedom
## Multiple R-Squared: 0.8217, Adjusted R-squared: 0.8026
##
## Convergence attained in 2 iterations with relative change 1.425963e-16
anova(m0,m2)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 278.32
## 2 28 200.82 2 77.506 5.4034 0.01036 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1,m2)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt + U1.wt + psi1.wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 28 201.27
## 2 28 200.82 0 0.45183
Its performance is not better than using median as cut point.
Try to use mean as cut point:
summary(dat$wt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.513 2.581 3.325 3.217 3.610 5.424
dat %<>% mutate(wt_mean = ifelse(wt<3.217,0,wt-3.217),
index_mean = as.factor(ifelse(wt<3.217, "lighter", "heavier")))
ggplot(dat, aes(x = wt, y = mpg))+
geom_point()+
stat_smooth(aes(group = index_mean), method = "lm")+
theme_bw()
summary(m3 <- segmented(m0, seg.Z = ~wt, psi = 3.217))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3.217)
##
## Estimated Break-Point(s):
## Est. St.Err
## 3.592 0.347
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.5569 2.4217 17.573 < 2e-16 ***
## wt -7.2877 0.8367 -8.710 1.85e-09 ***
## U1.wt 4.7892 1.5492 3.091 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.681 on 28 degrees of freedom
## Multiple R-Squared: 0.8213, Adjusted R-squared: 0.8022
##
## Convergence attained in 7 iterations with relative change 1.422492e-16
anova(m3,m0)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt + U1.wt + psi1.wt
## Model 2: mpg ~ wt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 28 201.20
## 2 30 278.32 -2 -77.121 5.3663 0.01064 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m3,m1)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt + U1.wt + psi1.wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 28 201.20
## 2 28 201.27 0 -0.066935
Using median as cut point is still better than other models.
Through the results, using median as cut point to conduct piecewise regression is the best model.
dat = read.csv("data/visual_search.csv", header = TRUE)
str(dat)
## 'data.frame': 132 obs. of 4 variables:
## $ Participant: Factor w/ 33 levels "0042","0044",..: 19 16 18 25 28 22 24 23 26 27 ...
## $ Dx : Factor w/ 2 levels "Aphasic","Control": 2 2 2 2 2 2 2 2 2 2 ...
## $ Set.Size : int 1 1 1 1 1 1 1 1 1 1 ...
## $ RT : num 786 936 751 1130 1211 ...
#check if any missing data exists
sum(sapply(dat, is.na))
## [1] 0
#The first Plot
ggplot(dat, aes(x = Set.Size, y = RT, group = Participant))+
geom_point()+
geom_line()+
stat_smooth(aes(group = Dx), method = "lm", formula = y~x)+
facet_wrap(~Dx)+
theme_bw()+
labs(x = "Set Size", y = "Response Time (ms)")
#Allow each participant has an individual intercept and slope:
summary(m1 <- lmer(RT~Set.Size*Dx+(Set.Size|Participant), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: RT ~ Set.Size * Dx + (Set.Size | Participant)
## Data: dat
##
## REML criterion at convergence: 2186.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7578 -0.3130 -0.0750 0.3117 6.1372
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Participant (Intercept) 657552.9 810.90
## Set.Size 407.4 20.18 1.00
## Residual 772433.1 878.88
## Number of obs: 132, groups: Participant, 33
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2078.75 270.98 7.671
## Set.Size 73.49 11.40 6.446
## DxControl -1106.05 366.90 -3.015
## Set.Size:DxControl -21.74 15.44 -1.408
##
## Correlation of Fixed Effects:
## (Intr) Set.Sz DxCntr
## Set.Size -0.071
## DxControl -0.739 0.053
## St.Sz:DxCnt 0.053 -0.739 -0.071
#The second plot
ggplot(dat, aes(x = Set.Size, y = RT, shape = Dx, linetype = Dx))+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.data = mean_se, geom = "errorbar", linetype = "solid", width = 0.5)+
scale_shape_manual(values = c(1,5,15,30))+
labs(x = "Set Size", y = "Response Time (ms)", shape = "Group", linetype = "Group")+
theme_bw()+
theme(legend.position = c(0.18,0.8),
legend.background = element_rect(fill = "white", color = "black"))
#The third plot:pearson residuals plot
plot(m1, resid(.,type = "pearson")~fitted(.)|Dx,
layout=c(2,1), abline = 0,
xlab = "Fitted Values", ylab = "Pearson Residuals", pch = 20, cex = 0.8, aspect = 1.2)
dat = read.table("data/alcohol_use.txt", header = TRUE)
str(dat)
## 'data.frame': 246 obs. of 8 variables:
## $ sid : int 1 1 1 2 2 2 3 3 3 4 ...
## $ coa : int 1 1 1 1 1 1 1 1 1 1 ...
## $ sex : int 0 0 0 1 1 1 1 1 1 1 ...
## $ age14 : int 0 1 2 0 1 2 0 1 2 0 ...
## $ alcuse: num 1.73 2 2 0 0 ...
## $ peer : num 1.265 1.265 1.265 0.894 0.894 ...
## $ cpeer : num 0.247 0.247 0.247 -0.124 -0.124 ...
## $ ccoa : num 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 ...
sum(sapply(dat, is.na))
## [1] 0
dat$coa %<>% as.factor
dat$sid %<>% as.factor
#Plot
ggplot(dat, aes(x = peer, y = alcuse, shape = coa, linetype = coa))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
labs(x = "Alcohol use among the teenager's peers", y = "Alcohol use of the teenager")+
scale_shape_discrete(name = "Child of a alcoholic parent",breaks =c("0","1"),labels = c("No", "Yes"))+
scale_linetype_discrete(name = "Child of a alcoholic parent",breaks =c("0","1"),labels = c("No", "Yes"))+
theme_bw()+
theme(legend.position = c(0.3,0.8),
legend.background = element_rect(fill = "white", color = "black"))
#Analysis: different intercepts on individual level
summary(m1 <- lmer(alcuse~coa+peer*age14+(1|sid), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (1 | sid)
## Data: dat
##
## REML criterion at convergence: 621.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.30588 -0.66394 -0.05233 0.55906 2.60999
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 0.3377 0.5811
## Residual 0.4823 0.6945
## Number of obs: 246, groups: sid, 82
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.31177 0.17225 -1.810
## coa1 0.56513 0.15878 3.559
## peer 0.69586 0.13219 5.264
## age14 0.42469 0.09346 4.544
## peer:age14 -0.15138 0.07481 -2.024
##
## Correlation of Fixed Effects:
## (Intr) coa1 peer age14
## coa1 -0.312
## peer -0.725 -0.134
## age14 -0.543 0.000 0.461
## peer:age14 0.442 0.000 -0.566 -0.814
#different intercepts(individual) and slopes(age)
summary(m2 <- lmer(alcuse~coa+peer*age14+(1+age14|sid), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (1 + age14 | sid)
## Data: dat
##
## REML criterion at convergence: 603.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.59995 -0.40432 -0.07739 0.44372 2.27436
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## sid (Intercept) 0.2595 0.5094
## age14 0.1469 0.3832 -0.05
## Residual 0.3373 0.5808
## Number of obs: 246, groups: sid, 82
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.31382 0.14871 -2.110
## coa1 0.57120 0.14898 3.834
## peer 0.69518 0.11322 6.140
## age14 0.42469 0.10690 3.973
## peer:age14 -0.15138 0.08556 -1.769
##
## Correlation of Fixed Effects:
## (Intr) coa1 peer age14
## coa1 -0.339
## peer -0.708 -0.146
## age14 -0.408 0.000 0.350
## peer:age14 0.332 0.000 -0.429 -0.814
#model selection
anova(m1,m2)
## refitting model(s) with ML (instead of REML)
## Data: dat
## Models:
## m1: alcuse ~ coa + peer * age14 + (1 | sid)
## m2: alcuse ~ coa + peer * age14 + (1 + age14 | sid)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 7 620.47 645.01 -303.23 606.47
## m2 9 606.70 638.25 -294.35 588.70 17.765 2 0.0001388 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Based on AIC results, m2 would be better.
That is, in different age stage, the effect of the two factors might differ.
dat$age14 %<>% as.factor
#Child of normal parents
p1 = ggplot(dat[dat$coa=="0",], aes(x = peer, y = alcuse))+
geom_point(alpha = .5)+
stat_smooth(aes(group = age14, color = age14), method = "lm", size = .5)+
ggtitle("Child of normal parents")+
labs(x = "Alcohol use among the teenager's peers", y = "Alcohol use of the teenager")+
scale_color_discrete(name ="Age", labels = c("14","15","16"))+
theme_bw()+
theme(panel.grid = element_blank())
#Child of a alcoholic parent
p2 = ggplot(dat[dat$coa=="1",], aes(x = peer, y = alcuse))+
geom_point(alpha = .5)+
stat_smooth(aes(group = age14, color = age14), method = "lm", size = .5)+
ggtitle("Child of a alcoholic parent")+
labs(x = "Alcohol use among the teenager's peers", y = "Alcohol use of the teenager")+
scale_color_discrete(name ="Age", labels = c("14","15","16"))+
theme_bw()+
theme(panel.grid = element_blank())
grid.arrange(p1,p2,nrow = 1, ncol = 2)
data("sleepstudy", package = "lme4")
dat = sleepstudy
str(dat)
## 'data.frame': 180 obs. of 3 variables:
## $ Reaction: num 250 259 251 321 357 ...
## $ Days : num 0 1 2 3 4 5 6 7 8 9 ...
## $ Subject : Factor w/ 18 levels "308","309","310",..: 1 1 1 1 1 1 1 1 1 1 ...
#Plot
ggplot(dat, aes(x = Days, y = Reaction, group = Subject))+
geom_point()+
geom_line(size = 0.5)+
labs(x = "Days of sleep deprivation", y = "Reaction Time (ms)")+
theme_bw()
#Simple lm
summary(m0 <- lm(Reaction~Days, data = dat))
##
## Call:
## lm(formula = Reaction ~ Days, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -110.848 -27.483 1.546 26.142 139.953
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 251.405 6.610 38.033 < 2e-16 ***
## Days 10.467 1.238 8.454 9.89e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.71 on 178 degrees of freedom
## Multiple R-squared: 0.2865, Adjusted R-squared: 0.2825
## F-statistic: 71.46 on 1 and 178 DF, p-value: 9.894e-15
plot(rstandard(m0), xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0)
#Analysis: different intercepts on individual level
summary(m1 <- lmer(Reaction~Days+(1|Subject), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (1 | Subject)
## Data: dat
##
## REML criterion at convergence: 1786.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2257 -0.5529 0.0109 0.5188 4.2506
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 1378.2 37.12
## Residual 960.5 30.99
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.4051 9.7467 25.79
## Days 10.4673 0.8042 13.02
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.371
plot(m1, xlab = "Fitted values", ylab = "Standardized residuals")
#different intercept and slope on individual level
summary(m2 <- lmer(Reaction~Days+(1+Days|Subject), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (1 + Days | Subject)
## Data: dat
##
## REML criterion at convergence: 1743.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9536 -0.4634 0.0231 0.4634 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 612.09 24.740
## Days 35.07 5.922 0.07
## Residual 654.94 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.405 6.825 36.84
## Days 10.467 1.546 6.77
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
plot(m2, xlab = "Fitted values", ylab = "Standardized residuals")
#Model selection
anova(m1,m2)
## refitting model(s) with ML (instead of REML)
## Data: dat
## Models:
## m1: Reaction ~ Days + (1 | Subject)
## m2: Reaction ~ Days + (1 + Days | Subject)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 4 1802.1 1814.8 -897.04 1794.1
## m2 6 1763.9 1783.1 -875.97 1751.9 42.139 2 7.072e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Based on residuals plots and AIC results, m2 is better
That is, sleep deprivation has differet effect on different individuals.
dat = read.table("data/lecithin.txt", header = TRUE)
str(dat)
## 'data.frame': 235 obs. of 4 variables:
## $ Group : Factor w/ 2 levels "L","P": 2 2 2 2 2 2 2 2 2 2 ...
## $ Visit : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Recall : int 20 14 7 6 9 9 7 18 6 10 ...
## $ Patient: Factor w/ 48 levels "0P2","L26","L27",..: 24 25 26 27 28 29 30 31 32 33 ...
sum(sapply(dat, is.na))
## [1] 0
#The first plot
ggplot(dat, aes(x = Visit, y = Recall, group = Patient))+
geom_point()+
geom_line()+
facet_wrap(~Group)+
labs(x = "Visit", y = "Recall score")+
theme_bw()
#The second plot
ggplot(dat, aes(x = Visit, y = Recall, shape = Group, linetype = Group))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(fun.data = mean_se, geom = "errorbar", width = .1)+
labs(x = "Visit", y = "Recall score")+
theme_bw()+
theme(legend.position = c(0.1,0.8),
legend.background = element_rect(fill = "white", color = "black"))
#simple lm
summary(m0 <-lm(Recall~Visit*Group, data = dat))
##
## Call:
## lm(formula = Recall ~ Visit * Group, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2182 -2.9080 0.2909 3.3035 10.2909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.8318 1.0087 5.781 2.39e-08 ***
## Visit 1.8773 0.3041 6.172 2.99e-09 ***
## GroupP 4.7482 1.3831 3.433 0.000707 ***
## Visit:GroupP -2.6013 0.4170 -6.238 2.10e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.511 on 231 degrees of freedom
## Multiple R-squared: 0.2361, Adjusted R-squared: 0.2261
## F-statistic: 23.79 on 3 and 231 DF, p-value: 1.867e-13
plot(rstandard(m0), xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0)
#Different intercept per individual
summary(m1 <- lmer(Recall~Visit*Group+(1|Patient), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Recall ~ Visit * Group + (1 | Patient)
## Data: dat
##
## REML criterion at convergence: 1140.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5563 -0.5452 -0.0379 0.5492 3.2297
##
## Random effects:
## Groups Name Variance Std.Dev.
## Patient (Intercept) 16.524 4.065
## Residual 4.097 2.024
## Number of obs: 235, groups: Patient, 48
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.8318 0.9777 5.965
## Visit 1.8773 0.1365 13.756
## GroupP 4.8325 1.3349 3.620
## Visit:GroupP -2.5965 0.1879 -13.821
##
## Correlation of Fixed Effects:
## (Intr) Visit GroupP
## Visit -0.419
## GroupP -0.732 0.307
## Visit:GropP 0.304 -0.726 -0.425
plot(m1, xlab = "Fitted values", ylab = "Standardized residuals")
#Different intercept and slope of visit per individual
summary(m2 <- lmer(Recall~Visit*Group+(1+Visit|Patient), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Recall ~ Visit * Group + (1 + Visit | Patient)
## Data: dat
##
## REML criterion at convergence: 1127
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.66658 -0.42645 0.00929 0.42500 2.55927
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Patient (Intercept) 21.6432 4.6522
## Visit 0.4065 0.6376 -0.48
## Residual 3.1159 1.7652
## Number of obs: 235, groups: Patient, 48
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.8318 1.0675 5.463
## Visit 1.8773 0.1807 10.390
## GroupP 4.8319 1.4597 3.310
## Visit:GroupP -2.5984 0.2482 -10.469
##
## Correlation of Fixed Effects:
## (Intr) Visit GroupP
## Visit -0.558
## GroupP -0.731 0.408
## Visit:GropP 0.406 -0.728 -0.563
plot(m2, xlab = "Fitted values", ylab = "Standardized residuals")
anova(m1,m2)
## refitting model(s) with ML (instead of REML)
## Data: dat
## Models:
## m1: Recall ~ Visit * Group + (1 | Patient)
## m2: Recall ~ Visit * Group + (1 + Visit | Patient)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 6 1151.2 1171.9 -569.57 1139.2
## m2 8 1142.6 1170.3 -563.32 1126.6 12.501 2 0.00193 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
According to the results, the effects of time, treatment and interaction between the two folds might differ on individuals.
From the plots, the possible interpretation is that with time passing by, the difference between treatment groups became larger.