library
library(pacman)
pacman::p_load(mlmRev, tidyverse, lme4,merTools,foreign,segmented,DPpackage,dplyr
,ggplot2,HLMdiag,tidyr,nlme,magrittr,lattice)
Question 1
dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/reading_piat.txt",header = T)
dta1$Age <- dta1$Age-6.5
dta1$Wave <- as.factor(dta1$Wave)
dta1$ID <- as.factor(dta1$ID)
str(dta1)
## 'data.frame': 267 obs. of 5 variables:
## $ ID : Factor w/ 89 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ Wave : Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
## $ Age_G: num 6.5 8.5 10.5 6.5 8.5 10.5 6.5 8.5 10.5 6.5 ...
## $ Age : num -0.5 1.83 3.83 -0.5 2 ...
## $ PIAT : int 18 35 59 18 25 28 18 23 32 18 ...
dta1_mean <- group_by(dta1,Wave)%>%summarize(mean_AGE = mean(Age),mean_PIAT = mean(PIAT))
# use black and white theme
theme_set(theme_bw())
# load the package for rlm
require(MASS)
#
ggplot() +
stat_smooth(data=dta1, aes(Age, PIAT, group = ID),method = "rlm", se = F, lwd= .1) +
stat_smooth(data=dta1_mean,aes(mean_AGE, mean_PIAT),method = "lm", se = F, col="magenta") +
geom_jitter(data=dta1,aes(Age, PIAT, group = ID),cex = .6, alpha = .5) +
labs(x = "AGE", y = "PIAT score") +theme_bw()

# plot PIAT by Age
ggplot(data=dta1,aes(Age, PIAT, group = ID)) +
geom_smooth(method = "rlm") +
geom_point(cex = 0.7)+
facet_wrap(~ Wave, nrow = 3) +
labs(x = "AGE", y = "PIAT score")

# random intercepts, random slopes
summary(q1 <- lmer(PIAT ~ Age + (Age | ID), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: PIAT ~ Age + (Age | ID)
## Data: dta1
##
## REML criterion at convergence: 1804.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0042 -0.4893 -0.1383 0.4067 3.6892
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 5.507 2.347
## Age 3.377 1.838 0.53
## Residual 27.400 5.235
## Number of obs: 267, groups: ID, 89
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 21.0621 0.5630 37.41
## Age 4.5399 0.2622 17.32
##
## Correlation of Fixed Effects:
## (Intr)
## Age -0.288
# residual plot
plot(q1, pch = 20, cex = 0.5, col = "steelblue",
xlab = "Fitted values", ylab = "Standard residuals")

Question 2
dta2 <- mtcars
str(dta2)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# set up linear spline
dta2 <- dta2 %>%
mutate( wt_intox = factor(ifelse(wt >= 3.582, "Yes", "No")),
wt_above = ifelse(wt < 3.582, 0, wt - 3.582))
# set up theme
ot <- theme_set(theme_bw())
# plot data with three fitted lines
ggplot(dta2, aes(wt, mpg)) +
stat_smooth(method = "lm", se = F, linetype = "dashed")+
geom_point(alpha = .5) +
geom_vline(xintercept = 3.582, linetype = "dotted") +
stat_smooth(aes(group = wt_intox), method = "lm", se = F) +
labs(x = "Weight", y = "Miles per gallon")

# fit a linear spline (piece-wise) model
summary(m1 <- lm(mpg ~ wt + wt_above, data = dta2))
##
## Call:
## lm(formula = mpg ~ wt + wt_above, data = dta2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6453 -1.9341 -0.8157 1.3575 5.8743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.584 2.270 18.763 < 2e-16 ***
## wt -7.299 0.759 -9.617 1.59e-10 ***
## wt_above 4.781 1.431 3.340 0.00231 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.633 on 29 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8092
## F-statistic: 66.73 on 2 and 29 DF, p-value: 1.408e-11
# load segmented piecewise modeling package
# fit a simple linear model first
# initial knot location set at 3
m0 <- lm(mpg ~ wt, data = dta2)
summary(m0)
##
## Call:
## lm(formula = mpg ~ wt, data = dta2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
summary(m2 <- segmented(m0, seg.Z = ~ wt, psi = 3))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3)
##
## Estimated Break-Point(s):
## Est. St.Err
## 3.567 0.310
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.624 2.517 16.935 3.03e-16 ***
## wt -7.317 0.891 -8.212 6.14e-09 ***
## U1.wt 4.766 1.460 3.264 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.678 on 28 degrees of freedom
## Multiple R-Squared: 0.8217, Adjusted R-squared: 0.8026
##
## Convergence attained in 2 iterations with relative change 0
# plot model fit
plot(m2)
with(dta2, points(wt, mpg))
abline(v = m2$psi[2], lty = 3)
grid()

Question 3
dta3 <- read.csv("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/visual_search.csv",header = T,sep = ",")
#
theme_set(theme_bw(base_size = 10))
#
ggplot(dta3, aes(Set.Size, RT)) +
geom_line(aes(group = Participant), alpha = .5) +
stat_smooth(aes(group = Dx), method = "lm", formula = y ~ x) +
geom_point(alpha = 0.5) +
facet_wrap( ~ Dx) +
labs(x = "Set.Size", y = "Response Time")

# set dodge amount
pd <- position_dodge(width=.2)
#
ggplot(dta3, aes(Set.Size, RT, shape = Dx, linetype = Dx)) +
stat_smooth(method = "lm",se = FALSE,col = "gray") +
stat_summary(fun.y = mean, geom = "point", position = pd) +
stat_summary(fun.data = mean_se, geom = "errorbar",
position = pd, linetype = "solid", width = 0.5) +
scale_shape_manual(values = c(1, 2, 16)) +
labs(x = "Sey.Size", y = "Response Time", linetype = "Group", shape = "Group") +
theme(legend.justification = c(0, 1), legend.position = c(0, 1),
legend.background = element_rect(fill = "white", color = "black"))

#
summary(r3 <- lmer(RT ~ Set.Size*Dx + (Set.Size | Participant) , data = dta3))
## Linear mixed model fit by REML ['lmerMod']
## Formula: RT ~ Set.Size * Dx + (Set.Size | Participant)
## Data: dta3
##
## REML criterion at convergence: 2186.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7578 -0.3130 -0.0750 0.3117 6.1372
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Participant (Intercept) 657552.9 810.90
## Set.Size 407.4 20.18 1.00
## Residual 772433.1 878.88
## Number of obs: 132, groups: Participant, 33
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2078.75 270.98 7.671
## Set.Size 73.49 11.40 6.446
## DxControl -1106.05 366.90 -3.015
## Set.Size:DxControl -21.74 15.44 -1.408
##
## Correlation of Fixed Effects:
## (Intr) Set.Sz DxCntr
## Set.Size -0.071
## DxControl -0.739 0.053
## St.Sz:DxCnt 0.053 -0.739 -0.071
#
plot(r3, resid(., type = "pearson") ~ fitted(.) | Dx,
layout=c(3,1), aspect = 2, abline = 0, pch = 20, cex = .8, id = 0.05,
xlab = "Ftted values", ylab = "Pearson Residuals")

Question 5
dta5 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/alcohol_use.txt",header = T)
str(dta5)
## 'data.frame': 246 obs. of 8 variables:
## $ sid : int 1 1 1 2 2 2 3 3 3 4 ...
## $ coa : int 1 1 1 1 1 1 1 1 1 1 ...
## $ sex : int 0 0 0 1 1 1 1 1 1 1 ...
## $ age14 : int 0 1 2 0 1 2 0 1 2 0 ...
## $ alcuse: num 1.73 2 2 0 0 ...
## $ peer : num 1.265 1.265 1.265 0.894 0.894 ...
## $ cpeer : num 0.247 0.247 0.247 -0.124 -0.124 ...
## $ ccoa : num 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 ...
dta5$sid <- as.factor(dta5$sid)
dta5 %<>% mutate(p_age14 = peer*age14)
#
summary(r5_1 <- lmer(alcuse ~ coa + peer * age14+ (1 | sid) , data = dta5))
## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (1 | sid)
## Data: dta5
##
## REML criterion at convergence: 621.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.30588 -0.66394 -0.05233 0.55906 2.60999
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 0.3377 0.5811
## Residual 0.4823 0.6945
## Number of obs: 246, groups: sid, 82
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.31177 0.17225 -1.810
## coa 0.56513 0.15878 3.559
## peer 0.69586 0.13219 5.264
## age14 0.42469 0.09346 4.544
## peer:age14 -0.15138 0.07481 -2.024
##
## Correlation of Fixed Effects:
## (Intr) coa peer age14
## coa -0.312
## peer -0.725 -0.134
## age14 -0.543 0.000 0.461
## peer:age14 0.442 0.000 -0.566 -0.814
#
summary(r5_2 <- lmer(alcuse ~ coa + peer * age14+ (age14 | sid) , data = dta5))
## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (age14 | sid)
## Data: dta5
##
## REML criterion at convergence: 603.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.59995 -0.40432 -0.07739 0.44372 2.27436
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## sid (Intercept) 0.2595 0.5094
## age14 0.1469 0.3832 -0.05
## Residual 0.3373 0.5808
## Number of obs: 246, groups: sid, 82
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.31382 0.14871 -2.110
## coa 0.57120 0.14898 3.834
## peer 0.69518 0.11322 6.140
## age14 0.42469 0.10690 3.973
## peer:age14 -0.15138 0.08556 -1.769
##
## Correlation of Fixed Effects:
## (Intr) coa peer age14
## coa -0.339
## peer -0.708 -0.146
## age14 -0.408 0.000 0.350
## peer:age14 0.332 0.000 -0.429 -0.814
Question 6
dta6 <- sleepstudy
str(dta6)
## 'data.frame': 180 obs. of 3 variables:
## $ Reaction: num 250 259 251 321 357 ...
## $ Days : num 0 1 2 3 4 5 6 7 8 9 ...
## $ Subject : Factor w/ 18 levels "308","309","310",..: 1 1 1 1 1 1 1 1 1 1 ...
#
ggplot(data = dta6 , aes(x = Days,y = Reaction,group = Subject))+
geom_point(aes(alpha = Subject))+
geom_line(aes(alpha = Subject))+
theme(legend.position = "none")

#
summary(r6 <- lmer(Reaction ~ Days + (Days | Subject) , data = dta6))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (Days | Subject)
## Data: dta6
##
## REML criterion at convergence: 1743.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9536 -0.4634 0.0231 0.4634 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 612.09 24.740
## Days 35.07 5.922 0.07
## Residual 654.94 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.405 6.825 36.84
## Days 10.467 1.546 6.77
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
#residual plot
plot(r6, resid(., type = "pearson") ~ fitted(.),
abline = 0, pch = 20, cex = .8, id = 0.05,
xlab = "Ftted values", ylab = "Pearson Residuals")

Question 7
dta7 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/lecithin.txt",header = T)
str(dta7)
## 'data.frame': 235 obs. of 4 variables:
## $ Group : Factor w/ 2 levels "L","P": 2 2 2 2 2 2 2 2 2 2 ...
## $ Visit : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Recall : int 20 14 7 6 9 9 7 18 6 10 ...
## $ Patient: Factor w/ 48 levels "0P2","L26","L27",..: 24 25 26 27 28 29 30 31 32 33 ...
#xyplot
xyplot(Recall ~ Visit | Group, groups = Patient, data = dta7,
type = c("p", "l", "g"),layout = c(2, 1),
xlab = "Visit", ylab = "Height (cm)")

#
summary(r7 <- lmer(Recall ~ Visit*Group + (Visit | Patient) , data = dta7))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Recall ~ Visit * Group + (Visit | Patient)
## Data: dta7
##
## REML criterion at convergence: 1127
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.66658 -0.42645 0.00929 0.42500 2.55927
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Patient (Intercept) 21.6432 4.6522
## Visit 0.4065 0.6376 -0.48
## Residual 3.1159 1.7652
## Number of obs: 235, groups: Patient, 48
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.8318 1.0675 5.463
## Visit 1.8773 0.1807 10.390
## GroupP 4.8319 1.4597 3.310
## Visit:GroupP -2.5984 0.2482 -10.469
##
## Correlation of Fixed Effects:
## (Intr) Visit GroupP
## Visit -0.558
## GroupP -0.731 0.408
## Visit:GropP 0.406 -0.728 -0.563
#
ggplot(dta7, aes(Visit, Recall, shape = Group, linetype = Group)) +
stat_summary(fun.y = mean, geom = "point", position = pd) +
stat_summary(fun.data = mean_se, geom = "errorbar",
position = pd, linetype = "solid", width = 0.37) +
stat_summary(fun.y = mean, geom = "line", position = pd) +
scale_shape_manual(values = c(1, 2, 16)) +
labs(x = "Visit", y = "Recall Score", linetype = "Group", shape = "Group") +
theme(legend.justification = c(0, 1), legend.position = c(0, 1),
legend.background = element_rect(fill = "white", color = "black"))

#Residual plot
plot(r7, resid(., type = "pearson") ~ fitted(.),
abline = 0, pch = 20, cex = .8, id = 0.05,
xlab = "Ftted values", ylab = "Pearson Residuals")
