library

library(pacman)
pacman::p_load(mlmRev, tidyverse, lme4,merTools,foreign,segmented,DPpackage,dplyr
               ,ggplot2,HLMdiag,tidyr,nlme,magrittr,lattice)

Question 1

dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/reading_piat.txt",header = T)
dta1$Age <- dta1$Age-6.5
dta1$Wave <- as.factor(dta1$Wave)
dta1$ID <- as.factor(dta1$ID)
str(dta1)
## 'data.frame':    267 obs. of  5 variables:
##  $ ID   : Factor w/ 89 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ Wave : Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
##  $ Age_G: num  6.5 8.5 10.5 6.5 8.5 10.5 6.5 8.5 10.5 6.5 ...
##  $ Age  : num  -0.5 1.83 3.83 -0.5 2 ...
##  $ PIAT : int  18 35 59 18 25 28 18 23 32 18 ...
dta1_mean <- group_by(dta1,Wave)%>%summarize(mean_AGE = mean(Age),mean_PIAT = mean(PIAT))

# use black and white theme
theme_set(theme_bw())

# load the package for rlm
require(MASS)

#
ggplot() +
  stat_smooth(data=dta1, aes(Age, PIAT, group = ID),method = "rlm", se = F, lwd= .1) +
  stat_smooth(data=dta1_mean,aes(mean_AGE, mean_PIAT),method = "lm", se = F, col="magenta") +
  geom_jitter(data=dta1,aes(Age, PIAT, group = ID),cex = .6, alpha = .5) +
  labs(x = "AGE", y = "PIAT score") +theme_bw()

# plot PIAT by Age 
ggplot(data=dta1,aes(Age, PIAT, group = ID)) +
  geom_smooth(method = "rlm") +
  geom_point(cex = 0.7)+
  facet_wrap(~ Wave, nrow = 3) +
  labs(x = "AGE", y = "PIAT score")

# random intercepts, random slopes
summary(q1 <- lmer(PIAT ~ Age + (Age | ID), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: PIAT ~ Age + (Age | ID)
##    Data: dta1
## 
## REML criterion at convergence: 1804.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0042 -0.4893 -0.1383  0.4067  3.6892 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  ID       (Intercept)  5.507   2.347        
##           Age          3.377   1.838    0.53
##  Residual             27.400   5.235        
## Number of obs: 267, groups:  ID, 89
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  21.0621     0.5630   37.41
## Age           4.5399     0.2622   17.32
## 
## Correlation of Fixed Effects:
##     (Intr)
## Age -0.288
# residual plot
plot(q1, pch = 20, cex = 0.5, col = "steelblue", 
     xlab = "Fitted values", ylab = "Standard residuals")

Question 2

dta2 <- mtcars
str(dta2)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
# set up linear spline
dta2 <- dta2 %>% 
  mutate( wt_intox = factor(ifelse(wt >= 3.582, "Yes", "No")),
          wt_above = ifelse(wt < 3.582, 0, wt - 3.582))
# set up theme
ot <- theme_set(theme_bw())

# plot data with three fitted lines
ggplot(dta2, aes(wt, mpg)) +
  stat_smooth(method = "lm", se = F, linetype = "dashed")+
  geom_point(alpha = .5) +
  geom_vline(xintercept = 3.582, linetype = "dotted") +
  stat_smooth(aes(group = wt_intox), method = "lm", se = F) +
  labs(x = "Weight", y = "Miles per gallon")

# fit a linear spline (piece-wise) model
summary(m1 <- lm(mpg ~ wt + wt_above, data = dta2))
## 
## Call:
## lm(formula = mpg ~ wt + wt_above, data = dta2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6453 -1.9341 -0.8157  1.3575  5.8743 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   42.584      2.270  18.763  < 2e-16 ***
## wt            -7.299      0.759  -9.617 1.59e-10 ***
## wt_above       4.781      1.431   3.340  0.00231 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.633 on 29 degrees of freedom
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8092 
## F-statistic: 66.73 on 2 and 29 DF,  p-value: 1.408e-11
# load segmented piecewise modeling package
# fit a simple linear model first
# initial knot location set at 3
m0 <- lm(mpg ~ wt, data = dta2)
summary(m0)
## 
## Call:
## lm(formula = mpg ~ wt, data = dta2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5432 -2.3647 -0.1252  1.4096  6.8727 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.2851     1.8776  19.858  < 2e-16 ***
## wt           -5.3445     0.5591  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10
summary(m2 <- segmented(m0, seg.Z = ~ wt, psi = 3))
## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3)
## 
## Estimated Break-Point(s):
##    Est. St.Err 
##  3.567  0.310 
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   42.624      2.517  16.935 3.03e-16 ***
## wt            -7.317      0.891  -8.212 6.14e-09 ***
## U1.wt          4.766      1.460   3.264       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.678 on 28 degrees of freedom
## Multiple R-Squared: 0.8217,  Adjusted R-squared: 0.8026 
## 
## Convergence attained in 2 iterations with relative change 0
# plot model fit
plot(m2)
with(dta2, points(wt, mpg))
abline(v = m2$psi[2], lty = 3)
grid()

Question 3

dta3 <- read.csv("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/visual_search.csv",header = T,sep = ",")


#
theme_set(theme_bw(base_size = 10))

#
ggplot(dta3, aes(Set.Size, RT)) +
  geom_line(aes(group = Participant), alpha = .5) +
  stat_smooth(aes(group = Dx), method = "lm", formula = y ~ x) +
  geom_point(alpha = 0.5) +
  facet_wrap( ~ Dx) +
  labs(x = "Set.Size", y = "Response Time")

# set dodge amount
pd <- position_dodge(width=.2)

# 
ggplot(dta3, aes(Set.Size, RT, shape = Dx, linetype = Dx)) +
    stat_smooth(method = "lm",se = FALSE,col = "gray") +
    stat_summary(fun.y = mean, geom = "point", position = pd) +
    stat_summary(fun.data = mean_se, geom = "errorbar", 
                 position = pd, linetype = "solid", width = 0.5) +
    scale_shape_manual(values = c(1, 2, 16)) +
    labs(x = "Sey.Size", y = "Response Time", linetype = "Group", shape = "Group") +
    theme(legend.justification = c(0, 1), legend.position = c(0, 1),
          legend.background = element_rect(fill = "white", color = "black"))

#
summary(r3 <- lmer(RT ~ Set.Size*Dx + (Set.Size | Participant) , data = dta3))
## Linear mixed model fit by REML ['lmerMod']
## Formula: RT ~ Set.Size * Dx + (Set.Size | Participant)
##    Data: dta3
## 
## REML criterion at convergence: 2186.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7578 -0.3130 -0.0750  0.3117  6.1372 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev. Corr
##  Participant (Intercept) 657552.9 810.90       
##              Set.Size       407.4  20.18   1.00
##  Residual                772433.1 878.88       
## Number of obs: 132, groups:  Participant, 33
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)         2078.75     270.98   7.671
## Set.Size              73.49      11.40   6.446
## DxControl          -1106.05     366.90  -3.015
## Set.Size:DxControl   -21.74      15.44  -1.408
## 
## Correlation of Fixed Effects:
##             (Intr) Set.Sz DxCntr
## Set.Size    -0.071              
## DxControl   -0.739  0.053       
## St.Sz:DxCnt  0.053 -0.739 -0.071
#
plot(r3, resid(., type = "pearson") ~ fitted(.) | Dx, 
     layout=c(3,1), aspect = 2, abline = 0, pch = 20, cex = .8, id = 0.05, 
     xlab = "Ftted values", ylab = "Pearson Residuals")

Question 5

dta5 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/alcohol_use.txt",header = T)
str(dta5)
## 'data.frame':    246 obs. of  8 variables:
##  $ sid   : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ coa   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ sex   : int  0 0 0 1 1 1 1 1 1 1 ...
##  $ age14 : int  0 1 2 0 1 2 0 1 2 0 ...
##  $ alcuse: num  1.73 2 2 0 0 ...
##  $ peer  : num  1.265 1.265 1.265 0.894 0.894 ...
##  $ cpeer : num  0.247 0.247 0.247 -0.124 -0.124 ...
##  $ ccoa  : num  0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 ...
dta5$sid <- as.factor(dta5$sid)
dta5 %<>% mutate(p_age14 = peer*age14)

#
summary(r5_1 <- lmer(alcuse ~ coa + peer * age14+ (1 | sid) , data = dta5))
## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (1 | sid)
##    Data: dta5
## 
## REML criterion at convergence: 621.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.30588 -0.66394 -0.05233  0.55906  2.60999 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sid      (Intercept) 0.3377   0.5811  
##  Residual             0.4823   0.6945  
## Number of obs: 246, groups:  sid, 82
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.31177    0.17225  -1.810
## coa          0.56513    0.15878   3.559
## peer         0.69586    0.13219   5.264
## age14        0.42469    0.09346   4.544
## peer:age14  -0.15138    0.07481  -2.024
## 
## Correlation of Fixed Effects:
##            (Intr) coa    peer   age14 
## coa        -0.312                     
## peer       -0.725 -0.134              
## age14      -0.543  0.000  0.461       
## peer:age14  0.442  0.000 -0.566 -0.814
#
summary(r5_2 <- lmer(alcuse ~ coa + peer * age14+ (age14 | sid) , data = dta5))
## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (age14 | sid)
##    Data: dta5
## 
## REML criterion at convergence: 603.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.59995 -0.40432 -0.07739  0.44372  2.27436 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  sid      (Intercept) 0.2595   0.5094        
##           age14       0.1469   0.3832   -0.05
##  Residual             0.3373   0.5808        
## Number of obs: 246, groups:  sid, 82
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.31382    0.14871  -2.110
## coa          0.57120    0.14898   3.834
## peer         0.69518    0.11322   6.140
## age14        0.42469    0.10690   3.973
## peer:age14  -0.15138    0.08556  -1.769
## 
## Correlation of Fixed Effects:
##            (Intr) coa    peer   age14 
## coa        -0.339                     
## peer       -0.708 -0.146              
## age14      -0.408  0.000  0.350       
## peer:age14  0.332  0.000 -0.429 -0.814

Question 6

dta6 <- sleepstudy
str(dta6)
## 'data.frame':    180 obs. of  3 variables:
##  $ Reaction: num  250 259 251 321 357 ...
##  $ Days    : num  0 1 2 3 4 5 6 7 8 9 ...
##  $ Subject : Factor w/ 18 levels "308","309","310",..: 1 1 1 1 1 1 1 1 1 1 ...
#
ggplot(data = dta6 , aes(x = Days,y = Reaction,group = Subject))+
  geom_point(aes(alpha = Subject))+
  geom_line(aes(alpha = Subject))+
  theme(legend.position = "none")

#
summary(r6 <- lmer(Reaction ~  Days + (Days | Subject) , data = dta6))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (Days | Subject)
##    Data: dta6
## 
## REML criterion at convergence: 1743.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9536 -0.4634  0.0231  0.4634  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 612.09   24.740       
##           Days         35.07    5.922   0.07
##  Residual             654.94   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.825   36.84
## Days          10.467      1.546    6.77
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138
#residual plot
plot(r6, resid(., type = "pearson") ~ fitted(.), 
       abline = 0, pch = 20, cex = .8, id = 0.05, 
     xlab = "Ftted values", ylab = "Pearson Residuals")

Question 7

dta7 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/lecithin.txt",header = T)
str(dta7)
## 'data.frame':    235 obs. of  4 variables:
##  $ Group  : Factor w/ 2 levels "L","P": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Visit  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Recall : int  20 14 7 6 9 9 7 18 6 10 ...
##  $ Patient: Factor w/ 48 levels "0P2","L26","L27",..: 24 25 26 27 28 29 30 31 32 33 ...
#xyplot
xyplot(Recall ~  Visit | Group, groups = Patient, data = dta7,
       type = c("p", "l", "g"),layout = c(2, 1),
       xlab = "Visit", ylab = "Height (cm)")

#
summary(r7 <- lmer(Recall ~  Visit*Group + (Visit | Patient) , data = dta7))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Recall ~ Visit * Group + (Visit | Patient)
##    Data: dta7
## 
## REML criterion at convergence: 1127
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.66658 -0.42645  0.00929  0.42500  2.55927 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Patient  (Intercept) 21.6432  4.6522        
##           Visit        0.4065  0.6376   -0.48
##  Residual              3.1159  1.7652        
## Number of obs: 235, groups:  Patient, 48
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)    5.8318     1.0675   5.463
## Visit          1.8773     0.1807  10.390
## GroupP         4.8319     1.4597   3.310
## Visit:GroupP  -2.5984     0.2482 -10.469
## 
## Correlation of Fixed Effects:
##             (Intr) Visit  GroupP
## Visit       -0.558              
## GroupP      -0.731  0.408       
## Visit:GropP  0.406 -0.728 -0.563
#

ggplot(dta7, aes(Visit, Recall, shape = Group, linetype = Group)) +
  stat_summary(fun.y = mean, geom = "point", position = pd) +
  stat_summary(fun.data = mean_se, geom = "errorbar", 
               position = pd, linetype = "solid", width = 0.37) +
  stat_summary(fun.y = mean, geom = "line", position = pd) +
  scale_shape_manual(values = c(1, 2, 16)) +
  labs(x = "Visit", y = "Recall Score", linetype = "Group", shape = "Group") +
  theme(legend.justification = c(0, 1), legend.position = c(0, 1),
        legend.background = element_rect(fill = "white", color = "black"))

#Residual plot
plot(r7, resid(., type = "pearson") ~ fitted(.), 
     abline = 0, pch = 20, cex = .8, id = 0.05, 
     xlab = "Ftted values", ylab = "Pearson Residuals")