Exercises: Covariance patterns models

p = c("tidyverse", "nlme", "car", "magrittr", "lme4", "gridExtra")
lapply(p, library, character.only = TRUE)

Q1

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/weightlifting.txt", header = TRUE, na.strings = ".")
str(dat)

## 'data.frame':    37 obs. of  9 variables:
##  $ ID     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Program: Factor w/ 2 levels "R","W": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Day0   : int  79 83 81 81 80 76 81 77 84 74 ...
##  $ Day2   : int  NA 83 83 81 81 76 84 78 85 75 ...
##  $ Day4   : int  79 85 82 81 82 76 83 79 87 78 ...
##  $ Day6   : int  80 85 82 82 82 76 83 79 89 78 ...
##  $ Day8   : int  80 86 83 82 82 76 85 81 NA 79 ...
##  $ Day10  : int  78 87 83 83 NA 76 85 82 NA 78 ...
##  $ Day12  : int  80 87 82 81 86 75 85 81 86 78 ...

head(dat)

##   ID Program Day0 Day2 Day4 Day6 Day8 Day10 Day12
## 1  1       R   79   NA   79   80   80    78    80
## 2  2       R   83   83   85   85   86    87    87
## 3  3       R   81   83   82   82   83    83    82
## 4  4       R   81   81   81   82   82    83    81
## 5  5       R   80   81   82   82   82    NA    86
## 6  6       R   76   76   76   76   76    76    75

dat %<>% gather(key = Day, value = strength, contains("Day"))
dat %<>% separate(Day, c("prefix","day"), sep = 3)
dat$day %<>% as.numeric
dat$prefix=NULL
dat$ID %<>% as.factor
dat$day %<>% as.integer

#plot
ggplot(dat, aes(x = day, y = strength, group = ID, color = ID))+
  geom_point(alpha = 0.2, na.rm = TRUE)+
  geom_line(alpha = 0.2, na.rm = TRUE)+
  stat_summary(aes(group = Program), fun.y = mean, geom = "point", na.rm = TRUE, color = "navy", size = 1.5)+
  stat_summary(aes(group = Program), fun.y = mean, geom = "line", na.rm = TRUE, color = "navy", size = 1.3)+
  stat_summary(aes(group = Program), fun.data = mean_se, geom = "errorbar", width = .5, na.rm = TRUE, color = "navy")+
  facet_wrap(~Program)+
  scale_x_continuous(breaks = c(0,2,4,6,8,10,12))+
  labs(x = "Day(s)", y = "Strength")+
  theme_bw()+
  theme(legend.position = "none")

datf = na.omit(dat)
datf %<>% mutate(day_f = factor(day))

summary(m0 <- gls(strength~Program, weights = varIdent(form = ~1|day_f), data = datf))

## Generalized least squares fit by REML
##   Model: strength ~ Program 
##   Data: datf 
##        AIC      BIC    logLik
##   1274.613 1305.826 -628.3066
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | day_f 
##  Parameter estimates:
##         0         2         4         6         8        10        12 
## 1.0000000 1.0178766 1.0862158 0.9942133 1.1120794 1.0142258 1.1171382 
## 
## Coefficients:
##                Value Std.Error   t-value p-value
## (Intercept) 80.75039 0.3278372 246.31250  0.0000
## ProgramW     1.31659 0.4371582   3.01171  0.0029
## 
##  Correlation: 
##          (Intr)
## ProgramW -0.75 
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.51141160 -0.81475040 -0.01919753  0.66640424  2.58321595 
## 
## Residual standard error: 3.21213 
## Degrees of freedom: 239 total; 237 residual

summary(m1 <- gls(strength~Program, weights = varIdent(form = ~1|day_f), 
                  correlation = corAR1(form = ~day|ID), data = datf))

## Generalized least squares fit by REML
##   Model: strength ~ Program 
##   Data: datf 
##        AIC      BIC    logLik
##   828.1655 862.8461 -404.0827
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~day | ID 
##  Parameter estimate(s):
##       Phi1 
## -0.9746822 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | day_f 
##  Parameter estimates:
##         0         4         6         8        10        12         2 
## 1.0000000 1.0474298 0.9453299 1.0624712 1.0405211 1.0190228 1.0055976 
## 
## Coefficients:
##                Value Std.Error   t-value p-value
## (Intercept) 80.40492 0.7361776 109.21946  0.0000
## ProgramW     2.15930 0.9782057   2.20741  0.0282
## 
##  Correlation: 
##          (Intr)
## ProgramW -0.753
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.4897401 -0.7315386 -0.1245228  0.6877192  2.6432181 
## 
## Residual standard error: 3.439804 
## Degrees of freedom: 239 total; 237 residual

anova(m0,m1)

##    Model df       AIC       BIC    logLik   Test  L.Ratio p-value
## m0     1  9 1274.6131 1305.8257 -628.3066                        
## m1     2 10  828.1655  862.8461 -404.0827 1 vs 2 448.4476  <.0001

m1 is better

summary(m2 <- gls(strength~Program, weights = varIdent(form = ~1|day_f), 
                  correlation = corCompSymm(form = ~1|ID), data = datf))

## Generalized least squares fit by REML
##   Model: strength ~ Program 
##   Data: datf 
##        AIC      BIC    logLik
##   926.9555 961.6361 -453.4777
## 
## Correlation Structure: Compound symmetry
##  Formula: ~1 | ID 
##  Parameter estimate(s):
##       Rho 
## 0.9441032 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | day_f 
##  Parameter estimates:
##        0        4        6        8       10       12        2 
## 1.000000 1.211367 1.260039 1.337123 1.354596 1.403072 1.142275 
## 
## Coefficients:
##                Value Std.Error t-value p-value
## (Intercept) 77.64349 0.7445963 104.276  0.0000
## ProgramW     1.02776 0.9939638   1.034  0.3022
## 
##  Correlation: 
##          (Intr)
## ProgramW -0.749
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.19837982  0.08789843  0.67773094  1.13146200  2.61098316 
## 
## Residual standard error: 3.897976 
## Degrees of freedom: 239 total; 237 residual

anova(m0,m1,m2)

##    Model df       AIC       BIC    logLik   Test  L.Ratio p-value
## m0     1  9 1274.6131 1305.8257 -628.3066                        
## m1     2 10  828.1655  862.8461 -404.0827 1 vs 2 448.4476  <.0001
## m2     3 10  926.9555  961.6361 -453.4777

m1 is still better than other models.

summary(m3 <- gls(strength~Program, weights = varExp(form = ~day), 
                  correlation = corCompSymm(form = ~1|ID), data = datf))

## Generalized least squares fit by REML
##   Model: strength ~ Program 
##   Data: datf 
##        AIC      BIC    logLik
##   920.4576 937.7979 -455.2288
## 
## Correlation Structure: Compound symmetry
##  Formula: ~1 | ID 
##  Parameter estimate(s):
##       Rho 
## 0.9306234 
## Variance function:
##  Structure: Exponential of variance covariate
##  Formula: ~day 
##  Parameter estimates:
##      expon 
## 0.02641161 
## 
## Coefficients:
##                Value Std.Error   t-value p-value
## (Intercept) 78.30192 0.7585038 103.23208  0.0000
## ProgramW     0.92879 1.0145621   0.91546  0.3609
## 
##  Correlation: 
##          (Intr)
## ProgramW -0.748
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.37991587 -0.04673772  0.62349954  1.13204254  2.79357078 
## 
## Residual standard error: 3.790606 
## Degrees of freedom: 239 total; 237 residual

anova(m0,m1,m2,m3)

##    Model df       AIC       BIC    logLik   Test  L.Ratio p-value
## m0     1  9 1274.6131 1305.8257 -628.3066                        
## m1     2 10  828.1655  862.8461 -404.0827 1 vs 2 448.4476  <.0001
## m2     3 10  926.9555  961.6361 -453.4777                        
## m3     4  5  920.4576  937.7979 -455.2288 3 vs 4   3.5021  0.6231

anova(m1,m3)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m1     1 10 828.1655 862.8461 -404.0827                        
## m3     2  5 920.4576 937.7979 -455.2288 1 vs 2 102.2921  <.0001

m1 is still better

summary(m4 <- lme(strength~Program, random = ~1|ID, weights = varIdent(form = ~1|day_f), 
                  correlation = corCompSymm(form = ~1|ID), data = datf))

## Linear mixed-effects model fit by REML
##  Data: datf 
##        AIC      BIC    logLik
##   910.9188 949.0675 -444.4594
## 
## Random effects:
##  Formula: ~1 | ID
##         (Intercept) Residual
## StdDev:    3.341608 1.730967
## 
## Correlation Structure: Compound symmetry
##  Formula: ~1 | ID 
##  Parameter estimate(s):
##        Rho 
## -0.1666666 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | day_f 
##  Parameter estimates:
##         0         4         6         8        10        12         2 
## 1.0000000 0.3777633 0.4409408 0.4987618 0.6520739 0.7361915 0.7992358 
## Fixed effects: strength ~ Program 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 80.90847 0.8358707 202 96.79544  0.0000
## ProgramW     1.35442 1.1096439  35  1.22059  0.2304
##  Correlation: 
##          (Intr)
## ProgramW -0.753
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.09167743 -0.66831698 -0.05790854  0.64805069  2.98611281 
## 
## Number of Observations: 239
## Number of Groups: 37

anova(m1,m4)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m1     1 10 828.1655 862.8461 -404.0827                        
## m4     2 11 910.9188 949.0675 -444.4594 1 vs 2 80.75333  <.0001

summary(m6 <- lme(strength~Program, random = ~1|ID, data = datf))

## Linear mixed-effects model fit by REML
##  Data: datf 
##        AIC      BIC    logLik
##   929.0306 942.9028 -460.5153
## 
## Random effects:
##  Formula: ~1 | ID
##         (Intercept) Residual
## StdDev:     3.23085 1.252571
## 
## Fixed effects: strength ~ Program 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 80.81873 0.8169969 202 98.92171  0.0000
## ProgramW     1.34095 1.0846601  35  1.23628  0.2246
##  Correlation: 
##          (Intr)
## ProgramW -0.753
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.6546248 -0.5274829  0.0307856  0.5948745  3.0866709 
## 
## Number of Observations: 239
## Number of Groups: 37

anova(m1,m6)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m1     1 10 828.1655 862.8461 -404.0827                        
## m6     2  4 929.0306 942.9028 -460.5153 1 vs 2 112.8651  <.0001

m1 is the best model. That is, the fixed effect model with designated correlation matrix and weights is better than random effect model sometimes.

Q2

data = read.csv("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/claudication.csv", header = TRUE)
dat = data
str(dat)

## 'data.frame':    38 obs. of  8 variables:
##  $ Treatment: Factor w/ 2 levels "ACT","PBO": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PID      : Factor w/ 38 levels "P101","P102",..: 1 5 9 12 17 22 23 28 29 32 ...
##  $ Gender   : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
##  $ D0       : int  190 98 155 245 182 140 196 162 195 167 ...
##  $ D1       : int  212 137 145 228 205 138 185 176 232 187 ...
##  $ D2       : int  213 185 196 280 218 187 185 192 199 228 ...
##  $ D3       : int  195 215 189 274 194 195 227 230 185 192 ...
##  $ D4       : int  248 225 176 260 193 205 180 215 200 210 ...

dat %<>% gather(key = month, value = dis, D0,D1,D2,D3,D4)
dat %<>% separate(month, c("prefix", "month"), sep = 1)
dat$month %<>% as.numeric
dat$prefix=NULL
dat %<>% mutate(Time = ifelse(month==0, "D0", 
                             ifelse(month==1, "D1", 
                                    ifelse(month==2, "D2", 
                                           ifelse(month==3, "D3", "D4")))))
dat$Time %<>% as.factor

#Plot
ggplot(dat, aes(x = month, y = dis, group = Gender, color = Treatment))+
  geom_point(alpha = .2)+
  stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "point")+
  stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "line")+
  stat_summary(aes(color = Treatment, group = Treatment), fun.data = mean_se, geom = "errorbar", width = .2)+
  facet_wrap(~Gender)+
  theme_bw()+
  labs(x = "Time(in months)", y = "Distance(in meters)")+
  theme(legend.position=c(0.1,0.85), legend.background = element_rect(color = "black"))

#mean
m <- data %>% group_by(Gender, Treatment) %>% summarise(av_D0 = mean(D0),
                                                        av_D1 = mean(D1),
                                                        av_D2 = mean(D2),
                                                        av_D3 = mean(D3),
                                                        av_D4 = mean(D4)
                                                        )
m[, c(3:7)] = round(m[,c(3:7)], 2)
m

## Source: local data frame [4 x 7]
## Groups: Gender [?]
## 
##   Gender Treatment  av_D0  av_D1  av_D2  av_D3  av_D4
##   <fctr>    <fctr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1      F       ACT 180.38 193.38 207.88 196.75 208.50
## 2      F       PBO 165.56 170.11 175.78 168.11 171.22
## 3      M       ACT 163.17 179.50 195.58 204.08 207.67
## 4      M       PBO 178.33 165.56 173.33 193.67 194.22

There are diffrences between means

#sd
s <- data %>% group_by(Gender, Treatment) %>% summarise(sd(D0),
                                                        sd(D1),
                                                        sd(D2),
                                                        sd(D3),
                                                        sd(D4)
                                                        )
s[, c(3:7)] = round(s[,c(3:7)], 2)
s

## Source: local data frame [4 x 7]
## Groups: Gender [?]
## 
##   Gender Treatment `sd(D0)` `sd(D1)` `sd(D2)` `sd(D3)` `sd(D4)`
##   <fctr>    <fctr>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1      F       ACT    42.18    33.11    58.21    41.66    57.11
## 2      F       PBO    41.53    39.13    47.25    33.43    45.34
## 3      M       ACT    42.35    34.51    40.15    28.40    28.04
## 4      M       PBO    45.01    45.53    43.68    55.38    47.35

There are differences between variances

#cor
cor(subset(data, Gender=="F" & Treatment =="ACT")[,-(1:3)])

##           D0        D1        D2        D3        D4
## D0 1.0000000 0.8021966 0.8724733 0.8483852 0.8757233
## D1 0.8021966 1.0000000 0.8645096 0.7073820 0.9054398
## D2 0.8724733 0.8645096 1.0000000 0.8937510 0.9159571
## D3 0.8483852 0.7073820 0.8937510 1.0000000 0.9251102
## D4 0.8757233 0.9054398 0.9159571 0.9251102 1.0000000

cor(subset(data, Gender=="M" & Treatment =="ACT")[,-(1:3)])

##           D0        D1        D2        D3        D4
## D0 1.0000000 0.8624819 0.8052568 0.5928303 0.4068980
## D1 0.8624819 1.0000000 0.6464277 0.3498338 0.4496799
## D2 0.8052568 0.6464277 1.0000000 0.6704930 0.6323603
## D3 0.5928303 0.3498338 0.6704930 1.0000000 0.5941441
## D4 0.4068980 0.4496799 0.6323603 0.5941441 1.0000000

cor(subset(data, Gender=="F" & Treatment =="PBO")[,-(1:3)])

##           D0        D1        D2        D3        D4
## D0 1.0000000 0.9269134 0.6057892 0.8657560 0.7776916
## D1 0.9269134 1.0000000 0.6826171 0.7707920 0.7579993
## D2 0.6057892 0.6826171 1.0000000 0.3713199 0.4846551
## D3 0.8657560 0.7707920 0.3713199 1.0000000 0.7931043
## D4 0.7776916 0.7579993 0.4846551 0.7931043 1.0000000

cor(subset(data, Gender=="M" & Treatment =="PBO")[,-(1:3)])

##           D0        D1        D2        D3        D4
## D0 1.0000000 0.9271229 0.8530641 0.8833606 0.9202039
## D1 0.9271229 1.0000000 0.7036016 0.7846666 0.9031326
## D2 0.8530641 0.7036016 1.0000000 0.8292800 0.8073104
## D3 0.8833606 0.7846666 0.8292800 1.0000000 0.9197535
## D4 0.9202039 0.9031326 0.8073104 0.9197535 1.0000000

There are strong correlations between observations

summary(m0 <- gls(dis~month+Gender*Treatment+month:Treatment, 
                  correlation = corSymm(form = ~1|PID),
                  weights = varIdent(form = ~1|Time),
                  data = dat))

## Generalized least squares fit by REML
##   Model: dis ~ month + Gender * Treatment + month:Treatment 
##   Data: dat 
##        AIC      BIC    logLik
##   1781.019 1848.533 -869.5095
## 
## Correlation Structure: General
##  Formula: ~1 | PID 
##  Parameter estimate(s):
##  Correlation: 
##   1     2     3     4    
## 2 0.876                  
## 3 0.774 0.721            
## 4 0.777 0.640 0.672      
## 5 0.750 0.730 0.704 0.846
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Time 
##  Parameter estimates:
##        D0        D1        D2        D3        D4 
## 1.0000000 0.8838348 1.0627783 0.9500921 1.0337160 
## 
## Coefficients:
##                          Value Std.Error   t-value p-value
## (Intercept)          177.00300 12.719156 13.916254  0.0000
## month                 10.28326  1.679140  6.124126  0.0000
## GenderM                0.01961 15.640605  0.001254  0.9990
## TreatmentPBO         -11.14603 17.575891 -0.634166  0.5268
## GenderM:TreatmentPBO   0.20352 22.484786  0.009051  0.9928
## month:TreatmentPBO    -7.78950  2.439733 -3.192766  0.0017
## 
##  Correlation: 
##                      (Intr) month  GendrM TrtPBO GM:TPB
## month                -0.304                            
## GenderM              -0.738  0.000                     
## TreatmentPBO         -0.724  0.220  0.534              
## GenderM:TreatmentPBO  0.513  0.000 -0.696 -0.702       
## month:TreatmentPBO    0.210 -0.688  0.000 -0.320  0.000
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.82526759 -0.58373590 -0.06888869  0.53349861  3.70599149 
## 
## Residual standard error: 43.29371 
## Degrees of freedom: 190 total; 184 residual

#Prediction plot and real data points
dat_p = data.frame(dat, fit = fitted(m0))
ggplot(dat_p, aes(x = month, y = fit, group = Gender, color = Treatment))+
  stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "point")+
  stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "line")+
  facet_wrap(~Gender)+
  theme_bw()+
  labs(x = "Time(in months)", y = "Distance(in meters)")+
  theme(legend.position=c(0.1,0.85), legend.background = element_rect(color = "black"))+
  stat_summary(aes(y = dis, color = Treatment, group = Treatment), fun.y = mean, geom = "point", alpha = .2)+
  stat_summary(aes(y = dis, color = Treatment, group = Treatment), fun.y = mean, geom = "line", alpha = .2)+
  stat_summary(aes(y = dis, color = Treatment, group = Treatment), fun.data = mean_se, geom = "errorbar", width = .2, alpha = .2)

Q3

data = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/adas.txt", header = TRUE, na.strings = ".")
dat = data
dat$Treatment = factor(dat$Treatment, levels = c("P", "L", "H"))
dat$PID %<>% as.factor
dat %<>% gather(key = month, value = adas, contains("adas"))
dat %<>% separate(month, c("prefix", "month"), sep = 4)
dat$month %<>% as.integer
dat$prefix=NULL
dat %<>% mutate(Time = as.factor(month))

#Plot
ggplot(dat, aes(x = month, y = adas, group = PID))+
  geom_point(size = .1, alpha = .1, na.rm = TRUE)+
  geom_line(size = .1, alpha = .1, na.rm = TRUE)+
  scale_x_continuous(breaks = c(2,4,6,8,10,12))+
  facet_wrap(~Treatment)+
  stat_summary(aes(group = Treatment, color = Treatment), fun.y = mean, geom = "point", na.rm = TRUE)+
  stat_summary(aes(group = Treatment, color = Treatment), fun.y = mean, geom = "line", na.rm = TRUE)+
  stat_summary(aes(group = Treatment, color = Treatment), fun.data = mean_se, geom = "errorbar", width = .3, na.rm = TRUE)+
  labs(x = "Time (in months)", y = "ADAS score")+
  theme_bw()

#mean
m <- data %>% group_by(Treatment) %>% summarise(av_m2 = mean(adas02, na.rm = TRUE), 
                                                av_m4 = mean(adas04, na.rm = TRUE), 
                                                av_m6 = mean(adas06, na.rm = TRUE),
                                                av_m8 = mean(adas08, na.rm = TRUE), 
                                                av_m10 = mean(adas10, na.rm = TRUE), 
                                                adas12 = mean(adas12, na.rm = TRUE))
m

## # A tibble: 3 × 7
##   Treatment    av_m2    av_m4    av_m6    av_m8   av_m10   adas12
##      <fctr>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1         H 31.58333 34.39130 35.00000 34.95833 34.00000 35.90909
## 2         L 30.64000 34.43478 36.12000 35.20000 37.16000 37.95833
## 3         P 31.58621 34.64286 36.24138 39.17857 39.24138 40.84615

s <- data %>% group_by(Treatment) %>% summarise(sd_m2 = sd(adas02, na.rm = TRUE), 
                                                sd_m4 = sd(adas04, na.rm = TRUE),
                                                sd_m6 = sd(adas06, na.rm = TRUE),
                                                sd_m8 = sd(adas08, na.rm = TRUE),
                                                sd_m10 = sd(adas10, na.rm = TRUE), 
                                                sd_m12 = sd(adas12, na.rm = TRUE))
s

## # A tibble: 3 × 7
##   Treatment    sd_m2    sd_m4    sd_m6     sd_m8    sd_m10    sd_m12
##      <fctr>    <dbl>    <dbl>    <dbl>     <dbl>     <dbl>     <dbl>
## 1         H 9.977874 9.064111 9.044862  9.452302  7.305042  9.354953
## 2         L 7.626052 8.532431 9.070649 10.041580 10.660206 10.339789
## 3         P 8.449502 7.963873 7.702760  8.380107  7.414538  9.172534

Variances might differ between months and treament groups

dat_f = na.omit(dat)
summary(m0 <- gls(adas~Treatment*month, data = dat_f, weights = varIdent(form = ~1|Time)))

## Generalized least squares fit by REML
##   Model: adas ~ Treatment * month 
##   Data: dat_f 
##        AIC      BIC   logLik
##   3279.961 3329.218 -1627.98
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Time 
##  Parameter estimates:
##         2         4         6         8        10        12 
## 1.0000000 0.9656600 0.9739845 1.0531478 0.9747791 1.0943490 
## 
## Coefficients:
##                      Value Std.Error   t-value p-value
## (Intercept)      30.599790 1.5228617 20.093611  0.0000
## TreatmentL        0.253992 2.2402949  0.113374  0.9098
## TreatmentH        1.682556 2.2673593  0.742077  0.4584
## month             0.907921 0.2020499  4.493549  0.0000
## TreatmentL:month -0.273995 0.2956072 -0.926888  0.3545
## TreatmentH:month -0.621001 0.3003345 -2.067700  0.0392
## 
##  Correlation: 
##                  (Intr) TrtmnL TrtmnH month  TrtmL:
## TreatmentL       -0.680                            
## TreatmentH       -0.672  0.457                     
## month            -0.897  0.610  0.602              
## TreatmentL:month  0.613 -0.897 -0.412 -0.684       
## TreatmentH:month  0.603 -0.410 -0.896 -0.673  0.460
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.9539022 -0.7347022 -0.1380573  0.5888577  3.1586040 
## 
## Residual standard error: 8.711074 
## Degrees of freedom: 454 total; 448 residual

summary(m1 <- gls(adas~Treatment*month, data = dat_f, 
                  correlation = corSymm(form = ~1|PID), weights = varIdent(form = ~1|Time)))

## Generalized least squares fit by REML
##   Model: adas ~ Treatment * month 
##   Data: dat_f 
##        AIC      BIC    logLik
##   2758.115 2868.944 -1352.058
## 
## Correlation Structure: General
##  Formula: ~1 | PID 
##  Parameter estimate(s):
##  Correlation: 
##   1     2     3     4     5    
## 2 0.888                        
## 3 0.778 0.824                  
## 4 0.803 0.791 0.865            
## 5 0.793 0.760 0.739 0.868      
## 6 0.766 0.683 0.718 0.810 0.916
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Time 
##  Parameter estimates:
##         2         4         8        10        12         6 
## 1.0000000 0.9326594 1.0210252 1.0059971 1.0502771 0.9436473 
## 
## Coefficients:
##                      Value Std.Error   t-value p-value
## (Intercept)      30.576160 1.6450902 18.586312  0.0000
## TreatmentL        0.917240 2.4154725  0.379735  0.7043
## TreatmentH        1.598239 2.4650553  0.648358  0.5171
## month             0.964399 0.1150145  8.385020  0.0000
## TreatmentL:month -0.300076 0.1671846 -1.794880  0.0733
## TreatmentH:month -0.522063 0.1710964 -3.051280  0.0024
## 
##  Correlation: 
##                  (Intr) TrtmnL TrtmnH month  TrtmL:
## TreatmentL       -0.681                            
## TreatmentH       -0.667  0.455                     
## month            -0.456  0.310  0.304              
## TreatmentL:month  0.314 -0.455 -0.209 -0.688       
## TreatmentH:month  0.306 -0.209 -0.455 -0.672  0.462
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0259439 -0.7883657 -0.1975715  0.5308461  3.0726329 
## 
## Residual standard error: 9.092395 
## Degrees of freedom: 454 total; 448 residual

anova(m0,m1)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m0     1 12 3279.961 3329.218 -1627.980                        
## m1     2 27 2758.115 2868.944 -1352.057 1 vs 2 551.8457  <.0001

m1 is better

summary(m2 <- gls(adas~Treatment*month, data = dat_f, weights = varIdent(form = ~1|Time), 
                  correlation = corAR1(form = ~month|PID)))

## Generalized least squares fit by REML
##   Model: adas ~ Treatment * month 
##   Data: dat_f 
##        AIC      BIC    logLik
##   2772.265 2825.627 -1373.132
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~month | PID 
##  Parameter estimate(s):
##      Phi1 
## 0.9370283 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Time 
##  Parameter estimates:
##         2         4         8        10        12         6 
## 1.0000000 0.9763306 1.0406118 0.9895272 1.0185703 1.0125494 
## 
## Coefficients:
##                      Value Std.Error   t-value p-value
## (Intercept)      30.269621 1.8327914 16.515585  0.0000
## TreatmentL        0.034753 2.6898309  0.012920  0.9897
## TreatmentH        0.870649 2.7432528  0.317378  0.7511
## month             0.933561 0.1673513  5.578449  0.0000
## TreatmentL:month -0.255835 0.2443760 -1.046891  0.2957
## TreatmentH:month -0.525558 0.2492242 -2.108777  0.0355
## 
##  Correlation: 
##                  (Intr) TrtmnL TrtmnH month  TrtmL:
## TreatmentL       -0.681                            
## TreatmentH       -0.668  0.455                     
## month            -0.613  0.418  0.410              
## TreatmentL:month  0.420 -0.614 -0.281 -0.685       
## TreatmentH:month  0.412 -0.281 -0.613 -0.671  0.460
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.8738998 -0.6777642 -0.1051968  0.5947320  3.1281170 
## 
## Residual standard error: 9.090338 
## Degrees of freedom: 454 total; 448 residual

anova(m0,m1,m2)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m0     1 12 3279.961 3329.218 -1627.980                        
## m1     2 27 2758.115 2868.944 -1352.057 1 vs 2 551.8457  <.0001
## m2     3 13 2772.265 2825.627 -1373.132 2 vs 3  42.1499   1e-04

m2 is better

#random effect
summary(m3 <- lme(adas~Treatment*month, random = ~1|PID, data = dat_f))

## Linear mixed-effects model fit by REML
##  Data: dat_f 
##       AIC      BIC   logLik
##   2809.16 2841.998 -1396.58
## 
## Random effects:
##  Formula: ~1 | PID
##         (Intercept) Residual
## StdDev:      8.0553 3.988485
## 
## Fixed effects: adas ~ Treatment * month 
##                      Value Std.Error  DF   t-value p-value
## (Intercept)      30.931412 1.6291549 371 18.986170  0.0000
## TreatmentL       -0.022530 2.3921381  77 -0.009418  0.9925
## TreatmentH        1.211469 2.4398321  77  0.496538  0.6209
## month             0.908461 0.0912069 371  9.960441  0.0000
## TreatmentL:month -0.313667 0.1332558 371 -2.353872  0.0191
## TreatmentH:month -0.573026 0.1352116 371 -4.237991  0.0000
##  Correlation: 
##                  (Intr) TrtmnL TrtmnH month  TrtmL:
## TreatmentL       -0.681                            
## TreatmentH       -0.668  0.455                     
## month            -0.386  0.263  0.258              
## TreatmentL:month  0.264 -0.388 -0.176 -0.684       
## TreatmentH:month  0.261 -0.177 -0.383 -0.675  0.462
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -3.9635986 -0.5745229  0.0133242  0.5410091  3.7416613 
## 
## Number of Observations: 454
## Number of Groups: 80

anova(m0,m1,m2,m3)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m0     1 12 3279.961 3329.218 -1627.980                        
## m1     2 27 2758.115 2868.944 -1352.057 1 vs 2 551.8457  <.0001
## m2     3 13 2772.265 2825.627 -1373.132 2 vs 3  42.1499   1e-04
## m3     4  8 2809.160 2841.998 -1396.580 3 vs 4  46.8947  <.0001

m3 is the best model.
That is, interaction between treatment and time have different effects on different individuals.
In this case, the random effect model is better than all the fixed effect models.

Q4

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/grip_strength.txt", header = TRUE)
str(dat)

## 'data.frame':    201 obs. of  6 variables:
##  $ ID      : int  26 26 26 27 27 27 29 29 29 34 ...
##  $ Baseline: int  175 175 175 165 165 165 175 175 175 178 ...
##  $ Treat   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Gender  : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Time    : int  1 2 3 1 2 3 1 2 3 1 ...
##  $ Strength: int  161 210 230 215 245 265 134 215 139 165 ...

dat$ID %<>% as.factor
dat$Treat %<>% as.factor
dat$Time %<>% as.factor
sum(sapply(dat, is.na))

## [1] 12

ggplot(dat, aes(x = Time, y = Strength, group = ID))+
  geom_point(alpha = .2, size = .2, na.rm = TRUE)+
  geom_line(alpha = .2, size = .2, na.rm = TRUE)+
  facet_wrap(~Gender)+
  stat_summary(aes(group = Treat, color = Treat), fun.y = mean, geom = "point", na.rm = TRUE)+
  stat_summary(aes(group = Treat, color = Treat), fun.y = mean, geom = "line", na.rm = TRUE)+
  stat_summary(aes(group = Treat, color = Treat), fun.data = mean_se, geom = "errorbar", width = .2, na.rm = TRUE)+
  theme_bw()

dat = na.omit(dat)
summary(m1 <- lme(Strength~Baseline+Treat+Gender+Time+
                    Treat:Gender+Treat:Time+Gender:Time+Treat:Gender:Time+
                    Baseline:Treat+Baseline:Gender+Baseline:Time, 
                  random = ~Gender|Time, 
                  data = dat))

## Warning in pt(-abs(tTable[, "t-value"]), tTable[, "DF"]): NaNs produced

## Linear mixed-effects model fit by REML
##  Data: dat 
##        AIC      BIC   logLik
##   1804.938 1871.035 -881.469
## 
## Random effects:
##  Formula: ~Gender | Time
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev   Corr  
## (Intercept) 10.48319 (Intr)
## GenderM     15.72478 0     
## Residual    31.20289       
## 
## Fixed effects: Strength ~ Baseline + Treat + Gender + Time + Treat:Gender +      Treat:Time + Gender:Time + Treat:Gender:Time + Baseline:Treat +      Baseline:Gender + Baseline:Time 
##                          Value Std.Error  DF   t-value p-value
## (Intercept)           25.23477 15.799162 172  1.597222  0.1121
## Baseline               0.82262  0.111836 172  7.355536  0.0000
## Treat2                -6.33103 13.104595 172 -0.483115  0.6296
## GenderM               35.62254 26.579706 172  1.340216  0.1819
## Time2                 10.31298 20.669082   0  0.498957     NaN
## Time3                 10.01182 21.130114   0  0.473818     NaN
## Treat2:GenderM        16.26939 18.979974 172  0.857187  0.3925
## Treat2:Time2           6.91817 14.532133 172  0.476060  0.6346
## Treat2:Time3           1.70443 15.158423 172  0.112441  0.9106
## GenderM:Time2         11.09311 30.309012 172  0.366001  0.7148
## GenderM:Time3         -5.04501 30.619835 172 -0.164763  0.8693
## Baseline:Treat2       -0.00223  0.101527 172 -0.021974  0.9825
## Baseline:GenderM      -0.07486  0.106271 172 -0.704401  0.4821
## Baseline:Time2        -0.09659  0.118335 172 -0.816283  0.4155
## Baseline:Time3        -0.05104  0.120001 172 -0.425347  0.6711
## Treat2:GenderM:Time2  -7.02886 22.105296 172 -0.317972  0.7509
## Treat2:GenderM:Time3 -10.93182 22.612754 172 -0.483436  0.6294
##  Correlation: 
##                      (Intr) Baseln Treat2 GendrM Time2  Time3  Tr2:GM
## Baseline             -0.595                                          
## Treat2               -0.387  0.234                                   
## GenderM              -0.068 -0.091 -0.076                            
## Time2                -0.638  0.243  0.193 -0.027                     
## Time3                -0.617  0.226  0.183 -0.031  0.482              
## Treat2:GenderM        0.058  0.190 -0.044 -0.477 -0.126 -0.127       
## Treat2:Time2          0.226 -0.005 -0.538 -0.118 -0.372 -0.174  0.371
## Treat2:Time3          0.216 -0.003 -0.511 -0.116 -0.170 -0.397  0.360
## GenderM:Time2        -0.027  0.225 -0.120 -0.568  0.037  0.020  0.229
## GenderM:Time3        -0.022  0.215 -0.122 -0.566  0.020  0.018  0.224
## Baseline:Treat2       0.194 -0.327 -0.634  0.352  0.006  0.013 -0.581
## Baseline:GenderM      0.284 -0.477  0.148 -0.554  0.011  0.024  0.180
## Baseline:Time2        0.308 -0.517 -0.013  0.245 -0.489 -0.233 -0.015
## Baseline:Time3        0.291 -0.488 -0.004  0.251 -0.236 -0.487 -0.008
## Treat2:GenderM:Time2 -0.134 -0.021  0.351  0.218  0.223  0.103 -0.570
## Treat2:GenderM:Time3 -0.131 -0.021  0.341  0.216  0.103  0.246 -0.559
##                      Tr2:T2 Tr2:T3 GnM:T2 GnM:T3 Bsln:Tr2 Bsl:GM Bsln:Tm2
## Baseline                                                                 
## Treat2                                                                   
## GenderM                                                                  
## Time2                                                                    
## Time3                                                                    
## Treat2:GenderM                                                           
## Treat2:Time2                                                             
## Treat2:Time3          0.466                                              
## GenderM:Time2         0.221  0.106                                       
## GenderM:Time3         0.109  0.243  0.492                                
## Baseline:Treat2      -0.001 -0.007 -0.004  0.001                         
## Baseline:GenderM     -0.017 -0.013  0.001  0.010 -0.260                  
## Baseline:Time2        0.044  0.014 -0.421 -0.210 -0.003   -0.009         
## Baseline:Time3        0.014  0.041 -0.209 -0.421 -0.017   -0.033  0.488  
## Treat2:GenderM:Time2 -0.655 -0.306 -0.397 -0.192  0.003    0.008  0.015  
## Treat2:GenderM:Time3 -0.312 -0.669 -0.190 -0.406  0.007    0.004  0.014  
##                      Bsl:T3 T2:GM:T2
## Baseline                            
## Treat2                              
## GenderM                             
## Time2                               
## Time3                               
## Treat2:GenderM                      
## Treat2:Time2                        
## Treat2:Time3                        
## GenderM:Time2                       
## GenderM:Time3                       
## Baseline:Treat2                     
## Baseline:GenderM                    
## Baseline:Time2                      
## Baseline:Time3                      
## Treat2:GenderM:Time2  0.014         
## Treat2:GenderM:Time3  0.015  0.477  
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.19443564 -0.52693293 -0.01183254  0.53454474  3.88997716 
## 
## Number of Observations: 189
## Number of Groups: 3

Q5

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/partners.txt", header = TRUE)
str(dat)

## 'data.frame':    464 obs. of  5 variables:
##  $ Sbj    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Gender : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Session: Factor w/ 4 levels "T1","T2","T3",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Time   : Factor w/ 2 levels "B","E": 1 1 1 1 2 2 2 2 1 1 ...
##  $ PANAS  : int  15 16 16 14 35 35 34 41 14 14 ...

sum(sapply(dat, is.na)) #yeah

## [1] 0

dat %<>% mutate(group = paste0(Gender, Time))
dat$group %<>% as.factor
dat$Sbj %<>% as.factor

p = position_dodge(.1)
mm = mean(dat$PANAS)
ggplot(dat, aes(x = Session, y = PANAS, color = group, group = group))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  stat_summary(fun.data = mean_se, geom = "errorbar", width = .3, position = p)+
  geom_hline(yintercept = mm, linetype = 2)+
  labs(x = "Session", y = "Mean PANAS")+
  theme_bw()+
  theme(legend.position = "top")+
  scale_color_discrete(name = "Gender:Time", labels = c("F:B", "F:E", "M:B", "M:E"))

g1 = ggplot(dat, aes(x = Session, y = PANAS, color = Gender, group = Gender))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  stat_summary(fun.data = mean_se, geom = "errorbar", width = .3)+
  geom_hline(yintercept = mm, linetype = 2)+
  labs(x = "Session", y = "Mean PANAS")+
  theme_bw()+
  theme(legend.position = "top")

g2 = ggplot(dat, aes(x = Session, y = PANAS, color = Time, group = Time))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  stat_summary(fun.data = mean_se, geom = "errorbar", width = .3)+
  geom_hline(yintercept = mm, linetype = 2)+
  labs(x = "Session", y = "Mean PANAS")+
  theme_bw()+
  theme(legend.position = "top")

g3 = ggplot(dat, aes(x = Session, y = PANAS))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.data = mean_se, geom = "errorbar", width = .3)+
  geom_hline(yintercept = mm, linetype = 2)+
  labs(x = "Session", y = "Mean PANAS")+
  theme_bw()+
  theme(legend.position = "top")

grid.arrange(g1,g2,g3, nrow = 1, ncol = 3)

summary(m0<- lmer(PANAS~Gender*Session*Time+(1|Sbj/Session)+(1|Sbj/Time), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## PANAS ~ Gender * Session * Time + (1 | Sbj/Session) + (1 | Sbj/Time)
##    Data: dat
## 
## REML criterion at convergence: 2749.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8159 -0.5614 -0.0136  0.4159  5.2048 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Session.Sbj (Intercept)  0.2083  0.4564  
##  Time.Sbj    (Intercept) 58.8769  7.6731  
##  Sbj         (Intercept)  0.0000  0.0000  
##  Sbj.1       (Intercept)  0.0000  0.0000  
##  Residual                15.8276  3.9784  
## Number of obs: 464, groups:  Session:Sbj, 116; Time:Sbj, 58; Sbj, 29
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)              23.8636     1.7451  13.675
## GenderM                  -1.0065     3.5520  -0.283
## SessionT2                -2.1136     0.8593  -2.460
## SessionT3                -2.8864     0.8593  -3.359
## SessionT4                -4.7045     0.8593  -5.475
## TimeE                     6.9091     2.4641   2.804
## GenderM:SessionT2         2.6851     1.7490   1.535
## GenderM:SessionT3         2.5292     1.7490   1.446
## GenderM:SessionT4         8.5617     1.7490   4.895
## GenderM:TimeE            -2.1948     5.0155  -0.438
## SessionT2:TimeE           2.2045     1.1995   1.838
## SessionT3:TimeE           1.6591     1.1995   1.383
## SessionT4:TimeE           5.1818     1.1995   4.320
## GenderM:SessionT2:TimeE  -1.9903     2.4415  -0.815
## GenderM:SessionT3:TimeE   1.6266     2.4415   0.666
## GenderM:SessionT4:TimeE  -6.4675     2.4415  -2.649

## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##   vcov(x)     if you need it

anova(m0)

## Analysis of Variance Table
##                     Df  Sum Sq Mean Sq F value
## Gender               1   0.652   0.652  0.0412
## Session              3  89.765  29.922  1.8905
## Time                 1 255.341 255.341 16.1326
## Gender:Session       3 314.408 104.803  6.6215
## Gender:Time          1  10.518  10.518  0.6645
## Session:Time         3 191.817  63.939  4.0397
## Gender:Session:Time  3 195.260  65.087  4.1122

AIC(m0)

## [1] 2791.237

Q6

require(foreign)

## Loading required package: foreign

dat = read.dta("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/bodyfat.dta")
dat$id %<>% as.factor
sum(sapply(dat, is.na))

## [1] 0

dat$age_f = as.factor(dat$age)

ggplot(dat, aes(x = age, y = bodyfat, group = id, color = id))+
  geom_point(size = .1, alpha = .5)+
  geom_line(size = .1, alpha = .5)+
  facet_wrap(~sex)+
  labs(x = "Age", y = "Body fat (kg)")+
  theme_bw()+
  theme(legend.position = "none")

ggplot(dat, aes(x = age, y = bodyfat, group = sex, color = sex))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  stat_summary(fun.data = mean_se, geom = "errorbar", width = .2)+
  labs(x = "Age", y = "Body fat (kg)")+
  theme_bw()

summary(m0 <- lme(bodyfat~sex*age, random = ~sex|age,data = dat))

## Linear mixed-effects model fit by REML
##  Data: dat 
##        AIC      BIC    logLik
##   10376.27 10422.09 -5180.134
## 
## Random effects:
##  Formula: ~sex | age
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev       Corr  
## (Intercept) 1.023563e-04 (Intr)
## sexmale     6.886174e-05 -0.001
## Residual    2.357100e+00       
## 
## Fixed effects: bodyfat ~ sex * age 
##                  Value Std.Error   DF   t-value p-value
## (Intercept)  1.7780638 0.5818216 2268  3.056029  0.0023
## sexmale      0.9879022 0.7914043 2268  1.248290  0.2121
## age          0.6387135 0.0446062    1 14.318932  0.0444
## sexmale:age -0.1694899 0.0606550 2268 -2.794329  0.0052
##  Correlation: 
##             (Intr) sexmal age   
## sexmale     -0.735              
## age         -0.992  0.729       
## sexmale:age  0.730 -0.992 -0.735
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.37650737 -0.66432159 -0.08668264  0.59228998  4.35814063 
## 
## Number of Observations: 2273
## Number of Groups: 3

summary(m1 <- lm(bodyfat~sex*age, data = dat))

## 
## Call:
## lm(formula = bodyfat ~ sex * age, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9588 -1.5659 -0.2043  1.3961 10.2726 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.77806    0.58182   3.056  0.00227 ** 
## sexmale      0.98790    0.79140   1.248  0.21205    
## age          0.63871    0.04461  14.319  < 2e-16 ***
## sexmale:age -0.16949    0.06065  -2.794  0.00524 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.357 on 2269 degrees of freedom
## Multiple R-squared:  0.1752, Adjusted R-squared:  0.1741 
## F-statistic: 160.7 on 3 and 2269 DF,  p-value: < 2.2e-16

anova(m1)

## Analysis of Variance Table
## 
## Response: bodyfat
##             Df  Sum Sq Mean Sq  F value    Pr(>F)    
## sex          1   814.7  814.69 146.6341 < 2.2e-16 ***
## age          1  1819.9 1819.87 327.5554 < 2.2e-16 ***
## sex:age      1    43.4   43.38   7.8083  0.005244 ** 
## Residuals 2269 12606.4    5.56                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m0, m1)

##    Model df      AIC      BIC    logLik   Test      L.Ratio p-value
## m0     1  8 10376.27 10422.08 -5180.134                            
## m1     2  5 10370.27 10398.90 -5180.134 1 vs 2 1.631985e-06       1

Random effect model did not perform better than fixed effect model.

m <- dat %>% group_by(sex, age) %>% summarise(mean(bodyfat))
m

## Source: local data frame [6 x 3]
## Groups: sex [?]
## 
##      sex   age `mean(bodyfat)`
##   <fctr> <dbl>           <dbl>
## 1 female    11        8.771233
## 2 female    13       10.150289
## 3 female    15       11.323054
## 4   male    11        7.937471
## 5   male    13        8.844691
## 6   male    15        9.815152

There is defference between sex.

s <- dat %>% group_by(sex, age) %>% summarise(sd(bodyfat))
s

## Source: local data frame [6 x 3]
## Groups: sex [?]
## 
##      sex   age `sd(bodyfat)`
##   <fctr> <dbl>         <dbl>
## 1 female    11      1.925571
## 2 female    13      2.265062
## 3 female    15      2.797142
## 4   male    11      1.983441
## 5   male    13      2.217271
## 6   male    15      2.853605

The variances did bot differ too much.

summary(m2 <- gls(bodyfat~sex*age, correlation = corCompSymm(form = ~1|id), data = dat))

## Generalized least squares fit by REML
##   Model: bodyfat ~ sex * age 
##   Data: dat 
##        AIC      BIC   logLik
##   10261.76 10296.12 -5124.88
## 
## Correlation Structure: Compound symmetry
##  Formula: ~1 | id 
##  Parameter estimate(s):
##       Rho 
## 0.2345844 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept)  1.8033821 0.5142196  3.507027  0.0005
## sexmale      0.9717458 0.6993056  1.389587  0.1648
## age          0.6364110 0.0392102 16.230740  0.0000
## sexmale:age -0.1678481 0.0533028 -3.148955  0.0017
## 
##  Correlation: 
##             (Intr) sexmal age   
## sexmale     -0.735              
## age         -0.985  0.725       
## sexmale:age  0.725 -0.985 -0.736
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.37247100 -0.66453990 -0.08650268  0.59241291  4.35717555 
## 
## Residual standard error: 2.357188 
## Degrees of freedom: 2273 total; 2269 residual

anova(m0,m1,m2)

##    Model df      AIC      BIC    logLik   Test L.Ratio p-value
## m0     1  8 10376.27 10422.08 -5180.134                       
## m1     2  5 10370.27 10398.90 -5180.134 1 vs 2   0.000       1
## m2     3  6 10261.76 10296.12 -5124.880 2 vs 3 110.509  <.0001

summary(m3 <- gls(bodyfat~sex*age, correlation = corExp(form = ~1|id), data = dat))

## Generalized least squares fit by REML
##   Model: bodyfat ~ sex * age 
##   Data: dat 
##        AIC      BIC    logLik
##   10199.23 10233.59 -5093.614
## 
## Correlation Structure: Exponential spatial correlation
##  Formula: ~1 | id 
##  Parameter estimate(s):
##     range 
## 0.9272448 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept)  1.8099458 0.5504187  3.288307  0.0010
## sexmale      0.9584834 0.7488041  1.280019  0.2007
## age          0.6353528 0.0420303 15.116526  0.0000
## sexmale:age -0.1659740 0.0571591 -2.903722  0.0037
## 
##  Correlation: 
##             (Intr) sexmal age   
## sexmale     -0.735              
## age         -0.987  0.725       
## sexmale:age  0.726 -0.987 -0.735
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.36153867 -0.66481672 -0.08852797  0.59319353  4.34717967 
## 
## Residual standard error: 2.362084 
## Degrees of freedom: 2273 total; 2269 residual

anova(m2,m3) #m3 is better

##    Model df      AIC      BIC    logLik
## m2     1  6 10261.76 10296.12 -5124.880
## m3     2  6 10199.23 10233.59 -5093.614

summary(m4 <- gls(bodyfat~sex*age, correlation = corAR1(form = ~age|id), data = dat))

## Generalized least squares fit by REML
##   Model: bodyfat ~ sex * age 
##   Data: dat 
##        AIC      BIC    logLik
##   10372.27 10406.63 -5180.134
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~age | id 
##  Parameter estimate(s):
## Phi1 
##    0 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept)  1.7780638 0.5818214  3.056030  0.0023
## sexmale      0.9879022 0.7914043  1.248290  0.2121
## age          0.6387135 0.0446062 14.318936  0.0000
## sexmale:age -0.1694899 0.0606550 -2.794329  0.0052
## 
##  Correlation: 
##             (Intr) sexmal age   
## sexmale     -0.735              
## age         -0.992  0.729       
## sexmale:age  0.730 -0.992 -0.735
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.37650737 -0.66432158 -0.08668265  0.59228998  4.35814062 
## 
## Residual standard error: 2.3571 
## Degrees of freedom: 2273 total; 2269 residual

anova(m3,m4) #m3 is better

##    Model df      AIC      BIC    logLik
## m3     1  6 10199.23 10233.59 -5093.614
## m4     2  6 10372.27 10406.63 -5180.134

summary(m5 <- gls(bodyfat~sex*age, correlation = corExp(form = ~1|id), weights = varIdent(form = ~1|age_f), data = dat))

## Generalized least squares fit by REML
##   Model: bodyfat ~ sex * age 
##   Data: dat 
##        AIC      BIC    logLik
##   10116.02 10161.84 -5050.012
## 
## Correlation Structure: Exponential spatial correlation
##  Formula: ~1 | id 
##  Parameter estimate(s):
##     range 
## 0.8711536 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | age_f 
##  Parameter estimates:
##       11       13       15 
## 1.000000 1.118751 1.378045 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept)  1.7305038 0.5369058  3.223105  0.0013
## sexmale      1.0546176 0.7303529  1.443984  0.1489
## age          0.6419058 0.0428299 14.987327  0.0000
## sexmale:age -0.1739068 0.0582457 -2.985743  0.0029
## 
##  Correlation: 
##             (Intr) sexmal age   
## sexmale     -0.735              
## age         -0.988  0.726       
## sexmale:age  0.726 -0.988 -0.735
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.8830852 -0.7001270 -0.0955765  0.5938367  5.1250317 
## 
## Residual standard error: 2.003283 
## Degrees of freedom: 2273 total; 2269 residual

anova(m3,m5) #m5 is better

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m3     1  6 10199.23 10233.59 -5093.614                        
## m5     2  8 10116.02 10161.84 -5050.012 1 vs 2 87.20272  <.0001

summary(m6 <- gls(bodyfat~sex*age, correlation = corExp(form = ~1|id), weights = varExp(form = ~age), data = dat))

## Generalized least squares fit by REML
##   Model: bodyfat ~ sex * age 
##   Data: dat 
##        AIC      BIC   logLik
##   10116.66 10156.75 -5051.33
## 
## Correlation Structure: Exponential spatial correlation
##  Formula: ~1 | id 
##  Parameter estimate(s):
##     range 
## 0.8825129 
## Variance function:
##  Structure: Exponential of variance covariate
##  Formula: ~age 
##  Parameter estimates:
##      expon 
## 0.08030966 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept)  1.7351781 0.5287982  3.281362  0.0010
## sexmale      1.0476842 0.7193111  1.456511  0.1454
## age          0.6412482 0.0422524 15.176594  0.0000
## sexmale:age -0.1729838 0.0574584 -3.010593  0.0026
## 
##  Correlation: 
##             (Intr) sexmal age   
## sexmale     -0.735              
## age         -0.987  0.726       
## sexmale:age  0.726 -0.987 -0.735
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.92165397 -0.70345159 -0.09567755  0.59067039  5.19962262 
## 
## Residual standard error: 0.8161702 
## Degrees of freedom: 2273 total; 2269 residual

anova(m5,m6) #m6 is better because of smaller df

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m5     1  8 10116.02 10161.84 -5050.012                        
## m6     2  7 10116.66 10156.75 -5051.330 1 vs 2 2.635837  0.1045

summary(m7 <- gls(bodyfat~sex*age, correlation = corSymm(form = ~1|id), weights = varExp(form = ~age), data = dat))

## Generalized least squares fit by REML
##   Model: bodyfat ~ sex * age 
##   Data: dat 
##        AIC      BIC    logLik
##   10078.37 10129.92 -5030.187
## 
## Correlation Structure: General
##  Formula: ~1 | id 
##  Parameter estimate(s):
##  Correlation: 
##   1     2    
## 2 0.169      
## 3 0.057 0.467
## Variance function:
##  Structure: Exponential of variance covariate
##  Formula: ~age 
##  Parameter estimates:
##      expon 
## 0.09320165 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept)  1.8063673 0.5466440  3.304467  0.0010
## sexmale      0.9706584 0.7433525  1.305785  0.1918
## age          0.6355736 0.0444097 14.311598  0.0000
## sexmale:age -0.1670398 0.0603772 -2.766604  0.0057
## 
##  Correlation: 
##             (Intr) sexmal age   
## sexmale     -0.735              
## age         -0.989  0.727       
## sexmale:age  0.727 -0.989 -0.736
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.8424922 -0.7177972 -0.1027411  0.6076323  5.3372886 
## 
## Residual standard error: 0.6901846 
## Degrees of freedom: 2273 total; 2269 residual

anova(m6,m7) #m7 is better

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## m6     1  7 10116.66 10156.75 -5051.330                        
## m7     2  9 10078.37 10129.92 -5030.187 1 vs 2 42.28675  <.0001

m7 is the best model.

#Prediction plot
dat_p = data.frame(dat, fit = fitted(m7))
ggplot(dat_p, aes(x = age, y = fit, group = sex, color = sex))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  stat_summary(aes(y = bodyfat), fun.y = mean, geom = "point", alpha = .5)+
  stat_summary(aes(y = bodyfat), fun.y = mean, geom = "line", alpha = .5, linetype = 2)+
  stat_summary(aes(y = bodyfat), fun.data = mean_se, geom = "errorbar", width = .3, alpha = .5, linetype = 2)+
  labs(x = "Age", y = "Body fat (kg)")+
  theme_bw()

The prediciton of m7 is close to real data.(dash line = real data, solid line = prediction)

anova(m7)

## Denom. DF: 2269 
##             numDF   F-value p-value
## (Intercept)     1 26661.471  <.0001
## sex             1    92.544  <.0001
## age             1   328.369  <.0001
## sex:age         1     7.654  0.0057

(1)Yes, body fat increases with age for both girls and boys.
(2)Yes, there is a difference between girls and boys with respect to the increase of the body fat with age.
(3)Yes, girls tend to have more body fat than boys.

Exercises: Covariance patterns models

Yu-Han, Luo

Q1

Q2

Q3

Q4

Q5

Q6