p = c("tidyverse", "nlme", "car", "magrittr", "lme4", "gridExtra")
lapply(p, library, character.only = TRUE)
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/weightlifting.txt", header = TRUE, na.strings = ".")
str(dat)
## 'data.frame': 37 obs. of 9 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Program: Factor w/ 2 levels "R","W": 1 1 1 1 1 1 1 1 1 1 ...
## $ Day0 : int 79 83 81 81 80 76 81 77 84 74 ...
## $ Day2 : int NA 83 83 81 81 76 84 78 85 75 ...
## $ Day4 : int 79 85 82 81 82 76 83 79 87 78 ...
## $ Day6 : int 80 85 82 82 82 76 83 79 89 78 ...
## $ Day8 : int 80 86 83 82 82 76 85 81 NA 79 ...
## $ Day10 : int 78 87 83 83 NA 76 85 82 NA 78 ...
## $ Day12 : int 80 87 82 81 86 75 85 81 86 78 ...
head(dat)
## ID Program Day0 Day2 Day4 Day6 Day8 Day10 Day12
## 1 1 R 79 NA 79 80 80 78 80
## 2 2 R 83 83 85 85 86 87 87
## 3 3 R 81 83 82 82 83 83 82
## 4 4 R 81 81 81 82 82 83 81
## 5 5 R 80 81 82 82 82 NA 86
## 6 6 R 76 76 76 76 76 76 75
dat %<>% gather(key = Day, value = strength, contains("Day"))
dat %<>% separate(Day, c("prefix","day"), sep = 3)
dat$day %<>% as.numeric
dat$prefix=NULL
dat$ID %<>% as.factor
dat$day %<>% as.integer
#plot
ggplot(dat, aes(x = day, y = strength, group = ID, color = ID))+
geom_point(alpha = 0.2, na.rm = TRUE)+
geom_line(alpha = 0.2, na.rm = TRUE)+
stat_summary(aes(group = Program), fun.y = mean, geom = "point", na.rm = TRUE, color = "navy", size = 1.5)+
stat_summary(aes(group = Program), fun.y = mean, geom = "line", na.rm = TRUE, color = "navy", size = 1.3)+
stat_summary(aes(group = Program), fun.data = mean_se, geom = "errorbar", width = .5, na.rm = TRUE, color = "navy")+
facet_wrap(~Program)+
scale_x_continuous(breaks = c(0,2,4,6,8,10,12))+
labs(x = "Day(s)", y = "Strength")+
theme_bw()+
theme(legend.position = "none")
datf = na.omit(dat)
datf %<>% mutate(day_f = factor(day))
summary(m0 <- gls(strength~Program, weights = varIdent(form = ~1|day_f), data = datf))
## Generalized least squares fit by REML
## Model: strength ~ Program
## Data: datf
## AIC BIC logLik
## 1274.613 1305.826 -628.3066
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | day_f
## Parameter estimates:
## 0 2 4 6 8 10 12
## 1.0000000 1.0178766 1.0862158 0.9942133 1.1120794 1.0142258 1.1171382
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 80.75039 0.3278372 246.31250 0.0000
## ProgramW 1.31659 0.4371582 3.01171 0.0029
##
## Correlation:
## (Intr)
## ProgramW -0.75
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.51141160 -0.81475040 -0.01919753 0.66640424 2.58321595
##
## Residual standard error: 3.21213
## Degrees of freedom: 239 total; 237 residual
summary(m1 <- gls(strength~Program, weights = varIdent(form = ~1|day_f),
correlation = corAR1(form = ~day|ID), data = datf))
## Generalized least squares fit by REML
## Model: strength ~ Program
## Data: datf
## AIC BIC logLik
## 828.1655 862.8461 -404.0827
##
## Correlation Structure: ARMA(1,0)
## Formula: ~day | ID
## Parameter estimate(s):
## Phi1
## -0.9746822
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | day_f
## Parameter estimates:
## 0 4 6 8 10 12 2
## 1.0000000 1.0474298 0.9453299 1.0624712 1.0405211 1.0190228 1.0055976
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 80.40492 0.7361776 109.21946 0.0000
## ProgramW 2.15930 0.9782057 2.20741 0.0282
##
## Correlation:
## (Intr)
## ProgramW -0.753
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.4897401 -0.7315386 -0.1245228 0.6877192 2.6432181
##
## Residual standard error: 3.439804
## Degrees of freedom: 239 total; 237 residual
anova(m0,m1)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 9 1274.6131 1305.8257 -628.3066
## m1 2 10 828.1655 862.8461 -404.0827 1 vs 2 448.4476 <.0001
m1 is better
summary(m2 <- gls(strength~Program, weights = varIdent(form = ~1|day_f),
correlation = corCompSymm(form = ~1|ID), data = datf))
## Generalized least squares fit by REML
## Model: strength ~ Program
## Data: datf
## AIC BIC logLik
## 926.9555 961.6361 -453.4777
##
## Correlation Structure: Compound symmetry
## Formula: ~1 | ID
## Parameter estimate(s):
## Rho
## 0.9441032
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | day_f
## Parameter estimates:
## 0 4 6 8 10 12 2
## 1.000000 1.211367 1.260039 1.337123 1.354596 1.403072 1.142275
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 77.64349 0.7445963 104.276 0.0000
## ProgramW 1.02776 0.9939638 1.034 0.3022
##
## Correlation:
## (Intr)
## ProgramW -0.749
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.19837982 0.08789843 0.67773094 1.13146200 2.61098316
##
## Residual standard error: 3.897976
## Degrees of freedom: 239 total; 237 residual
anova(m0,m1,m2)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 9 1274.6131 1305.8257 -628.3066
## m1 2 10 828.1655 862.8461 -404.0827 1 vs 2 448.4476 <.0001
## m2 3 10 926.9555 961.6361 -453.4777
m1 is still better than other models.
summary(m3 <- gls(strength~Program, weights = varExp(form = ~day),
correlation = corCompSymm(form = ~1|ID), data = datf))
## Generalized least squares fit by REML
## Model: strength ~ Program
## Data: datf
## AIC BIC logLik
## 920.4576 937.7979 -455.2288
##
## Correlation Structure: Compound symmetry
## Formula: ~1 | ID
## Parameter estimate(s):
## Rho
## 0.9306234
## Variance function:
## Structure: Exponential of variance covariate
## Formula: ~day
## Parameter estimates:
## expon
## 0.02641161
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 78.30192 0.7585038 103.23208 0.0000
## ProgramW 0.92879 1.0145621 0.91546 0.3609
##
## Correlation:
## (Intr)
## ProgramW -0.748
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.37991587 -0.04673772 0.62349954 1.13204254 2.79357078
##
## Residual standard error: 3.790606
## Degrees of freedom: 239 total; 237 residual
anova(m0,m1,m2,m3)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 9 1274.6131 1305.8257 -628.3066
## m1 2 10 828.1655 862.8461 -404.0827 1 vs 2 448.4476 <.0001
## m2 3 10 926.9555 961.6361 -453.4777
## m3 4 5 920.4576 937.7979 -455.2288 3 vs 4 3.5021 0.6231
anova(m1,m3)
## Model df AIC BIC logLik Test L.Ratio p-value
## m1 1 10 828.1655 862.8461 -404.0827
## m3 2 5 920.4576 937.7979 -455.2288 1 vs 2 102.2921 <.0001
m1 is still better
summary(m4 <- lme(strength~Program, random = ~1|ID, weights = varIdent(form = ~1|day_f),
correlation = corCompSymm(form = ~1|ID), data = datf))
## Linear mixed-effects model fit by REML
## Data: datf
## AIC BIC logLik
## 910.9188 949.0675 -444.4594
##
## Random effects:
## Formula: ~1 | ID
## (Intercept) Residual
## StdDev: 3.341608 1.730967
##
## Correlation Structure: Compound symmetry
## Formula: ~1 | ID
## Parameter estimate(s):
## Rho
## -0.1666666
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | day_f
## Parameter estimates:
## 0 4 6 8 10 12 2
## 1.0000000 0.3777633 0.4409408 0.4987618 0.6520739 0.7361915 0.7992358
## Fixed effects: strength ~ Program
## Value Std.Error DF t-value p-value
## (Intercept) 80.90847 0.8358707 202 96.79544 0.0000
## ProgramW 1.35442 1.1096439 35 1.22059 0.2304
## Correlation:
## (Intr)
## ProgramW -0.753
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.09167743 -0.66831698 -0.05790854 0.64805069 2.98611281
##
## Number of Observations: 239
## Number of Groups: 37
anova(m1,m4)
## Model df AIC BIC logLik Test L.Ratio p-value
## m1 1 10 828.1655 862.8461 -404.0827
## m4 2 11 910.9188 949.0675 -444.4594 1 vs 2 80.75333 <.0001
summary(m6 <- lme(strength~Program, random = ~1|ID, data = datf))
## Linear mixed-effects model fit by REML
## Data: datf
## AIC BIC logLik
## 929.0306 942.9028 -460.5153
##
## Random effects:
## Formula: ~1 | ID
## (Intercept) Residual
## StdDev: 3.23085 1.252571
##
## Fixed effects: strength ~ Program
## Value Std.Error DF t-value p-value
## (Intercept) 80.81873 0.8169969 202 98.92171 0.0000
## ProgramW 1.34095 1.0846601 35 1.23628 0.2246
## Correlation:
## (Intr)
## ProgramW -0.753
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.6546248 -0.5274829 0.0307856 0.5948745 3.0866709
##
## Number of Observations: 239
## Number of Groups: 37
anova(m1,m6)
## Model df AIC BIC logLik Test L.Ratio p-value
## m1 1 10 828.1655 862.8461 -404.0827
## m6 2 4 929.0306 942.9028 -460.5153 1 vs 2 112.8651 <.0001
m1 is the best model. That is, the fixed effect model with designated correlation matrix and weights is better than random effect model sometimes.
data = read.csv("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/claudication.csv", header = TRUE)
dat = data
str(dat)
## 'data.frame': 38 obs. of 8 variables:
## $ Treatment: Factor w/ 2 levels "ACT","PBO": 1 1 1 1 1 1 1 1 1 1 ...
## $ PID : Factor w/ 38 levels "P101","P102",..: 1 5 9 12 17 22 23 28 29 32 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ D0 : int 190 98 155 245 182 140 196 162 195 167 ...
## $ D1 : int 212 137 145 228 205 138 185 176 232 187 ...
## $ D2 : int 213 185 196 280 218 187 185 192 199 228 ...
## $ D3 : int 195 215 189 274 194 195 227 230 185 192 ...
## $ D4 : int 248 225 176 260 193 205 180 215 200 210 ...
dat %<>% gather(key = month, value = dis, D0,D1,D2,D3,D4)
dat %<>% separate(month, c("prefix", "month"), sep = 1)
dat$month %<>% as.numeric
dat$prefix=NULL
dat %<>% mutate(Time = ifelse(month==0, "D0",
ifelse(month==1, "D1",
ifelse(month==2, "D2",
ifelse(month==3, "D3", "D4")))))
dat$Time %<>% as.factor
#Plot
ggplot(dat, aes(x = month, y = dis, group = Gender, color = Treatment))+
geom_point(alpha = .2)+
stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "point")+
stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "line")+
stat_summary(aes(color = Treatment, group = Treatment), fun.data = mean_se, geom = "errorbar", width = .2)+
facet_wrap(~Gender)+
theme_bw()+
labs(x = "Time(in months)", y = "Distance(in meters)")+
theme(legend.position=c(0.1,0.85), legend.background = element_rect(color = "black"))
#mean
m <- data %>% group_by(Gender, Treatment) %>% summarise(av_D0 = mean(D0),
av_D1 = mean(D1),
av_D2 = mean(D2),
av_D3 = mean(D3),
av_D4 = mean(D4)
)
m[, c(3:7)] = round(m[,c(3:7)], 2)
m
## Source: local data frame [4 x 7]
## Groups: Gender [?]
##
## Gender Treatment av_D0 av_D1 av_D2 av_D3 av_D4
## <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 F ACT 180.38 193.38 207.88 196.75 208.50
## 2 F PBO 165.56 170.11 175.78 168.11 171.22
## 3 M ACT 163.17 179.50 195.58 204.08 207.67
## 4 M PBO 178.33 165.56 173.33 193.67 194.22
There are diffrences between means
#sd
s <- data %>% group_by(Gender, Treatment) %>% summarise(sd(D0),
sd(D1),
sd(D2),
sd(D3),
sd(D4)
)
s[, c(3:7)] = round(s[,c(3:7)], 2)
s
## Source: local data frame [4 x 7]
## Groups: Gender [?]
##
## Gender Treatment `sd(D0)` `sd(D1)` `sd(D2)` `sd(D3)` `sd(D4)`
## <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 F ACT 42.18 33.11 58.21 41.66 57.11
## 2 F PBO 41.53 39.13 47.25 33.43 45.34
## 3 M ACT 42.35 34.51 40.15 28.40 28.04
## 4 M PBO 45.01 45.53 43.68 55.38 47.35
There are differences between variances
#cor
cor(subset(data, Gender=="F" & Treatment =="ACT")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.8021966 0.8724733 0.8483852 0.8757233
## D1 0.8021966 1.0000000 0.8645096 0.7073820 0.9054398
## D2 0.8724733 0.8645096 1.0000000 0.8937510 0.9159571
## D3 0.8483852 0.7073820 0.8937510 1.0000000 0.9251102
## D4 0.8757233 0.9054398 0.9159571 0.9251102 1.0000000
cor(subset(data, Gender=="M" & Treatment =="ACT")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.8624819 0.8052568 0.5928303 0.4068980
## D1 0.8624819 1.0000000 0.6464277 0.3498338 0.4496799
## D2 0.8052568 0.6464277 1.0000000 0.6704930 0.6323603
## D3 0.5928303 0.3498338 0.6704930 1.0000000 0.5941441
## D4 0.4068980 0.4496799 0.6323603 0.5941441 1.0000000
cor(subset(data, Gender=="F" & Treatment =="PBO")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.9269134 0.6057892 0.8657560 0.7776916
## D1 0.9269134 1.0000000 0.6826171 0.7707920 0.7579993
## D2 0.6057892 0.6826171 1.0000000 0.3713199 0.4846551
## D3 0.8657560 0.7707920 0.3713199 1.0000000 0.7931043
## D4 0.7776916 0.7579993 0.4846551 0.7931043 1.0000000
cor(subset(data, Gender=="M" & Treatment =="PBO")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.9271229 0.8530641 0.8833606 0.9202039
## D1 0.9271229 1.0000000 0.7036016 0.7846666 0.9031326
## D2 0.8530641 0.7036016 1.0000000 0.8292800 0.8073104
## D3 0.8833606 0.7846666 0.8292800 1.0000000 0.9197535
## D4 0.9202039 0.9031326 0.8073104 0.9197535 1.0000000
There are strong correlations between observations
summary(m0 <- gls(dis~month+Gender*Treatment+month:Treatment,
correlation = corSymm(form = ~1|PID),
weights = varIdent(form = ~1|Time),
data = dat))
## Generalized least squares fit by REML
## Model: dis ~ month + Gender * Treatment + month:Treatment
## Data: dat
## AIC BIC logLik
## 1781.019 1848.533 -869.5095
##
## Correlation Structure: General
## Formula: ~1 | PID
## Parameter estimate(s):
## Correlation:
## 1 2 3 4
## 2 0.876
## 3 0.774 0.721
## 4 0.777 0.640 0.672
## 5 0.750 0.730 0.704 0.846
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Time
## Parameter estimates:
## D0 D1 D2 D3 D4
## 1.0000000 0.8838348 1.0627783 0.9500921 1.0337160
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 177.00300 12.719156 13.916254 0.0000
## month 10.28326 1.679140 6.124126 0.0000
## GenderM 0.01961 15.640605 0.001254 0.9990
## TreatmentPBO -11.14603 17.575891 -0.634166 0.5268
## GenderM:TreatmentPBO 0.20352 22.484786 0.009051 0.9928
## month:TreatmentPBO -7.78950 2.439733 -3.192766 0.0017
##
## Correlation:
## (Intr) month GendrM TrtPBO GM:TPB
## month -0.304
## GenderM -0.738 0.000
## TreatmentPBO -0.724 0.220 0.534
## GenderM:TreatmentPBO 0.513 0.000 -0.696 -0.702
## month:TreatmentPBO 0.210 -0.688 0.000 -0.320 0.000
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.82526759 -0.58373590 -0.06888869 0.53349861 3.70599149
##
## Residual standard error: 43.29371
## Degrees of freedom: 190 total; 184 residual
#Prediction plot and real data points
dat_p = data.frame(dat, fit = fitted(m0))
ggplot(dat_p, aes(x = month, y = fit, group = Gender, color = Treatment))+
stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "point")+
stat_summary(aes(color = Treatment, group = Treatment), fun.y = mean, geom = "line")+
facet_wrap(~Gender)+
theme_bw()+
labs(x = "Time(in months)", y = "Distance(in meters)")+
theme(legend.position=c(0.1,0.85), legend.background = element_rect(color = "black"))+
stat_summary(aes(y = dis, color = Treatment, group = Treatment), fun.y = mean, geom = "point", alpha = .2)+
stat_summary(aes(y = dis, color = Treatment, group = Treatment), fun.y = mean, geom = "line", alpha = .2)+
stat_summary(aes(y = dis, color = Treatment, group = Treatment), fun.data = mean_se, geom = "errorbar", width = .2, alpha = .2)
data = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/adas.txt", header = TRUE, na.strings = ".")
dat = data
dat$Treatment = factor(dat$Treatment, levels = c("P", "L", "H"))
dat$PID %<>% as.factor
dat %<>% gather(key = month, value = adas, contains("adas"))
dat %<>% separate(month, c("prefix", "month"), sep = 4)
dat$month %<>% as.integer
dat$prefix=NULL
dat %<>% mutate(Time = as.factor(month))
#Plot
ggplot(dat, aes(x = month, y = adas, group = PID))+
geom_point(size = .1, alpha = .1, na.rm = TRUE)+
geom_line(size = .1, alpha = .1, na.rm = TRUE)+
scale_x_continuous(breaks = c(2,4,6,8,10,12))+
facet_wrap(~Treatment)+
stat_summary(aes(group = Treatment, color = Treatment), fun.y = mean, geom = "point", na.rm = TRUE)+
stat_summary(aes(group = Treatment, color = Treatment), fun.y = mean, geom = "line", na.rm = TRUE)+
stat_summary(aes(group = Treatment, color = Treatment), fun.data = mean_se, geom = "errorbar", width = .3, na.rm = TRUE)+
labs(x = "Time (in months)", y = "ADAS score")+
theme_bw()
#mean
m <- data %>% group_by(Treatment) %>% summarise(av_m2 = mean(adas02, na.rm = TRUE),
av_m4 = mean(adas04, na.rm = TRUE),
av_m6 = mean(adas06, na.rm = TRUE),
av_m8 = mean(adas08, na.rm = TRUE),
av_m10 = mean(adas10, na.rm = TRUE),
adas12 = mean(adas12, na.rm = TRUE))
m
## # A tibble: 3 × 7
## Treatment av_m2 av_m4 av_m6 av_m8 av_m10 adas12
## <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 H 31.58333 34.39130 35.00000 34.95833 34.00000 35.90909
## 2 L 30.64000 34.43478 36.12000 35.20000 37.16000 37.95833
## 3 P 31.58621 34.64286 36.24138 39.17857 39.24138 40.84615
s <- data %>% group_by(Treatment) %>% summarise(sd_m2 = sd(adas02, na.rm = TRUE),
sd_m4 = sd(adas04, na.rm = TRUE),
sd_m6 = sd(adas06, na.rm = TRUE),
sd_m8 = sd(adas08, na.rm = TRUE),
sd_m10 = sd(adas10, na.rm = TRUE),
sd_m12 = sd(adas12, na.rm = TRUE))
s
## # A tibble: 3 × 7
## Treatment sd_m2 sd_m4 sd_m6 sd_m8 sd_m10 sd_m12
## <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 H 9.977874 9.064111 9.044862 9.452302 7.305042 9.354953
## 2 L 7.626052 8.532431 9.070649 10.041580 10.660206 10.339789
## 3 P 8.449502 7.963873 7.702760 8.380107 7.414538 9.172534
Variances might differ between months and treament groups
dat_f = na.omit(dat)
summary(m0 <- gls(adas~Treatment*month, data = dat_f, weights = varIdent(form = ~1|Time)))
## Generalized least squares fit by REML
## Model: adas ~ Treatment * month
## Data: dat_f
## AIC BIC logLik
## 3279.961 3329.218 -1627.98
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Time
## Parameter estimates:
## 2 4 6 8 10 12
## 1.0000000 0.9656600 0.9739845 1.0531478 0.9747791 1.0943490
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 30.599790 1.5228617 20.093611 0.0000
## TreatmentL 0.253992 2.2402949 0.113374 0.9098
## TreatmentH 1.682556 2.2673593 0.742077 0.4584
## month 0.907921 0.2020499 4.493549 0.0000
## TreatmentL:month -0.273995 0.2956072 -0.926888 0.3545
## TreatmentH:month -0.621001 0.3003345 -2.067700 0.0392
##
## Correlation:
## (Intr) TrtmnL TrtmnH month TrtmL:
## TreatmentL -0.680
## TreatmentH -0.672 0.457
## month -0.897 0.610 0.602
## TreatmentL:month 0.613 -0.897 -0.412 -0.684
## TreatmentH:month 0.603 -0.410 -0.896 -0.673 0.460
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.9539022 -0.7347022 -0.1380573 0.5888577 3.1586040
##
## Residual standard error: 8.711074
## Degrees of freedom: 454 total; 448 residual
summary(m1 <- gls(adas~Treatment*month, data = dat_f,
correlation = corSymm(form = ~1|PID), weights = varIdent(form = ~1|Time)))
## Generalized least squares fit by REML
## Model: adas ~ Treatment * month
## Data: dat_f
## AIC BIC logLik
## 2758.115 2868.944 -1352.058
##
## Correlation Structure: General
## Formula: ~1 | PID
## Parameter estimate(s):
## Correlation:
## 1 2 3 4 5
## 2 0.888
## 3 0.778 0.824
## 4 0.803 0.791 0.865
## 5 0.793 0.760 0.739 0.868
## 6 0.766 0.683 0.718 0.810 0.916
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Time
## Parameter estimates:
## 2 4 8 10 12 6
## 1.0000000 0.9326594 1.0210252 1.0059971 1.0502771 0.9436473
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 30.576160 1.6450902 18.586312 0.0000
## TreatmentL 0.917240 2.4154725 0.379735 0.7043
## TreatmentH 1.598239 2.4650553 0.648358 0.5171
## month 0.964399 0.1150145 8.385020 0.0000
## TreatmentL:month -0.300076 0.1671846 -1.794880 0.0733
## TreatmentH:month -0.522063 0.1710964 -3.051280 0.0024
##
## Correlation:
## (Intr) TrtmnL TrtmnH month TrtmL:
## TreatmentL -0.681
## TreatmentH -0.667 0.455
## month -0.456 0.310 0.304
## TreatmentL:month 0.314 -0.455 -0.209 -0.688
## TreatmentH:month 0.306 -0.209 -0.455 -0.672 0.462
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.0259439 -0.7883657 -0.1975715 0.5308461 3.0726329
##
## Residual standard error: 9.092395
## Degrees of freedom: 454 total; 448 residual
anova(m0,m1)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 12 3279.961 3329.218 -1627.980
## m1 2 27 2758.115 2868.944 -1352.057 1 vs 2 551.8457 <.0001
m1 is better
summary(m2 <- gls(adas~Treatment*month, data = dat_f, weights = varIdent(form = ~1|Time),
correlation = corAR1(form = ~month|PID)))
## Generalized least squares fit by REML
## Model: adas ~ Treatment * month
## Data: dat_f
## AIC BIC logLik
## 2772.265 2825.627 -1373.132
##
## Correlation Structure: ARMA(1,0)
## Formula: ~month | PID
## Parameter estimate(s):
## Phi1
## 0.9370283
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Time
## Parameter estimates:
## 2 4 8 10 12 6
## 1.0000000 0.9763306 1.0406118 0.9895272 1.0185703 1.0125494
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 30.269621 1.8327914 16.515585 0.0000
## TreatmentL 0.034753 2.6898309 0.012920 0.9897
## TreatmentH 0.870649 2.7432528 0.317378 0.7511
## month 0.933561 0.1673513 5.578449 0.0000
## TreatmentL:month -0.255835 0.2443760 -1.046891 0.2957
## TreatmentH:month -0.525558 0.2492242 -2.108777 0.0355
##
## Correlation:
## (Intr) TrtmnL TrtmnH month TrtmL:
## TreatmentL -0.681
## TreatmentH -0.668 0.455
## month -0.613 0.418 0.410
## TreatmentL:month 0.420 -0.614 -0.281 -0.685
## TreatmentH:month 0.412 -0.281 -0.613 -0.671 0.460
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8738998 -0.6777642 -0.1051968 0.5947320 3.1281170
##
## Residual standard error: 9.090338
## Degrees of freedom: 454 total; 448 residual
anova(m0,m1,m2)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 12 3279.961 3329.218 -1627.980
## m1 2 27 2758.115 2868.944 -1352.057 1 vs 2 551.8457 <.0001
## m2 3 13 2772.265 2825.627 -1373.132 2 vs 3 42.1499 1e-04
m2 is better
#random effect
summary(m3 <- lme(adas~Treatment*month, random = ~1|PID, data = dat_f))
## Linear mixed-effects model fit by REML
## Data: dat_f
## AIC BIC logLik
## 2809.16 2841.998 -1396.58
##
## Random effects:
## Formula: ~1 | PID
## (Intercept) Residual
## StdDev: 8.0553 3.988485
##
## Fixed effects: adas ~ Treatment * month
## Value Std.Error DF t-value p-value
## (Intercept) 30.931412 1.6291549 371 18.986170 0.0000
## TreatmentL -0.022530 2.3921381 77 -0.009418 0.9925
## TreatmentH 1.211469 2.4398321 77 0.496538 0.6209
## month 0.908461 0.0912069 371 9.960441 0.0000
## TreatmentL:month -0.313667 0.1332558 371 -2.353872 0.0191
## TreatmentH:month -0.573026 0.1352116 371 -4.237991 0.0000
## Correlation:
## (Intr) TrtmnL TrtmnH month TrtmL:
## TreatmentL -0.681
## TreatmentH -0.668 0.455
## month -0.386 0.263 0.258
## TreatmentL:month 0.264 -0.388 -0.176 -0.684
## TreatmentH:month 0.261 -0.177 -0.383 -0.675 0.462
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.9635986 -0.5745229 0.0133242 0.5410091 3.7416613
##
## Number of Observations: 454
## Number of Groups: 80
anova(m0,m1,m2,m3)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 12 3279.961 3329.218 -1627.980
## m1 2 27 2758.115 2868.944 -1352.057 1 vs 2 551.8457 <.0001
## m2 3 13 2772.265 2825.627 -1373.132 2 vs 3 42.1499 1e-04
## m3 4 8 2809.160 2841.998 -1396.580 3 vs 4 46.8947 <.0001
m3 is the best model.
That is, interaction between treatment and time have different effects on different individuals.
In this case, the random effect model is better than all the fixed effect models.
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/grip_strength.txt", header = TRUE)
str(dat)
## 'data.frame': 201 obs. of 6 variables:
## $ ID : int 26 26 26 27 27 27 29 29 29 34 ...
## $ Baseline: int 175 175 175 165 165 165 175 175 175 178 ...
## $ Treat : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ Time : int 1 2 3 1 2 3 1 2 3 1 ...
## $ Strength: int 161 210 230 215 245 265 134 215 139 165 ...
dat$ID %<>% as.factor
dat$Treat %<>% as.factor
dat$Time %<>% as.factor
sum(sapply(dat, is.na))
## [1] 12
ggplot(dat, aes(x = Time, y = Strength, group = ID))+
geom_point(alpha = .2, size = .2, na.rm = TRUE)+
geom_line(alpha = .2, size = .2, na.rm = TRUE)+
facet_wrap(~Gender)+
stat_summary(aes(group = Treat, color = Treat), fun.y = mean, geom = "point", na.rm = TRUE)+
stat_summary(aes(group = Treat, color = Treat), fun.y = mean, geom = "line", na.rm = TRUE)+
stat_summary(aes(group = Treat, color = Treat), fun.data = mean_se, geom = "errorbar", width = .2, na.rm = TRUE)+
theme_bw()
dat = na.omit(dat)
summary(m1 <- lme(Strength~Baseline+Treat+Gender+Time+
Treat:Gender+Treat:Time+Gender:Time+Treat:Gender:Time+
Baseline:Treat+Baseline:Gender+Baseline:Time,
random = ~Gender|Time,
data = dat))
## Warning in pt(-abs(tTable[, "t-value"]), tTable[, "DF"]): NaNs produced
## Linear mixed-effects model fit by REML
## Data: dat
## AIC BIC logLik
## 1804.938 1871.035 -881.469
##
## Random effects:
## Formula: ~Gender | Time
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 10.48319 (Intr)
## GenderM 15.72478 0
## Residual 31.20289
##
## Fixed effects: Strength ~ Baseline + Treat + Gender + Time + Treat:Gender + Treat:Time + Gender:Time + Treat:Gender:Time + Baseline:Treat + Baseline:Gender + Baseline:Time
## Value Std.Error DF t-value p-value
## (Intercept) 25.23477 15.799162 172 1.597222 0.1121
## Baseline 0.82262 0.111836 172 7.355536 0.0000
## Treat2 -6.33103 13.104595 172 -0.483115 0.6296
## GenderM 35.62254 26.579706 172 1.340216 0.1819
## Time2 10.31298 20.669082 0 0.498957 NaN
## Time3 10.01182 21.130114 0 0.473818 NaN
## Treat2:GenderM 16.26939 18.979974 172 0.857187 0.3925
## Treat2:Time2 6.91817 14.532133 172 0.476060 0.6346
## Treat2:Time3 1.70443 15.158423 172 0.112441 0.9106
## GenderM:Time2 11.09311 30.309012 172 0.366001 0.7148
## GenderM:Time3 -5.04501 30.619835 172 -0.164763 0.8693
## Baseline:Treat2 -0.00223 0.101527 172 -0.021974 0.9825
## Baseline:GenderM -0.07486 0.106271 172 -0.704401 0.4821
## Baseline:Time2 -0.09659 0.118335 172 -0.816283 0.4155
## Baseline:Time3 -0.05104 0.120001 172 -0.425347 0.6711
## Treat2:GenderM:Time2 -7.02886 22.105296 172 -0.317972 0.7509
## Treat2:GenderM:Time3 -10.93182 22.612754 172 -0.483436 0.6294
## Correlation:
## (Intr) Baseln Treat2 GendrM Time2 Time3 Tr2:GM
## Baseline -0.595
## Treat2 -0.387 0.234
## GenderM -0.068 -0.091 -0.076
## Time2 -0.638 0.243 0.193 -0.027
## Time3 -0.617 0.226 0.183 -0.031 0.482
## Treat2:GenderM 0.058 0.190 -0.044 -0.477 -0.126 -0.127
## Treat2:Time2 0.226 -0.005 -0.538 -0.118 -0.372 -0.174 0.371
## Treat2:Time3 0.216 -0.003 -0.511 -0.116 -0.170 -0.397 0.360
## GenderM:Time2 -0.027 0.225 -0.120 -0.568 0.037 0.020 0.229
## GenderM:Time3 -0.022 0.215 -0.122 -0.566 0.020 0.018 0.224
## Baseline:Treat2 0.194 -0.327 -0.634 0.352 0.006 0.013 -0.581
## Baseline:GenderM 0.284 -0.477 0.148 -0.554 0.011 0.024 0.180
## Baseline:Time2 0.308 -0.517 -0.013 0.245 -0.489 -0.233 -0.015
## Baseline:Time3 0.291 -0.488 -0.004 0.251 -0.236 -0.487 -0.008
## Treat2:GenderM:Time2 -0.134 -0.021 0.351 0.218 0.223 0.103 -0.570
## Treat2:GenderM:Time3 -0.131 -0.021 0.341 0.216 0.103 0.246 -0.559
## Tr2:T2 Tr2:T3 GnM:T2 GnM:T3 Bsln:Tr2 Bsl:GM Bsln:Tm2
## Baseline
## Treat2
## GenderM
## Time2
## Time3
## Treat2:GenderM
## Treat2:Time2
## Treat2:Time3 0.466
## GenderM:Time2 0.221 0.106
## GenderM:Time3 0.109 0.243 0.492
## Baseline:Treat2 -0.001 -0.007 -0.004 0.001
## Baseline:GenderM -0.017 -0.013 0.001 0.010 -0.260
## Baseline:Time2 0.044 0.014 -0.421 -0.210 -0.003 -0.009
## Baseline:Time3 0.014 0.041 -0.209 -0.421 -0.017 -0.033 0.488
## Treat2:GenderM:Time2 -0.655 -0.306 -0.397 -0.192 0.003 0.008 0.015
## Treat2:GenderM:Time3 -0.312 -0.669 -0.190 -0.406 0.007 0.004 0.014
## Bsl:T3 T2:GM:T2
## Baseline
## Treat2
## GenderM
## Time2
## Time3
## Treat2:GenderM
## Treat2:Time2
## Treat2:Time3
## GenderM:Time2
## GenderM:Time3
## Baseline:Treat2
## Baseline:GenderM
## Baseline:Time2
## Baseline:Time3
## Treat2:GenderM:Time2 0.014
## Treat2:GenderM:Time3 0.015 0.477
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.19443564 -0.52693293 -0.01183254 0.53454474 3.88997716
##
## Number of Observations: 189
## Number of Groups: 3
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/partners.txt", header = TRUE)
str(dat)
## 'data.frame': 464 obs. of 5 variables:
## $ Sbj : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
## $ Session: Factor w/ 4 levels "T1","T2","T3",..: 1 2 3 4 1 2 3 4 1 2 ...
## $ Time : Factor w/ 2 levels "B","E": 1 1 1 1 2 2 2 2 1 1 ...
## $ PANAS : int 15 16 16 14 35 35 34 41 14 14 ...
sum(sapply(dat, is.na)) #yeah
## [1] 0
dat %<>% mutate(group = paste0(Gender, Time))
dat$group %<>% as.factor
dat$Sbj %<>% as.factor
p = position_dodge(.1)
mm = mean(dat$PANAS)
ggplot(dat, aes(x = Session, y = PANAS, color = group, group = group))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(fun.data = mean_se, geom = "errorbar", width = .3, position = p)+
geom_hline(yintercept = mm, linetype = 2)+
labs(x = "Session", y = "Mean PANAS")+
theme_bw()+
theme(legend.position = "top")+
scale_color_discrete(name = "Gender:Time", labels = c("F:B", "F:E", "M:B", "M:E"))
g1 = ggplot(dat, aes(x = Session, y = PANAS, color = Gender, group = Gender))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(fun.data = mean_se, geom = "errorbar", width = .3)+
geom_hline(yintercept = mm, linetype = 2)+
labs(x = "Session", y = "Mean PANAS")+
theme_bw()+
theme(legend.position = "top")
g2 = ggplot(dat, aes(x = Session, y = PANAS, color = Time, group = Time))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(fun.data = mean_se, geom = "errorbar", width = .3)+
geom_hline(yintercept = mm, linetype = 2)+
labs(x = "Session", y = "Mean PANAS")+
theme_bw()+
theme(legend.position = "top")
g3 = ggplot(dat, aes(x = Session, y = PANAS))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.data = mean_se, geom = "errorbar", width = .3)+
geom_hline(yintercept = mm, linetype = 2)+
labs(x = "Session", y = "Mean PANAS")+
theme_bw()+
theme(legend.position = "top")
grid.arrange(g1,g2,g3, nrow = 1, ncol = 3)
summary(m0<- lmer(PANAS~Gender*Session*Time+(1|Sbj/Session)+(1|Sbj/Time), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## PANAS ~ Gender * Session * Time + (1 | Sbj/Session) + (1 | Sbj/Time)
## Data: dat
##
## REML criterion at convergence: 2749.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8159 -0.5614 -0.0136 0.4159 5.2048
##
## Random effects:
## Groups Name Variance Std.Dev.
## Session.Sbj (Intercept) 0.2083 0.4564
## Time.Sbj (Intercept) 58.8769 7.6731
## Sbj (Intercept) 0.0000 0.0000
## Sbj.1 (Intercept) 0.0000 0.0000
## Residual 15.8276 3.9784
## Number of obs: 464, groups: Session:Sbj, 116; Time:Sbj, 58; Sbj, 29
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 23.8636 1.7451 13.675
## GenderM -1.0065 3.5520 -0.283
## SessionT2 -2.1136 0.8593 -2.460
## SessionT3 -2.8864 0.8593 -3.359
## SessionT4 -4.7045 0.8593 -5.475
## TimeE 6.9091 2.4641 2.804
## GenderM:SessionT2 2.6851 1.7490 1.535
## GenderM:SessionT3 2.5292 1.7490 1.446
## GenderM:SessionT4 8.5617 1.7490 4.895
## GenderM:TimeE -2.1948 5.0155 -0.438
## SessionT2:TimeE 2.2045 1.1995 1.838
## SessionT3:TimeE 1.6591 1.1995 1.383
## SessionT4:TimeE 5.1818 1.1995 4.320
## GenderM:SessionT2:TimeE -1.9903 2.4415 -0.815
## GenderM:SessionT3:TimeE 1.6266 2.4415 0.666
## GenderM:SessionT4:TimeE -6.4675 2.4415 -2.649
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
anova(m0)
## Analysis of Variance Table
## Df Sum Sq Mean Sq F value
## Gender 1 0.652 0.652 0.0412
## Session 3 89.765 29.922 1.8905
## Time 1 255.341 255.341 16.1326
## Gender:Session 3 314.408 104.803 6.6215
## Gender:Time 1 10.518 10.518 0.6645
## Session:Time 3 191.817 63.939 4.0397
## Gender:Session:Time 3 195.260 65.087 4.1122
AIC(m0)
## [1] 2791.237
require(foreign)
## Loading required package: foreign
dat = read.dta("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/bodyfat.dta")
dat$id %<>% as.factor
sum(sapply(dat, is.na))
## [1] 0
dat$age_f = as.factor(dat$age)
ggplot(dat, aes(x = age, y = bodyfat, group = id, color = id))+
geom_point(size = .1, alpha = .5)+
geom_line(size = .1, alpha = .5)+
facet_wrap(~sex)+
labs(x = "Age", y = "Body fat (kg)")+
theme_bw()+
theme(legend.position = "none")
ggplot(dat, aes(x = age, y = bodyfat, group = sex, color = sex))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(fun.data = mean_se, geom = "errorbar", width = .2)+
labs(x = "Age", y = "Body fat (kg)")+
theme_bw()
summary(m0 <- lme(bodyfat~sex*age, random = ~sex|age,data = dat))
## Linear mixed-effects model fit by REML
## Data: dat
## AIC BIC logLik
## 10376.27 10422.09 -5180.134
##
## Random effects:
## Formula: ~sex | age
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 1.023563e-04 (Intr)
## sexmale 6.886174e-05 -0.001
## Residual 2.357100e+00
##
## Fixed effects: bodyfat ~ sex * age
## Value Std.Error DF t-value p-value
## (Intercept) 1.7780638 0.5818216 2268 3.056029 0.0023
## sexmale 0.9879022 0.7914043 2268 1.248290 0.2121
## age 0.6387135 0.0446062 1 14.318932 0.0444
## sexmale:age -0.1694899 0.0606550 2268 -2.794329 0.0052
## Correlation:
## (Intr) sexmal age
## sexmale -0.735
## age -0.992 0.729
## sexmale:age 0.730 -0.992 -0.735
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.37650737 -0.66432159 -0.08668264 0.59228998 4.35814063
##
## Number of Observations: 2273
## Number of Groups: 3
summary(m1 <- lm(bodyfat~sex*age, data = dat))
##
## Call:
## lm(formula = bodyfat ~ sex * age, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9588 -1.5659 -0.2043 1.3961 10.2726
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.77806 0.58182 3.056 0.00227 **
## sexmale 0.98790 0.79140 1.248 0.21205
## age 0.63871 0.04461 14.319 < 2e-16 ***
## sexmale:age -0.16949 0.06065 -2.794 0.00524 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.357 on 2269 degrees of freedom
## Multiple R-squared: 0.1752, Adjusted R-squared: 0.1741
## F-statistic: 160.7 on 3 and 2269 DF, p-value: < 2.2e-16
anova(m1)
## Analysis of Variance Table
##
## Response: bodyfat
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 814.7 814.69 146.6341 < 2.2e-16 ***
## age 1 1819.9 1819.87 327.5554 < 2.2e-16 ***
## sex:age 1 43.4 43.38 7.8083 0.005244 **
## Residuals 2269 12606.4 5.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m1)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 8 10376.27 10422.08 -5180.134
## m1 2 5 10370.27 10398.90 -5180.134 1 vs 2 1.631985e-06 1
Random effect model did not perform better than fixed effect model.
m <- dat %>% group_by(sex, age) %>% summarise(mean(bodyfat))
m
## Source: local data frame [6 x 3]
## Groups: sex [?]
##
## sex age `mean(bodyfat)`
## <fctr> <dbl> <dbl>
## 1 female 11 8.771233
## 2 female 13 10.150289
## 3 female 15 11.323054
## 4 male 11 7.937471
## 5 male 13 8.844691
## 6 male 15 9.815152
There is defference between sex.
s <- dat %>% group_by(sex, age) %>% summarise(sd(bodyfat))
s
## Source: local data frame [6 x 3]
## Groups: sex [?]
##
## sex age `sd(bodyfat)`
## <fctr> <dbl> <dbl>
## 1 female 11 1.925571
## 2 female 13 2.265062
## 3 female 15 2.797142
## 4 male 11 1.983441
## 5 male 13 2.217271
## 6 male 15 2.853605
The variances did bot differ too much.
summary(m2 <- gls(bodyfat~sex*age, correlation = corCompSymm(form = ~1|id), data = dat))
## Generalized least squares fit by REML
## Model: bodyfat ~ sex * age
## Data: dat
## AIC BIC logLik
## 10261.76 10296.12 -5124.88
##
## Correlation Structure: Compound symmetry
## Formula: ~1 | id
## Parameter estimate(s):
## Rho
## 0.2345844
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.8033821 0.5142196 3.507027 0.0005
## sexmale 0.9717458 0.6993056 1.389587 0.1648
## age 0.6364110 0.0392102 16.230740 0.0000
## sexmale:age -0.1678481 0.0533028 -3.148955 0.0017
##
## Correlation:
## (Intr) sexmal age
## sexmale -0.735
## age -0.985 0.725
## sexmale:age 0.725 -0.985 -0.736
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.37247100 -0.66453990 -0.08650268 0.59241291 4.35717555
##
## Residual standard error: 2.357188
## Degrees of freedom: 2273 total; 2269 residual
anova(m0,m1,m2)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0 1 8 10376.27 10422.08 -5180.134
## m1 2 5 10370.27 10398.90 -5180.134 1 vs 2 0.000 1
## m2 3 6 10261.76 10296.12 -5124.880 2 vs 3 110.509 <.0001
summary(m3 <- gls(bodyfat~sex*age, correlation = corExp(form = ~1|id), data = dat))
## Generalized least squares fit by REML
## Model: bodyfat ~ sex * age
## Data: dat
## AIC BIC logLik
## 10199.23 10233.59 -5093.614
##
## Correlation Structure: Exponential spatial correlation
## Formula: ~1 | id
## Parameter estimate(s):
## range
## 0.9272448
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.8099458 0.5504187 3.288307 0.0010
## sexmale 0.9584834 0.7488041 1.280019 0.2007
## age 0.6353528 0.0420303 15.116526 0.0000
## sexmale:age -0.1659740 0.0571591 -2.903722 0.0037
##
## Correlation:
## (Intr) sexmal age
## sexmale -0.735
## age -0.987 0.725
## sexmale:age 0.726 -0.987 -0.735
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.36153867 -0.66481672 -0.08852797 0.59319353 4.34717967
##
## Residual standard error: 2.362084
## Degrees of freedom: 2273 total; 2269 residual
anova(m2,m3) #m3 is better
## Model df AIC BIC logLik
## m2 1 6 10261.76 10296.12 -5124.880
## m3 2 6 10199.23 10233.59 -5093.614
summary(m4 <- gls(bodyfat~sex*age, correlation = corAR1(form = ~age|id), data = dat))
## Generalized least squares fit by REML
## Model: bodyfat ~ sex * age
## Data: dat
## AIC BIC logLik
## 10372.27 10406.63 -5180.134
##
## Correlation Structure: ARMA(1,0)
## Formula: ~age | id
## Parameter estimate(s):
## Phi1
## 0
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.7780638 0.5818214 3.056030 0.0023
## sexmale 0.9879022 0.7914043 1.248290 0.2121
## age 0.6387135 0.0446062 14.318936 0.0000
## sexmale:age -0.1694899 0.0606550 -2.794329 0.0052
##
## Correlation:
## (Intr) sexmal age
## sexmale -0.735
## age -0.992 0.729
## sexmale:age 0.730 -0.992 -0.735
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.37650737 -0.66432158 -0.08668265 0.59228998 4.35814062
##
## Residual standard error: 2.3571
## Degrees of freedom: 2273 total; 2269 residual
anova(m3,m4) #m3 is better
## Model df AIC BIC logLik
## m3 1 6 10199.23 10233.59 -5093.614
## m4 2 6 10372.27 10406.63 -5180.134
summary(m5 <- gls(bodyfat~sex*age, correlation = corExp(form = ~1|id), weights = varIdent(form = ~1|age_f), data = dat))
## Generalized least squares fit by REML
## Model: bodyfat ~ sex * age
## Data: dat
## AIC BIC logLik
## 10116.02 10161.84 -5050.012
##
## Correlation Structure: Exponential spatial correlation
## Formula: ~1 | id
## Parameter estimate(s):
## range
## 0.8711536
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | age_f
## Parameter estimates:
## 11 13 15
## 1.000000 1.118751 1.378045
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.7305038 0.5369058 3.223105 0.0013
## sexmale 1.0546176 0.7303529 1.443984 0.1489
## age 0.6419058 0.0428299 14.987327 0.0000
## sexmale:age -0.1739068 0.0582457 -2.985743 0.0029
##
## Correlation:
## (Intr) sexmal age
## sexmale -0.735
## age -0.988 0.726
## sexmale:age 0.726 -0.988 -0.735
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.8830852 -0.7001270 -0.0955765 0.5938367 5.1250317
##
## Residual standard error: 2.003283
## Degrees of freedom: 2273 total; 2269 residual
anova(m3,m5) #m5 is better
## Model df AIC BIC logLik Test L.Ratio p-value
## m3 1 6 10199.23 10233.59 -5093.614
## m5 2 8 10116.02 10161.84 -5050.012 1 vs 2 87.20272 <.0001
summary(m6 <- gls(bodyfat~sex*age, correlation = corExp(form = ~1|id), weights = varExp(form = ~age), data = dat))
## Generalized least squares fit by REML
## Model: bodyfat ~ sex * age
## Data: dat
## AIC BIC logLik
## 10116.66 10156.75 -5051.33
##
## Correlation Structure: Exponential spatial correlation
## Formula: ~1 | id
## Parameter estimate(s):
## range
## 0.8825129
## Variance function:
## Structure: Exponential of variance covariate
## Formula: ~age
## Parameter estimates:
## expon
## 0.08030966
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.7351781 0.5287982 3.281362 0.0010
## sexmale 1.0476842 0.7193111 1.456511 0.1454
## age 0.6412482 0.0422524 15.176594 0.0000
## sexmale:age -0.1729838 0.0574584 -3.010593 0.0026
##
## Correlation:
## (Intr) sexmal age
## sexmale -0.735
## age -0.987 0.726
## sexmale:age 0.726 -0.987 -0.735
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.92165397 -0.70345159 -0.09567755 0.59067039 5.19962262
##
## Residual standard error: 0.8161702
## Degrees of freedom: 2273 total; 2269 residual
anova(m5,m6) #m6 is better because of smaller df
## Model df AIC BIC logLik Test L.Ratio p-value
## m5 1 8 10116.02 10161.84 -5050.012
## m6 2 7 10116.66 10156.75 -5051.330 1 vs 2 2.635837 0.1045
summary(m7 <- gls(bodyfat~sex*age, correlation = corSymm(form = ~1|id), weights = varExp(form = ~age), data = dat))
## Generalized least squares fit by REML
## Model: bodyfat ~ sex * age
## Data: dat
## AIC BIC logLik
## 10078.37 10129.92 -5030.187
##
## Correlation Structure: General
## Formula: ~1 | id
## Parameter estimate(s):
## Correlation:
## 1 2
## 2 0.169
## 3 0.057 0.467
## Variance function:
## Structure: Exponential of variance covariate
## Formula: ~age
## Parameter estimates:
## expon
## 0.09320165
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.8063673 0.5466440 3.304467 0.0010
## sexmale 0.9706584 0.7433525 1.305785 0.1918
## age 0.6355736 0.0444097 14.311598 0.0000
## sexmale:age -0.1670398 0.0603772 -2.766604 0.0057
##
## Correlation:
## (Intr) sexmal age
## sexmale -0.735
## age -0.989 0.727
## sexmale:age 0.727 -0.989 -0.736
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.8424922 -0.7177972 -0.1027411 0.6076323 5.3372886
##
## Residual standard error: 0.6901846
## Degrees of freedom: 2273 total; 2269 residual
anova(m6,m7) #m7 is better
## Model df AIC BIC logLik Test L.Ratio p-value
## m6 1 7 10116.66 10156.75 -5051.330
## m7 2 9 10078.37 10129.92 -5030.187 1 vs 2 42.28675 <.0001
m7 is the best model.
#Prediction plot
dat_p = data.frame(dat, fit = fitted(m7))
ggplot(dat_p, aes(x = age, y = fit, group = sex, color = sex))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
stat_summary(aes(y = bodyfat), fun.y = mean, geom = "point", alpha = .5)+
stat_summary(aes(y = bodyfat), fun.y = mean, geom = "line", alpha = .5, linetype = 2)+
stat_summary(aes(y = bodyfat), fun.data = mean_se, geom = "errorbar", width = .3, alpha = .5, linetype = 2)+
labs(x = "Age", y = "Body fat (kg)")+
theme_bw()
The prediciton of m7 is close to real data.(dash line = real data, solid line = prediction)
anova(m7)
## Denom. DF: 2269
## numDF F-value p-value
## (Intercept) 1 26661.471 <.0001
## sex 1 92.544 <.0001
## age 1 328.369 <.0001
## sex:age 1 7.654 0.0057
(1)Yes, body fat increases with age for both girls and boys.
(2)Yes, there is a difference between girls and boys with respect to the increase of the body fat with age.
(3)Yes, girls tend to have more body fat than boys.