Column 1: Treatment ID
Column 2: Patient ID
Column 3: Gender ID
Column 4: Walking distance at baseline (month 0)
Column 5: Walking distance at month 1
Column 6: Walking distance at month 2
Column 7: Walking distance at month 3
Column 8: Walking distance at month 4
pacman::p_load(tidyr,tidyverse, nlme, car, afex, emmeans, lattice)
# Input data
dta1 <- read.csv("claudication.csv")
str(dta1)
## 'data.frame': 38 obs. of 8 variables:
## $ Treatment: chr "ACT" "ACT" "ACT" "ACT" ...
## $ PID : chr "P101" "P105" "P109" "P112" ...
## $ Gender : chr "M" "M" "M" "M" ...
## $ D0 : int 190 98 155 245 182 140 196 162 195 167 ...
## $ D1 : int 212 137 145 228 205 138 185 176 232 187 ...
## $ D2 : int 213 185 196 280 218 187 185 192 199 228 ...
## $ D3 : int 195 215 189 274 194 195 227 230 185 192 ...
## $ D4 : int 248 225 176 260 193 205 180 215 200 210 ...
head(dta1)
## Treatment PID Gender D0 D1 D2 D3 D4
## 1 ACT P101 M 190 212 213 195 248
## 2 ACT P105 M 98 137 185 215 225
## 3 ACT P109 M 155 145 196 189 176
## 4 ACT P112 M 245 228 280 274 260
## 5 ACT P117 M 182 205 218 194 193
## 6 ACT P122 M 140 138 187 195 205
dta1 <- mutate(dta1,
Treatment = factor(Treatment),
Gender = factor(Gender),
PID = factor(PID))
# long form
dta1L <- gather(dta1, Month, Distance, D0:D4, factor_key=TRUE)
str(dta1L)
## 'data.frame': 190 obs. of 5 variables:
## $ Treatment: Factor w/ 2 levels "ACT","PBO": 1 1 1 1 1 1 1 1 1 1 ...
## $ PID : Factor w/ 38 levels "P101","P102",..: 1 5 9 12 17 22 23 28 29 32 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ Month : Factor w/ 5 levels "D0","D1","D2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Distance : int 190 98 155 245 182 140 196 162 195 167 ...
head(dta1L)
## Treatment PID Gender Month Distance
## 1 ACT P101 M D0 190
## 2 ACT P105 M D0 98
## 3 ACT P109 M D0 155
## 4 ACT P112 M D0 245
## 5 ACT P117 M D0 182
## 6 ACT P122 M D0 140
theme_set(theme_bw())
scatterplotMatrix(~ D0 + D1 + D2 + D3 + D4 | Treatment,
data = dta1,
smooth = F,
by.group = TRUE,
regLine = F,
ellipse = T,
diagonal = T,
pch = c(1, 20))
ggplot(dta1L, aes(Month, Distance,
group = Treatment,
shape = Treatment,
linetype = Treatment,
color = Treatment)) +
stat_summary(fun = mean, geom = "line") +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
scale_shape_manual(values = c(1, 2)) +
labs(x = "TIME (in months)",
y = "Walking distance (in meters)",
linetype = "Treatment", shape = "Treatment") +
theme(legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.background = element_rect(fill = "white",color = "black"))
The walking distance of ACT group is increasing more than the placebo group by month.
ggplot(dta1L, aes(Month, Distance,
group = Treatment,
shape = Treatment,
linetype = Treatment,
color = Treatment)) +
geom_point() +
stat_summary(fun = mean, geom = "line") +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.3) +
scale_shape_manual(values = c(1, 2)) +
facet_wrap( ~ Gender)+
labs(x = "TIME (in months)",
y = "Walking distance (in meters)",
linetype = "Treatment", shape = "Treatment") +
theme(legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.background = element_rect(fill = "white",color = "black"))
Even in different gender, the trend is similar.
# means & sd of differ Treatment and gender
dta1_nid <- dta1 %>% select(-2)
# mean
aggregate(.~ Treatment + Gender, data = dta1_nid, mean, na.rm=T)
## Treatment Gender D0 D1 D2 D3 D4
## 1 ACT F 180.3750 193.3750 207.8750 196.7500 208.5000
## 2 PBO F 165.5556 170.1111 175.7778 168.1111 171.2222
## 3 ACT M 163.1667 179.5000 195.5833 204.0833 207.6667
## 4 PBO M 178.3333 165.5556 173.3333 193.6667 194.2222
# sd
aggregate(.~ Treatment + Gender, data = dta1_nid, sd, na.rm=T)
## Treatment Gender D0 D1 D2 D3 D4
## 1 ACT F 42.18306 33.10994 58.21006 41.65762 57.10892
## 2 PBO F 41.52743 39.13261 47.25404 33.43443 45.34252
## 3 ACT M 42.34669 34.51350 40.14623 28.40441 28.04001
## 4 PBO M 45.00556 45.53051 43.68352 55.37824 47.35182
cor(subset(dta1, Treatment == "ACT" & Gender == "F")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.8021966 0.8724733 0.8483852 0.8757233
## D1 0.8021966 1.0000000 0.8645096 0.7073820 0.9054398
## D2 0.8724733 0.8645096 1.0000000 0.8937510 0.9159571
## D3 0.8483852 0.7073820 0.8937510 1.0000000 0.9251102
## D4 0.8757233 0.9054398 0.9159571 0.9251102 1.0000000
cor(subset(dta1, Treatment == "ACT" & Gender == "M")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.8624819 0.8052568 0.5928303 0.4068980
## D1 0.8624819 1.0000000 0.6464277 0.3498338 0.4496799
## D2 0.8052568 0.6464277 1.0000000 0.6704930 0.6323603
## D3 0.5928303 0.3498338 0.6704930 1.0000000 0.5941441
## D4 0.4068980 0.4496799 0.6323603 0.5941441 1.0000000
cor(subset(dta1, Treatment == "PBO" & Gender == "F")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.9269134 0.6057892 0.8657560 0.7776916
## D1 0.9269134 1.0000000 0.6826171 0.7707920 0.7579993
## D2 0.6057892 0.6826171 1.0000000 0.3713199 0.4846551
## D3 0.8657560 0.7707920 0.3713199 1.0000000 0.7931043
## D4 0.7776916 0.7579993 0.4846551 0.7931043 1.0000000
cor(subset(dta1, Treatment == "PBO" & Gender == "M")[,-(1:3)])
## D0 D1 D2 D3 D4
## D0 1.0000000 0.9271229 0.8530641 0.8833606 0.9202039
## D1 0.9271229 1.0000000 0.7036016 0.7846666 0.9031326
## D2 0.8530641 0.7036016 1.0000000 0.8292800 0.8073104
## D3 0.8833606 0.7846666 0.8292800 1.0000000 0.9197535
## D4 0.9202039 0.9031326 0.8073104 0.9197535 1.0000000
dta1L$Month <- as.numeric(dta1L$Month)
summary(m1 <- gls(Distance ~ Month + Gender*Treatment + Month:Treatment,
weights = varIdent(form = ~ 1 | Month),
correlation = corSymm(form = ~ 1 | PID),
data = dta1L))
## Generalized least squares fit by REML
## Model: Distance ~ Month + Gender * Treatment + Month:Treatment
## Data: dta1L
## AIC BIC logLik
## 1781.019 1848.533 -869.5095
##
## Correlation Structure: General
## Formula: ~1 | PID
## Parameter estimate(s):
## Correlation:
## 1 2 3 4
## 2 0.876
## 3 0.774 0.721
## 4 0.777 0.640 0.672
## 5 0.750 0.730 0.704 0.846
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Month
## Parameter estimates:
## 1 2 3 4 5
## 1.0000000 0.8838348 1.0627779 0.9500948 1.0337142
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 166.71981 13.326806 12.510110 0.0000
## Month 10.28325 1.679138 6.124124 0.0000
## GenderM 0.01944 15.640654 0.001243 0.9990
## TreatmentPBO -3.35659 18.501998 -0.181418 0.8562
## GenderM:TreatmentPBO 0.20371 22.484857 0.009060 0.9928
## Month:TreatmentPBO -7.78949 2.439731 -3.192765 0.0017
##
## Correlation:
## (Intr) Month GendrM TrtPBO GM:TPB
## Month -0.417
## GenderM -0.704 0.000
## TreatmentPBO -0.720 0.300 0.507
## GenderM:TreatmentPBO 0.490 0.000 -0.696 -0.666
## Month:TreatmentPBO 0.287 -0.688 0.000 -0.436 0.000
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.82526479 -0.58373609 -0.06888758 0.53349971 3.70597932
##
## Residual standard error: 43.29372
## Degrees of freedom: 190 total; 184 residual
summary(m2<- gls(Distance ~ Month + Treatment,
weights = varIdent(form = ~ 1 | Month),
correlation = corSymm(form = ~ 1 | PID),
data = dta1L))
## Generalized least squares fit by REML
## Model: Distance ~ Month + Treatment
## Data: dta1L
## AIC BIC logLik
## 1801.3 1859.46 -882.6501
##
## Correlation Structure: General
## Formula: ~1 | PID
## Parameter estimate(s):
## Correlation:
## 1 2 3 4
## 2 0.868
## 3 0.739 0.707
## 4 0.752 0.637 0.670
## 5 0.708 0.729 0.708 0.834
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Month
## Parameter estimates:
## 1 2 3 4 5
## 1.0000000 0.8458121 1.0130766 0.8933380 0.9658770
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 179.41376 8.661846 20.713109 0.0000
## Month 6.45108 1.338622 4.819197 0.0000
## TreatmentPBO -29.03131 10.819309 -2.683287 0.0079
##
## Correlation:
## (Intr) Month
## Month -0.511
## TreatmentPBO -0.592 0.000
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.9643441 -0.5698611 -0.1358135 0.5440175 3.7491840
##
## Residual standard error: 44.72987
## Degrees of freedom: 190 total; 187 residual
m_aov <- afex::aov_car(Distance ~ Treatment*Month + Error(PID/Month), data=dta1L)
## Contrasts set to contr.sum for the following variables: Treatment
knitr::kable(nice(m_aov))
Effect | df | MSE | F | ges | p.value |
---|---|---|---|---|---|
Treatment | 1, 36 | 6925.36 | 2.06 | .043 | .160 |
Month | 3.18, 114.49 | 584.58 | 8.49 *** | .048 | <.001 |
Treatment:Month | 3.18, 114.49 | 584.58 | 2.63 + | .015 | .050 |
emmeans(m_aov, ~ Month | Treatment)
## Treatment = ACT:
## Month emmean SE df lower.CL upper.CL
## X1 171 9.57 55.8 151 190
## X2 186 9.57 55.8 166 205
## X3 201 9.57 55.8 182 220
## X4 202 9.57 55.8 182 221
## X5 208 9.57 55.8 189 228
##
## Treatment = PBO:
## Month emmean SE df lower.CL upper.CL
## X1 172 9.68 58.1 153 192
## X2 168 9.68 58.1 149 188
## X3 175 9.68 58.1 156 194
## X4 181 9.68 58.1 162 201
## X5 183 9.68 58.1 164 203
##
## Warning: EMMs are biased unless design is perfectly balanced
## Confidence level used: 0.95
dta1_b <- gather(dta1, Month, Distance, D1:D4, factor_key=TRUE)
xyplot(Distance ~ D0 | as.factor(Month), groups=Treatment, data=dta1_b,
type=c("p","r","g"), layout=c(5,1),
xlab="Baseline distance (in meters)",
ylab="Distance (in meters)",
auto.key = TRUE)
交叉!
Column 1: Treatment ID (L = Low dose, H = High dose, P = Placebo)
Column 2: Patient ID
Column 3: ADAS score at month 2
Column 4: ADAS score at month 4
Column 5: ADAS score at month 6
Column 6: ADAS score at month 8
Column 7: ADAS score at month 10
Column 8: ADAS score at month 12
dta2 <- read.table("adas.txt", header=T, na.strings='.')
dta2 <- mutate(dta2,
Treatment = factor(Treatment),
PID = factor(PID),
Baseline = adas02)
colnames(dta2)<-c("Treatment", "PID", "2", "4", "6", "8", "10" , "12", "Baseline")
str(dta2)
## 'data.frame': 80 obs. of 9 variables:
## $ Treatment: Factor w/ 3 levels "H","L","P": 2 2 2 2 2 2 2 2 2 2 ...
## $ PID : Factor w/ 80 levels "1","2","3","4",..: 1 5 8 12 13 15 19 21 24 28 ...
## $ 2 : int 22 34 40 24 29 31 22 43 18 25 ...
## $ 4 : int 30 35 41 NA 26 36 27 49 28 24 ...
## $ 6 : int NA 46 41 21 29 41 28 42 29 27 ...
## $ 8 : int 33 37 46 28 26 46 24 48 NA 18 ...
## $ 10 : int 28 31 52 30 NA 52 27 48 25 21 ...
## $ 12 : int 30 35 48 27 36 57 28 46 28 22 ...
## $ Baseline : int 22 34 40 24 29 31 22 43 18 25 ...
head (dta2)
## Treatment PID 2 4 6 8 10 12 Baseline
## 1 L 1 22 30 NA 33 28 30 22
## 2 L 5 34 35 46 37 31 35 34
## 3 L 8 40 41 41 46 52 48 40
## 4 L 12 24 NA 21 28 30 27 24
## 5 L 13 29 26 29 26 NA 36 29
## 6 L 15 31 36 41 46 52 57 31
# long form
dta2L <- gather(dta2, Month, ADAS, "2":"12")
dta2L <- mutate(dta2L,
Baseline = as.numeric(Baseline),
ADAS = as.numeric(ADAS),
Time_f = factor(as.numeric(Month)-2),
Month = factor (Month))
str(dta2L)
## 'data.frame': 480 obs. of 6 variables:
## $ Treatment: Factor w/ 3 levels "H","L","P": 2 2 2 2 2 2 2 2 2 2 ...
## $ PID : Factor w/ 80 levels "1","2","3","4",..: 1 5 8 12 13 15 19 21 24 28 ...
## $ Baseline : num 22 34 40 24 29 31 22 43 18 25 ...
## $ Month : Factor w/ 6 levels "10","12","2",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ ADAS : num 22 34 40 24 29 31 22 43 18 25 ...
## $ Time_f : Factor w/ 6 levels "0","2","4","6",..: 1 1 1 1 1 1 1 1 1 1 ...
head(dta2L)
## Treatment PID Baseline Month ADAS Time_f
## 1 L 1 22 2 22 0
## 2 L 5 34 2 34 0
## 3 L 8 40 2 40 0
## 4 L 12 24 2 24 0
## 5 L 13 29 2 29 0
## 6 L 15 31 2 31 0
p <- ggplot(dta2L, aes(Time_f, ADAS,
group = Treatment,
linetype = Treatment,
shape = Treatment)) +
stat_summary(fun = mean, geom = "line") +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
scale_shape_manual(values=c(1, 2, 7)) +
scale_y_continuous(breaks=seq(20, 60, 10))+
labs(x = "Month", y = "ADAS", linetype = "Treatment", shape = "Treatment") +
theme_minimal() +
theme(legend.position=c(.15,.85))
suppressWarnings(suppressMessages(print(p)))
dta2w <- read.table("adas.txt", h=T, na.strings='.')
dta2w <- dta2w %>%
mutate(Baseline= adas02)
dta2w %>%
dplyr::group_by(Treatment) %>%
dplyr::select(starts_with("adas")) %>%
furniture::table1(digits=2, total=FALSE, test=F, output="html")
## Adding missing grouping variables: `Treatment`
## Using dplyr::group_by() groups: Treatment
H | L | P | |
---|---|---|---|
n = 18 | n = 17 | n = 21 | |
adas02 | |||
30.22 (8.56) | 32.82 (7.51) | 29.62 (6.78) | |
adas04 | |||
32.44 (7.33) | 35.12 (8.64) | 33.43 (6.42) | |
adas06 | |||
33.56 (8.56) | 37.65 (9.09) | 34.86 (6.43) | |
adas08 | |||
33.33 (9.06) | 36.41 (11.09) | 36.38 (6.34) | |
adas10 | |||
33.72 (7.44) | 38.35 (11.19) | 37.57 (7.03) | |
adas12 | |||
34.28 (7.10) | 39.18 (10.77) | 39.05 (8.80) |
# furniture::table1(dta2w, adas02, adas04, adas06, adas08, adas10, adas12, splitby = ~Treatment, digits=2, test=F, output="html")
knitr::kable(cor(dta2w[,3:8], use="pair"))
adas02 | adas04 | adas06 | adas08 | adas10 | adas12 | |
---|---|---|---|---|---|---|
adas02 | 1.0000000 | 0.8954000 | 0.7544220 | 0.7740050 | 0.7006653 | 0.7496129 |
adas04 | 0.8954000 | 1.0000000 | 0.8144181 | 0.7936916 | 0.7006978 | 0.7400989 |
adas06 | 0.7544220 | 0.8144181 | 1.0000000 | 0.8628966 | 0.6716075 | 0.7615946 |
adas08 | 0.7740050 | 0.7936916 | 0.8628966 | 1.0000000 | 0.8472388 | 0.8337049 |
adas10 | 0.7006653 | 0.7006978 | 0.6716075 | 0.8472388 | 1.0000000 | 0.9153379 |
adas12 | 0.7496129 | 0.7400989 | 0.7615946 | 0.8337049 | 0.9153379 | 1.0000000 |
scatterplotMatrix(~ adas02 + adas04 + adas06 + adas08 + adas10 + adas12 | Treatment,
data=dta2w,
col="gray",
smooth=FALSE,
by.group = TRUE,
regLine=FALSE,
ellipse=list(levels=c(.975),
fill=TRUE,
fill.alpha=.1),
diagonal = T,
pch = c(1, 19, 21))
## Warning in scatterplotMatrix.default(X[, -ncol], groups = X[, ncol], ...): number of groups exceeds number of available colors
## colors are recycled
m_aov <- afex::aov_car(ADAS ~ Treatment*Time_f + Error(PID/Time_f), data=dta2L)
## Warning: Missing values for following ID(s):
## 1, 7, 11, 12, 13, 24, 26, 27, 30, 31, 35, 41, 42, 45, 47, 48, 51, 52, 54, 61, 68, 70, 72, 74
## Removing those cases from the analysis.
## Contrasts set to contr.sum for the following variables: Treatment
knitr::kable(nice(m_aov))
Effect | df | MSE | F | ges | p.value |
---|---|---|---|---|---|
Treatment | 2, 53 | 328.01 | 1.10 | .032 | .342 |
Time_f | 3.27, 173.16 | 25.22 | 18.57 *** | .066 | <.001 |
Treatment:Time_f | 6.53, 173.16 | 25.22 | 1.35 | .010 | .234 |
emmeans(m_aov, ~ Time_f | Treatment)
## Treatment = H:
## Time_f emmean SE df lower.CL upper.CL
## X0 30.2 1.93 82.6 26.4 34.1
## X2 32.4 1.93 82.6 28.6 36.3
## X4 33.5 1.93 82.6 29.7 37.4
## X6 33.3 1.93 82.6 29.5 37.2
## X8 33.7 1.93 82.6 29.9 37.6
## X10 34.3 1.93 82.6 30.4 38.1
##
## Treatment = L:
## Time_f emmean SE df lower.CL upper.CL
## X0 32.8 1.96 83.8 28.9 36.7
## X2 35.1 1.96 83.8 31.2 39.0
## X4 37.6 1.96 83.8 33.7 41.5
## X6 36.4 1.96 83.8 32.5 40.3
## X8 38.3 1.96 83.8 34.4 42.2
## X10 39.2 1.96 83.8 35.3 43.1
##
## Treatment = P:
## Time_f emmean SE df lower.CL upper.CL
## X0 29.6 1.87 79.5 25.9 33.3
## X2 33.4 1.87 79.5 29.7 37.1
## X4 34.8 1.87 79.5 31.1 38.6
## X6 36.4 1.87 79.5 32.7 40.1
## X8 37.6 1.87 79.5 33.8 41.3
## X10 39.0 1.87 79.5 35.3 42.7
##
## Warning: EMMs are biased unless design is perfectly balanced
## Confidence level used: 0.95
xyplot(ADAS ~ Baseline | as.factor(Time_f), groups=Treatment, data=dta2L,
type=c("p","r","g"), layout=c(6,1),
xlab="Baseline ADAS",
ylab="ADAS",
auto.key = TRUE)
m_ancova <- gls(ADAS ~ Baseline + Baseline:Time_f + Treatment*Time_f, data=dta2L, correlation = corSymm(form = ~ Month | PID), weights = varIdent(form = ~ 1 | Time_f), na.action = na.exclude, control = glsControl(opt = “optim”))
dta2L$trt_time <- dta2L$Time_f
dta2L$trt_time[dta2L$Treatment=="P"] <- "0"
dta2L$Time_f <- relevel(dta2L$Time_f, ref="0")
dta2L$trt_time <- relevel(dta2L$trt_time, ref="0")
str(dta2L)
## 'data.frame': 480 obs. of 7 variables:
## $ Treatment: Factor w/ 3 levels "H","L","P": 2 2 2 2 2 2 2 2 2 2 ...
## $ PID : Factor w/ 80 levels "1","2","3","4",..: 1 5 8 12 13 15 19 21 24 28 ...
## $ Baseline : num 22 34 40 24 29 31 22 43 18 25 ...
## $ Month : Factor w/ 6 levels "10","12","2",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ ADAS : num 22 34 40 24 29 31 22 43 18 25 ...
## $ Time_f : Factor w/ 6 levels "0","2","4","6",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ trt_time : Factor w/ 6 levels "0","2","4","6",..: 1 1 1 1 1 1 1 1 1 1 ...
m_cgls <- gls(ADAS ~ Time_f + trt_time, data = dta2L, correlation=corSymm(form = ~ Month | PID), weights=varIdent(form = ~ 1 | Time_f), na.action = na.exclude, control=glsControl(opt = ‘optim’))
m_ar1 <- gls(ADAS ~ Time_f + trt_time, data = dta2L, correlation = corAR1(form = ~ Month | PID), weights = varIdent(form = ~ 1 | Time_f), method =“ML”, na.action = na.exclude, control = glsControl(opt = ‘optim’))
m_ar1b <- gls(ADAS ~ Time_f + trt_time, data = dta2L, correlation = corAR1(form = ~ Month | PID), method =“ML”, na.action = na.exclude, control = glsControl(opt = ‘optim’))
m_ar1c <- gls(ADAS ~ Time_f + trt_time, data = dta2L, weights=varIdent(form= ~ 1 | Time_f), method=“ML”, na.action=na.exclude, control=glsControl(opt=‘optim’))
m_cgls <- update(m_cgls, method=“ML”)
anova(m_cgls, m_ar1, m_ar1b)
Use the data set (in Stata dta format) to answer the following questions:
(a) Does body fat increase with age for both girls and boys? YES (b) Is there a difference between girls and boys with respect to the increase of the body fat with age?
(c) Do girls tend to have more body fat than boys?
Column 1: Subject ID
Column 2: Gender ID
Column 3: Age in years
Column 4: Body fat in kilograms
pacman::p_load(foreign)
dta3 <- read.dta("bodyfat.dta",
convert.dates = TRUE,
convert.factors = TRUE,
missing.type = FALSE,
convert.underscore = FALSE,
warn.missing.labels = TRUE)
str(dta3)
## 'data.frame': 2273 obs. of 4 variables:
## $ id : num 1 1 1 2 2 2 3 3 3 4 ...
## $ sex : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 1 1 1 1 ...
## $ age : num 11 13 15 11 13 15 11 13 15 11 ...
## $ bodyfat: num 4 6.2 10.5 8.1 10.4 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "21 May 2011 13:30"
## - attr(*, "formats")= chr [1:4] "%9.0g" "%9.0g" "%9.0g" "%9.0g"
## - attr(*, "types")= int [1:4] 254 254 254 254
## - attr(*, "val.labels")= chr [1:4] "" "labsex" "" ""
## - attr(*, "var.labels")= chr [1:4] "group(u)" "" "" ""
## - attr(*, "version")= int 8
## - attr(*, "label.table")=List of 1
## ..$ labsex: Named int [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "female" "male"
head (dta3)
## id sex age bodyfat
## 1 1 male 11 4.0
## 2 1 male 13 6.2
## 3 1 male 15 10.5
## 4 2 female 11 8.1
## 5 2 female 13 10.4
## 6 2 female 15 15.2
ggplot(dta3, aes(age, bodyfat,
group = id,
color = id)) +
geom_point()+
stat_summary(fun = mean, geom = "line") +
#stat_summary(fun = mean, geom = "point") +
#stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.3) +
#scale_shape_manual(values = c(1, 2)) +
facet_wrap( ~ sex)+
labs(x = "Age (in years)",
y = "Body fat (in %)") +
theme(legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.background = element_rect(fill = "white",color = "black"))
p <- ggplot(dta3, aes(age, bodyfat,
group = sex,
linetype = sex,
shape = sex)) +
stat_summary(fun = mean, geom = "line") +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
scale_shape_manual(values=c(1, 2)) +
scale_y_continuous(breaks=seq(5, 15))+
labs(x = "Age", y = "Body fat (in %)", linetype = "sex", shape = "sex") +
theme_minimal() +
theme(legend.position=c(.15,.85))
suppressWarnings(suppressMessages(print(p)))
Body fat increase with age for both girls and boys.
dta3w <- reshape(dta3, idvar = "id", timevar = "age", direction = "wide")
dta3w <- dta3w %>% select(1,2,3,5,7)
colnames(dta3w)<-c("id", "sex", "bf11", "bf13", "bf15")
dta3w %>%
dplyr::group_by(sex) %>%
dplyr::select(starts_with("bf")) %>%
furniture::table1(digits=2, total=FALSE, test=T, output="html")
## Adding missing grouping variables: `sex`
## Using dplyr::group_by() groups: sex
female | male | P-Value | |
---|---|---|---|
n = 331 | n = 393 | ||
bf11 | <.001 | ||
8.79 (1.93) | 7.95 (1.91) | ||
bf13 | <.001 | ||
10.16 (2.28) | 8.86 (2.23) | ||
bf15 | <.001 | ||
11.34 (2.80) | 9.82 (2.86) |
knitr::kable(cor(dta3w[,3:5], use="pair"))
bf11 | bf13 | bf15 | |
---|---|---|---|
bf11 | 1.0000000 | 0.2159806 | 0.1060951 |
bf13 | 0.2159806 | 1.0000000 | 0.5013258 |
bf15 | 0.1060951 | 0.5013258 | 1.0000000 |
scatterplotMatrix(~ bf11 + bf13 + bf15 | sex,
data=dta3w, col=8,
smooth=FALSE, regLine=FALSE,
ellipse=list(levels=c(.975),
fill=TRUE,
fill.alpha=.1))
## Warning in scatterplotMatrix.default(X[, -ncol], groups = X[, ncol], ...): number of groups exceeds number of available colors
## colors are recycled
Description Bone Mineral Density Data consisting of 112 girls randomized to receive calcium og placebo. Longitudinal measurements of bone mineral density (g/cm^2) measured approximately every 6th month in 3 years.
Format A data.frame containing 560 (incomplete) observations. The ’person’ column defines the individual girls of the study with measurements at visiting times ’visit’, and age in years ’age’ at the time of visit.
The bone mineral density variable is ’bmd’ (g/cm^2).
Source Vonesh & Chinchilli (1997), Table 5.4.1 on page 228.
pacman::p_load(lava)
data(calcium, package= "lava")
str(calcium)
## 'data.frame': 501 obs. of 6 variables:
## $ bmd : num 0.815 0.875 0.911 0.952 0.97 0.813 0.833 0.855 0.881 0.901 ...
## $ group : Factor w/ 2 levels "C","P": 1 1 1 1 1 2 2 2 2 2 ...
## $ person: int 101 101 101 101 101 102 102 102 102 102 ...
## $ visit : int 1 2 3 4 5 1 2 3 4 5 ...
## $ age : num 10.9 11.4 11.9 12.4 12.9 ...
## $ ctime : int 11078 11266 11436 11625 11807 11078 11266 11427 11616 11791 ...
dta <- calcium
dta$Time_f <- as.factor((dta$visit-1)*0.5)
xyplot(bmd ~ I((visit-1)*0.5) | group, data=dta, col=8,
xlab="Follow-up time (in year)",
ylab="Bone mineral density (mg/cm^3)",
type=c('b','g'),
cex=.5, lwd=.6)
p <- ggplot(dta, aes(Time_f, bmd,
group=group, shape=group, linetype=group)) +
stat_summary(fun=mean, geom="line") +
stat_summary(fun=mean, geom="point") +
stat_summary(fun.data=mean_se, geom="errorbar", width=0.1) +
scale_shape_manual(values=c(1, 2)) +
scale_y_continuous(breaks=seq(860, 1000, by=20))+
labs(x="Time (years since baseline)",
y="Mean bone mineral density (mg/cm^3)",
linetype="group", shape="group") +
theme_minimal()+
theme(legend.position=c(.2, .9))
suppressWarnings(suppressMessages(ggplot2:::print.ggplot(p)))
dta_w <- reshape(calcium, idvar = "person", timevar = "visit", direction = "wide")
dta_w <- dta_w %>% select(1,3,2,6,10,14,18)
colnames(dta_w)<-c("id", "group", "bmd1", "bmd2", "bmd3", "bmd4", "bmd5")
dta_w %>%
dplyr::group_by(group) %>%
dplyr::select(starts_with("bmd")) %>%
furniture::table1(digits=2, total=FALSE, test=FALSE, output="html")
## Adding missing grouping variables: `group`
## Using dplyr::group_by() groups: group
C | P | |
---|---|---|
n = 44 | n = 47 | |
bmd1 | ||
0.88 (0.05) | 0.87 (0.07) | |
bmd2 | ||
0.91 (0.06) | 0.89 (0.07) | |
bmd3 | ||
0.94 (0.06) | 0.92 (0.08) | |
bmd4 | ||
0.97 (0.07) | 0.94 (0.08) | |
bmd5 | ||
0.99 (0.06) | 0.96 (0.07) |
knitr::kable(cor(dta_w[,3:7], use="pair"))
bmd1 | bmd2 | bmd3 | bmd4 | bmd5 | |
---|---|---|---|---|---|
bmd1 | 1.0000000 | 0.9680986 | 0.9365273 | 0.9171087 | 0.8890096 |
bmd2 | 0.9680986 | 1.0000000 | 0.9718850 | 0.9563890 | 0.9369754 |
bmd3 | 0.9365273 | 0.9718850 | 1.0000000 | 0.9800385 | 0.9572398 |
bmd4 | 0.9171087 | 0.9563890 | 0.9800385 | 1.0000000 | 0.9740520 |
bmd5 | 0.8890096 | 0.9369754 | 0.9572398 | 0.9740520 | 1.0000000 |
scatterplotMatrix(~ bmd1 + bmd2 + bmd3 + bmd4 + bmd5,
data=dta_w, col=8,
smooth=FALSE, regLine=FALSE,
ellipse=list(levels=c(.975),
fill=TRUE,
fill.alpha=.1))
m_aov <- afex::aov_car(bmd ~ group*Time_f + Error(person/Time_f), data=dta)
## Warning: Missing values for following ID(s):
## 108, 111, 116, 117, 118, 128, 135, 209, 210, 211, 305, 306, 308, 318, 321, 329, 330, 401, 404, 419, 422
## Removing those cases from the analysis.
## Contrasts set to contr.sum for the following variables: group
knitr::kable(nice(m_aov))
Effect | df | MSE | F | ges | p.value |
---|---|---|---|---|---|
group | 1, 89 | 0.02 | 2.29 | .024 | .134 |
Time_f | 2.23, 198.83 | 0.00 | 594.84 *** | .217 | <.001 |
group:Time_f | 2.23, 198.83 | 0.00 | 4.74 ** | .002 | .008 |
emmeans(m_aov, ~ Time_f | group)
## group = C:
## Time_f emmean SE df lower.CL upper.CL
## X0 0.881 0.01 97.1 0.861 0.901
## X0.5 0.909 0.01 97.1 0.889 0.928
## X1 0.938 0.01 97.1 0.918 0.958
## X1.5 0.965 0.01 97.1 0.945 0.985
## X2 0.988 0.01 97.1 0.968 1.008
##
## group = P:
## Time_f emmean SE df lower.CL upper.CL
## X0 0.870 0.01 96.6 0.850 0.890
## X0.5 0.890 0.01 96.6 0.870 0.910
## X1 0.915 0.01 96.6 0.895 0.935
## X1.5 0.941 0.01 96.6 0.922 0.961
## X2 0.958 0.01 96.6 0.938 0.978
##
## Warning: EMMs are biased unless design is perfectly balanced
## Confidence level used: 0.95