# Loading package
pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest, ggplot2)
# data file link
fL <- "asian.dta"
dta1 <- read.dta(fL)
dta1$id <- factor(dta1$id)
dta1$gender <- factor(dta1$gender, 1:2, labels = c("M", "F")) # 把原本的1,2 改成"M", "F"
dta1$brthwt <- dta1$brthwt/1000 # 出生體重原本單位是g,改成kg方便跟weight gains單位統一
dta1$occ <- factor(dta1$occ) # 場域編號改成factor變項
glimpse(dta1) # It's a little like "str()" applied to a data frame
## Rows: 198
## Columns: 6
## $ id <fct> 45, 45, 45, 45, 45, 258, 258, 258, 258, 287, 287, 287, 287, ...
## $ occ <fct> 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 2, ...
## $ age <dbl> 0.1368925, 0.6570842, 1.2183436, 1.4291581, 2.2724161, 0.191...
## $ weight <dbl> 5.171, 10.860, 13.150, 13.200, 15.880, 5.300, 9.740, 9.980, ...
## $ brthwt <dbl> 4.140, 4.140, 4.140, 4.140, 4.140, 3.155, 3.155, 3.155, 3.15...
## $ gender <fct> M, M, M, M, M, F, F, F, F, M, M, M, M, F, F, F, F, F, M, M, ...
head(dta1)
## id occ age weight brthwt gender
## 1 45 1 0.1368925 5.171 4.140 M
## 2 45 2 0.6570842 10.860 4.140 M
## 3 45 3 1.2183436 13.150 4.140 M
## 4 45 4 1.4291581 13.200 4.140 M
## 5 45 5 2.2724161 15.880 4.140 M
## 6 258 1 0.1916496 5.300 3.155 F
ggplot(dta1, aes(age, weight, group = id, color = gender))+
geom_point()+
geom_line() +
labs(x = "Age (year)", y = "Weight (kg)") +
theme(legend.position = c(.9, .2))
The body weight change by age between male and female.
Male > Female?
ggplot(dta1, aes(age, weight, group = id, color = gender))+
geom_point()+
geom_line() +
facet_wrap( ~ gender)+
labs(x = "Age (year)", y = "Weight (kg)") +
theme(legend.position = c(.9, .2))
對照生長曲線過去提及3、4個月大時是出生體重的大約2倍;1歲時是出生體重的大約3倍;2歲時是出生體重的大約4倍,此資料好像也可以印證類似的說法。
Polynomial regression line 因為原本資料呈現的狀況不是線性關係,試著以Polynomial regression 來找合適的模型,但polynominal也可能估不準?(正弦曲線sine wave 到極值就會轉向), 所以後來有logistic regression?
ggplot(dta1, aes(age, weight, color = gender))+
geom_point()+
stat_smooth(method = "lm", formula = y ~ poly(x, 2)) +
labs(x = "Age (year)", y = "Weight (kg)")+
theme(legend.position = c(.9, .2))
dta1$age2 <- dta1$age^2 #age squre
Welch–Satterthwaite?
Fixed effect: birthwt, gender, age, age2
Random effect (random intercept and slope): age
# t-tests use Satterthwaite's method
summary(m0 <- lmer(weight ~ brthwt + gender + age + age2 + (age | id), data = dta1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta1
##
## REML criterion at convergence: 496.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81353 -0.43471 -0.03652 0.45573 2.79813
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.1920 0.4381
## age 0.2702 0.5198 0.07
## Residual 0.3346 0.5784
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.02225 0.54924 57.65949 1.861 0.06782 .
## brthwt 0.87755 0.16616 53.16001 5.281 2.43e-06 ***
## genderF -0.51922 0.16698 52.23221 -3.109 0.00303 **
## age 7.73933 0.23674 135.03625 32.691 < 2e-16 ***
## age2 -1.67125 0.08762 114.78740 -19.074 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age
## brthwt -0.963
## genderF -0.240 0.095
## age -0.221 0.058 -0.003
## age2 0.199 -0.051 0.002 -0.927
The estimated weight is (1.02 ± 0.44) + 0.88(brthwt) - 0.52(female) + (7.74 ± 0.52)(age) - 1.67(age2)
Kenward-Roger
# Type 3 tests, KR-method - This is better
summary(m0k <- afex::mixed(weight ~ brthwt + gender + age + age2 + (age | id), data = dta1))
## Contrasts set to contr.sum for the following variables: gender, id
## Numerical variables NOT centered on 0: brthwt, age, age2
## If in interactions, interpretation of lower order (e.g., main) effects difficult.
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: data
##
## REML criterion at convergence: 498
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81353 -0.43471 -0.03652 0.45573 2.79813
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.1920 0.4381
## age 0.2702 0.5198 0.07
## Residual 0.3346 0.5784
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.76264 0.53537 58.73938 1.425 0.15959
## brthwt 0.87755 0.16616 53.16001 5.281 2.43e-06 ***
## gender1 0.25961 0.08349 52.23221 3.109 0.00303 **
## age 7.73933 0.23674 135.03625 32.691 < 2e-16 ***
## age2 -1.67125 0.08762 114.78741 -19.074 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendr1 age
## brthwt -0.973
## gender1 0.090 -0.095
## age -0.227 0.058 0.003
## age2 0.204 -0.051 -0.002 -0.927
sjPlot::tab_model(m0, m0k, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
weight | weight | |||
---|---|---|---|---|
Predictors | Estimates | std. Error | Estimates | std. Error |
(Intercept) | 1.02 | 0.55 | 0.76 | 0.54 |
brthwt | 0.88 | 0.17 | 0.88 | 0.17 |
gender [F] | -0.52 | 0.17 | ||
age | 7.74 | 0.24 | 7.74 | 0.24 |
age2 | -1.67 | 0.09 | -1.67 | 0.09 |
gender1 | 0.26 | 0.08 | ||
Random Effects | ||||
σ2 | 0.33 | 0.33 | ||
τ00 | 0.19 id | 0.19 id | ||
τ11 | 0.27 id.age | 0.27 id.age | ||
ρ01 | 0.07 id | 0.07 id | ||
ICC | 0.68 | 0.68 |
Four data collection time-point -> 3 knots (切在每次間格的中間?)
# construct piecewise lines defined by knots
dta1$d1 <- dta1$age - 0.4; dta1$d1[dta1$d1 < 0] <- 0
dta1$d2 <- dta1$age - 0.9; dta1$d2[dta1$d2 < 0] <- 0
dta1$d3 <- dta1$age - 1.5; dta1$d3[dta1$d3 < 0] <- 0
Fixed effect: birthwt, gender, age, d1, d2, d3
Random effect (random intercept and slope): age
summary(m1 <- lmer(weight ~ brthwt + gender + age + d1 + d2 + d3 +
(age | id), data = dta1))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + d1 + d2 + d3 + (age | id)
## Data: dta1
##
## REML criterion at convergence: 465
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81475 -0.41746 -0.01623 0.42323 2.51409
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.2476 0.4976
## age 0.3273 0.5721 -0.12
## Residual 0.2523 0.5023
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.4941 0.5600 65.7487 0.882 0.380826
## brthwt 0.8949 0.1617 51.4586 5.533 1.07e-06 ***
## genderF -0.5459 0.1627 50.8735 -3.356 0.001504 **
## age 10.3595 1.0792 83.2345 9.599 4.03e-15 ***
## d1 -6.1891 1.8004 80.6638 -3.438 0.000931 ***
## d2 -1.6641 1.3071 82.8518 -1.273 0.206547
## d3 -0.3684 0.9920 80.3757 -0.371 0.711367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age d1 d2
## brthwt -0.927
## genderF -0.231 0.099
## age -0.339 0.045 -0.002
## d1 0.313 -0.039 -0.001 -0.985
## d2 -0.239 0.032 0.012 0.816 -0.884
## d3 0.167 -0.031 -0.019 -0.533 0.606 -0.886
The estimated weight is (0.49 ± 0.5) + 0.9(brthwt) - 0.55(female) + (10.36 ± 0.57)(age) - 6.12(d1) -1.66(d2) -0.37(d3)
Y hat (written ŷ ) is the predicted value of y (the dependent variable) in m1.
dta1fm1 <- data.frame(dta1, yhat=fitted(m1))
點是實際的體重值
線是預測的體重值
ggplot(dta1fm1, aes(age, weight, group=id, color = gender))+
geom_point()+
geom_line(aes(age, yhat))+
facet_wrap(~ gender)+
labs(x = "Age (year)", y = "Weight (kg)")
quantile(dta1$age)
## 0% 25% 50% 75% 100%
## 0.1149897 0.6461328 0.9965777 1.4709103 2.5462012
看年齡對體重的影響(Simple linear regression)
summary(m0 <- lm(weight ~ age, data=dta1))
##
## Call:
## lm(formula = weight ~ age, data = dta1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2994 -1.0341 -0.2524 1.0947 4.2614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2021 0.1779 29.24 <2e-16 ***
## age 3.3640 0.1332 25.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.471 on 196 degrees of freedom
## Multiple R-squared: 0.765, Adjusted R-squared: 0.7638
## F-statistic: 637.9 on 1 and 196 DF, p-value: < 2.2e-16
Estimate weight = 5.20 + 3.36(age)
Piecewise regression based on quantile
summary(m0z <- segmented::segmented(m0, seg.Z= ~ age,
psi=list(age=c(0.6, 1.4)),
control=seg.control(display=FALSE)))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~age, psi = list(age = c(0.6,
## 1.4)), control = seg.control(display = FALSE))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.age 0.318 0.139
## psi2.age 2.300 0.047
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.269 1.391 1.631 0.105
## age 16.373 10.311 1.588 0.114
## U1.age -13.734 10.312 -1.332 NA
## U2.age -8.539 3.457 -2.470 NA
##
## Residual standard error: 1.176 on 192 degrees of freedom
## Multiple R-Squared: 0.8528, Adjusted R-squared: 0.849
##
## Convergence attained in 1 iter. (rel. change 1.6843e-06)
psi: estimated break-points and relevant (approximate) standard errors
The estimated weight is 2.26 -13.79(U1.age) -8.54(U1.age) + 16.43(age)
plot(m0z)
with(dta1, points(age, weight))
abline(v = m0z$psi[,2], lty = 3)
grid()
從出生至四個月大(0.317)間年齡對體重的影響大於“四個月(0.317)至2歲間(2.3)” ,2歲後年齡對體重的影響逐漸降低。
Column 1: Mean reaction time in milliseconds
Column 2: Days of sleep deprivation
Column 3: Truck driver ID
# load package
library(lme4)
# 18 subjects (3hrs sleep) 10 observations on each, p64 of bates
# the "new" random effects (vs nlme) see Bates book 2010
# sleep deprivation 3hrs/night truckers
#input
data(sleepstudy, package="lme4")
# nuber of row and column
dim(sleepstudy)
## [1] 180 3
# top of six value
head(sleepstudy)
## Reaction Days Subject
## 1 249.5600 0 308
## 2 258.7047 1 308
## 3 250.8006 2 308
## 4 321.4398 3 308
## 5 356.8519 4 308
## 6 414.6901 5 308
# with function (8 subjects, 10 observations on each)
with(sleepstudy, table(Subject))
## Subject
## 308 309 310 330 331 332 333 334 335 337 349 350 351 352 369 370 371 372
## 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
bivariate scatterplots or time-series plots
#
lattice::xyplot(Reaction ~ Days | Subject, data=sleepstudy,
xlab = "Days of sleep deprivation",
ylab = "Average reaction time (ms)",
type = c("g","p","r"),
as.table = TRUE)
The average reaction time of the most of the subject is increase by the days of sleep deprivation. Only No.335 in the opposite.
facet by subject
# unneeded
m0 <- lmer(Reaction ~ Days | Subject, data = sleepstudy)
# unneeded
sleepstudy1 <- sleepstudy %>%
mutate(coef1 = fitted(m0))
sleepstudy1$Subject <- factor(sleepstudy1$Subject, levels = c("309", "310", "335", "349", "351", "370", "371", "334", "330", "369", "332", "331", "350", "333", "372", "352", "308", "337"))
ggplot(sleepstudy1 , aes(Days, Reaction)) +
stat_smooth(method = "lm",
formula = y ~ x,
se = FALSE,
lwd = rel(.5)) +
geom_point (size = rel(.5)) +
facet_wrap ( ~ Subject, nrow = 1) +
labs(x = "Deprivation (in Days)",
y = "Reaction time (in ms)")
ggplot2::ggplot(sleepstudy1, aes(Days, Reaction)) +
geom_point() +
stat_smooth(method = "lm", aes(Days, Reaction), se = TRUE) +
labs(x = "Deprivation (in Days)",
y = "Reaction time (in ms)") +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
The impact of Days of sleep deprivation on the reaction time by each of subjects.
# Fit a list of lm or glm objects with a common model for different subgroups of the data.
sleeplmList <- lmList(Reaction ~ Days | Subject, data = sleepstudy)
# summary by subjects
sleeplmList
## Call: lmList(formula = Reaction ~ Days | Subject, data = sleepstudy)
## Coefficients:
## (Intercept) Days
## 308 244.1927 21.764702
## 309 205.0549 2.261785
## 310 203.4842 6.114899
## 330 289.6851 3.008073
## 331 285.7390 5.266019
## 332 264.2516 9.566768
## 333 275.0191 9.142045
## 334 240.1629 12.253141
## 335 263.0347 -2.881034
## 337 290.1041 19.025974
## 349 215.1118 13.493933
## 350 225.8346 19.504017
## 351 261.1470 6.433498
## 352 276.3721 13.566549
## 369 254.9681 11.348109
## 370 210.4491 18.056151
## 371 253.6360 9.188445
## 372 267.0448 11.298073
##
## Degrees of freedom: 180 total; 144 residual
## Residual standard error: 25.59182
#mean int and slope match lmer Fixed effects results p.67
mean(coef(sleeplmList)[,1])
## [1] 251.4051
mean(coef(sleeplmList)[,2])
## [1] 10.46729
Mean intercept = 251.4051
Mean slope (Days) = 10.46729
# these are too big as they should be,
# compare with variance random effects p.67
# mle subtracts off wobble in estimated indiv regressions Y on t
# - SSR/SST
var(coef(sleeplmList)[,1])
## [1] 838.3423
var(coef(sleeplmList)[,2])
## [1] 43.01034
# quantile of intercept and slope
quantile(coef(sleeplmList)[,1])
## 0% 25% 50% 75% 100%
## 203.4842 229.4167 258.0576 273.0255 290.1041
quantile(coef(sleeplmList)[,2])
## 0% 25% 50% 75% 100%
## -2.881034 6.194548 10.432421 13.548395 21.764702
Should consider the random effect of slope?
# stem-and-leaf plot
stem(coef(sleeplmList)[,2])
##
## The decimal point is 1 digit(s) to the right of the |
##
## -0 | 3
## 0 | 23
## 0 | 56699
## 1 | 011234
## 1 | 89
## 2 | 02
correlation between intercept and slope is -0.14
cor(coef(sleeplmList)[,1], coef(sleeplmList)[,2])
## [1] -0.1375534
# first 2 do REML, second pair match
# Bates p.67 in doing MLE (REML FALSE)
sleeplmer <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
summary(sleeplmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction ~ Days + (Days | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1743.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9536 -0.4634 0.0231 0.4634 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 612.10 24.741
## Days 35.07 5.922 0.07
## Residual 654.94 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 251.405 6.825 17.000 36.838 < 2e-16 ***
## Days 10.467 1.546 17.000 6.771 3.26e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
sleeplmer2 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy, REML=FALSE)
summary(sleeplmer2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: Reaction ~ Days + (Days | Subject)
## Data: sleepstudy
##
## AIC BIC logLik deviance df.resid
## 1763.9 1783.1 -876.0 1751.9 174
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9416 -0.4656 0.0289 0.4636 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 565.48 23.780
## Days 32.68 5.717 0.08
## Residual 654.95 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 251.405 6.632 18.001 37.907 < 2e-16 ***
## Days 10.467 1.502 18.000 6.968 1.65e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
Column 1: Subject ID
Column 2: Current Age (years)
Column 3: Age at Menarche (years)
Column 4: Time relative to Menarche (years)
Column 5: Percent Body Fat
# load them
pacman::p_load(mlmRev, tidyverse, lme4, foreign, merTools, sjPlot)
# download a copy of data from the following link and save it to the
# data folder
# https://content.sph.harvard.edu/fitzmaur/ala2e/fat.dta
# input data
dta3 <- read.dta("fat.dta")
# rename
names(dta3) <-c("ID", "Age", "Age_m", "Time", "PBF")
# show first 6 obs.
head(dta3)
## ID Age Age_m Time PBF
## 1 1 9.32 13.19 -3.87 7.94
## 2 1 10.33 13.19 -2.86 15.65
## 3 1 11.24 13.19 -1.95 13.51
## 4 1 12.19 13.19 -1.00 23.23
## 5 1 13.24 13.19 0.05 10.52
## 6 1 14.24 13.19 1.05 20.45
# construct indicator variable for after mc
dta3 <- dta3 %>%
mutate(ID = factor(ID),
Time_a = ifelse(Time > 0, Time, 0),
Menarche = as.factor(ifelse(Time > 0, "T", "F")))
# show data structure
str(dta3)
## 'data.frame': 1049 obs. of 7 variables:
## $ ID : Factor w/ 162 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Age : num 9.32 10.33 11.24 12.19 13.24 ...
## $ Age_m : num 13.2 13.2 13.2 13.2 13.2 ...
## $ Time : num -3.87 -2.86 -1.95 -1 0.05 ...
## $ PBF : num 7.94 15.65 13.51 23.23 10.52 ...
## $ Time_a : num 0 0 0 0 0.05 ...
## $ Menarche: Factor w/ 2 levels "F","T": 1 1 1 1 2 2 1 1 1 1 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "23 Mar 2011 15:46"
## - attr(*, "formats")= chr [1:5] "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int [1:5] 254 254 254 254 254
## - attr(*, "val.labels")= chr [1:5] "" "" "" "" ...
## - attr(*, "var.labels")= chr [1:5] "" "" "" "" ...
## - attr(*, "version")= int 12
# black and white theme
theme_set(theme_minimal())
ggplot(subset(dta3, Menarche=="F"),
aes(Time, PBF, group=ID)) +
#geom_point()+
geom_line(col="Rosy Brown 1")+
stat_smooth(aes(group=1),method='lm',formula=y~x, col="Indian Red 1")+
labs(x="Age (in year)",
y="% Body fat")
The percent body fat is not increased by age before the menarche among 162 girls.
ggplot(subset(dta3, Menarche=="T"),
aes(Time, PBF, group=ID)) +
# geom_point()+
geom_line(col="cyan3")+
stat_smooth(aes(group=1),method='lm',formula=y~x, col="cyan4")+
labs(x="Age (in year)",
y="% Body fat")
# baseline model if you want one
mb_0 <- lmer(PBF ~ Time + (1 | ID), data = subset(dta3, Menarche=='F'))
tab_model(mb_0)
PBF | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 20.20 | 19.06 – 21.34 | <0.001 |
Time | -0.09 | -0.33 – 0.16 | 0.494 |
Random Effects | |||
σ2 | 10.02 | ||
τ00 ID | 43.81 | ||
ICC | 0.81 | ||
N ID | 162 | ||
Observations | 497 | ||
Marginal R2 / Conditional R2 | 0.000 / 0.814 |
ma_0 <- lmer(PBF ~ Time + (Time | ID), data=subset(dta3, Menarche=='T'))
tab_model(ma_0)
PBF | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 22.59 | 21.42 – 23.75 | <0.001 |
Time | 1.97 | 1.68 – 2.25 | <0.001 |
Random Effects | |||
σ2 | 7.82 | ||
τ00 ID | 46.21 | ||
τ11 ID.Time | 1.42 | ||
ρ01 ID | -0.55 | ||
ICC | 0.82 | ||
N ID | 160 | ||
Observations | 552 | ||
Marginal R2 / Conditional R2 | 0.123 / 0.844 |
betas_a <- na.omit(coef(lmList(PBF ~ Time | ID, data=subset(dta3, Menarche=='T'))))
car::dataEllipse(betas_a[,1],betas_a[,2])
m4 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data =dta3)
dta_full <- dta3 %>%
mutate(yhat = fitted(m4))
# individual connected lines different color for before and after
# add group regression lines
# dta_full
ggplot(dta_full, aes(Time, yhat, color = Menarche)) +
geom_line(aes(group = ID)) +
geom_point(alpha = 0.5) +
stat_smooth(method="lm", aes(group = Menarche)) +
guides(color = F) +
labs(x = "Age relative to menarche (in years)",
y = "Percent body fat")
## `geom_smooth()` using formula 'y ~ x'
# baseline model if you want one
summary(m0 <- lmer(PBF ~ Time + (Time | ID), data=dta3))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PBF ~ Time + (Time | ID)
## Data: dta3
##
## REML criterion at convergence: 6196.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7577 -0.5961 0.0221 0.6169 3.0981
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 38.2178 6.1821
## Time 0.6938 0.8329 -0.24
## Residual 11.6215 3.4090
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 23.04978 0.49985 159.73871 46.11 <2e-16 ***
## Time 1.55480 0.08484 145.73759 18.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Time -0.199
## convergence code: 0
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)
# model with 3 random effects associated with an individual
summary(m1 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data = dta3))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: PBF ~ Time + Time_a + (Time + Time_a | ID)
## Data: dta3
##
## REML criterion at convergence: 6062.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7742 -0.5900 -0.0359 0.5946 3.3798
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 45.942 6.778
## Time 1.631 1.277 0.29
## Time_a 2.750 1.658 -0.54 -0.83
## Residual 9.473 3.078
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 21.3614 0.5646 161.5610 37.837 < 2e-16 ***
## Time 0.4171 0.1572 108.4614 2.654 0.00915 **
## Time_a 2.0471 0.2280 132.6773 8.980 2.32e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Time
## Time 0.351
## Time_a -0.515 -0.872
# estimate random effects by simulation
m1_re <- REsim(m1)
# plot for random effects
plotREsim(m1_re)
# residual plot
plot(m1, resid(., type = "pearson") ~ fitted(.),
abline = 0, pch = 20, cex = .8, id = 0.05,
xlab = "Ftted values", ylab = "Pearson Residuals")