The data consist of weight gains of 68 Asian children in a British community. Measurements of weight were recorded for children on up to five occasions visiting a clinic. The ages at which the measurements were taken are roughly to target examination dates of 6 weeks, then 8, 12, 27 months.
pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest)
fL <- "http://www.stata-press.com/data/mlmus3/asian.dta"
dta <- read.dta(fL)
str(dta)
## 'data.frame': 198 obs. of 6 variables:
## $ id : int 45 45 45 45 45 258 258 258 258 287 ...
## $ occ : int 1 2 3 4 5 1 2 3 4 1 ...
## $ age : num 0.137 0.657 1.218 1.429 2.272 ...
## $ weight: num 5.17 10.86 13.15 13.2 15.88 ...
## $ brthwt: int 4140 4140 4140 4140 4140 3155 3155 3155 3155 3850 ...
## $ gender: int 1 1 1 1 1 2 2 2 2 1 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "29 Jan 2005 17:33"
## - attr(*, "formats")= chr [1:6] "%8.0g" "%9.0g" "%8.0g" "%8.0g" ...
## - attr(*, "types")= int [1:6] 252 251 254 254 252 252
## - attr(*, "val.labels")= chr [1:6] "" "" "" "" ...
## - attr(*, "var.labels")= chr [1:6] "" "" "" "" ...
## - attr(*, "expansion.fields")=List of 5
## ..$ : chr [1:3] "_dta" "ReS_i" "id"
## ..$ : chr [1:3] "_dta" "ReS_ver" "v.2"
## ..$ : chr [1:3] "_dta" "ReS_j" "occ"
## ..$ : chr [1:3] "_dta" "ReS_str" "0"
## ..$ : chr [1:3] "_dta" "ReS_Xij" "weight age"
## - attr(*, "version")= int 8
#id 轉為factor
dta$id <- factor(dta$id)
#性別重命名為M,F
dta$gender <- factor(dta$gender, 1:2, labels = c("M", "F"))
#brthwt改單位為kg
dta$brthwt <- dta$brthwt/1000
#occ 轉為factor
dta$occ <- factor(dta$occ)
#改變資料呈現型態
glimpse(dta)
## Rows: 198
## Columns: 6
## $ id <fct> 45, 45, 45, 45, 45, 258, 258, 258, 258, 287, 287, 287, 287, ...
## $ occ <fct> 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 2, ...
## $ age <dbl> 0.1368925, 0.6570842, 1.2183436, 1.4291581, 2.2724161, 0.191...
## $ weight <dbl> 5.171, 10.860, 13.150, 13.200, 15.880, 5.300, 9.740, 9.980, ...
## $ brthwt <dbl> 4.140, 4.140, 4.140, 4.140, 4.140, 3.155, 3.155, 3.155, 3.15...
## $ gender <fct> M, M, M, M, M, F, F, F, F, M, M, M, M, F, F, F, F, F, M, M, ...
head(dta)
## id occ age weight brthwt gender
## 1 45 1 0.1368925 5.171 4.140 M
## 2 45 2 0.6570842 10.860 4.140 M
## 3 45 3 1.2183436 13.150 4.140 M
## 4 45 4 1.4291581 13.200 4.140 M
## 5 45 5 2.2724161 15.880 4.140 M
## 6 258 1 0.1916496 5.300 3.155 F
ggplot(dta, aes(age, weight, group = id, color=gender))+
geom_point()+
geom_line() +
facet_wrap( ~ gender)+
labs(x = "Age (year)", y = "Weight (kg)") +
theme(legend.position = c(.9, .2))
# ggplot, age(x), weight(y), regression line
ggplot(dta, aes(age, weight, color = gender))+
geom_point()+
stat_smooth(method = "lm", formula = y ~ poly(x, 2)) +
labs(x = "Age (year)", y = "Weight (kg)")+
theme(legend.position = c(.9, .2))
age 與 wight 呈曲線關係
dta$age2 <- dta$age^2
# t-tests use Satterthwaite's method
summary(m0 <- lmer(weight ~ brthwt + gender + age + age2 + (age |id),
data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
##
## REML criterion at convergence: 496.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81353 -0.43471 -0.03652 0.45573 2.79813
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.1920 0.4381
## age 0.2702 0.5198 0.07
## Residual 0.3346 0.5784
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.02225 0.54924 57.65949 1.861 0.06782 .
## brthwt 0.87755 0.16616 53.16001 5.281 2.43e-06 ***
## genderF -0.51922 0.16698 52.23221 -3.109 0.00303 **
## age 7.73933 0.23674 135.03625 32.691 < 2e-16 ***
## age2 -1.67125 0.08762 114.78740 -19.074 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age
## brthwt -0.963
## genderF -0.240 0.095
## age -0.221 0.058 -0.003
## age2 0.199 -0.051 0.002 -0.927
# Type 3 tests, KR-method - This is better
afex::mixed(weight ~ brthwt + gender + age + age2 + (age | id), data = dta)
## Contrasts set to contr.sum for the following variables: gender, id
## Numerical variables NOT centered on 0: brthwt, age, age2
## If in interactions, interpretation of lower order (e.g., main) effects difficult.
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
## Mixed Model Anova Table (Type 3 tests, KR-method)
##
## Model: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
## Effect df F p.value
## 1 brthwt 1, 62.42 26.79 *** <.001
## 2 gender 1, 61.58 9.31 ** .003
## 3 age 1, 141.54 1046.79 *** <.001
## 4 age2 1, 122.93 355.47 *** <.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
dta$d1 <- dta$age - 0.4; dta$d1[dta$d1 < 0] <- 0
dta$d2 <- dta$age - 0.9; dta$d2[dta$d2 < 0] <- 0
dta$d3 <- dta$age - 1.5; dta$d3[dta$d3 < 0] <- 0
summary(m1 <- lmer(weight ~ brthwt + gender + age + d1 + d2 + d3 +
(age | id), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + d1 + d2 + d3 + (age | id)
## Data: dta
##
## REML criterion at convergence: 465
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81475 -0.41746 -0.01623 0.42323 2.51409
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.2476 0.4976
## age 0.3273 0.5721 -0.12
## Residual 0.2523 0.5023
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.4941 0.5600 65.7487 0.882 0.380826
## brthwt 0.8949 0.1617 51.4586 5.533 1.07e-06 ***
## genderF -0.5459 0.1627 50.8735 -3.356 0.001504 **
## age 10.3595 1.0792 83.2345 9.599 4.03e-15 ***
## d1 -6.1891 1.8004 80.6638 -3.438 0.000931 ***
## d2 -1.6641 1.3071 82.8518 -1.273 0.206547
## d3 -0.3684 0.9920 80.3757 -0.371 0.711367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age d1 d2
## brthwt -0.927
## genderF -0.231 0.099
## age -0.339 0.045 -0.002
## d1 0.313 -0.039 -0.001 -0.985
## d2 -0.239 0.032 0.012 0.816 -0.884
## d3 0.167 -0.031 -0.019 -0.533 0.606 -0.886
dtafm1 <- data.frame(dta, yhat=fitted(m1))
ggplot(dtafm1, aes(age, weight, group=id, color = gender))+
geom_point()+
geom_line(aes(age, yhat))+
facet_wrap(~ gender)+
labs(x = "Age (year)", y = "Weight (kg)")
quantile(dta$age)
## 0% 25% 50% 75% 100%
## 0.1149897 0.6461328 0.9965777 1.4709103 2.5462012
summary(m0 <- lm(weight ~ age, data=dta))
##
## Call:
## lm(formula = weight ~ age, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2994 -1.0341 -0.2524 1.0947 4.2614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2021 0.1779 29.24 <2e-16 ***
## age 3.3640 0.1332 25.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.471 on 196 degrees of freedom
## Multiple R-squared: 0.765, Adjusted R-squared: 0.7638
## F-statistic: 637.9 on 1 and 196 DF, p-value: < 2.2e-16
summary(m0z <- segmented::segmented(m0, seg.Z= ~ age,
psi=list(age=c(0.6, 1.4)),
control=seg.control(display=FALSE)))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~age, psi = list(age = c(0.6,
## 1.4)), control = seg.control(display = FALSE))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.age 0.317 0.138
## psi2.age 2.300 0.047
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.262 1.391 1.626 0.106
## age 16.425 10.311 1.593 0.113
## U1.age -13.786 10.312 -1.337 NA
## U2.age -8.539 3.457 -2.470 NA
##
## Residual standard error: 1.176 on 192 degrees of freedom
## Multiple R-Squared: 0.8528, Adjusted R-squared: 0.849
##
## Convergence attained in 1 iter. (rel. change 6.9923e-08)
plot(m0z)
with(dta, points(age, weight))
abline(v = m0z$psi[,2], lty = 3)
grid()