The data consist of weight gains of 68 Asian children in a British community. Measurements of weight were recorded for children on up to five occasions visiting a clinic. The ages at which the measurements were taken are roughly to target examination dates of 6 weeks, then 8, 12, 27 months.
pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest)
fL <- "http://www.stata-press.com/data/mlmus3/asian.dta"
dta <- read.dta(fL)
dta$id <- factor(dta$id)
dta$gender <- factor(dta$gender, 1:2, labels = c("M", "F"))
dta$brthwt <- dta$brthwt/1000
dta$occ <- factor(dta$occ)
將dta的id,gender,brthwt,occ設為factor
glimpse(dta)
## Rows: 198
## Columns: 6
## $ id <fct> 45, 45, 45, 45, 45, 258, 258, 258, 258, 287, 287, 287, 287, ...
## $ occ <fct> 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 2, ...
## $ age <dbl> 0.1368925, 0.6570842, 1.2183436, 1.4291581, 2.2724161, 0.191...
## $ weight <dbl> 5.171, 10.860, 13.150, 13.200, 15.880, 5.300, 9.740, 9.980, ...
## $ brthwt <dbl> 4.140, 4.140, 4.140, 4.140, 4.140, 3.155, 3.155, 3.155, 3.15...
## $ gender <fct> M, M, M, M, M, F, F, F, F, M, M, M, M, F, F, F, F, F, M, M, ...
顯示dta
head(dta)
## id occ age weight brthwt gender
## 1 45 1 0.1368925 5.171 4.140 M
## 2 45 2 0.6570842 10.860 4.140 M
## 3 45 3 1.2183436 13.150 4.140 M
## 4 45 4 1.4291581 13.200 4.140 M
## 5 45 5 2.2724161 15.880 4.140 M
## 6 258 1 0.1916496 5.300 3.155 F
ggplot(dta, aes(age, weight, group = id, color=gender))+
geom_point()+
geom_line() +
facet_wrap( ~ gender)+
labs(x = "Age (year)", y = "Weight (kg)") +
theme(legend.position = c(.9, .2))
繪出年齡與體重的散佈圖,並且以“性別”分面,呈現“M”,“F”表示男性女性。
ggplot(dta, aes(age, weight, color = gender))+
geom_point()+
stat_smooth(method = "lm", formula = y ~ poly(x, 2)) +
labs(x = "Age (year)", y = "Weight (kg)")+
theme(legend.position = c(.9, .2))
替此xy散佈圖加上趨勢線
dta$age2 <- dta$age^2
從dta中取出物件欄位資料(這裡的物件指的是“age”)
# t-tests use Satterthwaite's method
summary(m0 <- lmer(weight ~ brthwt + gender + age + age2 + (age |id),
data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
##
## REML criterion at convergence: 496.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81353 -0.43471 -0.03652 0.45573 2.79813
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.1920 0.4381
## age 0.2702 0.5198 0.07
## Residual 0.3346 0.5784
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.02225 0.54924 57.65949 1.861 0.06782 .
## brthwt 0.87755 0.16616 53.16001 5.281 2.43e-06 ***
## genderF -0.51922 0.16698 52.23221 -3.109 0.00303 **
## age 7.73933 0.23674 135.03625 32.691 < 2e-16 ***
## age2 -1.67125 0.08762 114.78740 -19.074 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age
## brthwt -0.963
## genderF -0.240 0.095
## age -0.221 0.058 -0.003
## age2 0.199 -0.051 0.002 -0.927
依變項(weight)~自變項(brthwt)+自變項(gender)+自變項(age)+自變項(age2)
# Type 3 tests, KR-method - This is better
afex::mixed(weight ~ brthwt + gender + age + age2 + (age | id), data = dta)
## Contrasts set to contr.sum for the following variables: gender, id
## Numerical variables NOT centered on 0: brthwt, age, age2
## If in interactions, interpretation of lower order (e.g., main) effects difficult.
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
## Mixed Model Anova Table (Type 3 tests, KR-method)
##
## Model: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
## Effect df F p.value
## 1 brthwt 1, 62.42 26.79 *** <.001
## 2 gender 1, 61.58 9.31 ** .003
## 3 age 1, 141.54 1046.79 *** <.001
## 4 age2 1, 122.93 355.47 *** <.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
# construct piecewise lines defined by knots
dta$d1 <- dta$age - 0.4; dta$d1[dta$d1 < 0] <- 0
dta$d2 <- dta$age - 0.9; dta$d2[dta$d2 < 0] <- 0
dta$d3 <- dta$age - 1.5; dta$d3[dta$d3 < 0] <- 0
將dta中依“age”分成三段
summary(m1 <- lmer(weight ~ brthwt + gender + age + d1 + d2 + d3 +
(age | id), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + d1 + d2 + d3 + (age | id)
## Data: dta
##
## REML criterion at convergence: 465
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.81475 -0.41746 -0.01623 0.42323 2.51409
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.2476 0.4976
## age 0.3273 0.5721 -0.12
## Residual 0.2523 0.5023
## Number of obs: 198, groups: id, 68
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.4941 0.5600 65.7487 0.882 0.380826
## brthwt 0.8949 0.1617 51.4586 5.533 1.07e-06 ***
## genderF -0.5459 0.1627 50.8735 -3.356 0.001504 **
## age 10.3595 1.0792 83.2345 9.599 4.03e-15 ***
## d1 -6.1891 1.8004 80.6638 -3.438 0.000931 ***
## d2 -1.6641 1.3071 82.8518 -1.273 0.206547
## d3 -0.3684 0.9920 80.3757 -0.371 0.711367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) brthwt gendrF age d1 d2
## brthwt -0.927
## genderF -0.231 0.099
## age -0.339 0.045 -0.002
## d1 0.313 -0.039 -0.001 -0.985
## d2 -0.239 0.032 0.012 0.816 -0.884
## d3 0.167 -0.031 -0.019 -0.533 0.606 -0.886
Correlation of Fixed Effects中可以得知:
d1,d2,d3此三個分段與age的相關分別為-0.985,0.816,-0.533
故為高度負相關,正相關,負相關。
也就是兒童在age0.4以前年齡增長與體重無太大相關
轉折點過後(age0.9起)體重會隨著年齡增長而增加。到了第三段起相關性又降低。
dtafm1 <- data.frame(dta, yhat=fitted(m1))
ggplot(dtafm1, aes(age, weight, group=id, color = gender))+
geom_point()+
geom_line(aes(age, yhat))+
facet_wrap(~ gender)+
labs(x = "Age (year)", y = "Weight (kg)")
男性與女性分開的散佈圖,分別畫出迴歸線
quantile(dta$age)
## 0% 25% 50% 75% 100%
## 0.1149897 0.6461328 0.9965777 1.4709103 2.5462012
summary(m0 <- lm(weight ~ age, data=dta))
##
## Call:
## lm(formula = weight ~ age, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2994 -1.0341 -0.2524 1.0947 4.2614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2021 0.1779 29.24 <2e-16 ***
## age 3.3640 0.1332 25.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.471 on 196 degrees of freedom
## Multiple R-squared: 0.765, Adjusted R-squared: 0.7638
## F-statistic: 637.9 on 1 and 196 DF, p-value: < 2.2e-16
summary(m0z <- segmented::segmented(m0, seg.Z= ~ age,
psi=list(age=c(0.6, 1.4)),
control=seg.control(display=FALSE)))
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~age, psi = list(age = c(0.6,
## 1.4)), control = seg.control(display = FALSE))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.age 0.315 0.135
## psi2.age 2.300 0.047
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.240 1.391 1.610 0.109
## age 16.590 10.311 1.609 0.109
## U1.age -13.950 10.313 -1.353 NA
## U2.age -8.541 3.457 -2.471 NA
##
## Residual standard error: 1.176 on 192 degrees of freedom
## Multiple R-Squared: 0.8528, Adjusted R-squared: 0.849
##
## Convergence attained in 1 iter. (rel. change 7.2397e-08)
找出何處為適合的轉折處(最小平方法),得出別在年齡為
0.317,2.300為轉折。
plot(m0z)
with(dta, points(age, weight))
abline(v = m0z$psi[,2], lty = 3)
grid()
繪出分段的圖形