0.1 Data

The data consist of weight gains of 68 Asian children in a British community. Measurements of weight were recorded for children on up to five occasions visiting a clinic. The ages at which the measurements were taken are roughly to target examination dates of 6 weeks, then 8, 12, 27 months.

1 load package

pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest)

2 load Data

fL <- "http://www.stata-press.com/data/mlmus3/asian.dta"
dta <- read.dta(fL)
dta$id <- factor(dta$id)
dta$gender <- factor(dta$gender, 1:2, labels = c("M", "F"))
dta$brthwt <- dta$brthwt/1000
dta$occ <- factor(dta$occ)

將dta的id,gender,brthwt,occ設為factor

glimpse(dta)
## Rows: 198
## Columns: 6
## $ id     <fct> 45, 45, 45, 45, 45, 258, 258, 258, 258, 287, 287, 287, 287, ...
## $ occ    <fct> 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 2, ...
## $ age    <dbl> 0.1368925, 0.6570842, 1.2183436, 1.4291581, 2.2724161, 0.191...
## $ weight <dbl> 5.171, 10.860, 13.150, 13.200, 15.880, 5.300, 9.740, 9.980, ...
## $ brthwt <dbl> 4.140, 4.140, 4.140, 4.140, 4.140, 3.155, 3.155, 3.155, 3.15...
## $ gender <fct> M, M, M, M, M, F, F, F, F, M, M, M, M, F, F, F, F, F, M, M, ...

顯示dta

head(dta)
##    id occ       age weight brthwt gender
## 1  45   1 0.1368925  5.171  4.140      M
## 2  45   2 0.6570842 10.860  4.140      M
## 3  45   3 1.2183436 13.150  4.140      M
## 4  45   4 1.4291581 13.200  4.140      M
## 5  45   5 2.2724161 15.880  4.140      M
## 6 258   1 0.1916496  5.300  3.155      F

3 Plot

ggplot(dta, aes(age, weight, group = id, color=gender))+
 geom_point()+ 
 geom_line() +
 facet_wrap( ~ gender)+
 labs(x = "Age (year)", y = "Weight (kg)") +
 theme(legend.position = c(.9, .2))

繪出年齡與體重的散佈圖,並且以“性別”分面,呈現“M”,“F”表示男性女性。

ggplot(dta, aes(age, weight, color = gender))+
 geom_point()+
 stat_smooth(method = "lm", formula = y ~ poly(x, 2)) +
 labs(x = "Age (year)", y = "Weight (kg)")+
 theme(legend.position = c(.9, .2))

替此xy散佈圖加上趨勢線

dta$age2 <- dta$age^2

從dta中取出物件欄位資料(這裡的物件指的是“age”)

4 Analysis

# t-tests use Satterthwaite's method
summary(m0 <- lmer(weight ~ brthwt + gender + age + age2 + (age |id),
data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
##    Data: dta
## 
## REML criterion at convergence: 496.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.81353 -0.43471 -0.03652  0.45573  2.79813 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  id       (Intercept) 0.1920   0.4381       
##           age         0.2702   0.5198   0.07
##  Residual             0.3346   0.5784       
## Number of obs: 198, groups:  id, 68
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   1.02225    0.54924  57.65949   1.861  0.06782 .  
## brthwt        0.87755    0.16616  53.16001   5.281 2.43e-06 ***
## genderF      -0.51922    0.16698  52.23221  -3.109  0.00303 ** 
## age           7.73933    0.23674 135.03625  32.691  < 2e-16 ***
## age2         -1.67125    0.08762 114.78740 -19.074  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) brthwt gendrF age   
## brthwt  -0.963                     
## genderF -0.240  0.095              
## age     -0.221  0.058 -0.003       
## age2     0.199 -0.051  0.002 -0.927

依變項(weight)~自變項(brthwt)+自變項(gender)+自變項(age)+自變項(age2)

# Type 3 tests, KR-method - This is better
afex::mixed(weight ~ brthwt + gender + age + age2 + (age | id), data = dta)
## Contrasts set to contr.sum for the following variables: gender, id
## Numerical variables NOT centered on 0: brthwt, age, age2
## If in interactions, interpretation of lower order (e.g., main) effects difficult.
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
## Mixed Model Anova Table (Type 3 tests, KR-method)
## 
## Model: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
##   Effect        df           F p.value
## 1 brthwt  1, 62.42   26.79 ***   <.001
## 2 gender  1, 61.58     9.31 **    .003
## 3    age 1, 141.54 1046.79 ***   <.001
## 4   age2 1, 122.93  355.47 ***   <.001
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
# construct piecewise lines defined by knots
dta$d1 <- dta$age - 0.4; dta$d1[dta$d1 < 0] <- 0
dta$d2 <- dta$age - 0.9; dta$d2[dta$d2 < 0] <- 0
dta$d3 <- dta$age - 1.5; dta$d3[dta$d3 < 0] <- 0

將dta中依“age”分成三段

summary(m1 <- lmer(weight ~ brthwt + gender + age + d1 + d2 + d3 +
(age | id), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + d1 + d2 + d3 + (age | id)
##    Data: dta
## 
## REML criterion at convergence: 465
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.81475 -0.41746 -0.01623  0.42323  2.51409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  id       (Intercept) 0.2476   0.4976        
##           age         0.3273   0.5721   -0.12
##  Residual             0.2523   0.5023        
## Number of obs: 198, groups:  id, 68
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   0.4941     0.5600 65.7487   0.882 0.380826    
## brthwt        0.8949     0.1617 51.4586   5.533 1.07e-06 ***
## genderF      -0.5459     0.1627 50.8735  -3.356 0.001504 ** 
## age          10.3595     1.0792 83.2345   9.599 4.03e-15 ***
## d1           -6.1891     1.8004 80.6638  -3.438 0.000931 ***
## d2           -1.6641     1.3071 82.8518  -1.273 0.206547    
## d3           -0.3684     0.9920 80.3757  -0.371 0.711367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) brthwt gendrF age    d1     d2    
## brthwt  -0.927                                   
## genderF -0.231  0.099                            
## age     -0.339  0.045 -0.002                     
## d1       0.313 -0.039 -0.001 -0.985              
## d2      -0.239  0.032  0.012  0.816 -0.884       
## d3       0.167 -0.031 -0.019 -0.533  0.606 -0.886

Correlation of Fixed Effects中可以得知:
d1,d2,d3此三個分段與age的相關分別為-0.985,0.816,-0.533
故為高度負相關,正相關,負相關。
也就是兒童在age0.4以前年齡增長與體重無太大相關
轉折點過後(age0.9起)體重會隨著年齡增長而增加。到了第三段起相關性又降低。

dtafm1 <- data.frame(dta, yhat=fitted(m1))
ggplot(dtafm1, aes(age, weight, group=id, color = gender))+
 geom_point()+
 geom_line(aes(age, yhat))+
 facet_wrap(~ gender)+
 labs(x = "Age (year)", y = "Weight (kg)")

男性與女性分開的散佈圖,分別畫出迴歸線

quantile(dta$age)
##        0%       25%       50%       75%      100% 
## 0.1149897 0.6461328 0.9965777 1.4709103 2.5462012
summary(m0 <- lm(weight ~ age, data=dta))
## 
## Call:
## lm(formula = weight ~ age, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2994 -1.0341 -0.2524  1.0947  4.2614 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.2021     0.1779   29.24   <2e-16 ***
## age           3.3640     0.1332   25.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.471 on 196 degrees of freedom
## Multiple R-squared:  0.765,  Adjusted R-squared:  0.7638 
## F-statistic: 637.9 on 1 and 196 DF,  p-value: < 2.2e-16
summary(m0z <- segmented::segmented(m0, seg.Z= ~ age, 
                            psi=list(age=c(0.6, 1.4)),
                            control=seg.control(display=FALSE)))
## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~age, psi = list(age = c(0.6, 
##     1.4)), control = seg.control(display = FALSE))
## 
## Estimated Break-Point(s):
##            Est. St.Err
## psi1.age 0.315  0.135
## psi2.age 2.300  0.047
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    2.240      1.391   1.610    0.109
## age           16.590     10.311   1.609    0.109
## U1.age       -13.950     10.313  -1.353       NA
## U2.age        -8.541      3.457  -2.471       NA
## 
## Residual standard error: 1.176 on 192 degrees of freedom
## Multiple R-Squared: 0.8528,  Adjusted R-squared: 0.849 
## 
## Convergence attained in 1 iter. (rel. change 7.2397e-08)

找出何處為適合的轉折處(最小平方法),得出別在年齡為
0.317,2.300為轉折。

plot(m0z)
with(dta, points(age, weight))
abline(v = m0z$psi[,2], lty = 3)
grid()

繪出分段的圖形