The data consist of weight gains of 68 Asian children in a British community. Measurements of weight were recorded for children on up to five occasions visiting a clinic. The ages at which the measurements were taken are roughly to target examination dates of 6 weeks, then 8, 12, 27 months.

1 load package

pacman::p_load(tidyverse, afex, segmented, foreign, haven, lme4, lmerTest)

2 data input

fL <- "http://www.stata-press.com/data/mlmus3/asian.dta"
dta <- read.dta(fL)
str(dta)

## 'data.frame':    198 obs. of  6 variables:
##  $ id    : int  45 45 45 45 45 258 258 258 258 287 ...
##  $ occ   : int  1 2 3 4 5 1 2 3 4 1 ...
##  $ age   : num  0.137 0.657 1.218 1.429 2.272 ...
##  $ weight: num  5.17 10.86 13.15 13.2 15.88 ...
##  $ brthwt: int  4140 4140 4140 4140 4140 3155 3155 3155 3155 3850 ...
##  $ gender: int  1 1 1 1 1 2 2 2 2 1 ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "29 Jan 2005 17:33"
##  - attr(*, "formats")= chr [1:6] "%8.0g" "%9.0g" "%8.0g" "%8.0g" ...
##  - attr(*, "types")= int [1:6] 252 251 254 254 252 252
##  - attr(*, "val.labels")= chr [1:6] "" "" "" "" ...
##  - attr(*, "var.labels")= chr [1:6] "" "" "" "" ...
##  - attr(*, "expansion.fields")=List of 5
##   ..$ : chr [1:3] "_dta" "ReS_i" "id"
##   ..$ : chr [1:3] "_dta" "ReS_ver" "v.2"
##   ..$ : chr [1:3] "_dta" "ReS_j" "occ"
##   ..$ : chr [1:3] "_dta" "ReS_str" "0"
##   ..$ : chr [1:3] "_dta" "ReS_Xij" "weight age"
##  - attr(*, "version")= int 8

3 data management

#id 轉為factor
dta$id <- factor(dta$id)
#性別重命名為M,F
dta$gender <- factor(dta$gender, 1:2, labels = c("M", "F"))
#brthwt改單位為kg
dta$brthwt <- dta$brthwt/1000
#occ 轉為factor
dta$occ <- factor(dta$occ)

#改變資料呈現型態
glimpse(dta)

## Rows: 198
## Columns: 6
## $ id     <fct> 45, 45, 45, 45, 45, 258, 258, 258, 258, 287, 287, 287, 287, ...
## $ occ    <fct> 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 2, ...
## $ age    <dbl> 0.1368925, 0.6570842, 1.2183436, 1.4291581, 2.2724161, 0.191...
## $ weight <dbl> 5.171, 10.860, 13.150, 13.200, 15.880, 5.300, 9.740, 9.980, ...
## $ brthwt <dbl> 4.140, 4.140, 4.140, 4.140, 4.140, 3.155, 3.155, 3.155, 3.15...
## $ gender <fct> M, M, M, M, M, F, F, F, F, M, M, M, M, F, F, F, F, F, M, M, ...

head(dta)

##    id occ       age weight brthwt gender
## 1  45   1 0.1368925  5.171  4.140      M
## 2  45   2 0.6570842 10.860  4.140      M
## 3  45   3 1.2183436 13.150  4.140      M
## 4  45   4 1.4291581 13.200  4.140      M
## 5  45   5 2.2724161 15.880  4.140      M
## 6 258   1 0.1916496  5.300  3.155      F

4 ggplot, age(x), weight(y), id(rep), facet by gender

ggplot(dta, aes(age, weight, group = id, color=gender))+
 geom_point()+ 
 geom_line() +
 facet_wrap( ~ gender)+
 labs(x = "Age (year)", y = "Weight (kg)") +
 theme(legend.position = c(.9, .2))

# ggplot, age(x), weight(y), regression line

ggplot(dta, aes(age, weight, color = gender))+
 geom_point()+
 stat_smooth(method = "lm", formula = y ~ poly(x, 2)) +
 labs(x = "Age (year)", y = "Weight (kg)")+
 theme(legend.position = c(.9, .2))

age 與 wight 呈曲線關係

5 create age square

dta$age2 <- dta$age^2

6 Satterthwaite’s method, random age slope

# t-tests use Satterthwaite's method
summary(m0 <- lmer(weight ~ brthwt + gender + age + age2 + (age |id),
data = dta))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + age2 + (age | id)
##    Data: dta
## 
## REML criterion at convergence: 496.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.81353 -0.43471 -0.03652  0.45573  2.79813 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  id       (Intercept) 0.1920   0.4381       
##           age         0.2702   0.5198   0.07
##  Residual             0.3346   0.5784       
## Number of obs: 198, groups:  id, 68
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   1.02225    0.54924  57.65949   1.861  0.06782 .  
## brthwt        0.87755    0.16616  53.16001   5.281 2.43e-06 ***
## genderF      -0.51922    0.16698  52.23221  -3.109  0.00303 ** 
## age           7.73933    0.23674 135.03625  32.691  < 2e-16 ***
## age2         -1.67125    0.08762 114.78740 -19.074  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) brthwt gendrF age   
## brthwt  -0.963                     
## genderF -0.240  0.095              
## age     -0.221  0.058 -0.003       
## age2     0.199 -0.051  0.002 -0.927

7 KR-method

# Type 3 tests, KR-method - This is better
afex::mixed(weight ~ brthwt + gender + age + age2 + (age | id), data = dta)

## Contrasts set to contr.sum for the following variables: gender, id

## Numerical variables NOT centered on 0: brthwt, age, age2
## If in interactions, interpretation of lower order (e.g., main) effects difficult.

## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]

## Mixed Model Anova Table (Type 3 tests, KR-method)
## 
## Model: weight ~ brthwt + gender + age + age2 + (age | id)
## Data: dta
##   Effect        df           F p.value
## 1 brthwt  1, 62.42   26.79 ***   <.001
## 2 gender  1, 61.58     9.31 **    .003
## 3    age 1, 141.54 1046.79 ***   <.001
## 4   age2 1, 122.93  355.47 ***   <.001
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

8 construct piecewise lines defined by knots

dta$d1 <- dta$age - 0.4; dta$d1[dta$d1 < 0] <- 0
dta$d2 <- dta$age - 0.9; dta$d2[dta$d2 < 0] <- 0
dta$d3 <- dta$age - 1.5; dta$d3[dta$d3 < 0] <- 0

9 control segment point in the model

summary(m1 <- lmer(weight ~ brthwt + gender + age + d1 + d2 + d3 +
(age | id), data = dta))

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weight ~ brthwt + gender + age + d1 + d2 + d3 + (age | id)
##    Data: dta
## 
## REML criterion at convergence: 465
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.81475 -0.41746 -0.01623  0.42323  2.51409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  id       (Intercept) 0.2476   0.4976        
##           age         0.3273   0.5721   -0.12
##  Residual             0.2523   0.5023        
## Number of obs: 198, groups:  id, 68
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   0.4941     0.5600 65.7487   0.882 0.380826    
## brthwt        0.8949     0.1617 51.4586   5.533 1.07e-06 ***
## genderF      -0.5459     0.1627 50.8735  -3.356 0.001504 ** 
## age          10.3595     1.0792 83.2345   9.599 4.03e-15 ***
## d1           -6.1891     1.8004 80.6638  -3.438 0.000931 ***
## d2           -1.6641     1.3071 82.8518  -1.273 0.206547    
## d3           -0.3684     0.9920 80.3757  -0.371 0.711367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) brthwt gendrF age    d1     d2    
## brthwt  -0.927                                   
## genderF -0.231  0.099                            
## age     -0.339  0.045 -0.002                     
## d1       0.313 -0.039 -0.001 -0.985              
## d2      -0.239  0.032  0.012  0.816 -0.884       
## d3       0.167 -0.031 -0.019 -0.533  0.606 -0.886

10 create yhat

dtafm1 <- data.frame(dta, yhat=fitted(m1))

11 plot age and yhat

ggplot(dtafm1, aes(age, weight, group=id, color = gender))+
 geom_point()+
 geom_line(aes(age, yhat))+
 facet_wrap(~ gender)+
 labs(x = "Age (year)", y = "Weight (kg)")

quantile(dta$age)

##        0%       25%       50%       75%      100% 
## 0.1149897 0.6461328 0.9965777 1.4709103 2.5462012

summary(m0 <- lm(weight ~ age, data=dta))

## 
## Call:
## lm(formula = weight ~ age, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2994 -1.0341 -0.2524  1.0947  4.2614 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.2021     0.1779   29.24   <2e-16 ***
## age           3.3640     0.1332   25.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.471 on 196 degrees of freedom
## Multiple R-squared:  0.765,  Adjusted R-squared:  0.7638 
## F-statistic: 637.9 on 1 and 196 DF,  p-value: < 2.2e-16

12 Piecewise regression

summary(m0z <- segmented::segmented(m0, seg.Z= ~ age, 
                            psi=list(age=c(0.6, 1.4)),
                            control=seg.control(display=FALSE)))

## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~age, psi = list(age = c(0.6, 
##     1.4)), control = seg.control(display = FALSE))
## 
## Estimated Break-Point(s):
##            Est. St.Err
## psi1.age 0.317  0.138
## psi2.age 2.300  0.047
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    2.262      1.391   1.626    0.106
## age           16.425     10.311   1.593    0.113
## U1.age       -13.786     10.312  -1.337       NA
## U2.age        -8.539      3.457  -2.470       NA
## 
## Residual standard error: 1.176 on 192 degrees of freedom
## Multiple R-Squared: 0.8528,  Adjusted R-squared: 0.849 
## 
## Convergence attained in 1 iter. (rel. change 6.9923e-08)

plot(m0z)
with(dta, points(age, weight))
abline(v = m0z$psi[,2], lty = 3)
grid()

Random coefficients models

jenny hsu