title: “Girls hieghts data example” author: “Szu-Yu Chen” date: 26 September 2020 ——
# load data set
dta <- read.csv("C:/Users/ASUS/Desktop/data/girl_height.csv", h=T)
options(digits=4, show.signif.stars=FALSE)
pacman::p_load(tidyverse, nlme, broom, magrittr, MuMIn)
str(dta)
## 'data.frame': 100 obs. of 4 variables:
## $ Height: num 111 116 122 126 130 ...
## $ Girl : chr "G1" "G1" "G1" "G1" ...
## $ Age : int 6 7 8 9 10 6 7 8 9 10 ...
## $ Mother: chr "S" "S" "S" "S" ...
class(dta)
## [1] "data.frame"
head(dta)
## Height Girl Age Mother
## 1 111.0 G1 6 S
## 2 116.4 G1 7 S
## 3 121.7 G1 8 S
## 4 126.3 G1 9 S
## 5 130.5 G1 10 S
## 6 110.0 G2 6 S
plot(dta)
dta %>% dplyr::select(Height, Age)%>% var %>% diag
## Height Age
## 90.28 2.02
ggplot(dta, aes(x = Age, y = Height, color = Mother)) +
geom_point() +
stat_smooth(aes(group = 1), method = "lm") +
labs(x = "Age (scaled)", y = "Height (cm)") +
theme(legend.position = "NONE")
## `geom_smooth()` using formula 'y ~ x'
m0 <- gls(Height ~ Age, data = dta)
m1 <- update(m0, corr = corAR1(form = ~ 1 | Girl), method = "ML")
m2 <- update(m0, weights = varIdent(form = ~ 1 | Mother))
m3 <- update(m1, weights = varIdent(form = ~ 1 | Mother))
lapply(list(m0, m1, m2, m3), summary)
## [[1]]
## Generalized least squares fit by REML
## Model: Height ~ Age
## Data: dta
## AIC BIC logLik
## 607.5 615.3 -300.8
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.52 2.8439 29.02 0
## Age 5.72 0.3501 16.33 0
##
## Correlation:
## (Intr)
## Age -0.985
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8561 -0.7402 -0.1696 0.7974 2.6348
##
## Residual standard error: 4.951
## Degrees of freedom: 100 total; 98 residual
##
## [[2]]
## Generalized least squares fit by maximum likelihood
## Model: Height ~ Age
## Data: dta
## AIC BIC logLik
## 353.3 363.8 -172.7
##
## Correlation Structure: AR(1)
## Formula: ~1 | Girl
## Parameter estimate(s):
## Phi
## 0.9782
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.47 1.380 59.75 0
## Age 5.68 0.111 51.21 0
##
## Correlation:
## (Intr)
## Age -0.643
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8432 -0.7005 -0.1174 0.8831 2.7934
##
## Residual standard error: 4.781
## Degrees of freedom: 100 total; 98 residual
##
## [[3]]
## Generalized least squares fit by REML
## Model: Height ~ Age
## Data: dta
## AIC BIC logLik
## 594 606.9 -292
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Mother
## Parameter estimates:
## S M T
## 1.0000 0.8014 1.8171
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.46 2.3331 35.34 0
## Age 5.55 0.2872 19.32 0
##
## Correlation:
## (Intr)
## Age -0.985
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.88260 -0.64836 0.07568 0.75427 2.40785
##
## Residual standard error: 3.961
## Degrees of freedom: 100 total; 98 residual
##
## [[4]]
## Generalized least squares fit by maximum likelihood
## Model: Height ~ Age
## Data: dta
## AIC BIC logLik
## 337.6 353.2 -162.8
##
## Correlation Structure: AR(1)
## Formula: ~1 | Girl
## Parameter estimate(s):
## Phi
## 0.9787
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Mother
## Parameter estimates:
## S M T
## 1.0000 0.6923 1.5494
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.47 1.1305 72.95 0
## Age 5.56 0.0903 61.55 0
##
## Correlation:
## (Intr)
## Age -0.639
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.77189 -0.71727 0.06484 0.80091 2.56095
##
## Residual standard error: 4.265
## Degrees of freedom: 100 total; 98 residual
acf(resid(m0, type = "normalized"), main = "m0")
acf(resid(m1, type = "normalized"), main = "m1")
acf(resid(m2, type = "normalized"), main = "m2")
acf(resid(m3, type = "normalized"), main = "m3")
## the end