options(digits=4, show.signif.stars=FALSE)
pacman::p_load(tidyverse, nlme, broom, magrittr, MuMIn)
gheight<-read.csv("girl_height.csv", header= T)
str(gheight)
## 'data.frame': 100 obs. of 4 variables:
## $ Height: num 111 116 122 126 130 ...
## $ Girl : chr "G1" "G1" "G1" "G1" ...
## $ Age : int 6 7 8 9 10 6 7 8 9 10 ...
## $ Mother: chr "S" "S" "S" "S" ...
subdta <- gheight %>%
select(Height, Age)
##descriptive statistic
subdta %>% cor
## Height Age
## Height 1.0000 0.8551
## Age 0.8551 1.0000
subdta %>% var %>% diag
## Height Age
## 90.28 2.02
ot <- theme_set(theme_bw())
身高與年齡相關性高correlation=0.85 身高變異數也滿大的Height variance=90.28 年齡變異數倒是還好age variance=2.02
#lm plot
ggplot(gheight, aes(x = Age, y = Height, color = Mother)) +
geom_point() +
stat_smooth(aes(group = 1), method = "lm") +
labs(x = "Age (scaled)", y = "Height (cm)") +
theme(legend.position = "NONE")
## `geom_smooth()` using formula 'y ~ x'
#ordinary regression
#ordinary regression
m0 <- gls(Height ~ Age, data = gheight)
#regression with autocorrelation
m1 <- update(m0, corr = corAR1(form = ~ 1 | Girl), method = "ML")
#regression with unequal variances
m2 <- update(m0, weights = varIdent(form = ~ 1 | Mother))
#regression with autocorrelation and unequal variances
m3 <- update(m1, weights = varIdent(form = ~ 1 | Mother))
#model selection with AICc
model.sel(m0, m1, m2, m3)
## Model selection table
## (Intrc) Age family correlation weights REML
## m3 82.47 5.558 gaussian(identity) corAR1(~1|Girl) varIdent(~1|Mother) FALSE
## m1 82.47 5.684 gaussian(identity) corAR1(~1|Girl) FALSE
## m2 82.46 5.549 gaussian(identity) varIdent(~1|Mother)
## m0 82.52 5.716 gaussian(identity)
## df logLik AICc delta weight
## m3 6 -162.8 338.5 0.00 1
## m1 4 -172.7 353.8 15.26 0
## m2 5 -292.0 594.6 256.14 0
## m0 3 -300.8 607.8 269.27 0
## Models ranked by AICc(x)
#summary model output
lapply(list(m0, m1, m2, m3), summary)
## [[1]]
## Generalized least squares fit by REML
## Model: Height ~ Age
## Data: gheight
## AIC BIC logLik
## 607.5 615.3 -300.8
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.52 2.8439 29.02 0
## Age 5.72 0.3501 16.33 0
##
## Correlation:
## (Intr)
## Age -0.985
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8561 -0.7402 -0.1696 0.7974 2.6348
##
## Residual standard error: 4.951
## Degrees of freedom: 100 total; 98 residual
##
## [[2]]
## Generalized least squares fit by maximum likelihood
## Model: Height ~ Age
## Data: gheight
## AIC BIC logLik
## 353.3 363.8 -172.7
##
## Correlation Structure: AR(1)
## Formula: ~1 | Girl
## Parameter estimate(s):
## Phi
## 0.9782
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.47 1.380 59.75 0
## Age 5.68 0.111 51.21 0
##
## Correlation:
## (Intr)
## Age -0.643
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8432 -0.7005 -0.1174 0.8831 2.7934
##
## Residual standard error: 4.781
## Degrees of freedom: 100 total; 98 residual
##
## [[3]]
## Generalized least squares fit by REML
## Model: Height ~ Age
## Data: gheight
## AIC BIC logLik
## 594 606.9 -292
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Mother
## Parameter estimates:
## S M T
## 1.0000 0.8014 1.8171
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.46 2.3331 35.34 0
## Age 5.55 0.2872 19.32 0
##
## Correlation:
## (Intr)
## Age -0.985
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.88260 -0.64836 0.07568 0.75427 2.40785
##
## Residual standard error: 3.961
## Degrees of freedom: 100 total; 98 residual
##
## [[4]]
## Generalized least squares fit by maximum likelihood
## Model: Height ~ Age
## Data: gheight
## AIC BIC logLik
## 337.6 353.2 -162.8
##
## Correlation Structure: AR(1)
## Formula: ~1 | Girl
## Parameter estimate(s):
## Phi
## 0.9787
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Mother
## Parameter estimates:
## S M T
## 1.0000 0.6923 1.5494
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.47 1.1305 72.95 0
## Age 5.56 0.0903 61.55 0
##
## Correlation:
## (Intr)
## Age -0.639
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.77189 -0.71727 0.06484 0.80091 2.56095
##
## Residual standard error: 4.265
## Degrees of freedom: 100 total; 98 residual
四個模型比較,看起來考慮到自相關的模型(m1,m3)配適比較好。 其中又以考慮到自相關及不相等變異數的m3模型配適最好(AIC最小) ## autocorrelation
acf(resid(m0, type = "normalized"), main = "m0")
acf(resid(m1, type = "normalized"), main = "m1")
acf(resid(m2, type = "normalized"), main = "m2")
acf(resid(m3, type = "normalized"), main = "m3")
用acf再驗證m1和m3的lag effect 呈現截尾。屬平穩序列。
1.在ggplot stat_smooth(aes(group = 1))語法中,不太曉得group=1代表的是什麼?