options(digits=4, show.signif.stars=FALSE)

0.1 load packages

pacman::p_load(tidyverse, nlme, broom, magrittr, MuMIn)

0.2 data input and subdata

gheight<-read.csv("girl_height.csv", header= T)
str(gheight)
## 'data.frame':    100 obs. of  4 variables:
##  $ Height: num  111 116 122 126 130 ...
##  $ Girl  : chr  "G1" "G1" "G1" "G1" ...
##  $ Age   : int  6 7 8 9 10 6 7 8 9 10 ...
##  $ Mother: chr  "S" "S" "S" "S" ...
subdta <- gheight %>%
  select(Height, Age)

##descriptive statistic

subdta %>% cor
##        Height    Age
## Height 1.0000 0.8551
## Age    0.8551 1.0000
subdta %>% var %>% diag
## Height    Age 
##  90.28   2.02
ot <- theme_set(theme_bw())

身高與年齡相關性高correlation=0.85 身高變異數也滿大的Height variance=90.28 年齡變異數倒是還好age variance=2.02

0.3 regression

#lm plot
ggplot(gheight, aes(x = Age, y = Height, color = Mother)) +
 geom_point() +
 stat_smooth(aes(group = 1), method = "lm") +
 labs(x = "Age (scaled)", y = "Height (cm)") +
 theme(legend.position = "NONE")
## `geom_smooth()` using formula 'y ~ x'

#ordinary regression

#ordinary regression
m0 <- gls(Height ~ Age, data = gheight)
#regression with autocorrelation
m1 <- update(m0, corr = corAR1(form = ~ 1 | Girl), method = "ML")
#regression with unequal variances
m2 <- update(m0, weights = varIdent(form = ~ 1 | Mother))
#regression with autocorrelation and unequal variances
m3 <- update(m1, weights = varIdent(form = ~ 1 | Mother))

0.4 model comparison

#model selection with AICc
model.sel(m0, m1, m2, m3)
## Model selection table 
##    (Intrc)   Age             family     correlation             weights  REML
## m3   82.47 5.558 gaussian(identity) corAR1(~1|Girl) varIdent(~1|Mother) FALSE
## m1   82.47 5.684 gaussian(identity) corAR1(~1|Girl)                     FALSE
## m2   82.46 5.549 gaussian(identity)                 varIdent(~1|Mother)      
## m0   82.52 5.716 gaussian(identity)                                          
##    df logLik  AICc  delta weight
## m3  6 -162.8 338.5   0.00      1
## m1  4 -172.7 353.8  15.26      0
## m2  5 -292.0 594.6 256.14      0
## m0  3 -300.8 607.8 269.27      0
## Models ranked by AICc(x)
#summary model output
lapply(list(m0, m1, m2, m3), summary)
## [[1]]
## Generalized least squares fit by REML
##   Model: Height ~ Age 
##   Data: gheight 
##     AIC   BIC logLik
##   607.5 615.3 -300.8
## 
## Coefficients:
##             Value Std.Error t-value p-value
## (Intercept) 82.52    2.8439   29.02       0
## Age          5.72    0.3501   16.33       0
## 
##  Correlation: 
##     (Intr)
## Age -0.985
## 
## Standardized residuals:
##     Min      Q1     Med      Q3     Max 
## -1.8561 -0.7402 -0.1696  0.7974  2.6348 
## 
## Residual standard error: 4.951 
## Degrees of freedom: 100 total; 98 residual
## 
## [[2]]
## Generalized least squares fit by maximum likelihood
##   Model: Height ~ Age 
##   Data: gheight 
##     AIC   BIC logLik
##   353.3 363.8 -172.7
## 
## Correlation Structure: AR(1)
##  Formula: ~1 | Girl 
##  Parameter estimate(s):
##    Phi 
## 0.9782 
## 
## Coefficients:
##             Value Std.Error t-value p-value
## (Intercept) 82.47     1.380   59.75       0
## Age          5.68     0.111   51.21       0
## 
##  Correlation: 
##     (Intr)
## Age -0.643
## 
## Standardized residuals:
##     Min      Q1     Med      Q3     Max 
## -1.8432 -0.7005 -0.1174  0.8831  2.7934 
## 
## Residual standard error: 4.781 
## Degrees of freedom: 100 total; 98 residual
## 
## [[3]]
## Generalized least squares fit by REML
##   Model: Height ~ Age 
##   Data: gheight 
##   AIC   BIC logLik
##   594 606.9   -292
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Mother 
##  Parameter estimates:
##      S      M      T 
## 1.0000 0.8014 1.8171 
## 
## Coefficients:
##             Value Std.Error t-value p-value
## (Intercept) 82.46    2.3331   35.34       0
## Age          5.55    0.2872   19.32       0
## 
##  Correlation: 
##     (Intr)
## Age -0.985
## 
## Standardized residuals:
##      Min       Q1      Med       Q3      Max 
## -1.88260 -0.64836  0.07568  0.75427  2.40785 
## 
## Residual standard error: 3.961 
## Degrees of freedom: 100 total; 98 residual
## 
## [[4]]
## Generalized least squares fit by maximum likelihood
##   Model: Height ~ Age 
##   Data: gheight 
##     AIC   BIC logLik
##   337.6 353.2 -162.8
## 
## Correlation Structure: AR(1)
##  Formula: ~1 | Girl 
##  Parameter estimate(s):
##    Phi 
## 0.9787 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Mother 
##  Parameter estimates:
##      S      M      T 
## 1.0000 0.6923 1.5494 
## 
## Coefficients:
##             Value Std.Error t-value p-value
## (Intercept) 82.47    1.1305   72.95       0
## Age          5.56    0.0903   61.55       0
## 
##  Correlation: 
##     (Intr)
## Age -0.639
## 
## Standardized residuals:
##      Min       Q1      Med       Q3      Max 
## -1.77189 -0.71727  0.06484  0.80091  2.56095 
## 
## Residual standard error: 4.265 
## Degrees of freedom: 100 total; 98 residual

四個模型比較,看起來考慮到自相關的模型(m1,m3)配適比較好。 其中又以考慮到自相關及不相等變異數的m3模型配適最好(AIC最小) ## autocorrelation

acf(resid(m0, type = "normalized"), main = "m0")

acf(resid(m1, type = "normalized"), main = "m1")

acf(resid(m2, type = "normalized"), main = "m2")

acf(resid(m3, type = "normalized"), main = "m3")

用acf再驗證m1和m3的lag effect 呈現截尾。屬平穩序列。

0.5 Question

1.在ggplot stat_smooth(aes(group = 1))語法中,不太曉得group=1代表的是什麼?

  1. regression中form = ~ 1,代表的意思是? 在網路上找到的資料寫“an optional one-sided formula of the form ~ v, or ~ v | g, specifying a variance covariate v and, optionally, a grouping factor g for the coefficients. The variance covariate is ignored in this variance function. When a grouping factor is present in form, a different coefficient value is used for each of its levels less one reference level (see description section below). Several grouping variables may be simultaneously specified, separated by the * operator, like in ~ v | g1 * g2 * g3. In this case, the levels of each grouping variable are pasted together and the resulting factor is used to group the observations. Defaults to ~ 1.”