# regression with correlations over time and in clusters
# set significant decimal points
# handle package loading automatically
# load packages
pacman::p_load(tidyverse, nlme, broom, magrittr, MuMIn)
# load data set
girlheight<-read.csv("girl_height.csv", header= T)
# grouped data structure from nlme
str(girlheight)## 'data.frame': 100 obs. of 4 variables:
## $ Height: num 111 116 122 126 130 ...
## $ Girl : Factor w/ 20 levels "G1","G10","G11",..: 1 1 1 1 1 12 12 12 12 12 ...
## $ Age : int 6 7 8 9 10 6 7 8 9 10 ...
## $ Mother: Factor w/ 3 levels "M","S","T": 2 2 2 2 2 2 2 2 2 2 ...
## Height Girl Age Mother
## 1 111.0 G1 6 S
## 2 116.4 G1 7 S
## 3 121.7 G1 8 S
## 4 126.3 G1 9 S
## 5 130.5 G1 10 S
## 6 110.0 G2 6 S
## Height Age
## Height 1.0000000 0.8551367
## Age 0.8551367 1.0000000
ggplot(girlheight, aes(x = Age, y = Height, color = Mother)) +
geom_point() +
stat_smooth(aes(group = 1), method = "lm") +
labs(x = "Age (scaled)", y = "Height (cm)") +
theme(legend.position = "NONE")## `geom_smooth()` using formula 'y ~ x'
## ordinary regression
## Model selection table
## (Intrc) Age correlation weights REML df logLik AICc delta weight
## m3 82.47 5.558 crAR1(Girl) vrI(Mth) F 6 -162.798 338.5 0.00 1
## m1 82.47 5.684 crAR1(Girl) F 4 -172.671 353.8 15.26 0
## m2 82.46 5.549 vrI(Mth) 5 -291.999 594.6 256.14 0
## m0 82.52 5.716 3 -300.761 607.8 269.27 0
## Abbreviations:
## correlation: crAR1(Girl) = 'corAR1(~1|Girl)'
## weights: vrI(Mth) = 'varIdent(~1|Mother)'
## REML: F = 'FALSE'
## Models ranked by AICc(x)
## [[1]]
## Generalized least squares fit by REML
## Model: Height ~ Age
## Data: girlheight
## AIC BIC logLik
## 607.5218 615.2767 -300.7609
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.5240 2.843942 29.01747 0
## Age 5.7165 0.350065 16.32982 0
##
## Correlation:
## (Intr)
## Age -0.985
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8561136 -0.7401528 -0.1695731 0.7974431 2.6347966
##
## Residual standard error: 4.950667
## Degrees of freedom: 100 total; 98 residual
##
## [[2]]
## Generalized least squares fit by maximum likelihood
## Model: Height ~ Age
## Data: girlheight
## AIC BIC logLik
## 353.3411 363.7618 -172.6705
##
## Correlation Structure: AR(1)
## Formula: ~1 | Girl
## Parameter estimate(s):
## Phi
## 0.9781674
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.47456 1.3803440 59.74927 0
## Age 5.68379 0.1109941 51.20803 0
##
## Correlation:
## (Intr)
## Age -0.643
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8432405 -0.7004707 -0.1173561 0.8830952 2.7934077
##
## Residual standard error: 4.780948
## Degrees of freedom: 100 total; 98 residual
##
## [[3]]
## Generalized least squares fit by REML
## Model: Height ~ Age
## Data: girlheight
## AIC BIC logLik
## 593.998 606.9228 -291.999
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Mother
## Parameter estimates:
## S M T
## 1.0000000 0.8013775 1.8171062
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.46268 2.3330969 35.34473 0
## Age 5.54936 0.2871844 19.32334 0
##
## Correlation:
## (Intr)
## Age -0.985
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.88259506 -0.64836209 0.07567592 0.75426807 2.40785034
##
## Residual standard error: 3.96064
## Degrees of freedom: 100 total; 98 residual
##
## [[4]]
## Generalized least squares fit by maximum likelihood
## Model: Height ~ Age
## Data: girlheight
## AIC BIC logLik
## 337.597 353.228 -162.7985
##
## Correlation Structure: AR(1)
## Formula: ~1 | Girl
## Parameter estimate(s):
## Phi
## 0.9786546
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | Mother
## Parameter estimates:
## S M T
## 1.0000000 0.6922724 1.5493547
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 82.47292 1.1305458 72.94965 0
## Age 5.55833 0.0902996 61.55430 0
##
## Correlation:
## (Intr)
## Age -0.639
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.77188602 -0.71727072 0.06483833 0.80091331 2.56095242
##
## Residual standard error: 4.264505
## Degrees of freedom: 100 total; 98 residual