fecfat1 and fecfat2. Is it similar to the estimate above?Learning objectives
Exercises
I found this lab very helpful for visualizing grouped data and practicing heat maps/spaghetti plots. Cleaning multiple variables of the Georgia Birthweight Dataset allowed for a better understanding of cleaning data. I experimented with this data by seeing how setting different seeds in the beginning of the data would change the rest of the lab. This allowed for different simulations/numbers to be run while making our lab reproducible.
library(pheatmap)
library(RColorBrewer)
mycol <- rev(colorRampPalette(colors = c('#d8b365','#f5f5f5','#5ab4ac'))(100))
pheatmap(select(simfun(sigma_subj = sqrt(3), sigma_resid = 1), fecfat1:fecfat2), color = mycol)
Also play with the values of sigma_subj and sigma_resid to see what effect this has on the heatmap.
library(ggplot2)
set.seed(1)
library(tidyr)
simfun(sigma_subj = sqrt(3), sigma_resid = sqrt(1)) %>%
pivot_longer(cols = starts_with("fecfat")) %>%
ggplot(aes(x=name, y=value, group=id)) + geom_line(alpha = 0.25)
Also play with the values of sigma_subj and sigma_resid to see what effect this has on the spaghetti plot
set.seed(1)
df_wide <- simfun(n = 10000, sigma_subj = sqrt(4), sigma_resid = sqrt(1))
df <- pivot_longer(df_wide, cols = starts_with("fecfat"))
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
fit <- lme(value ~ 1, data = df, random = ~1|id)
summary(fit)
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 78952.58 78976.29 -39473.29
##
## Random effects:
## Formula: ~1 | id
## (Intercept) Residual
## StdDev: 2.026972 0.9988031
##
## Fixed effects: value ~ 1
## Value Std.Error DF t-value p-value
## (Intercept) 9.988608 0.0214649 10000 465.3462 0
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.695148369 -0.506748348 0.004597958 0.511724338 2.743339009
##
## Number of Observations: 20000
## Number of Groups: 10000
intervals(fit)
## Approximate 95% confidence intervals
##
## Fixed effects:
## lower est. upper
## (Intercept) 9.946533 9.988608 10.03068
## attr(,"label")
## [1] "Fixed effects:"
##
## Random Effects:
## Level: id
## lower est. upper
## sd((Intercept)) 1.995785 2.026972 2.058646
##
## Within-group standard error:
## lower est. upper
## 0.9888043 0.9988031 1.0089031
Recall ICC for subject \(i\), measurements \(j\) and \(k\): \[\begin{equation*} \begin{aligned} ICC & = corr(x_{ij}, x_{ik}) \\ & = \frac{\sigma_{subj}^2}{\sigma_{subj}^2 + \sigma_{\epsilon}^2} \\ & = \frac{\tau_{00}^2}{\tau_{00}^2 + \sigma_\epsilon^2} \end{aligned} \end{equation*}\]
2.026972^2 / (2.026972^2 + 0.9988031^2)
## [1] 0.8046291
ICClme <- function(fit){
cors <- as.numeric(VarCorr(fit))
cors[1] / (cors[1] + cors[2])
}
ICClme(fit)
## [1] 0.804629
fecfat1 and fecfat2. Is it similar to the estimate above?select(df_wide, starts_with("fec")) %>% cor()
## fecfat1 fecfat2
## fecfat1 1.000000 0.804624
## fecfat2 0.804624 1.000000
momagemomid to a factorlibrary(readr)
ga <- read_csv("gababies.csv") %>%
mutate(momage = na_if(momage, 99)) %>%
mutate(agebin = cut(initage, breaks = c(0, 17, 100))) %>%
mutate(momid = factor(momid)) %>%
mutate(lowbrth = recode_factor(lowbrth, `0` = "normal", `1` = "low"))
ggplot(ga, aes(x = birthord, y=bweight, group = birthord)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width=0.2, alpha = 0.25) +
labs(title = "Georgia birthweight dataset") +
xlab("Birth order") + ylab("Birth weight (g)") +
theme_grey(base_size = 10)
Figure 3: Birth weight as a function of birth order in the Georgia birthweight dataset.
ggplot(ga, aes(x=birthord, y = bweight, group = momid)) +
geom_line(alpha = 0.25) +
labs(title = "Georgia birthweight dataset") +
xlab("Birth order") + ylab("Birth weight (g)") +
theme_grey(base_size = 10)
library(nlme)
gafit1 <- lme(bweight ~ birthord, data = ga, random = ~1|momid)
intervals(gafit1)
## Approximate 95% confidence intervals
##
## Fixed effects:
## lower est. upper
## (Intercept) 2913.20418 2995.640 3078.07582
## birthord 27.07478 46.608 66.14122
## attr(,"label")
## [1] "Fixed effects:"
##
## Random Effects:
## Level: momid
## lower est. upper
## sd((Intercept)) 323.1724 367.2676 417.3794
##
## Within-group standard error:
## lower est. upper
## 423.7298 445.0228 467.3859
qqnorm(residuals(gafit1, type = "pearson"), main = "Pearson residuals QQ plot")
qqline(residuals(gafit1, type = "pearson"))
qqnorm(ranef(gafit1)[, 1], main = "Random Intercepts QQ plot")
qqline(ranef(gafit1)[, 1])
gafit2 <- lme(bweight ~ birthord*agebin, data = ga, random = ~1|momid)
summary(gafit2)
## Linear mixed-effects model fit by REML
## Data: ga
## AIC BIC logLik
## 15303.27 15332.69 -7645.635
##
## Random effects:
## Formula: ~1 | momid
## (Intercept) Residual
## StdDev: 364.4842 444.9955
##
## Fixed effects: bweight ~ birthord * agebin
## Value Std.Error DF t-value p-value
## (Intercept) 2978.3873 52.75519 798 56.45676 0.0000
## birthord 38.6175 12.53633 798 3.08044 0.0021
## agebin(17,100] 46.6289 86.72900 198 0.53764 0.5914
## birthord:agebin(17,100] 21.5961 20.60960 798 1.04786 0.2950
## Correlation:
## (Intr) brthrd a(17,1
## birthord -0.713
## agebin(17,100] -0.608 0.434
## birthord:agebin(17,100] 0.434 -0.608 -0.713
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.24250712 -0.43312124 0.04338865 0.53148398 3.30316603
##
## Number of Observations: 1000
## Number of Groups: 200
gafit3 <- lme(bweight ~ birthord*initwght, data = ga, random = ~1|momid)
summary(gafit3)
## Linear mixed-effects model fit by REML
## Data: ga
## AIC BIC logLik
## 15198.28 15227.7 -7593.138
##
## Random effects:
## Formula: ~1 | momid
## (Intercept) Residual
## StdDev: 274.526 431.555
##
## Fixed effects: bweight ~ birthord * initwght
## Value Std.Error DF t-value p-value
## (Intercept) 600.3298 199.59828 798 3.007690 0.0027
## birthord 409.8405 51.45612 798 7.964855 0.0000
## initwght 0.7941 0.06499 198 12.217420 0.0000
## birthord:initwght -0.1204 0.01676 798 -7.186579 0.0000
## Correlation:
## (Intr) brthrd intwgh
## birthord -0.773
## initwght -0.982 0.760
## birthord:initwght 0.760 -0.982 -0.773
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.329582374 -0.455327000 0.007433181 0.531084518 4.791970708
##
## Number of Observations: 1000
## Number of Groups: 200
library(gee)
gagee1 <- gee(bweight ~ birthord*agebin, data = ga, id = momid, corstr = "unstructured")
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept) birthord agebin(17,100]
## 2978.38730 38.61746 46.62891
## birthord:agebin(17,100]
## 21.59605
summary(gagee1)
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Identity
## Variance to Mean Relation: Gaussian
## Correlation Structure: Unstructured
##
## Call:
## gee(formula = bweight ~ birthord * agebin, id = momid, data = ga,
## corstr = "unstructured")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -2863.40368 -294.16890 29.24113 339.16644 1756.59632
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## (Intercept) 2978.92557 48.99654 60.7986933 45.82626 65.0047740
## birthord 37.24333 12.48136 2.9839156 12.24704 3.0410073
## agebin(17,100] 52.18848 80.54981 0.6479032 82.95506 0.6291175
## birthord:agebin(17,100] 20.18655 20.51923 0.9837866 20.41810 0.9886593
##
## Estimated Scale Parameter: 330087.7
## Number of Iterations: 3
##
## Working Correlation
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.0000000 0.2153033 0.3092689 0.2545510 0.3836927
## [2,] 0.2153033 1.0000000 0.4809124 0.4351138 0.3949443
## [3,] 0.3092689 0.4809124 1.0000000 0.6344401 0.4349807
## [4,] 0.2545510 0.4351138 0.6344401 1.0000000 0.4484055
## [5,] 0.3836927 0.3949443 0.4349807 0.4484055 1.0000000
gagee2 <- gee(bweight ~ birthord*initwght, data = ga, id = momid, corstr = "unstructured")
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept) birthord initwght birthord:initwght
## 600.3298240 409.8405317 0.7940549 -0.1204130
summary(gagee2)
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Identity
## Variance to Mean Relation: Gaussian
## Correlation Structure: Unstructured
##
## Call:
## gee(formula = bweight ~ birthord * initwght, id = momid, data = ga,
## corstr = "unstructured")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -2748.50271 -242.04489 11.70037 278.75451 2920.36648
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## (Intercept) 852.1852972 168.85833158 5.046747 193.39200182 4.406518
## birthord 207.2267183 50.87766438 4.073039 56.85291664 3.644962
## initwght 0.7119308 0.05498405 12.947951 0.06172936 11.533099
## birthord:initwght -0.0547416 0.01656691 -3.304274 0.01818505 -3.010253
##
## Estimated Scale Parameter: 268548.1
## Number of Iterations: 7
##
## Working Correlation
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.00000000 -0.1644909 -0.1041232 -0.1045992 -0.02349959
## [2,] -0.16449093 1.0000000 0.6376520 0.5986444 0.43603797
## [3,] -0.10412317 0.6376520 1.0000000 0.7606906 0.41963808
## [4,] -0.10459919 0.5986444 0.7606906 1.0000000 0.46289783
## [5,] -0.02349959 0.4360380 0.4196381 0.4628978 1.00000000
pivot_wider(ga, values_from = bweight, names_from=birthord, id_cols = momid) %>%
select(-momid) %>%
pheatmap(clustering_distance_rows = "correlation", clustering_distance_cols = "correlation", scale = "column")