# load the following libraries: 
# tidyverse, broom, table1, GGally, car, easystats, datawizard, modelsummary and marginaleffects

library(tidyverse)
library(broom)
library(table1)
library(GGally)
library(car)
library(easystats)
library(datawizard)
library(modelsummary)
library(marginaleffects)

# load your dataset and assign it to a variable using read_rds("data/natality.rds") 
data <- read_rds("data/natality.rds")
save(data, file="natality.RData")

The lab specifications can be found here.

Exercise 1

Use the table1 package to create a table with appropriate descriptive information about the data set. Use the examples provided in the following vignette to help you build a table.

# Adjust levels for smoking
levels(data$CIG_REC) <- list("Non smoker" = "No", "Smoker" = "Yes")

# Adjust label for prior births (living)
label(data$PRIORLIVE) <- "Prior births (living)"

# Adjust label for prior births (dead)
label(data$PRIORDEAD) <- "Prior births (dead)"

# Creating table 1

caption <- "Maternal demographics stratified by smoking"

table1(~ DBWT + risks + MAGER + MRACEHISP + DMAR + MEDUC + PRIORLIVE +
         PRIORDEAD + BMI | CIG_REC, data=data, overall = FALSE, caption=caption)
Maternal demographics stratified by smoking
Non smoker
(N=1564)
Smoker
(N=124)
Birth Weight (g)
Mean (SD) 3250 (601) 3070 (597)
Median [Min, Max] 3290 [369, 5880] 3090 [630, 4220]
Risk Factors Reported
No 1088 (69.6%) 84 (67.7%)
Yes 476 (30.4%) 40 (32.3%)
Mother Age
Mean (SD) 29.0 (5.79) 27.8 (5.79)
Median [Min, Max] 29.0 [15.0, 49.0] 28.0 [17.0, 42.0]
Mother Race/Hispanic Origin
NH White 844 (54.0%) 94 (75.8%)
NH Black 266 (17.0%) 14 (11.3%)
NH Other 125 (8.0%) 10 (8.1%)
Hispanic 329 (21.0%) 6 (4.8%)
Marital Status
Married 1017 (65.0%) 40 (32.3%)
Unmarried 547 (35.0%) 84 (67.7%)
Mother Education
lower 177 (11.3%) 34 (27.4%)
HS 377 (24.1%) 45 (36.3%)
Some college 437 (27.9%) 37 (29.8%)
Uni 573 (36.6%) 8 (6.5%)
Prior births (living)
0 601 (38.4%) 38 (30.6%)
1 516 (33.0%) 41 (33.1%)
2 265 (16.9%) 17 (13.7%)
3 93 (5.9%) 17 (13.7%)
4+ 89 (5.7%) 11 (8.9%)
Prior births (dead)
0 1551 (99.2%) 123 (99.2%)
1+ 13 (0.8%) 1 (0.8%)
Mother BMI
Mean (SD) 26.9 (6.32) 27.3 (6.42)
Median [Min, Max] 25.6 [15.6, 58.4] 26.3 [16.5, 50.1]
# Set back levels for smoking to "No" and "Yes"
levels(data$CIG_REC) <- list("No" = "Non smoker", "Yes" = "Smoker")

Answer: Description / interpretation of the table: Smokers represent approximately 7% of the total sample, with non-Hispanic White being overrepresented among the smokers, compared to non-smokers (75.8% vs 54.0%). A simple comparison of means suggests birth weight of children born from smokers is 180 g lower in average. Non-smoking mothers were more commonly married and attended university, compared to mothers who were smokers.

Exercise 2

Use the ggpairs function to explore the association between the different variables. Try to replicate the figure below, or create your own variation.

# The matrix as given in the exercise 
plot1 <- ggpairs(data, columns = c("DBWT", "risks", "MAGER", "DMAR", "BMI"), 
                columnLabels = c("Birth weight (g)", "Risk Factors Reported", "Mother Age", "Marital Status", "Mother BMI"),
                upper = list(continuous = "density", combo = "facethist", discrete = "colbar"),  
                lower = list(continuous = "cor", combo = "facethist", discrete = "barDiag"),  
                diag = list(continuous = wrap("densityDiag", alpha=0.5), discrete = "densityDiag"), 
                mapping = aes(colour=data$CIG_REC)) 

plot1 + theme_minimal() + ggtitle('Natality in the United States (2018)') 

Exercise 3

Fit four linear models, each nested within the other: the null model, the unadjusted model, the full model and the optimal model. Use the step function find the optimal model using the stepwise method. Then use anova to compare the four models. What is your conclusion? Now compare your models using modelsummary function. What does the number in the (brackets) mean? How can you identify the optimal model just by looking at the \(R^2\) and the AIC?

# Specify the different models
M.NULL <- lm(DBWT ~ 1, data)
M.UNADJUSTED <- lm(DBWT ~ CIG_REC, data)
# The dot represents all variables except the outcome DBWT
M.FULL <- lm(DBWT ~ ., data)  

# Finding the optimal model
optimal <- step(M.NULL,
     scope = list(upper = M.FULL),
     direction="both",
     data=data)
## Start:  AIC=21609.55
## DBWT ~ 1
## 
##             Df Sum of Sq       RSS   AIC
## + DMAR       1  28672236 583166851 21530
## + MRACEHISP  3  28557357 583281730 21535
## + PRIORLIVE  4   7485192 604353895 21597
## + MEDUC      3   5189885 606649203 21601
## + CIG_REC    1   3685187 608153901 21601
## + risks      1   3277949 608561138 21602
## + BMI        1   3170730 608668358 21603
## + MAGER      1   2941408 608897679 21603
## + PRIORDEAD  1    849628 610989460 21609
## <none>                   611839087 21610
## 
## Step:  AIC=21530.53
## DBWT ~ DMAR
## 
##             Df Sum of Sq       RSS   AIC
## + MRACEHISP  3  15359037 567807814 21492
## + risks      1   4395289 578771562 21520
## + BMI        1   3607980 579558871 21522
## + PRIORLIVE  4   4500359 578666492 21526
## + CIG_REC    1    978350 582188501 21530
## + PRIORDEAD  1    818812 582348039 21530
## <none>                   583166851 21530
## + MAGER      1     82475 583084376 21532
## + MEDUC      3    313468 582853384 21536
## - DMAR       1  28672236 611839087 21610
## 
## Step:  AIC=21491.48
## DBWT ~ DMAR + MRACEHISP
## 
##             Df Sum of Sq       RSS   AIC
## + BMI        1   4532441 563275373 21480
## + risks      1   3924653 563883161 21482
## + CIG_REC    1   2181695 565626119 21487
## + PRIORLIVE  4   3880005 563927809 21488
## + PRIORDEAD  1    846700 566961114 21491
## <none>                   567807814 21492
## + MAGER      1     52434 567755381 21493
## + MEDUC      3     98014 567709800 21497
## - MRACEHISP  3  15359037 583166851 21530
## - DMAR       1  15473916 583281730 21535
## 
## Step:  AIC=21479.95
## DBWT ~ DMAR + MRACEHISP + BMI
## 
##             Df Sum of Sq       RSS   AIC
## + risks      1   6059709 557215664 21464
## + CIG_REC    1   2362075 560913299 21475
## + PRIORLIVE  4   3457999 559817375 21478
## + PRIORDEAD  1    896345 562379028 21479
## <none>                   563275373 21480
## + MAGER      1    125890 563149483 21482
## + MEDUC      3    115009 563160364 21486
## - BMI        1   4532441 567807814 21492
## - MRACEHISP  3  16283498 579558871 21522
## - DMAR       1  15325756 578601129 21523
## 
## Step:  AIC=21463.69
## DBWT ~ DMAR + MRACEHISP + BMI + risks
## 
##             Df Sum of Sq       RSS   AIC
## + PRIORLIVE  4   5580134 551635530 21455
## + CIG_REC    1   2199370 555016294 21459
## + PRIORDEAD  1   1025923 556189741 21463
## <none>                   557215664 21464
## + MAGER      1     22588 557193076 21466
## + MEDUC      3     69675 557145989 21470
## - risks      1   6059709 563275373 21480
## - BMI        1   6667497 563883161 21482
## - MRACEHISP  3  15921403 573137067 21505
## - DMAR       1  16431163 573646827 21511
## 
## Step:  AIC=21454.7
## DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE
## 
##             Df Sum of Sq       RSS   AIC
## + CIG_REC    1   2707991 548927539 21448
## + PRIORDEAD  1    921529 550714001 21454
## <none>                   551635530 21455
## + MAGER      1    280671 551354859 21456
## + MEDUC      3    173640 551461890 21460
## - PRIORLIVE  4   5580134 557215664 21464
## - BMI        1   6486820 558122350 21472
## - risks      1   8181845 559817375 21478
## - MRACEHISP  3  15145908 566781438 21494
## - DMAR       1  14456460 566091990 21496
## 
## Step:  AIC=21448.4
## DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC
## 
##             Df Sum of Sq       RSS   AIC
## + PRIORDEAD  1    922493 548005046 21448
## <none>                   548927539 21448
## + MAGER      1    337056 548590483 21449
## + MEDUC      3     75431 548852108 21454
## - CIG_REC    1   2707991 551635530 21455
## - PRIORLIVE  4   6088755 555016294 21459
## - BMI        1   6662694 555590232 21467
## - risks      1   8131760 557059299 21471
## - DMAR       1  10955889 559883427 21480
## - MRACEHISP  3  16576572 565504111 21493
## 
## Step:  AIC=21447.56
## DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC + 
##     PRIORDEAD
## 
##             Df Sum of Sq       RSS   AIC
## <none>                   548005046 21448
## - PRIORDEAD  1    922493 548927539 21448
## + MAGER      1    361028 547644018 21448
## + MEDUC      3     82260 547922786 21453
## - CIG_REC    1   2708955 550714001 21454
## - PRIORLIVE  4   5984714 553989760 21458
## - BMI        1   6749558 554754604 21466
## - risks      1   8234365 556239411 21471
## - DMAR       1  10970232 558975278 21479
## - MRACEHISP  3  16594210 564599256 21492
# Looking at model residuals 
par(mfrow=c(2,2)) 
plot(optimal) 

plot(M.FULL) 

par(mfrow=c(1,1)) 

## --> By checking the residuals, it seems that overall, the requirements of linear least squares regression assumptions are met. There are some extreme values (outliers), reaching Cook’s distance. 

# check collinearity
predictors <- model.matrix(optimal) [,-1] 
cor_matrix <- cor(predictors) 
print(cor_matrix) 
##                   DMARUnmarried MRACEHISPNH Black MRACEHISPNH Other
## DMARUnmarried      1.0000000000       0.277607412     -0.0743242682
## MRACEHISPNH Black  0.2776074117       1.000000000     -0.1314797424
## MRACEHISPNH Other -0.0743242682      -0.131479742      1.0000000000
## MRACEHISPHispanic  0.1466633695      -0.221896879     -0.1467082922
## BMI                0.0221022898       0.067376236     -0.0414747295
## risksYes          -0.0528611229       0.001409027      0.0271740638
## PRIORLIVE1        -0.0917042886      -0.069075190      0.0021049894
## PRIORLIVE2        -0.0309074959       0.013759458     -0.0208035109
## PRIORLIVE3        -0.0005936758      -0.008042953      0.0194897798
## PRIORLIVE4+       -0.0019788298       0.009527976     -0.0369825806
## CIG_RECYes         0.1766934687      -0.040099934      0.0006942985
## PRIORDEAD1+       -0.0031513911      -0.005659484      0.0211994431
##                   MRACEHISPHispanic          BMI     risksYes    PRIORLIVE1
## DMARUnmarried           0.146663369  0.022102290 -0.052861123 -0.0917042886
## MRACEHISPNH Black      -0.221896879  0.067376236  0.001409027 -0.0690751903
## MRACEHISPNH Other      -0.146708292 -0.041474730  0.027174064  0.0021049894
## MRACEHISPHispanic       1.000000000  0.038097591  0.011590058 -0.0364607526
## BMI                     0.038097591  1.000000000  0.201200894  0.0163043024
## risksYes                0.011590058  0.201200894  1.000000000  0.0566984062
## PRIORLIVE1             -0.036460753  0.016304302  0.056698406  1.0000000000
## PRIORLIVE2              0.035973387  0.007976993  0.106160712 -0.3142882820
## PRIORLIVE3              0.025091586  0.016491059  0.080099966 -0.1852845206
## PRIORLIVE4+             0.045011818  0.042715973  0.067713361 -0.1761047402
## CIG_RECYes             -0.105948563  0.016278885  0.010325103  0.0004005345
## PRIORDEAD1+             0.003628763 -0.013132945  0.024393056 -0.0086085705
##                     PRIORLIVE2    PRIORLIVE3  PRIORLIVE4+    CIG_RECYes
## DMARUnmarried     -0.030907496 -0.0005936758 -0.001978830  0.1766934687
## MRACEHISPNH Black  0.013759458 -0.0080429534  0.009527976 -0.0400999339
## MRACEHISPNH Other -0.020803511  0.0194897798 -0.036982581  0.0006942985
## MRACEHISPHispanic  0.035973387  0.0250915863  0.045011818 -0.1059485628
## BMI                0.007976993  0.0164910591  0.042715973  0.0162788847
## risksYes           0.106160712  0.0800999664  0.067713361  0.0103251028
## PRIORLIVE1        -0.314288282 -0.1852845206 -0.176104740  0.0004005345
## PRIORLIVE2         1.000000000 -0.1182428086 -0.112384559 -0.0226182822
## PRIORLIVE3        -0.118242809  1.0000000000 -0.066254838  0.0820598176
## PRIORLIVE4+       -0.112384559 -0.0662548376  1.000000000  0.0351471325
## CIG_RECYes        -0.022618282  0.0820598176  0.035147133  1.0000000000
## PRIORDEAD1+        0.046599450 -0.0241451070  0.032390671 -0.0007119841
##                     PRIORDEAD1+
## DMARUnmarried     -0.0031513911
## MRACEHISPNH Black -0.0056594837
## MRACEHISPNH Other  0.0211994431
## MRACEHISPHispanic  0.0036287626
## BMI               -0.0131329455
## risksYes           0.0243930559
## PRIORLIVE1        -0.0086085705
## PRIORLIVE2         0.0465994497
## PRIORLIVE3        -0.0241451070
## PRIORLIVE4+        0.0323906711
## CIG_RECYes        -0.0007119841
## PRIORDEAD1+        1.0000000000
vif_values <- vif(optimal) 
vif_values
##               GVIF Df GVIF^(1/(2*Df))
## DMAR      1.223057  1        1.105919
## MRACEHISP 1.212509  3        1.032637
## BMI       1.052965  1        1.026141
## risks     1.097282  1        1.047512
## PRIORLIVE 1.095732  4        1.011493
## CIG_REC   1.086856  1        1.042524
## PRIORDEAD 1.005190  1        1.002592
## --> VIF values of less than 10, suggesting no collinearity. 

# Compare models
tidy(anova(M.NULL, M.UNADJUSTED, optimal, M.FULL))
## # A tibble: 4 × 7
##   term                      df.residual    rss    df   sumsq statistic   p.value
##   <chr>                           <dbl>  <dbl> <dbl>   <dbl>     <dbl>     <dbl>
## 1 DBWT ~ 1                         1687 6.12e8    NA NA         NA     NA       
## 2 DBWT ~ CIG_REC                   1686 6.08e8     1  3.69e6    11.2    8.14e- 4
## 3 DBWT ~ DMAR + MRACEHISP …        1675 5.48e8    11  6.01e7    16.7    1.29e-31
## 4 DBWT ~ CIG_REC + risks +…        1671 5.47e8     4  5.89e5     0.449  7.73e- 1
# Displaying the four models by each other
modelsummary(
  list(
    null = M.NULL, 
    unadjasted = M.UNADJUSTED, 
    optimal = optimal,
    full = M.FULL),
  fmt = 1,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = TRUE
  )
null unadjasted optimal full
(Intercept) 3234.7 (14.7)*** 3247.9 (15.2)*** 3080.6 (64.6)*** 3162.4 (107.0)***
CIG_RECYes −179.1 (56.0)** −160.1 (55.6)** −156.6 (56.3)**
DMARUnmarried −184.3 (31.8)*** −189.8 (35.1)***
MRACEHISPNH Black −292.1 (41.7)*** −290.2 (41.8)***
MRACEHISPNH Other −121.7 (52.8)* −120.8 (53.0)*
MRACEHISPHispanic −74.5 (38.1)+ −72.5 (39.1)+
BMI 10.3 (2.3)*** 10.5 (2.3)***
risksYes −158.8 (31.7)*** −152.9 (32.1)***
PRIORLIVE1 130.7 (33.8)*** 140.5 (34.7)***
PRIORLIVE2 107.2 (41.7)* 123.4 (43.9)**
PRIORLIVE3 138.8 (60.0)* 161.3 (63.1)*
PRIORLIVE4+ 131.9 (62.3)* 162.5 (67.6)*
PRIORDEAD1+ 258.4 (153.9)+ 265.5 (154.2)+
MAGER −3.8 (3.1)
MEDUCHS 8.4 (49.9)
MEDUCSome college −0.2 (50.8)
MEDUCUni 31.6 (56.9)
Num.Obs. 1688 1688 1688 1688
R2 0.000 0.006 0.104 0.105
R2 Adj. 0.000 0.005 0.098 0.097
AIC 26401.9 26393.7 26239.9 26246.1
BIC 26412.7 26410.0 26315.9 26343.8
Log.Lik. −13198.942 −13193.843 −13105.946 −13105.039
F 10.217 16.259 12.291
RMSE 602.05 600.23 569.78 569.47
# Alternative: 95% CI instead of SE in brackets
modelsummary(
  list(
    null = M.NULL, 
    unadjasted = M.UNADJUSTED, 
    optimal = optimal,
    full = M.FULL),
  fmt = 1,
  estimate  = "{estimate} ({conf.low}, {conf.high})",
  statistic = NULL,
  stars = TRUE
  )
null unadjasted optimal full
(Intercept) 3234.7 (3206.0, 3263.5) 3247.9 (3218.1, 3277.7) 3080.6 (2953.8, 3207.3) 3162.4 (2952.5, 3372.3)
CIG_RECYes −179.1 (−289.0, −69.2) −160.1 (−269.2, −51.0) −156.6 (−267.1, −46.1)
DMARUnmarried −184.3 (−246.7, −121.9) −189.8 (−258.6, −121.0)
MRACEHISPNH Black −292.1 (−373.9, −210.4) −290.2 (−372.2, −208.1)
MRACEHISPNH Other −121.7 (−225.2, −18.1) −120.8 (−224.6, −16.9)
MRACEHISPHispanic −74.5 (−149.2, 0.2) −72.5 (−149.2, 4.3)
BMI 10.3 (5.8, 14.7) 10.5 (6.0, 15.0)
risksYes −158.8 (−220.9, −96.7) −152.9 (−215.8, −90.0)
PRIORLIVE1 130.7 (64.4, 197.0) 140.5 (72.4, 208.6)
PRIORLIVE2 107.2 (25.3, 189.1) 123.4 (37.3, 209.5)
PRIORLIVE3 138.8 (21.1, 256.4) 161.3 (37.4, 285.2)
PRIORLIVE4+ 131.9 (9.7, 254.2) 162.5 (30.0, 295.0)
PRIORDEAD1+ 258.4 (−43.4, 560.3) 265.5 (−36.9, 567.9)
MAGER −3.8 (−9.9, 2.2)
MEDUCHS 8.4 (−89.4, 106.3)
MEDUCSome college −0.2 (−100.0, 99.5)
MEDUCUni 31.6 (−80.0, 143.2)
Num.Obs. 1688 1688 1688 1688
R2 0.000 0.006 0.104 0.105
R2 Adj. 0.000 0.005 0.098 0.097
AIC 26401.9 26393.7 26239.9 26246.1
BIC 26412.7 26410.0 26315.9 26343.8
Log.Lik. −13198.942 −13193.843 −13105.946 −13105.039
F 10.217 16.259 12.291
RMSE 602.05 600.23 569.78 569.47
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Answer:

What does the number in the (brackets) mean? It is the SE. How can you identify the optimal model just by looking at the \(R^2\) and the AIC? Lowest AIC indicate the optimal model. Conclusion: The optimal model has the lowest AIC. The R-squared was similar for the optimal and full model. In the ANOVA test, the full model was not significantly better than the optimal model. So we conclude that indeed the optimal model is the best model.

Exercise 4

Center and scale all the continuous variables, and create a forest plot for the optimal model.

# Scaling
data.ctr <- data |> 
  standardize(two_sd = TRUE)

# Fitting the model
M.OPT.ctr <- lm(
  DBWT ~ DMAR + MRACEHISP + BMI + 
    risks + PRIORLIVE + CIG_REC + 
    PRIORDEAD, 
  data = data.ctr
  )

# creating a forest plot
M.OPT.ctr %>% 
  model_parameters() %>% 
  plot()

Answer: All parameters except PRIORDEAD in the optimal model are associated with the outcome. Being umarried (compared to married), NH Black, NH Other or Hispanic (compared to NH White), having no risk factors and smoking are associated with lower birthweight. Having higher BMI and having one or more prior live births is associated with higher birthweight.

Exercise 5

How do the effect estimate of smoking, the standard error of its estimate, its 95% confidence interval, and its statistical significance change between the un-adjusted, optimal and full models? Please use the centered and scaled models to answer this question.

# Specify the different models
M.NULL.ctr <- lm(DBWT ~ 1, data.ctr)
M.UNADJUSTED.ctr <- lm(DBWT ~ CIG_REC, data.ctr)
# The dot represents all variables except the outcome DBWT
M.FULL.ctr <- lm(DBWT ~ ., data.ctr) 

# Displaying the four models by each other, now using the centered and standardized data
modelsummary(
  list(
    null = M.NULL.ctr, 
    unadjasted = M.UNADJUSTED.ctr, 
    optimal = M.OPT.ctr,
    full = M.FULL.ctr),
  fmt = 2,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = TRUE
  )
null unadjasted optimal full
(Intercept) 0.00 (0.01) 0.01 (0.01) 0.10 (0.02)*** 0.08 (0.05)+
CIG_RECYes −0.15 (0.05)** −0.13 (0.05)** −0.13 (0.05)**
DMARUnmarried −0.15 (0.03)*** −0.16 (0.03)***
MRACEHISPNH Black −0.24 (0.03)*** −0.24 (0.03)***
MRACEHISPNH Other −0.10 (0.04)* −0.10 (0.04)*
MRACEHISPHispanic −0.06 (0.03)+ −0.06 (0.03)+
BMI 0.11 (0.02)*** 0.11 (0.02)***
risksYes −0.13 (0.03)*** −0.13 (0.03)***
PRIORLIVE1 0.11 (0.03)*** 0.12 (0.03)***
PRIORLIVE2 0.09 (0.03)* 0.10 (0.04)**
PRIORLIVE3 0.12 (0.05)* 0.13 (0.05)*
PRIORLIVE4+ 0.11 (0.05)* 0.13 (0.06)*
PRIORDEAD1+ 0.21 (0.13)+ 0.22 (0.13)+
MAGER −0.04 (0.03)
MEDUCHS 0.01 (0.04)
MEDUCSome college 0.00 (0.04)
MEDUCUni 0.03 (0.05)
Num.Obs. 1688 1688 1688 1688
R2 0.000 0.006 0.104 0.105
R2 Adj. 0.000 0.005 0.098 0.097
AIC 2453.3 2445.1 2291.3 2297.5
BIC 2464.1 2461.4 2367.3 2395.2
Log.Lik. −1224.636 −1219.537 −1131.640 −1130.732
F 10.217 16.259 12.291
RMSE 0.50 0.50 0.47 0.47
# Show models next to each other, now with 95% CI's and p-values
modelsummary(
  list(
    null = M.NULL.ctr, 
    unadjasted = M.UNADJUSTED.ctr, 
    optimal = M.OPT.ctr,
    full = M.FULL.ctr),
  fmt = 3,
  estimate  = "{estimate} ({conf.low}, {conf.high}) p={p.value}",
  statistic = NULL,
  stars = TRUE
  )
null unadjasted optimal full
(Intercept) 0.000 (−0.024, 0.024) p=1.000 0.011 (−0.014, 0.036) p=0.386 0.101 (0.055, 0.148) p=<0.001 0.082 (−0.012, 0.177) p=0.088
CIG_RECYes −0.149 (−0.240, −0.057) p=0.001 −0.133 (−0.224, −0.042) p=0.004 −0.130 (−0.222, −0.038) p=0.005
DMARUnmarried −0.153 (−0.205, −0.101) p=<0.001 −0.158 (−0.215, −0.100) p=<0.001
MRACEHISPNH Black −0.243 (−0.310, −0.175) p=<0.001 −0.241 (−0.309, −0.173) p=<0.001
MRACEHISPNH Other −0.101 (−0.187, −0.015) p=0.021 −0.100 (−0.187, −0.014) p=0.023
MRACEHISPHispanic −0.062 (−0.124, 0.000) p=0.051 −0.060 (−0.124, 0.004) p=0.064
BMI 0.108 (0.061, 0.154) p=<0.001 0.110 (0.063, 0.157) p=<0.001
risksYes −0.132 (−0.183, −0.080) p=<0.001 −0.127 (−0.179, −0.075) p=<0.001
PRIORLIVE1 0.109 (0.053, 0.164) p=<0.001 0.117 (0.060, 0.173) p=<0.001
PRIORLIVE2 0.089 (0.021, 0.157) p=0.010 0.102 (0.031, 0.174) p=0.005
PRIORLIVE3 0.115 (0.018, 0.213) p=0.021 0.134 (0.031, 0.237) p=0.011
PRIORLIVE4+ 0.110 (0.008, 0.211) p=0.034 0.135 (0.025, 0.245) p=0.016
PRIORDEAD1+ 0.215 (−0.036, 0.465) p=0.093 0.220 (−0.031, 0.471) p=0.085
MAGER −0.037 (−0.095, 0.021) p=0.214
MEDUCHS 0.007 (−0.074, 0.088) p=0.866
MEDUCSome college 0.000 (−0.083, 0.083) p=0.996
MEDUCUni 0.026 (−0.066, 0.119) p=0.579
Num.Obs. 1688 1688 1688 1688
R2 0.000 0.006 0.104 0.105
R2 Adj. 0.000 0.005 0.098 0.097
AIC 2453.3 2445.1 2291.3 2297.5
BIC 2464.1 2461.4 2367.3 2395.2
Log.Lik. −1224.636 −1219.537 −1131.640 −1130.732
F 10.217 16.259 12.291
RMSE 0.50 0.50 0.47 0.47
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001

Answer: The effect estimate of smoking on birthweigth is slightly smaller in the adjusted (both full adjustment and optimal adjustment) compared to the unadjusted analysis. Statistical significance remains. The width of the 95% CI was similar in the adjusted model (−0.22, −0.04) and unadjusted model (−0.24, −0.06). The p-value in the effect estimate of smoking was <0.01 in both the unadjusted and adjusted models.

Exercise 6

Does the association between cigarette smoking during pregnancy and birth weight depend on mother’s education? Modify the FULL model appropriately to be able to answer this question. Use anova to test whether the model with the interaction is significantly better than the one without it. Regardless of statistical significance, create a dumbell plot illustrating the smoking effect at each level of mother’s education and estimate the cigarette smoking effect at each level of mother’s education. Hint: When creating the dataset for plotting, include the value zero for each predictor not included in the interaction (the reference level for all categorical variables). Picking the value zero in a centered predictor is like choosing the mean of that variable, which is a reasonable choice.

# Create new model with interaction between smoking, education level and ethnicity (as is done in the code provided by the exercise)
M.FULL.int.ctr <- lm(DBWT ~ . + CIG_REC:MEDUC:MRACEHISP, data.ctr)

# Create new model with interaction between smoking, education level and ethnicity (as is done in the code provided by the exercise), but now making sure it also takes into account the relevant two-way interactions

M.FULL.int.ctrA <- lm(DBWT ~ . + (CIG_REC + MEDUC + MRACEHISP)^3, data.ctr)

# Create new model with interaction between smoking and education level (and not ethnicity; as the question is actually whether the association between cigarette smoking during pregnancy and birth weight depends on mother’s education, and not whether is also depends on race)
M.FULL.int.ctrB <- lm(DBWT ~ . + CIG_REC:MEDUC, data.ctr)

modelsummary(
  list(
    null = M.NULL.ctr, 
    unadjasted = M.UNADJUSTED.ctr, 
    optimal = M.OPT.ctr,
    full = M.FULL.ctr,
    interaction_smo_edu_ethn = M.FULL.int.ctr,
    interaction_smo_edu_ethn2 = M.FULL.int.ctrA,
    interaction_smo_edu = M.FULL.int.ctrB),
  fmt = 3,
  estimate  = "{estimate} ({std.error}){stars} p={p.value}",
  statistic = NULL,
  stars = TRUE
  )
null unadjasted optimal full interaction_smo_edu_ethn  interaction_smo_edu_ethn2 interaction_smo_edu
(Intercept) 0.000 (0.012) p=1.000 0.011 (0.013) p=0.386 0.101 (0.024)*** p=<0.001 0.082 (0.048)+ p=0.088 −1.189 (0.793) p=0.134 −0.008 (0.074) p=0.911 0.062 (0.050) p=0.214
CIG_RECYes −0.149 (0.047)** p=0.001 −0.133 (0.046)** p=0.004 −0.130 (0.047)** p=0.005 0.489 (0.343) p=0.155 0.125 (0.117) p=0.287 −0.018 (0.091) p=0.845
DMARUnmarried −0.153 (0.026)*** p=<0.001 −0.158 (0.029)*** p=<0.001 −0.157 (0.029)*** p=<0.001 −0.157 (0.029)*** p=<0.001 −0.157 (0.029)*** p=<0.001
MRACEHISPNH Black −0.243 (0.035)*** p=<0.001 −0.241 (0.035)*** p=<0.001 0.576 (0.399) p=0.149 −0.106 (0.114) p=0.353 −0.239 (0.035)*** p=<0.001
MRACEHISPNH Other −0.101 (0.044)* p=0.021 −0.100 (0.044)* p=0.023 0.660 (0.390)+ p=0.091 −0.164 (0.165) p=0.322 −0.102 (0.044)* p=0.020
MRACEHISPHispanic −0.062 (0.032)+ p=0.051 −0.060 (0.032)+ p=0.064 0.663 (0.400)+ p=0.097 0.040 (0.084) p=0.633 −0.056 (0.033)+ p=0.085
BMI 0.108 (0.024)*** p=<0.001 0.110 (0.024)*** p=<0.001 0.107 (0.024)*** p=<0.001 0.107 (0.024)*** p=<0.001 0.108 (0.024)*** p=<0.001
risksYes −0.132 (0.026)*** p=<0.001 −0.127 (0.027)*** p=<0.001 −0.125 (0.027)*** p=<0.001 −0.125 (0.027)*** p=<0.001 −0.127 (0.027)*** p=<0.001
PRIORLIVE1 0.109 (0.028)*** p=<0.001 0.117 (0.029)*** p=<0.001 0.123 (0.029)*** p=<0.001 0.123 (0.029)*** p=<0.001 0.117 (0.029)*** p=<0.001
PRIORLIVE2 0.089 (0.035)* p=0.010 0.102 (0.036)** p=0.005 0.106 (0.037)** p=0.004 0.106 (0.037)** p=0.004 0.104 (0.036)** p=0.004
PRIORLIVE3 0.115 (0.050)* p=0.021 0.134 (0.052)* p=0.011 0.137 (0.053)** p=0.010 0.137 (0.053)** p=0.010 0.129 (0.053)* p=0.015
PRIORLIVE4+ 0.110 (0.052)* p=0.034 0.135 (0.056)* p=0.016 0.137 (0.057)* p=0.016 0.137 (0.057)* p=0.016 0.135 (0.056)* p=0.016
PRIORDEAD1+ 0.215 (0.128)+ p=0.093 0.220 (0.128)+ p=0.085 0.222 (0.128)+ p=0.085 0.222 (0.128)+ p=0.085 0.225 (0.128)+ p=0.080
MAGER −0.037 (0.030) p=0.214 −0.042 (0.030) p=0.164 −0.042 (0.030) p=0.164 −0.036 (0.030) p=0.225
MEDUCHS 0.007 (0.041) p=0.866 0.235 (0.477) p=0.623 0.119 (0.077) p=0.123 0.026 (0.045) p=0.556
MEDUCSome college 0.000 (0.042) p=0.996 −0.230 (0.478) p=0.630 0.085 (0.076) p=0.266 0.025 (0.045) p=0.571
MEDUCUni 0.026 (0.047) p=0.579 0.533 (0.486) p=0.273 0.110 (0.076) p=0.146 0.044 (0.049) p=0.364
CIG_RECNo × MRACEHISPNH White × MEDUClower 1.180 (0.794) p=0.137
CIG_RECYes × MRACEHISPNH White × MEDUClower 0.816 (0.530) p=0.124
CIG_RECNo × MRACEHISPNH Black × MEDUClower 0.498 (0.497) p=0.316
CIG_RECYes × MRACEHISPNH Black × MEDUClower −0.697 (0.444) p=0.117
CIG_RECNo × MRACEHISPNH Other × MEDUClower 0.357 (0.518) p=0.491
CIG_RECYes × MRACEHISPNH Other × MEDUClower −0.034 (0.422) p=0.935
CIG_RECNo × MRACEHISPHispanic × MEDUClower 0.558 (0.483) p=0.248
CIG_RECNo × MRACEHISPNH White × MEDUCHS 1.065 (0.793) p=0.180
CIG_RECYes × MRACEHISPNH White × MEDUCHS 0.418 (0.529) p=0.430
CIG_RECNo × MRACEHISPNH Black × MEDUCHS 0.175 (0.492) p=0.722
CIG_RECYes × MRACEHISPNH Black × MEDUCHS −0.690 (0.422) p=0.103 0.613 (0.406) p=0.131
CIG_RECNo × MRACEHISPNH Other × MEDUCHS 0.305 (0.514) p=0.553
CIG_RECYes × MRACEHISPNH Other × MEDUCHS −0.225 (0.485) p=0.642 0.143 (0.481) p=0.766
CIG_RECNo × MRACEHISPHispanic × MEDUCHS 0.315 (0.484) p=0.515
CIG_RECNo × MRACEHISPNH White × MEDUCSome college 1.495 (0.793)+ p=0.060
CIG_RECYes × MRACEHISPNH White × MEDUCSome college 0.814 (0.532) p=0.126
CIG_RECNo × MRACEHISPNH Black × MEDUCSome college 0.724 (0.493) p=0.142
CIG_RECYes × MRACEHISPNH Black × MEDUCSome college −0.119 (0.393) p=0.762 0.670 (0.378)+ p=0.076
CIG_RECNo × MRACEHISPNH Other × MEDUCSome college 0.719 (0.503) p=0.153
CIG_RECYes × MRACEHISPNH Other × MEDUCSome college 0.193 (0.485) p=0.691 0.182 (0.470) p=0.698
CIG_RECNo × MRACEHISPHispanic × MEDUCSome college 0.779 (0.485) p=0.108
CIG_RECNo × MRACEHISPNH White × MEDUCUni 0.758 (0.395)+ p=0.055
CIG_RECYes × MEDUCHS −0.283 (0.145)+ p=0.052 −0.121 (0.117) p=0.302
CIG_RECYes × MEDUCSome college −0.317 (0.153)* p=0.038 −0.202 (0.122)+ p=0.096
CIG_RECYes × MEDUCUni −0.394 (0.228)+ p=0.085 −0.065 (0.192) p=0.736
CIG_RECYes × MRACEHISPNH Black −0.832 (0.313)** p=0.008
CIG_RECYes × MRACEHISPNH Other −0.027 (0.305) p=0.930
CIG_RECYes × MRACEHISPHispanic −0.194 (0.360) p=0.590
MRACEHISPNH Black × MEDUCHS −0.208 (0.130) p=0.109
MRACEHISPNH Other × MEDUCHS 0.064 (0.215) p=0.765
MRACEHISPHispanic × MEDUCHS −0.127 (0.104) p=0.221
MRACEHISPNH Black × MEDUCSome college −0.089 (0.129) p=0.488
MRACEHISPNH Other × MEDUCSome college 0.048 (0.183) p=0.795
MRACEHISPHispanic × MEDUCSome college −0.094 (0.105) p=0.370
MRACEHISPNH Black × MEDUCUni −0.076 (0.132) p=0.565
MRACEHISPNH Other × MEDUCUni 0.066 (0.178) p=0.712
MRACEHISPHispanic × MEDUCUni −0.136 (0.109) p=0.213
CIG_RECYes × MRACEHISPHispanic × MEDUCHS 0.525 (0.503) p=0.297
CIG_RECYes × MRACEHISPHispanic × MEDUCSome college 0.097 (0.507) p=0.849
CIG_RECYes × MRACEHISPNH Other × MEDUCUni 0.785 (0.499) p=0.116
Num.Obs. 1688 1688 1688 1688 1688 1688 1688
R2 0.000 0.006 0.104 0.105 0.117 0.117 0.107
R2 Adj. 0.000 0.005 0.098 0.097 0.096 0.096 0.097
AIC 2453.3 2445.1 2291.3 2297.5 2319.6 2319.6 2300.6
BIC 2464.1 2461.4 2367.3 2395.2 2536.9 2536.9 2414.6
Log.Lik. −1224.636 −1219.537 −1131.640 −1130.732 −1119.811 −1119.811 −1129.286
F 10.217 16.259 12.291 10.500
RMSE 0.50 0.50 0.47 0.47 0.47 0.47 0.47
# Compare models
tidy(anova(M.FULL.ctr, M.FULL.int.ctr, M.FULL.int.ctrA, M.FULL.int.ctrB))
## # A tibble: 4 × 7
##   term                           df.residual   rss    df sumsq statistic p.value
##   <chr>                                <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>
## 1 DBWT ~ CIG_REC + risks + MAGE…        1671  377.    NA NA       NA      NA    
## 2 DBWT ~ CIG_REC + risks + MAGE…        1649  372.    22  4.85     0.976   0.492
## 3 DBWT ~ CIG_REC + risks + MAGE…        1649  372.     0  0       NA      NA    
## 4 DBWT ~ CIG_REC + risks + MAGE…        1668  377.   -19 -4.21     0.980   0.482
# Estimate the outcome variable
# using marginaleffects package
overallpredictions <- marginaleffects::predictions(M.FULL.int.ctr, 
            newdata = datagrid(
              MRACEHISP = c("NH White", "NH Black", "NH Other", "Hispanic"),
              MEDUC   = c("lower", "HS", "Some college", "Uni"),
              CIG_REC = c("No", "Yes")
              )
            ) |> 
  as.data.frame() |> 
  mutate(DBWT = estimate)  |> 
  select(DBWT, MRACEHISP, CIG_REC, MEDUC) 

levels(overallpredictions$CIG_REC) <- list("No Smoking" ="No" , "Smoking" ="Yes" )

ggplot(overallpredictions,aes(x=CIG_REC,y=DBWT,group=MEDUC,col=MEDUC))+geom_point() + facet_wrap(.~MRACEHISP) + geom_line() + theme_bw() + xlab("") + ylab("Birth weight (centered, scaled)")+ theme(legend.position = "bottom") + labs(color="Mother's Education") + theme_minimal()

Answer: The p-values of the interaction terms in the model with the two-way interaction term smoking x education were all above 0.05, suggesting no evidence of significant interaction between smoking and education level (not taking ethnicity into account). This answers the question as posed in the exercise (Does the association between cigarette smoking during pregnancy and birth weight depend on mother’s education?). When we look at the 3-way interaction model (the one that also takes into account the relevant 2-way interactions),thus taking into account a potential differential association across races, there seems to be significant interaction between smoking and ‘some college’, and between smoking and NH Black. But the 3-way interaction terms themselves are not significant. Based on the ANOVA test, the model with both the 2-way and 3-way interaction terms is not significantly better than the full model (without the interaction terms). When looking at the Dumbell plot, showing the 3-way interaction between smoking, education level and ethnicity, the seem to be some differential associations, but from the p-value of the interaction terms we conclude that significant interaction is not present.

Exercise 7

Does the association between mother’s BMI and birth-weight differ between mothers of different race/ethnicity? Modify the OPTIMAL model appropriately to be able to answer this question, using the dataset with the centered variables. Use anova to test whether the model with the interaction is significantly better than the one without it. Regardless of statistical significance, estimate the BMI effect at each level of mother’s race/ethnicity and plot the BMI effect at each level of mother’s race/ethnicity using a dumbell plot.

# Create new model with interaction between BMI and race

M.OPT.int.ctr <- lm(DBWT ~ DMAR + risks + PRIORLIVE + CIG_REC + PRIORDEAD + BMI * MRACEHISP, 
    data = data.ctr)

modelsummary(
  list(
    null = M.NULL.ctr, 
    unadjasted = M.UNADJUSTED.ctr, 
    optimal = M.OPT.ctr,
    full = M.FULL.ctr,
    interaction_bmi_race = M.OPT.int.ctr),
  fmt = 3,
  estimate  = "{estimate} ({std.error}){stars} p={p.value}",
  statistic = NULL,
  stars = TRUE
  )
null unadjasted optimal full interaction_bmi_race
(Intercept) 0.000 (0.012) p=1.000 0.011 (0.013) p=0.386 0.101 (0.024)*** p=<0.001 0.082 (0.048)+ p=0.088 0.103 (0.024)*** p=<0.001
CIG_RECYes −0.149 (0.047)** p=0.001 −0.133 (0.046)** p=0.004 −0.130 (0.047)** p=0.005 −0.136 (0.046)** p=0.003
DMARUnmarried −0.153 (0.026)*** p=<0.001 −0.158 (0.029)*** p=<0.001 −0.155 (0.026)*** p=<0.001
MRACEHISPNH Black −0.243 (0.035)*** p=<0.001 −0.241 (0.035)*** p=<0.001 −0.240 (0.035)*** p=<0.001
MRACEHISPNH Other −0.101 (0.044)* p=0.021 −0.100 (0.044)* p=0.023 −0.092 (0.044)* p=0.038
MRACEHISPHispanic −0.062 (0.032)+ p=0.051 −0.060 (0.032)+ p=0.064 −0.061 (0.032)+ p=0.055
BMI 0.108 (0.024)*** p=<0.001 0.110 (0.024)*** p=<0.001 0.120 (0.032)*** p=<0.001
risksYes −0.132 (0.026)*** p=<0.001 −0.127 (0.027)*** p=<0.001 −0.132 (0.026)*** p=<0.001
PRIORLIVE1 0.109 (0.028)*** p=<0.001 0.117 (0.029)*** p=<0.001 0.108 (0.028)*** p=<0.001
PRIORLIVE2 0.089 (0.035)* p=0.010 0.102 (0.036)** p=0.005 0.090 (0.035)** p=0.010
PRIORLIVE3 0.115 (0.050)* p=0.021 0.134 (0.052)* p=0.011 0.115 (0.050)* p=0.021
PRIORLIVE4+ 0.110 (0.052)* p=0.034 0.135 (0.056)* p=0.016 0.108 (0.052)* p=0.036
PRIORDEAD1+ 0.215 (0.128)+ p=0.093 0.220 (0.128)+ p=0.085 0.207 (0.128) p=0.106
MAGER −0.037 (0.030) p=0.214
MEDUCHS 0.007 (0.041) p=0.866
MEDUCSome college 0.000 (0.042) p=0.996
MEDUCUni 0.026 (0.047) p=0.579
BMI × MRACEHISPNH Black −0.050 (0.059) p=0.393
BMI × MRACEHISPNH Other 0.120 (0.098) p=0.221
BMI × MRACEHISPHispanic −0.046 (0.063) p=0.467
Num.Obs. 1688 1688 1688 1688 1688
R2 0.000 0.006 0.104 0.105 0.106
R2 Adj. 0.000 0.005 0.098 0.097 0.098
AIC 2453.3 2445.1 2291.3 2297.5 2294.1
BIC 2464.1 2461.4 2367.3 2395.2 2386.4
Log.Lik. −1224.636 −1219.537 −1131.640 −1130.732 −1130.048
F 10.217 16.259 12.291 13.219
RMSE 0.50 0.50 0.47 0.47 0.47
# Compare models

tidy(anova(M.FULL.ctr, M.OPT.int.ctr))
## # A tibble: 2 × 7
##   term                          df.residual   rss    df  sumsq statistic p.value
##   <chr>                               <dbl> <dbl> <dbl>  <dbl>     <dbl>   <dbl>
## 1 DBWT ~ CIG_REC + risks + MAG…        1671  377.    NA NA            NA      NA
## 2 DBWT ~ DMAR + risks + PRIORL…        1672  377.    -1  0.306        NA      NA
# Create data for Dummbell plot (with maternal education = some college)
BMI_mean.ctr <- mean(data.ctr$BMI)
BMI_sd.ctr <- sd(data.ctr$BMI)
BMI_mean_plus_sd.ctr <- BMI_mean.ctr + BMI_sd.ctr

newdata2 <- mutate(data.frame(
  BMI = c(0, 0.5, 0, 0.5, 0, 0.5, 0, 0.5),  
  MRACEHISP   = c("Hispanic", "Hispanic", "NH Black", "NH Black",  "NH Other", "NH Other", "NH White", "NH White")), 
    risks = "No", 
    DMAR = "Married", 
    PRIORLIVE = "0", 
    PRIORDEAD = "0",
    CIG_REC = "No")

newdata2 |> 
  mutate(
    DBWT = predict(
      M.OPT.int.ctr, 
      newdata2
      )) |> select(MRACEHISP, BMI, DBWT)
##   MRACEHISP BMI        DBWT
## 1  Hispanic 0.0  0.04215825
## 2  Hispanic 0.5  0.07928655
## 3  NH Black 0.0 -0.13666489
## 4  NH Black 0.5 -0.10182893
## 5  NH Other 0.0  0.01078647
## 6  NH Other 0.5  0.13096466
## 7  NH White 0.0  0.10284377
## 8  NH White 0.5  0.16283965
# Dumbell plot
Db_bmi_race <- tribble(
  ~MRACEHISP, ~BMI_mean.ctr, ~BMI_mean_plus_sd.ctr, 
  "Hispanic",    0.04215825,  0.07928655, 
  "NH Black",       -0.13666489,  -0.10182893, 
  "NH Other",        0.01078647,         0.13096466, 
  "NH White",        0.10284377,         0.16283965
  ) |> ggplot() + 
  geom_segment(
    aes(
      x = factor("Mean BMI", levels = c("Mean BMI", "Mean + 1SD BMI")), 
      y = BMI_mean.ctr, 
      xend = factor("Mean + 1SD BMI", levels = c("Mean BMI", "Mean + 1SD BMI")), 
      yend = BMI_mean_plus_sd.ctr, 
      color = MRACEHISP
      ),
    linewidth = 2
    ) + 
  geom_point(
    aes(x = factor("Mean + 1SD BMI", levels = c("Mean BMI", "Mean + 1SD BMI")), y = BMI_mean_plus_sd.ctr, color = MRACEHISP), 
    size = 6
    ) + 
  geom_point(
    aes(x = factor("Mean BMI", levels = c("Mean BMI", "Mean + 1SD BMI")), y = BMI_mean.ctr, color = MRACEHISP), 
    size = 6
    ) +
  scale_y_continuous(name="Birth weight (centered, scaled") +
  scale_x_discrete(name = NULL, limits = c("Mean BMI", "Mean + 1SD BMI")) 

Db_bmi_race + theme_minimal() + theme(axis.title.x = element_blank())

Answer: No evidence of significant interaction between BMI and race based on p-values of interaction terms >0.05. Model with interaction term does not perform better than the model without the interaction term. In the dumbell plot it seems like addition of 1SD for BMI is associated with a stronger increase in birthweight among NH Other women, compared to the other ethnic groups. However, based on the p-value of the interaction term, this difference is not statistically significant.

Exercise 8

Does the association between mother’s BMI and birth-weight depend on mother’s age? Modify the FULL model appropriately to be able to answer this question, using the dataset with the centered variables. Use anova to test whether the model with the interaction is significantly better than the one without it. Regardless of statistical significance, estimate the BMI effect for mothers age 20, 30, and 40 years and make a plot illustrating the BMI effect at each of these ages.

# Create new model with interaction between smoking and education level

M.FULL.int.ctr2 <- lm(DBWT ~ . + BMI * MAGER, data.ctr)

modelsummary(
  list(
    null = M.NULL.ctr, 
    unadjasted = M.UNADJUSTED.ctr, 
    optimal = M.OPT.ctr,
    full = M.FULL.ctr,
    interaction_bmi_age = M.FULL.int.ctr2),
  fmt = 3,
  estimate  = "{estimate} ({std.error}){stars} p={p.value}",
  statistic = NULL,
  stars = TRUE
  )
null unadjasted optimal full interaction_bmi_age
(Intercept) 0.000 (0.012) p=1.000 0.011 (0.013) p=0.386 0.101 (0.024)*** p=<0.001 0.082 (0.048)+ p=0.088 0.083 (0.048)+ p=0.086
CIG_RECYes −0.149 (0.047)** p=0.001 −0.133 (0.046)** p=0.004 −0.130 (0.047)** p=0.005 −0.131 (0.047)** p=0.005
DMARUnmarried −0.153 (0.026)*** p=<0.001 −0.158 (0.029)*** p=<0.001 −0.158 (0.029)*** p=<0.001
MRACEHISPNH Black −0.243 (0.035)*** p=<0.001 −0.241 (0.035)*** p=<0.001 −0.240 (0.035)*** p=<0.001
MRACEHISPNH Other −0.101 (0.044)* p=0.021 −0.100 (0.044)* p=0.023 −0.101 (0.044)* p=0.022
MRACEHISPHispanic −0.062 (0.032)+ p=0.051 −0.060 (0.032)+ p=0.064 −0.059 (0.033)+ p=0.068
BMI 0.108 (0.024)*** p=<0.001 0.110 (0.024)*** p=<0.001 0.110 (0.024)*** p=<0.001
risksYes −0.132 (0.026)*** p=<0.001 −0.127 (0.027)*** p=<0.001 −0.127 (0.027)*** p=<0.001
PRIORLIVE1 0.109 (0.028)*** p=<0.001 0.117 (0.029)*** p=<0.001 0.116 (0.029)*** p=<0.001
PRIORLIVE2 0.089 (0.035)* p=0.010 0.102 (0.036)** p=0.005 0.102 (0.036)** p=0.005
PRIORLIVE3 0.115 (0.050)* p=0.021 0.134 (0.052)* p=0.011 0.134 (0.052)* p=0.011
PRIORLIVE4+ 0.110 (0.052)* p=0.034 0.135 (0.056)* p=0.016 0.135 (0.056)* p=0.016
PRIORDEAD1+ 0.215 (0.128)+ p=0.093 0.220 (0.128)+ p=0.085 0.217 (0.128)+ p=0.090
MAGER −0.037 (0.030) p=0.214 −0.037 (0.030) p=0.213
MEDUCHS 0.007 (0.041) p=0.866 0.007 (0.041) p=0.870
MEDUCSome college 0.000 (0.042) p=0.996 0.000 (0.042) p=0.992
MEDUCUni 0.026 (0.047) p=0.579 0.025 (0.047) p=0.596
MAGER × BMI −0.024 (0.047) p=0.605
Num.Obs. 1688 1688 1688 1688 1688
R2 0.000 0.006 0.104 0.105 0.105
R2 Adj. 0.000 0.005 0.098 0.097 0.096
AIC 2453.3 2445.1 2291.3 2297.5 2299.2
BIC 2464.1 2461.4 2367.3 2395.2 2402.4
Log.Lik. −1224.636 −1219.537 −1131.640 −1130.732 −1130.597
F 10.217 16.259 12.291 11.578
RMSE 0.50 0.50 0.47 0.47 0.47
# Compare models
tidy(anova(M.FULL.ctr, M.FULL.int.ctr2))
## # A tibble: 2 × 7
##   term                         df.residual   rss    df   sumsq statistic p.value
##   <chr>                              <dbl> <dbl> <dbl>   <dbl>     <dbl>   <dbl>
## 1 DBWT ~ CIG_REC + risks + MA…        1671  377.    NA NA         NA      NA    
## 2 DBWT ~ CIG_REC + risks + MA…        1670  377.     1  0.0604     0.267   0.605
# Create new data for plot
(20-mean(data$MAGER))/(2*sd(data$MAGER))
## [1] -0.7714721
(30-mean(data$MAGER))/(2*sd(data$MAGER))
## [1] 0.09042803
(40-mean(data$MAGER))/(2*sd(data$MAGER))
## [1] 0.9523281
newdata3 <- mutate(data.frame(
  MAGER = c(-0.77, 0.09, 0.95, -0.77, 0.09, 0.95),  
  BMI   = c(0, 0, 0, 0.5, 0.5, 0.5) 
  ), risks = "No", 
    MRACEHISP = "NH White", 
    DMAR = "Married", 
    PRIORLIVE = "0", 
    PRIORDEAD = "0", 
    CIG_REC = "No",
    MEDUC = "lower"
    )

newdata3 |> 
  mutate(
    DBWT = predict(
      M.FULL.int.ctr2, 
      newdata3
      )) |> select(MAGER, BMI, DBWT)
##   MAGER BMI       DBWT
## 1 -0.77 0.0 0.11161284
## 2  0.09 0.0 0.07976423
## 3  0.95 0.0 0.04791563
## 4 -0.77 0.5 0.17591559
## 5  0.09 0.5 0.13364017
## 6  0.95 0.5 0.09136475
# Dumbell plot
Db_bmi_age <- tribble(
  ~MAGER, ~BMI_mean.ctr, ~BMI_mean_plus_sd.ctr, 
  "20",    0.11161284,  0.17591559, 
  "30",       0.07976423,  0.13364017, 
  "40",        0.04791563,         0.09136475
  ) |> ggplot() + 
  geom_segment(
    aes(
      x = "Mean BMI", y = BMI_mean.ctr, 
      xend = "Mean BMI + 1SD", yend = BMI_mean_plus_sd.ctr, 
      color = MAGER
      ),
    linewidth = 2
    ) + 
  geom_point(
    aes(x = "Mean BMI", y = BMI_mean.ctr, color = MAGER), 
    size = 6
    ) + 
  geom_point(
    aes(x = "Mean BMI + 1SD", y = BMI_mean_plus_sd.ctr, color = MAGER), 
    size = 6
    ) +
  scale_y_continuous(name="Birth weight (centered, scaled")

Db_bmi_age + theme_minimal() + theme(axis.title.x = element_blank())

Answer: No evidence of significant interaction between BMI and age for birthweight based on p-values of the interaction term > 0.05. Also in dumbell plot, the lines are more or less parallel, showing no evidence of interaction. Model with interaction term BMI x Age does not perform better than the full model based on AIC.