Multiple linear least square regression

# 1. load the library "tidyverse"
library(tidyverse)
library(broom)
library(table1)
library(car)
library(easystats)
library(modelsummary)
library(GGally)

# 2. use the read_csv file to read the dataset
natality <- read_rds("data/natality.rds")

Exercise 1

Question: Use the table1 package to create a table with appropriate descriptive information about the data set. Use the examples provided in the following vignette to help you build a table.

# Create Tittle of Table
tittle  <- "Maternal demographics stratified by smoking"

# Change Labels Names
label(natality$PRIORLIVE)       <- "Prior births (living)"
label(natality$PRIORDEAD)       <- "Prior births (dead)"

table1(~ DBWT + risks + MAGER + MRACEHISP + DMAR + MEDUC + PRIORLIVE + PRIORDEAD +BMI| CIG_REC, overall = FALSE, caption=tittle, data=natality)

Maternal demographics stratified by smoking
	No (N=1564)	Yes (N=124)
Birth Weight (g)
Mean (SD)	3250 (601)	3070 (597)
Median [Min, Max]	3290 [369, 5880]	3090 [630, 4220]
Risk Factors Reported
No	1088 (69.6%)	84 (67.7%)
Yes	476 (30.4%)	40 (32.3%)
Mother Age
Mean (SD)	29.0 (5.79)	27.8 (5.79)
Median [Min, Max]	29.0 [15.0, 49.0]	28.0 [17.0, 42.0]
Mother Race/Hispanic Origin
NH White	844 (54.0%)	94 (75.8%)
NH Black	266 (17.0%)	14 (11.3%)
NH Other	125 (8.0%)	10 (8.1%)
Hispanic	329 (21.0%)	6 (4.8%)
Marital Status
Married	1017 (65.0%)	40 (32.3%)
Unmarried	547 (35.0%)	84 (67.7%)
Mother Education
lower	177 (11.3%)	34 (27.4%)
HS	377 (24.1%)	45 (36.3%)
Some college	437 (27.9%)	37 (29.8%)
Uni	573 (36.6%)	8 (6.5%)
Prior births (living)
0	601 (38.4%)	38 (30.6%)
1	516 (33.0%)	41 (33.1%)
2	265 (16.9%)	17 (13.7%)
3	93 (5.9%)	17 (13.7%)
4+	89 (5.7%)	11 (8.9%)
Prior births (dead)
0	1551 (99.2%)	123 (99.2%)
1+	13 (0.8%)	1 (0.8%)
Mother BMI
Mean (SD)	26.9 (6.32)	27.3 (6.42)
Median [Min, Max]	25.6 [15.6, 58.4]	26.3 [16.5, 50.1]

Answer: [Replace this with your answer]

Exercise 2

Question: Use the ggpairs function to explore the association between the different variables. Try to replicate the figure below, or create your own variation.

# Write your code here

data(natality) 

tittle_plot  <- "Natality in the United States (2018)"
tittle = tittle_plot


ggpairs(
  natality, c(1,3,4,6,10),
  title = tittle_plot, 
  mapping = ggplot2::aes(color = CIG_REC),
  upper = list(continuous = wrap("density", alpha = 0.9),
          combo = "facethist", discrete = wrap("colbar")),
  lower = list(continuous = wrap("cor"), combo = NULL),
  diag = list(continuous = wrap("densityDiag", colour = NA, alpha = 0.6), discrete = wrap("densityDiag")),
  legend = c(3,5),
  showStrips = FALSE,
   cardinality_threshold = 5
  ) + 
  labs(colour = "Smoking") + 
  theme_minimal() + 
  theme(legend.position = "bottom")

Answer: [Replace this with your answer]

Exercise 3

Question: Fit four linear models, each nested within the other: the null model, the unadjusted model, the full model and the optimal model. Use the step function find the optimal model using the stepwise method. Then use anova to compare the four models. What is your conclusion? Now compare your models using modelsummary function. What does the number in the (brackets) mean? How can you identify the optimal model just by looking at the R2 and the AIC?

M.NULL <- lm(DBWT ~ 1, natality)
M.UNADJUSTED <- lm(DBWT ~ CIG_REC, natality)
# The dot represents all variables except the outcome DBWT
M.FULL <- lm(DBWT ~ ., natality)  

# Finding the optimal model
step(M.NULL,
     scope = list(upper = M.FULL),
     direction="both",
     data=natality)

## Start:  AIC=21609.55
## DBWT ~ 1
## 
##             Df Sum of Sq       RSS   AIC
## + DMAR       1  28672236 583166851 21530
## + MRACEHISP  3  28557357 583281730 21535
## + PRIORLIVE  4   7485192 604353895 21597
## + MEDUC      3   5189885 606649203 21601
## + CIG_REC    1   3685187 608153901 21601
## + risks      1   3277949 608561138 21602
## + BMI        1   3170730 608668358 21603
## + MAGER      1   2941408 608897679 21603
## + PRIORDEAD  1    849628 610989460 21609
## <none>                   611839087 21610
## 
## Step:  AIC=21530.53
## DBWT ~ DMAR
## 
##             Df Sum of Sq       RSS   AIC
## + MRACEHISP  3  15359037 567807814 21492
## + risks      1   4395289 578771562 21520
## + BMI        1   3607980 579558871 21522
## + PRIORLIVE  4   4500359 578666492 21526
## + CIG_REC    1    978350 582188501 21530
## + PRIORDEAD  1    818812 582348039 21530
## <none>                   583166851 21530
## + MAGER      1     82475 583084376 21532
## + MEDUC      3    313468 582853384 21536
## - DMAR       1  28672236 611839087 21610
## 
## Step:  AIC=21491.48
## DBWT ~ DMAR + MRACEHISP
## 
##             Df Sum of Sq       RSS   AIC
## + BMI        1   4532441 563275373 21480
## + risks      1   3924653 563883161 21482
## + CIG_REC    1   2181695 565626119 21487
## + PRIORLIVE  4   3880005 563927809 21488
## + PRIORDEAD  1    846700 566961114 21491
## <none>                   567807814 21492
## + MAGER      1     52434 567755381 21493
## + MEDUC      3     98014 567709800 21497
## - MRACEHISP  3  15359037 583166851 21530
## - DMAR       1  15473916 583281730 21535
## 
## Step:  AIC=21479.95
## DBWT ~ DMAR + MRACEHISP + BMI
## 
##             Df Sum of Sq       RSS   AIC
## + risks      1   6059709 557215664 21464
## + CIG_REC    1   2362075 560913299 21475
## + PRIORLIVE  4   3457999 559817375 21478
## + PRIORDEAD  1    896345 562379028 21479
## <none>                   563275373 21480
## + MAGER      1    125890 563149483 21482
## + MEDUC      3    115009 563160364 21486
## - BMI        1   4532441 567807814 21492
## - MRACEHISP  3  16283498 579558871 21522
## - DMAR       1  15325756 578601129 21523
## 
## Step:  AIC=21463.69
## DBWT ~ DMAR + MRACEHISP + BMI + risks
## 
##             Df Sum of Sq       RSS   AIC
## + PRIORLIVE  4   5580134 551635530 21455
## + CIG_REC    1   2199370 555016294 21459
## + PRIORDEAD  1   1025923 556189741 21463
## <none>                   557215664 21464
## + MAGER      1     22588 557193076 21466
## + MEDUC      3     69675 557145989 21470
## - risks      1   6059709 563275373 21480
## - BMI        1   6667497 563883161 21482
## - MRACEHISP  3  15921403 573137067 21505
## - DMAR       1  16431163 573646827 21511
## 
## Step:  AIC=21454.7
## DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE
## 
##             Df Sum of Sq       RSS   AIC
## + CIG_REC    1   2707991 548927539 21448
## + PRIORDEAD  1    921529 550714001 21454
## <none>                   551635530 21455
## + MAGER      1    280671 551354859 21456
## + MEDUC      3    173640 551461890 21460
## - PRIORLIVE  4   5580134 557215664 21464
## - BMI        1   6486820 558122350 21472
## - risks      1   8181845 559817375 21478
## - MRACEHISP  3  15145908 566781438 21494
## - DMAR       1  14456460 566091990 21496
## 
## Step:  AIC=21448.4
## DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC
## 
##             Df Sum of Sq       RSS   AIC
## + PRIORDEAD  1    922493 548005046 21448
## <none>                   548927539 21448
## + MAGER      1    337056 548590483 21449
## + MEDUC      3     75431 548852108 21454
## - CIG_REC    1   2707991 551635530 21455
## - PRIORLIVE  4   6088755 555016294 21459
## - BMI        1   6662694 555590232 21467
## - risks      1   8131760 557059299 21471
## - DMAR       1  10955889 559883427 21480
## - MRACEHISP  3  16576572 565504111 21493
## 
## Step:  AIC=21447.56
## DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC + 
##     PRIORDEAD
## 
##             Df Sum of Sq       RSS   AIC
## <none>                   548005046 21448
## - PRIORDEAD  1    922493 548927539 21448
## + MAGER      1    361028 547644018 21448
## + MEDUC      3     82260 547922786 21453
## - CIG_REC    1   2708955 550714001 21454
## - PRIORLIVE  4   5984714 553989760 21458
## - BMI        1   6749558 554754604 21466
## - risks      1   8234365 556239411 21471
## - DMAR       1  10970232 558975278 21479
## - MRACEHISP  3  16594210 564599256 21492

## 
## Call:
## lm(formula = DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + 
##     CIG_REC + PRIORDEAD, data = natality)
## 
## Coefficients:
##       (Intercept)      DMARUnmarried  MRACEHISPNH Black  MRACEHISPNH Other  
##           3080.56            -184.27            -292.14            -121.70  
## MRACEHISPHispanic                BMI           risksYes         PRIORLIVE1  
##            -74.49              10.26            -158.81             130.69  
##        PRIORLIVE2         PRIORLIVE3        PRIORLIVE4+         CIG_RECYes  
##            107.17             138.78             131.95            -160.08  
##       PRIORDEAD1+  
##            258.43

M.OPT <- lm(formula = DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC + PRIORDEAD, data = natality)

#Anova 
anova(M.NULL, M.UNADJUSTED, M.OPT, M.FULL)

## Analysis of Variance Table
## 
## Model 1: DBWT ~ 1
## Model 2: DBWT ~ CIG_REC
## Model 3: DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC + 
##     PRIORDEAD
## Model 4: DBWT ~ CIG_REC + risks + MAGER + MRACEHISP + DMAR + MEDUC + PRIORLIVE + 
##     PRIORDEAD + BMI
##   Res.Df       RSS Df Sum of Sq       F    Pr(>F)    
## 1   1687 611839087                                   
## 2   1686 608153901  1   3685187 11.2491 0.0008144 ***
## 3   1675 548005046 11  60148855 16.6914 < 2.2e-16 ***
## 4   1671 547416336  4    588710  0.4493 0.7730024    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Displaying the four models by each other
modelsummary(
  list(
    null = M.NULL, 
    unadjasted = M.UNADJUSTED, 
    opt = M.OPT, 
    full = M.FULL
    ), 
  stars = TRUE
  )

	null	unadjasted	opt	full
(Intercept)	3234.746***	3247.903***	3080.559***	3162.373***
	(14.658)	(15.187)	(64.625)	(107.021)
CIG_RECYes		−179.096**	−160.083**	−156.593**
		(56.032)	(55.632)	(56.325)
DMARUnmarried			−184.275***	−189.782***
			(31.823)	(35.084)
MRACEHISPNH Black			−292.140***	−290.175***
			(41.690)	(41.829)
MRACEHISPNH Other			−121.695*	−120.774*
			(52.796)	(52.960)
MRACEHISPHispanic			−74.492+	−72.464+
			(38.075)	(39.120)
BMI			10.261***	10.493***
			(2.259)	(2.277)
risksYes			−158.808***	−152.902***
			(31.655)	(32.055)
PRIORLIVE1			130.686***	140.483***
			(33.814)	(34.725)
PRIORLIVE2			107.170*	123.385**
			(41.746)	(43.880)
PRIORLIVE3			138.776*	161.298*
			(59.972)	(63.149)
PRIORLIVE4+			131.949*	162.508*
			(62.333)	(67.562)
PRIORDEAD1+			258.434+	265.467+
			(153.905)	(154.184)
MAGER				−3.836
				(3.085)
MEDUCHS				8.448
				(49.909)
MEDUCSome college				−0.240
				(50.838)
MEDUCUni				31.610
				(56.902)
Num.Obs.	1688	1688	1688	1688
R2	0.000	0.006	0.104	0.105
R2 Adj.	0.000	0.005	0.098	0.097
AIC	26401.9	26393.7	26239.9	26246.1
BIC	26412.7	26410.0	26315.9	26343.8
Log.Lik.	−13198.942	−13193.843	−13105.946	−13105.039
F		10.217	16.259	12.291
RMSE	602.05	600.23	569.78	569.47
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001

Answer: In the anova we can see that model 2 is better than model 1, model 3 is better than model 2, but model 4 is not better than model 3,therefore model 3 is the best model.

When you compare the models using modelsummary function Model 3 is the best predictor or fitted, because it has the highest R2 and the lowest AIC. The brackets is the standard error.

In conclusion the optimal model (model 3) is the best predictor compare to the other models.

Exercise 4

Question: Center and scale all the continuous variables, and create a forest plot for the optimal model.

# Scaling or standarizing or centering
natality.ctr <- natality |>
  mutate_if(is.numeric, scale) |>
  mutate_if(is.numeric, ~ as.numeric(.x / 2))

# Fitting the model
M.OPT.ctr <- lm(
  DBWT ~ DMAR + MRACEHISP + BMI + 
    risks + PRIORLIVE + CIG_REC + 
    PRIORDEAD, 
  data = natality.ctr
  )

# creating a forest plot
M.OPT.ctr %>% 
  model_parameters() %>% 
  plot()

Exercise 5

Question: How do the effect estimate of smoking, the standard error of its estimate, its 95% confidence interval, and its statistical significance change between the un-adjusted, optimal and full models? Please use the centered and scaled models to answer this question.

M.FULL |> tidy(conf.int = TRUE)

## # A tibble: 17 × 7
##    term              estimate std.error statistic   p.value conf.low conf.high
##    <chr>                <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
##  1 (Intercept)       3162.       107.    29.5     9.73e-155  2952.     3372.  
##  2 CIG_RECYes        -157.        56.3   -2.78    5.49e-  3  -267.      -46.1 
##  3 risksYes          -153.        32.1   -4.77    2.00e-  6  -216.      -90.0 
##  4 MAGER               -3.84       3.09  -1.24    2.14e-  1    -9.89      2.22
##  5 MRACEHISPNH Black -290.        41.8   -6.94    5.70e- 12  -372.     -208.  
##  6 MRACEHISPNH Other -121.        53.0   -2.28    2.27e-  2  -225.      -16.9 
##  7 MRACEHISPHispanic  -72.5       39.1   -1.85    6.41e-  2  -149.        4.26
##  8 DMARUnmarried     -190.        35.1   -5.41    7.24e-  8  -259.     -121.  
##  9 MEDUCHS              8.45      49.9    0.169   8.66e-  1   -89.4     106.  
## 10 MEDUCSome college   -0.240     50.8   -0.00472 9.96e-  1  -100.       99.5 
## 11 MEDUCUni            31.6       56.9    0.556   5.79e-  1   -80.0     143.  
## 12 PRIORLIVE1         140.        34.7    4.05    5.46e-  5    72.4     209.  
## 13 PRIORLIVE2         123.        43.9    2.81    4.98e-  3    37.3     209.  
## 14 PRIORLIVE3         161.        63.1    2.55    1.07e-  2    37.4     285.  
## 15 PRIORLIVE4+        163.        67.6    2.41    1.63e-  2    30.0     295.  
## 16 PRIORDEAD1+        265.       154.     1.72    8.53e-  2   -36.9     568.  
## 17 BMI                 10.5        2.28   4.61    4.36e-  6     6.03     15.0

Answer: The full model has the lowest effect estimated compare to the other models. The the standard error is similar in all models. The 95% confidence interval for smoking changes between models, the Full model has the narrowest CI. The p value is significant in all models, but the un-adjusted model has the smallest p value.

Exercise 6

Question: Does the association between cigarette smoking during pregnancy and birth weight depend on mother’s education? Modify the FULL model appropriately to be able to answer this question. Use anova to test whether the model with the interaction is significantly better than the one without it. Regardless of statistical significance, create a dumbell plot illustrating the smoking effect at each level of mother’s education and estimate the cigarette smoking effect at each level of mother’s education. Hint: When creating the dataset for plotting, include the value zero for each predictor not included in the interaction (the reference level for all categorical variables). Picking the value zero in a centered predictor is like choosing the mean of that variable, which is a reasonable choice.

M.FULL.ctr <- lm(formula = DBWT ~ ., data = natality.ctr)

M.FULL_int.ctr <- lm(formula = DBWT ~ . + CIG_REC * MEDUC, data = natality.ctr)

newdata <- data.frame(
  CIG_REC = c("No",    "Yes",   "No", "Yes", "No",           "Yes",          "No",  "Yes"),  
  MEDUC   = c("lower", "lower", "HS", "HS",  "Some college", "Some college", "Uni", "Uni") 
  ) |> 
  mutate(
    risks = "No", 
    MAGER = 0, 
    MRACEHISP = "NH White", 
    DMAR = "Married", 
    PRIORLIVE = "0", 
    PRIORDEAD = "0", 
    BMI = 0
    )

newdata |> 
  mutate(
    DBWT = predict(
      M.FULL_int.ctr, 
      newdata
      )
  ) |> select(CIG_REC, MEDUC, DBWT)

##   CIG_REC        MEDUC        DBWT
## 1      No        lower  0.06227469
## 2     Yes        lower  0.04447809
## 3      No           HS  0.08854779
## 4     Yes           HS -0.05014673
## 5      No Some college  0.08777361
## 6     Yes Some college -0.13236808
## 7      No          Uni  0.10674030
## 8     Yes          Uni  0.02404656

# Create your dumbells

tribble(
  ~Education, ~No_Smoking, ~Smoking, 
  "Lower",    0.06227469,  0.04447809, 
  "HS",       0.08854779,  -0.05014673, 
  "Some college",  0.08777361,   -0.13236808, 
  "Uni",        0.10674030,       0.02404656    
  ) |> ggplot() +
  ylab("Birth weight (centred , scaled)") +
  xlab("") +
  geom_segment(
    aes(
      x = "No Smoking", y = No_Smoking, 
      xend = "Smoking", yend = Smoking, 
      color = Education
      ),
    linewidth = 2
    ) + 
  geom_point(
    aes(x = "No Smoking", y = No_Smoking, color = Education), 
    size = 6
    ) + 
  geom_point(
    aes(x = "Smoking", y = Smoking, color = Education), 
    size = 6
    )

anova(M.FULL.ctr, M.FULL_int.ctr)

## Analysis of Variance Table
## 
## Model 1: DBWT ~ CIG_REC + risks + MAGER + MRACEHISP + DMAR + MEDUC + PRIORLIVE + 
##     PRIORDEAD + BMI
## Model 2: DBWT ~ CIG_REC + risks + MAGER + MRACEHISP + DMAR + MEDUC + PRIORLIVE + 
##     PRIORDEAD + BMI + CIG_REC * MEDUC
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1   1671 377.34                           
## 2   1668 376.70  3   0.64613 0.9537 0.4138

Answer: The association between cigarette smoking during pregnancy and birth weight depend on mother’s education, the slopes for each education group is different, therefore there is interaction.

In the anova test we can observe that the model with the interaction is not significantly better p >0.05 than the one without it.

Exercise 7

Question: Does the association between mother’s BMI and birth-weight differ between mothers of different race/ethnicity? Modify the OPTIMAL model appropriately to be able to answer this question, using the dataset with the centered variables. Use anova to test whether the model with the interaction is significantly better than the one without it. Regardless of statistical significance, estimate the BMI effect at each level of mother’s race/ethnicity and plot the BMI effect at each level of mother’s race/ethnicity using a dumbell plot.

# Full model with interaction 
M.OPT_INTR.ctr <- lm(
   DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC + PRIORDEAD + BMI:MRACEHISP, 
  data = natality.ctr
  )


newdata_opt <- data.frame(
  BMI = c(6.942179e-17, 2,   6.942179e-17, 2,  6.942179e-17, 2,   6.942179e-17,    2),  
  MRACEHISP   = c("NH White", "NH White", "NH Black", "NH Black",  "NH Other", "NH Other", "Hispanic", "Hispanic") 
  ) |> 
  mutate(
    risks = "No", 
    MAGER = 0, 
    DMAR = "Married", 
    PRIORLIVE = "0", 
    CIG_REC = "No",
    PRIORDEAD = "0"
    )

newdata_opt  |> 
  mutate(
    DBWT = predict(
      M.OPT_INTR.ctr, 
      newdata_opt
      )
  ) |> select(BMI, MRACEHISP, DBWT)

##            BMI MRACEHISP         DBWT
## 1 6.942179e-17  NH White  0.102843774
## 2 2.000000e+00  NH White  0.342827274
## 3 6.942179e-17  NH Black -0.136664894
## 4 2.000000e+00  NH Black  0.002678953
## 5 6.942179e-17  NH Other  0.010786473
## 6 2.000000e+00  NH Other  0.491499237
## 7 6.942179e-17  Hispanic  0.042158252
## 8 2.000000e+00  Hispanic  0.190671430

# Create your dumbells

tribble(
  ~Race, ~mean_bmi, ~bmi_sd, 
  "NH White",    0.102843774,  0.342827274, 
  "NH Black",       -0.136664894,  0.002678953, 
  "NH Other",  0.010786473,   0.491499237, 
  "Hispanic",        0.042158252,      0.190671430              
  ) |> ggplot() +
  ylab("Birth weight (centred , scaled)") +
  xlab("") +
  geom_segment(
    aes(
      x = "mean(BMI)", y = mean_bmi, 
      xend = "mean(BMI) + sd (BMI)", yend = bmi_sd, 
      color = Race
      ),
    linewidth = 2
    ) + 
  geom_point(
    aes(x = "mean(BMI)", y = mean_bmi, color = Race), 
    size = 6
    ) + 
  geom_point(
    aes(x = "mean(BMI) + sd (BMI)", y = bmi_sd, color = Race), 
    size = 6
    )

anova(M.OPT.ctr, M.OPT_INTR.ctr)

## Analysis of Variance Table
## 
## Model 1: DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC + 
##     PRIORDEAD
## Model 2: DBWT ~ DMAR + MRACEHISP + BMI + risks + PRIORLIVE + CIG_REC + 
##     PRIORDEAD + BMI:MRACEHISP
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1   1675 377.75                           
## 2   1672 377.04  3   0.71163 1.0519 0.3685

Answer: The association between mother’s BMI and birth-weight differ between mothers of different race/ethnicity, because the slopes of different groups are different there is interaction. In the anova test we can observe that the model with the interaction is not significantly better p >0.05 than the one without it.

Exercise 8

Question: Does the association between mother’s BMI and birth-weight depend on mother’s age? Modify the FULL model appropriately to be able to answer this question, using the dataset with the centered variables. Use anova to test whether the model with the interaction is significantly better than the one without it. Regardless of statistical significance, estimate the BMI effect for mothers age 20, 30, and 40 years and make a plot illustrating the BMI effect at each of these ages.

# Full model without interaction 
M.FULL.ctr <- lm(formula = DBWT ~ ., data = natality.ctr)
M.FULL_age_int.ctr <- lm( DBWT ~ . + BMI:MAGER, data = natality.ctr)

newdata_full <- data.frame(
  BMI = c(6.942179e-17, 2,   6.942179e-17, 2,  6.942179e-17, 2),  
  MAGER   = c(20, 20, 30, 30,  40, 40) 
  ) |> 
  mutate(
    risks = "No", 
    MEDUC = "lower", 
    MRACEHISP = "NH White", 
    DMAR = "Married", 
    PRIORLIVE = "0", 
    PRIORDEAD = "0", 
    CIG_REC = "No"
    )

newdata_full |> 
  mutate(
    DBWT = predict(
      M.FULL_age_int.ctr, 
      newdata_full
      )
  ) |> select(BMI, MAGER, DBWT)

##            BMI MAGER       DBWT
## 1 6.942179e-17    20 -0.6575681
## 2 2.000000e+00    20 -1.4076355
## 3 6.942179e-17    30 -1.0279007
## 4 2.000000e+00    30 -2.2629360
## 5 6.942179e-17    40 -1.3982333
## 6 2.000000e+00    40 -3.1182366

# Create your dumbells

tribble(
  ~Age,    ~mean_bmi,     ~bmi_sd, 
  "20",    -0.6575681,  -1.4076355, 
  "30",    -1.0279007,  -2.2629360, 
  "40",   -1.3982333,   -3.1182366              
  ) |> ggplot() +
  ylab("Birth weight (centred , scaled)") +
  xlab("") +
  geom_segment(
    aes(
      x = "mean(BMI)", y = mean_bmi, 
      xend = "mean(BMI) + sd (BMI)", yend = bmi_sd, 
      color = Age
      ),
    linewidth = 2
    ) + 
  geom_point(
    aes(x = "mean(BMI)", y = mean_bmi, color = Age), 
    size = 6
    ) + 
  geom_point(
    aes(x = "mean(BMI) + sd (BMI)", y = bmi_sd, color = Age), 
    size = 6
    )

anova(M.FULL.ctr, M.FULL_age_int.ctr)

## Analysis of Variance Table
## 
## Model 1: DBWT ~ CIG_REC + risks + MAGER + MRACEHISP + DMAR + MEDUC + PRIORLIVE + 
##     PRIORDEAD + BMI
## Model 2: DBWT ~ CIG_REC + risks + MAGER + MRACEHISP + DMAR + MEDUC + PRIORLIVE + 
##     PRIORDEAD + BMI + BMI:MAGER
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1   1671 377.34                           
## 2   1670 377.28  1  0.060384 0.2673 0.6052

Answer: The association between mother’s BMI and birth-weight seems to depend on mother’s age, the slopes are different between each group, therefore there is interaction.