How to analyze confounding and effect measure modification

please see the detail here.

Practice in assessing confounding and modification

  • Load package and data set
## Importing required packages
library(tidyverse)   
library(skimr)      
## Read in clean data set
df_dia <- read_csv("C:\\Users\\hed2\\Downloads\\code-storage\\code\\diabetes-data.csv")
  • Skim the dataset
## Skim the dataset
skim(df_dia)
Data summary
Name df_dia
Number of rows 16458
Number of columns 9
_______________________
Column type frequency:
character 1
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
readmitted 0 1 2 3 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
race 0 1 0.27 0.44 0 0 0 1 1 ▇▁▁▁▃
gender 0 1 0.48 0.50 0 0 0 1 1 ▇▁▁▁▇
age 0 1 0.56 0.50 0 0 1 1 1 ▆▁▁▁▇
hospital_stay 0 1 4.82 3.10 1 2 4 7 14 ▇▆▂▂▁
HbA1c 0 1 0.70 0.46 0 0 1 1 1 ▃▁▁▁▇
diabetesMed 0 1 0.84 0.37 0 1 1 1 1 ▂▁▁▁▇
admit_source 0 1 0.34 0.47 0 0 0 1 1 ▇▁▁▁▅
patient_visits 0 1 0.95 2.08 0 0 0 1 38 ▇▁▁▁▁
  • Check the structure of the data
## Explore data set
glimpse(df_dia)
## Rows: 16,458
## Columns: 9
## $ race           <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0…
## $ gender         <dbl> 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0…
## $ age            <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0…
## $ hospital_stay  <dbl> 4, 3, 12, 8, 11, 7, 2, 7, 13, 6, 6, 9, 7, 5, 2, 4, 8, 1…
## $ HbA1c          <dbl> 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1…
## $ diabetesMed    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1…
## $ readmitted     <chr> "NO", "NO", "NO", "NO", ">30", ">30", "NO", ">30", "NO"…
## $ admit_source   <dbl> 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0…
## $ patient_visits <dbl> 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0…
  • Create (three-way) interaction terms
## Create interaction terms
df_dia <- df_dia %>% 
  mutate(
    padmit = patient_visits*admit_source,
    hadmit = HbA1c*admit_source,
    hpatient = HbA1c*patient_visits,
    hpadmit = HbA1c*patient_visits*admit_source)%>% 
  mutate_at(vars(diabetesMed, HbA1c, admit_source, hadmit), as.factor)
## Check the structure of the data
glimpse(df_dia)
## Rows: 16,458
## Columns: 13
## $ race           <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0…
## $ gender         <dbl> 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0…
## $ age            <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0…
## $ hospital_stay  <dbl> 4, 3, 12, 8, 11, 7, 2, 7, 13, 6, 6, 9, 7, 5, 2, 4, 8, 1…
## $ HbA1c          <fct> 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1…
## $ diabetesMed    <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1…
## $ readmitted     <chr> "NO", "NO", "NO", "NO", ">30", ">30", "NO", ">30", "NO"…
## $ admit_source   <fct> 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0…
## $ patient_visits <dbl> 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ padmit         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hadmit         <fct> 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0…
## $ hpatient       <dbl> 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hpadmit        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…

Explore whether it is a confounder

if the covariate is not sig, it must be not confounder. in other words, the regresion model can adjust confoudners.

## patient_visits is   confounder  
model7 <- glm(diabetesMed ~ HbA1c + admit_source + patient_visits ,
              df_dia, family = "binomial")

## Print the coefficients of the model
coef(summary(model7))
##                  Estimate Std. Error   z value      Pr(>|z|)
## (Intercept)    1.03209664 0.03730247 27.668317 1.680415e-168
## HbA1c1         0.81957667 0.04330696 18.924825  7.122506e-80
## admit_source1  0.10412221 0.04560026  2.283369  2.240866e-02
## patient_visits 0.03830031 0.01198813  3.194852  1.399024e-03

patient_visits is not confounder but independent predictor, because it is sig but there is no change in the coefficient of hba1c.

Before and after adjusting

##  
model8 <- glm(diabetesMed ~ HbA1c + admit_source   ,
              df_dia, family = "binomial")

## Print the coefficients of the model
coef(summary(model8))
##                Estimate Std. Error   z value      Pr(>|z|)
## (Intercept)   1.0649027 0.03595553 29.617213 8.970841e-193
## HbA1c1        0.8226772 0.04328767 19.004884  1.553891e-80
## admit_source1 0.1018356 0.04558166  2.234135  2.547421e-02

examine the primary association of interest at different levels of a potential confounding factor.

curve((1.03209664  +0.81957667*0      +0.10412221       +0.03830031 *x)   , from=1, to=80, n=300, xlab="patient_visits", ylab="log odds", 
             col="red", lwd=2, main="confounding of exposure and patient_visits"  )

curve((1.03209664  +0.81957667*1      +0.10412221       +0.03830031 *x )  , from=1, to=80, n=300, add=T, xlab="patient_visits", ylab="log odds", 
             col="blue", lwd=2, main="confounding of exposure and patient_visits"  )

- Plot confounding in odds

curve( exp(1.03209664  +0.81957667*0      +0.10412221       +0.03830031 *x)   , from=1, to=80, n=300, xlab="patient_visits", ylab="odds", 
             col="red", lwd=2, main="confounding of exposure and patient_visits"  )

curve( exp(1.03209664  +0.81957667*1      +0.10412221       +0.03830031 *x )  , from=1, to=80, n=300, add=T, xlab="patient_visits", ylab="odds", 
             col="blue", lwd=2, main="confounding of exposure and patient_visits"  )

- Plot confounding in odds ratio

curve( exp(1.03209664  +0.81957667*0      +0.10412221       +0.03830031 *x) / exp(1.03209664  +0.81957667*1      +0.10412221       +0.03830031 *x )  , from=1, to=80, n=300,   xlab="patient_visits", ylab="OR", 
             col="blue", lwd=2, main="confounding of exposure and patient_visits"  )

Explore whether it is a effect measure modification

although the covariate is not sig, it may be emm as long as the interaction is sig. This occurs when an association between an exposure X and an outcome Y can depend on another variable Z, and Z does not have to be directly related to X and/or Y.

  • Step 1: Full model
## Step 1: Full model
full_model <- glm(as.factor(diabetesMed) ~ HbA1c + admit_source + patient_visits 
                  + padmit + hadmit + hpatient + hpadmit, 
                  df_dia , family = "binomial")
## Print the summary of the model
coef(summary(full_model)) 
##                   Estimate Std. Error    z value      Pr(>|z|)
## (Intercept)     0.99629700 0.04501059 22.1347222 1.464124e-108
## HbA1c1          0.89285523 0.05843413 15.2796880  1.044395e-52
## admit_source1  -0.01041378 0.08018456 -0.1298726  8.966672e-01
## patient_visits  0.11539004 0.02872722  4.0167496  5.900637e-05
## padmit          0.06891114 0.05741569  1.2002143  2.300561e-01
## hadmit1         0.09181157 0.10348724  0.8871777  3.749833e-01
## hpatient       -0.13333094 0.03217501 -4.1439282  3.414067e-05
## hpadmit         0.01171417 0.06624062  0.1768427  8.596320e-01
  • Step 2: Remove no sig variables
## Step 3:  
model5 <- glm(diabetesMed ~ HbA1c + patient_visits + hpatient,
              df_dia, family = "binomial")

## Print the coefficients of the model
coef(summary(model5))
##                  Estimate Std. Error   z value      Pr(>|z|)
## (Intercept)     0.9940305 0.03728822 26.658030 1.444410e-156
## HbA1c1          0.9267593 0.04817753 19.236338  1.837393e-82
## patient_visits  0.1340291 0.02505776  5.348807  8.853588e-08
## hpatient       -0.1322058 0.02815558 -4.695546  2.658952e-06

equation with exposure, outcome and interaction

# 
# y=0.9940305 +0.9267593*HbA1c1           +0.1340291*patient_visits  - 0.1322058* HbA1c1*patient_visits
# y0=0.9940305 +0.9267593*0           +0.1340291*patient_visits  - 0.1322058* 0*patient_visits
# y1=0.9940305 +0.9267593*1           +0.1340291*patient_visits  - 0.1322058* 1*patient_visits
  • Plot interaction of exposure and patient_visits in log odds
curve( (0.9940305 +0.9267593*0           +0.1340291*x  - 0.1322058* 0*x), from=1, to=20, n=300, xlab="patient_visits", ylab="log odds", 
             col="red", lwd=2, main="interaction of exposure and patient_visits"  )

curve( (0.9940305 +0.9267593*1           +0.1340291*x  - 0.1322058* 1*x), from=1, to=20, n=300, add=TRUE, xlab="patient_visits", ylab="log odds", 
             col="blue", lwd=2, main="interaction of exposure and patient_visits"  )

  • Plot interaction of exposure and patient_visits in odds
curve( exp(0.9940305 +0.9267593*0           +0.1340291*x  - 0.1322058* 0*x), from=1, to=20, n=300, xlab="patient_visits", ylab="odds", 
             col="red", lwd=2, main="interaction of exposure and patient_visits"  )

curve( exp(0.9940305 +0.9267593*1           +0.1340291*x  - 0.1322058* 1*x), from=1, to=20, n=300, add=TRUE, xlab="patient_visits", ylab="odds", 
             col="blue", lwd=2, main="interaction of exposure and patient_visits"  )

  • Plot interaction of exposure and patient_visits in odds ratio
curve(  exp(0.9940305 +0.9267593*0           +0.1340291*x  - 0.1322058* 0*x)/ exp(0.9940305 +0.9267593*1           +0.1340291*x  - 0.1322058* 1*x), from=1, to=20, n=300,  xlab="patient_visits", ylab="OR", 
             col="blue", lwd=2, main="interaction of exposure and patient_visits"  )

This suggests that patient visits modify the outcome-exposure relationship, while others do not modify the relationship between HbA1c levels (exposure) and diabetic medication change (outcome). a covariate cannot confound and modify the exposure-outcome relationship simultaneously.

  • Get the confidence interval and OR
## Get the confidence interval
CI <- confint(model5)
## Waiting for profiling to be done...
CI
##                     2.5 %      97.5 %
## (Intercept)     0.9212491  1.06742675
## HbA1c1          0.8322821  1.02114631
## patient_visits  0.0864433  0.18463824
## hpatient       -0.1884240 -0.07800111
## Find the odds ratio
Odds_ratios <- exp(model5$coefficients)
Odds_ratios
##    (Intercept)         HbA1c1 patient_visits       hpatient 
##      2.7021034      2.5263088      1.1434261      0.8761607