Tabulating and Plotting Regression
Results
Single regression model table
analytic_set <- readxl::read_xlsx(paste0(alt_path, "analytic_set.xlsx"))
analytic_set %>%
lm(BPXSY1 ~ log2(LBXNFOA) + RIAGENDR + age + as.factor(RIDRETH3) + as.factor(edu_cat) + inc_ratio, data = .) %>%
gtsummary::tbl_regression()
| Characteristic |
Beta |
95% CI |
p-value |
| log2(LBXNFOA) |
1.1 |
0.10, 2.0 |
0.030 |
| RIAGENDR |
|
|
|
| Female |
— |
— |
|
| Male |
3.7 |
1.8, 5.5 |
<0.001 |
| age |
0.45 |
0.38, 0.51 |
<0.001 |
| as.factor(RIDRETH3) |
|
|
|
| Mexican American |
— |
— |
|
| Non-Hispanic Asian |
-2.1 |
-5.5, 1.4 |
0.2 |
| Non-Hispanic Black |
3.3 |
0.30, 6.4 |
0.031 |
| Non-Hispanic White |
-4.8 |
-7.6, -1.9 |
0.001 |
| Other Hispanic |
-4.7 |
-8.4, -0.90 |
0.015 |
| Other Race - Including Multi-Racial |
0.01 |
-4.5, 4.5 |
>0.9 |
| as.factor(edu_cat) |
|
|
|
| 0 |
— |
— |
|
| 1 |
-0.76 |
-3.3, 1.8 |
0.6 |
| 2 |
-3.6 |
-6.9, -0.25 |
0.035 |
| inc_ratio |
-0.20 |
-0.83, 0.44 |
0.5 |
Warning: We try not to do this, right?
Main results table
PFASa | estimateb | 95% CI | p-value |
|---|
n-PFOA | 1.05 | (0.1, 2) | 0.03 |
branched-PFOA | 0.65 | (-1.65, 2.95) | 0.58 |
n-PFOS | -0.19 | (-1, 0.62) | 0.64 |
PFHxS | 0.66 | (-0.13, 1.45) | 0.10 |
PFNA | 0.41 | (-0.41, 1.23) | 0.32 |
aPFAS measured in plasma |
bChange in systolic blood pressure in mmHg per doubled PFAS. Adjusted for sex, age, self-reported ethnicity, education category, and income-to-poverty ratio. |
Forest plot
clean_table %>% ggplot() +
geom_point(aes(x = PFAS, y = estimate)) +
geom_errorbar(aes(x = PFAS, ymin = LB, ymax = UB)) +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(x = "Plasma PFAS", y = "Increase in systolic BP, per doubled PFAS (mmHg)") +
coord_flip() + theme_bw()
Stratum-Specific Estimates (with
Interactions)
What if we believe that the effects differ by ethnicity and we fit a
model with interactions?
nfoa_intx_res <- analytic_set %>%
mutate(ETHNIC_CAT = case_when(grepl("Other", RIDRETH3) ~ "Other", T ~ RIDRETH3)) %>%
lm(BPXSY1 ~ log2(LBXNFOA)*as.factor(ETHNIC_CAT) + RIAGENDR + age + as.factor(edu_cat) + inc_ratio, data = .) %>% broom::tidy()
nfoa_intx_res
Plot interaction terms
We could plot the “effects” of the interaction terms:
nfoa_intx_res %>%
filter(grepl(":", term)) %>%
mutate(LB = estimate - 1.96*std.error, UB = estimate + 1.96*std.error) %>%
ggplot() +
# geom_point(aes(x = term, y = exp(estimate))) + geom_errorbar(aes(x = term, ymin = exp(LB), ymax = exp(UB))) +
# geom_hline(yintercept = 1, linetype = "dashed") + scale_y_log10() +
geom_point(aes(x = term, y = estimate)) + geom_errorbar(aes(x = term, ymin = LB, ymax = UB)) +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(x = "Ethnic Group", y = "DIFFERENCE in effect on SBP per doubled n-PFOA",
title = "Interaction between ethnicity and n-PFOA effect,",
subtitle = "compared to Mexican Americans") +
coord_flip() + theme_bw()
But, it is probably more intuitive to plot the stratum-specific ORs.
That requires us to add coefficients (e.g. beta_exposure +
beta_interaction_term) and compute the appropriate standard errors for
those compound coefficients.
Stratum-specific estimates
We need to save the original (un-tidied) regression model and then
call the lincom (linear combination) command from the
biostat3 package.
nfoa_intx_raw <- analytic_set %>%
mutate(ETHNIC_CAT = case_when(grepl("Other", RIDRETH3) ~ "Other", T ~ RIDRETH3)) %>%
lm(BPXSY1 ~ log2(LBXNFOA)*as.factor(ETHNIC_CAT) + RIAGENDR + age + as.factor(edu_cat) + inc_ratio, data = .)
Let’s verify the output by comparing the main coefficient and
confidence interval with the output from lincom. This corresponds to the
effect of doubling n-PFOA on SBP in Mexican Americans (the reference
group):
# BETA =
summary(nfoa_intx_raw)$coefficients[2,1]
## [1] 5.62826
#95% CI LB =
summary(nfoa_intx_raw)$coefficients[2,1] - 1.96*summary(nfoa_intx_raw)$coefficients[2,2]
## [1] 3.009435
#95% CI UB =
summary(nfoa_intx_raw)$coefficients[2,1] + 1.96*summary(nfoa_intx_raw)$coefficients[2,2]
## [1] 8.247085
# LINCOM RESULTS:
effect_mexican <- biostat3::lincom(nfoa_intx_raw, c("log2(LBXNFOA)")) %>% as_tibble() %>% mutate(ethnicity = "Mexican American")
## Loading required namespace: car
effect_mexican
Now lets compute the effects for the other ethnic groups:
# STRATUM-SPECIFIC EFFECTS FOR Non-Hispanic Asian:
effect_asian <- biostat3::lincom(nfoa_intx_raw, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Non-Hispanic Asian")) %>% as_tibble() %>%
mutate(ethnicity = "Non-Hispanic Asian")
# STRATUM-SPECIFIC EFFECTS FOR Non-Hispanic Black:
effect_black <- biostat3::lincom(nfoa_intx_raw, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Non-Hispanic Black")) %>% as_tibble() %>%
mutate(ethnicity = "Non-Hispanic Black")
# STRATUM-SPECIFIC EFFECTS FOR Non-Hispanic White:
effect_white <- biostat3::lincom(nfoa_intx_raw, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Non-Hispanic White")) %>% as_tibble() %>%
mutate(ethnicity = "Non-Hispanic White")
# STRATUM-SPECIFIC EFFECTS FOR Other ethnicity:
effect_other <- biostat3::lincom(nfoa_intx_raw, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Other")) %>% as_tibble() %>%
mutate(ethnicity = "Other / Mixed-Ethnicity")
Plot stratum-specific estimates
Let’s merge these effects and plot:
stratum_effects <-
effect_mexican %>%
add_row(effect_asian) %>%
add_row(effect_black) %>%
add_row(effect_white) %>%
add_row(effect_other)
stratum_effects %>% ggplot() +
geom_point(aes(x = ethnicity, y = Estimate)) +
geom_errorbar(aes(x = ethnicity, ymin = `2.5 %`, ymax = `97.5 %`)) +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(x = "Ethnic Group", y = "Increase in SBP (mmHg) per doubled n-PFOA",
title = "Ethnicity-specific effects of n-PFOA on SBP") +
coord_flip() + theme_bw()
Confirm with relevel-ing
We can also produce each stratum-specific effect by using “relevel”,
so that the desired group is the baseline. Then, we can just read off
the main coefficient. It should give nearly identical results as the
lincom approach:
For Non-Hispanic Asian:
For Non-Hispanic Black:
For Non-Hispanic White:
For Other / Mixed-Ethnicty:
For Ratio Measures
If we want to do this for ratio measures, its the same steps, we just
have to remember to exponentiate the final results.
Let’s create a HI_BP indicator and compute stratum specific ORs for
HI_BP:
nfoa_intx_OR <- analytic_set %>%
mutate(HI_BP = case_when(BPXSY1 >= 140 ~ 1, T ~ 0)) %>%
mutate(ETHNIC_CAT = case_when(grepl("Other", RIDRETH3) ~ "Other", T ~ RIDRETH3)) %>%
glm(HI_BP ~ log2(LBXNFOA)*as.factor(ETHNIC_CAT) + RIAGENDR + age + as.factor(edu_cat) + inc_ratio, data = ., family = binomial(link = "logit"))
OR_res <-
mutate(as_tibble(biostat3::lincom(nfoa_intx_OR, c("log2(LBXNFOA)"))), ethnicity = "Mexican American") %>%
add_row(mutate(as_tibble(biostat3::lincom(nfoa_intx_OR, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Non-Hispanic Asian"))), ethnicity = "Non-Hispanic Asian")) %>%
add_row(mutate(as_tibble(biostat3::lincom(nfoa_intx_OR, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Non-Hispanic Black"))), ethnicity = "Non-Hispanic Black")) %>%
add_row(mutate(as_tibble(biostat3::lincom(nfoa_intx_OR, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Non-Hispanic White"))), ethnicity = "Non-Hispanic White")) %>%
add_row(mutate(as_tibble(biostat3::lincom(nfoa_intx_OR, c("log2(LBXNFOA) + log2(LBXNFOA):as.factor(ETHNIC_CAT)Other"))), ethnicity = "Other / Mixed-Ethnicity"))
OR_res
Then, let’s exponentiate the resultant coefficients and confidence
limits and plot the results:
OR_res %>%
mutate(OR = exp(Estimate), LB = exp(`2.5 %`), UB = exp(`97.5 %`)) %>%
ggplot() +
geom_point(aes(x = ethnicity, y = OR)) +
geom_errorbar(aes(x = ethnicity, ymin = LB, ymax = UB)) +
geom_hline(yintercept = 1, linetype = "dashed") +
labs(x = "Ethnic Group", y = "Odds Ratio of High SBP (> 140 mmHg) per doubled n-PFOA",
title = "Ethnicity-specific effects") +
coord_flip() + theme_bw()
