theme for nice plotting [OPTIONAL]

library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(survey)
library(broom)
library(gt)
library(ggplot2)
library(gtsummary)
library(flextable)
## 
## Attaching package: 'flextable'
## The following objects are masked from 'package:gtsummary':
## 
##     as_flextable, continuous_summary
# theme for nice plotting
theme_nice <- theme_classic()+
                theme(
                  axis.line.y.left = element_line(colour = "black"),
                  axis.line.y.right = element_line(colour = "black"),
                  axis.line.x.bottom = element_line(colour = "black"),
                  axis.line.x.top = element_line(colour = "black"),
                  axis.text.y = element_text(colour = "black", size = 12),
                  axis.text.x = element_text(color = "black", size = 12),
                  axis.ticks = element_line(color = "black")) +
                theme(
                  axis.ticks.length = unit(-0.25, "cm"), 
                  axis.text.x = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")), 
                  axis.text.y = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")))
gc()
##           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 2229282 119.1    4618982 246.7         NA  2667662 142.5
## Vcells 3741995  28.6    8388608  64.0      16384  6137534  46.9

https://cran.r-project.org/web/packages/sjPlot/vignettes/plot_interactions.html

Load Libraries

Load Data

data <- read_dta("3.sample_long.dta")

data$bsisolation<- as.numeric(data$bsisolation)

Age Distribution

result_age<-tabyl(data$age1)
## Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
## ℹ Please use one dimensional logical vectors instead.
## ℹ The deprecated feature was likely used in the janitor package.
##   Please report the issue at <]8;;https://github.com/sfirke/janitor/issueshttps://github.com/sfirke/janitor/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
result_age
##  data$age1    n      percent
##         52 1934 2.119661e-02
##         53 3269 3.582819e-02
##         54 2522 2.764108e-02
##         55 2821 3.091812e-02
##         56 2934 3.215660e-02
##         57 3435 3.764755e-02
##         58 4129 4.525378e-02
##         59 4733 5.187361e-02
##         60 4506 4.938569e-02
##         61 4495 4.926513e-02
##         62 4240 4.647034e-02
##         63 4285 4.696354e-02
##         64 4111 4.505650e-02
##         65 4065 4.455234e-02
##         66 3470 3.803115e-02
##         67 3367 3.690227e-02
##         68 3458 3.789963e-02
##         69 3121 3.420611e-02
##         70 2466 2.702732e-02
##         71 2260 2.476957e-02
##         72 2584 2.832060e-02
##         73 2133 2.337765e-02
##         74 2065 2.263237e-02
##         75 1861 2.039653e-02
##         76 1694 1.856621e-02
##         77 1706 1.869773e-02
##         78 1629 1.785382e-02
##         79 1449 1.588102e-02
##         80 1205 1.320678e-02
##         81 1079 1.182582e-02
##         82  894 9.798227e-03
##         83  644 7.058230e-03
##         84  551 6.038952e-03
##         85  451 4.942953e-03
##         86  404 4.427834e-03
##         87  391 4.285354e-03
##         88  289 3.167436e-03
##         89  159 1.742638e-03
##         90  139 1.523438e-03
##         91   90 9.863987e-04
##         92   79 8.658388e-04
##         93   40 4.383994e-04
##         94   35 3.835995e-04
##         95   23 2.520797e-04
##         96    9 9.863987e-05
##         97   11 1.205598e-04
##         98    3 3.287996e-05
##        100    3 3.287996e-05

Race Distribution

table(data$race)
## 
##     1     2     3 
## 73943 10645  6653

Isolation Distribution

table(data$bsisolation1)
## 
##     0     1     2     3     4     5 
##  2599 16799 33275 27963  9443  1162

Foreign Distribution

table(data$foreign)
## 
##     0     1 
## 83820  7421

Exercise Distribution

table(data$exercise)
## 
##     0     1 
## 47421 43820

Demential Distribution - Those at risk of dementia

table(data$dement1)
## 
##     0 
## 91241

#Ever having dementia

table(data$dement_ever)
## 
##     0     1 
## 75845 15396

first_diag_d

table(data$first_diag_d)
## 
##    2    3    4    5    6    7    8    9 
## 1082 1305 1711 1868 2460 2307 2238 2425
tabyl(data$dement_ever, data$first_diag_d, show_na = TRUE)
##  data$dement_ever     n   percent
##                 0 75845 0.8312601
##                 1 15396 0.1687399
table(data$lastwave)
## 
##     1     2     3     4     5     6     7     8     9 
##   312   926  1917  2908  4061  5917  6046  9226 59928
table(data$durwave_d)
## 
##     1     2     3     4     5     6     7     8     9 
##   312  2004  2944  4114  5043  7007  6790  9211 53816
table(data$durwave_d)
## 
##     1     2     3     4     5     6     7     8     9 
##   312  2004  2944  4114  5043  7007  6790  9211 53816
table(data$csmokem)
## 
##     1     2     3 
## 80620  9902   719
data$bsisolation3c<- as.factor(data$bsisolation3c)
#data$female<- as.factor(data$female)

#Recoding Race
data$race<- as.numeric(data$race)
data$race <- car::Recode(data$race,
                           recodes = "1='NH White' ; 2= 'NH Black'; 3='Hispanic'; else=NA")

data$race <- factor(data$race, levels = c("NH White", "NH Black", "Hispanic"))
data$race <- relevel(data$race, ref = "NH White")

#Recoding Education 
data$educ<- as.numeric(data$educ)
data$educ <- car::Recode(data$educ,
                           recodes = "1='LTHS' ; 2= 'HS/GED'; 3='Some college'; 4='College+';else=NA")
data$educ<- as.factor(data$educ)
data$educ <- relevel(data$educ, ref = "LTHS")

Relabel Key variable

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
label(data$female)<-"Sex"
label(data$bsisolation3c)<-"Isolation"
label(data$age)<- "Age"
label(data$educ)<- "Educational Attainment"
label(data$foreign)<- "Born in the U.S"
label(data$proxyb) <- "proxyb"
label(data$race)<- "Race/Ethnicity"

Statistical Analyses (same as Zhang SSM 2021)

Set up the survey design

survey_design <- svydesign(
  id = ~1,  # Assuming a simple random sample without clustering
  strata = ~raestrat,  # Specify the strata variable
  weights = ~r5wtresp,  # Specify the sampling weights variable
  data = data
)

Fit a logistic regression model

model <- svyglm(
  formula = `_Y` ~ bsisolation3c + female + race + age + educ + foreign + proxyb,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation3c + female + race + age + 
##     educ + foreign + proxyb, design = survey_design, family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -11.862351   0.240043 -49.418  < 2e-16 ***
## bsisolation3c2     0.563919   0.058178   9.693  < 2e-16 ***
## bsisolation3c3     0.771131   0.069280  11.131  < 2e-16 ***
## female             0.217901   0.052184   4.176 2.97e-05 ***
## raceNH Black       0.916662   0.065920  13.906  < 2e-16 ***
## raceHispanic       0.515361   0.096989   5.314 1.08e-07 ***
## age                0.103145   0.002927  35.237  < 2e-16 ***
## educCollege+      -0.974471   0.075928 -12.834  < 2e-16 ***
## educHS/GED        -0.556622   0.056454  -9.860  < 2e-16 ***
## educSome college  -0.752622   0.068827 -10.935  < 2e-16 ***
## foreign           -0.087064   0.094462  -0.922    0.357    
## proxyb             2.537125   0.056581  44.841  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8766902)
## 
## Number of Fisher Scoring iterations: 7

Extract odds ratios and confidence intervals

gtsummary::tbl_regression(model, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
Isolation


    1 — —
    2 1.76 1.57, 1.97 <0.001
    3 2.16 1.89, 2.48 <0.001
Sex 1.24 1.12, 1.38 <0.001
Race/Ethnicity


    NH White — —
    NH Black 2.50 2.20, 2.85 <0.001
    Hispanic 1.67 1.38, 2.02 <0.001
Age 1.11 1.10, 1.12 <0.001
Educational Attainment


    LTHS — —
    College+ 0.38 0.33, 0.44 <0.001
    HS/GED 0.57 0.51, 0.64 <0.001
    Some college 0.47 0.41, 0.54 <0.001
Born in the U.S 0.92 0.76, 1.10 0.4
proxyb 12.6 11.3, 14.1 <0.001
1 OR = Odds Ratio, CI = Confidence Interval
gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  2645158 141.3    4618982 246.7         NA  4618982 246.7
## Vcells 27056084 206.5   60275782 459.9      16384 60275782 459.9
set_gtsummary_theme(theme_gtsummary_compact(set_theme = TRUE))
## Setting theme `Compact`
## Setting theme `Compact`
t1 <- tbl_regression(model, exp = TRUE) %>% bold_p() 
gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  2648663 141.5    4618982 246.7         NA  4618982 246.7
## Vcells 27066862 206.6   60346358 460.5      16384 60346358 460.5
tbl_merge_ex1 <-
  tbl_merge( #Table merge function
    tbls = list(t1), # Selecting the tables to be included in the combined table output 
    # More objects can be added here
    tab_spanner = c("**Main Model**") # Naming the models
  ) %>% 
  bold_labels()%>% 
  italicize_levels()%>% 
  as_flex_table() #gtsummary packages can crete objects as gt, flextable or huxtable, and many more. These are useful pacakges to further formatting tables according to presentation and publication needs.

#as_gt() 

tbl_merge_ex1

Main Model

Characteristic

OR1

95% CI1

p-value

Isolation

1

—

—

2

1.76

1.57, 1.97

<0.001

3

2.16

1.89, 2.48

<0.001

Sex

1.24

1.12, 1.38

<0.001

Race/Ethnicity

NH White

—

—

NH Black

2.50

2.20, 2.85

<0.001

Hispanic

1.67

1.38, 2.02

<0.001

Age

1.11

1.10, 1.12

<0.001

Educational Attainment

LTHS

—

—

College+

0.38

0.33, 0.44

<0.001

HS/GED

0.57

0.51, 0.64

<0.001

Some college

0.47

0.41, 0.54

<0.001

Born in the U.S

0.92

0.76, 1.10

0.4

proxyb

12.6

11.3, 14.1

<0.001

1OR = Odds Ratio, CI = Confidence Interval

table_two <- tbl_merge_ex1 %>% 
  colformat_char( #This function is from flextable package
      j = c(1,4), # These are the no. of columns.
      prefix = "(", #Putting parenthesis around 95% confidence interval
      suffix = ")") %>%
    set_table_properties(width = 1, layout = "autofit")

table_two

Main Model

Characteristic

OR1

95% CI1

p-value

(Isolation)

(1)

—

—

(2)

1.76

1.57, 1.97

(<0.001)

(3)

2.16

1.89, 2.48

(<0.001)

(Sex)

1.24

1.12, 1.38

(<0.001)

(Race/Ethnicity)

(NH White)

—

—

(NH Black)

2.50

2.20, 2.85

(<0.001)

(Hispanic)

1.67

1.38, 2.02

(<0.001)

(Age)

1.11

1.10, 1.12

(<0.001)

(Educational Attainment)

(LTHS)

—

—

(College+)

0.38

0.33, 0.44

(<0.001)

(HS/GED)

0.57

0.51, 0.64

(<0.001)

(Some college)

0.47

0.41, 0.54

(<0.001)

(Born in the U.S)

0.92

0.76, 1.10

(0.4)

(proxyb)

12.6

11.3, 14.1

(<0.001)

1OR = Odds Ratio, CI = Confidence Interval

Model 2

model2 <- svyglm(
  formula = `_Y` ~ factor(bsisolation3c, levels = c(2, 1, 3)) + female + race + age + educ + foreign + proxyb,
  design = survey_design,
  family = quasibinomial(link = "logit")
)
model2
## Stratified Independent Sampling design (with replacement)
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Call:  svyglm(formula = `_Y` ~ factor(bsisolation3c, levels = c(2, 1, 
##     3)) + female + race + age + educ + foreign + proxyb, design = survey_design, 
##     family = quasibinomial(link = "logit"))
## 
## Coefficients:
##                                 (Intercept)  
##                                   -11.29843  
## factor(bsisolation3c, levels = c(2, 1, 3))1  
##                                    -0.56392  
## factor(bsisolation3c, levels = c(2, 1, 3))3  
##                                     0.20721  
##                                      female  
##                                     0.21790  
##                                raceNH Black  
##                                     0.91666  
##                                raceHispanic  
##                                     0.51536  
##                                         age  
##                                     0.10314  
##                                educCollege+  
##                                    -0.97447  
##                                  educHS/GED  
##                                    -0.55662  
##                            educSome college  
##                                    -0.75262  
##                                     foreign  
##                                    -0.08706  
##                                      proxyb  
##                                     2.53713  
## 
## Degrees of Freedom: 91240 Total (i.e. Null);  91178 Residual
## Null Deviance:       23530 
## Residual Deviance: 17510     AIC: NA
gtsummary::tbl_regression(model2, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
factor(bsisolation3c, levels = c(2, 1, 3))


    2 — —
    1 0.57 0.51, 0.64 <0.001
    3 1.23 1.10, 1.38 <0.001
Sex 1.24 1.12, 1.38 <0.001
Race/Ethnicity


    NH White — —
    NH Black 2.50 2.20, 2.85 <0.001
    Hispanic 1.67 1.38, 2.02 <0.001
Age 1.11 1.10, 1.12 <0.001
Educational Attainment


    LTHS — —
    College+ 0.38 0.33, 0.44 <0.001
    HS/GED 0.57 0.51, 0.64 <0.001
    Some college 0.47 0.41, 0.54 <0.001
Born in the U.S 0.92 0.76, 1.10 0.4
proxyb 12.6 11.3, 14.1 <0.001
1 OR = Odds Ratio, CI = Confidence Interval

*Mediators

Model 3

model3 <- svyglm(
  formula = `_Y` ~ bsisolation3c + female + race + age + educ + foreign + proxyb +lnincome,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model3)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation3c + female + race + age + 
##     educ + foreign + proxyb + lnincome, design = survey_design, 
##     family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -10.287535   0.285127 -36.081  < 2e-16 ***
## bsisolation3c2     0.533390   0.058428   9.129  < 2e-16 ***
## bsisolation3c3     0.707599   0.069932  10.118  < 2e-16 ***
## female             0.177948   0.052599   3.383 0.000717 ***
## raceNH Black       0.843803   0.066784  12.635  < 2e-16 ***
## raceHispanic       0.413464   0.100043   4.133 3.59e-05 ***
## age                0.102038   0.002913  35.024  < 2e-16 ***
## educCollege+      -0.839985   0.077459 -10.844  < 2e-16 ***
## educHS/GED        -0.507171   0.056983  -8.900  < 2e-16 ***
## educSome college  -0.676100   0.069501  -9.728  < 2e-16 ***
## foreign           -0.118895   0.096560  -1.231 0.218210    
## proxyb             2.541502   0.056871  44.689  < 2e-16 ***
## lnincome          -0.148914   0.015764  -9.447  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8637416)
## 
## Number of Fisher Scoring iterations: 7
gtsummary::tbl_regression(model3, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
Isolation


    1 — —
    2 1.70 1.52, 1.91 <0.001
    3 2.03 1.77, 2.33 <0.001
Sex 1.19 1.08, 1.32 <0.001
Race/Ethnicity


    NH White — —
    NH Black 2.33 2.04, 2.65 <0.001
    Hispanic 1.51 1.24, 1.84 <0.001
Age 1.11 1.10, 1.11 <0.001
Educational Attainment


    LTHS — —
    College+ 0.43 0.37, 0.50 <0.001
    HS/GED 0.60 0.54, 0.67 <0.001
    Some college 0.51 0.44, 0.58 <0.001
Born in the U.S 0.89 0.73, 1.07 0.2
proxyb 12.7 11.4, 14.2 <0.001
lnincome 0.86 0.84, 0.89 <0.001
1 OR = Odds Ratio, CI = Confidence Interval

Model 4

model4 <- svyglm(
  formula = `_Y` ~ bsisolation3c + female + race + age + educ + foreign + proxyb +chronic +dep2c,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model4)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation3c + female + race + age + 
##     educ + foreign + proxyb + chronic + dep2c, design = survey_design, 
##     family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -12.074463   0.239383 -50.440  < 2e-16 ***
## bsisolation3c2     0.502393   0.058390   8.604  < 2e-16 ***
## bsisolation3c3     0.669936   0.069137   9.690  < 2e-16 ***
## female             0.205732   0.052559   3.914 9.07e-05 ***
## raceNH Black       0.872454   0.066448  13.130  < 2e-16 ***
## raceHispanic       0.468673   0.097002   4.832 1.36e-06 ***
## age                0.100843   0.002976  33.881  < 2e-16 ***
## educCollege+      -0.849707   0.076690 -11.080  < 2e-16 ***
## educHS/GED        -0.511945   0.056577  -9.049  < 2e-16 ***
## educSome college  -0.686765   0.069043  -9.947  < 2e-16 ***
## foreign           -0.056380   0.094351  -0.598     0.55    
## proxyb             2.570361   0.056794  45.258  < 2e-16 ***
## chronic            0.181369   0.023894   7.590 3.22e-14 ***
## dep2c              0.545048   0.050108  10.878  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8757591)
## 
## Number of Fisher Scoring iterations: 7
gtsummary::tbl_regression(model4, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
Isolation


    1 — —
    2 1.65 1.47, 1.85 <0.001
    3 1.95 1.71, 2.24 <0.001
Sex 1.23 1.11, 1.36 <0.001
Race/Ethnicity


    NH White — —
    NH Black 2.39 2.10, 2.73 <0.001
    Hispanic 1.60 1.32, 1.93 <0.001
Age 1.11 1.10, 1.11 <0.001
Educational Attainment


    LTHS — —
    College+ 0.43 0.37, 0.50 <0.001
    HS/GED 0.60 0.54, 0.67 <0.001
    Some college 0.50 0.44, 0.58 <0.001
Born in the U.S 0.95 0.79, 1.14 0.6
proxyb 13.1 11.7, 14.6 <0.001
chronic 1.20 1.14, 1.26 <0.001
RECODE of cesdm 1.72 1.56, 1.90 <0.001
1 OR = Odds Ratio, CI = Confidence Interval

Model 5

model5 <- svyglm(
  formula = `_Y` ~ bsisolation3c + female + race + age + educ + foreign + proxyb +lnincome +chronic +dep2c,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model5)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation3c + female + race + age + 
##     educ + foreign + proxyb + lnincome + chronic + dep2c, design = survey_design, 
##     family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -10.664031   0.288311 -36.988  < 2e-16 ***
## bsisolation3c2     0.474906   0.058615   8.102 5.47e-16 ***
## bsisolation3c3     0.614940   0.069792   8.811  < 2e-16 ***
## female             0.170500   0.052910   3.222 0.001271 ** 
## raceNH Black       0.808122   0.067228  12.021  < 2e-16 ***
## raceHispanic       0.380218   0.099633   3.816 0.000136 ***
## age                0.100047   0.002959  33.806  < 2e-16 ***
## educCollege+      -0.733733   0.078050  -9.401  < 2e-16 ***
## educHS/GED        -0.469222   0.057072  -8.222  < 2e-16 ***
## educSome college  -0.619363   0.069739  -8.881  < 2e-16 ***
## foreign           -0.083974   0.096258  -0.872 0.383005    
## proxyb             2.572519   0.057048  45.094  < 2e-16 ***
## lnincome          -0.133846   0.016379  -8.172 3.08e-16 ***
## chronic            0.178664   0.023917   7.470 8.09e-14 ***
## dep2c              0.521572   0.050461  10.336  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8626207)
## 
## Number of Fisher Scoring iterations: 7
gtsummary::tbl_regression(model4, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
Isolation


    1 — —
    2 1.65 1.47, 1.85 <0.001
    3 1.95 1.71, 2.24 <0.001
Sex 1.23 1.11, 1.36 <0.001
Race/Ethnicity


    NH White — —
    NH Black 2.39 2.10, 2.73 <0.001
    Hispanic 1.60 1.32, 1.93 <0.001
Age 1.11 1.10, 1.11 <0.001
Educational Attainment


    LTHS — —
    College+ 0.43 0.37, 0.50 <0.001
    HS/GED 0.60 0.54, 0.67 <0.001
    Some college 0.50 0.44, 0.58 <0.001
Born in the U.S 0.95 0.79, 1.14 0.6
proxyb 13.1 11.7, 14.6 <0.001
chronic 1.20 1.14, 1.26 <0.001
RECODE of cesdm 1.72 1.56, 1.90 <0.001
1 OR = Odds Ratio, CI = Confidence Interval

*Interactions with Gender/Race

Model 6

model6 <- svyglm(
  formula = `_Y` ~ bsisolation + female + race + age + educ + foreign + proxyb +lnincome +chronic +dep2c + bsisolation*female ,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model6)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation + female + race + age + educ + 
##     foreign + proxyb + lnincome + chronic + dep2c + bsisolation * 
##     female, design = survey_design, family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -11.298084   0.295486 -38.236  < 2e-16 ***
## bsisolation          0.357254   0.048470   7.371 1.71e-13 ***
## female               0.556933   0.167950   3.316 0.000913 ***
## raceNH Black         0.804768   0.067278  11.962  < 2e-16 ***
## raceHispanic         0.386641   0.099599   3.882 0.000104 ***
## age                  0.099447   0.002976  33.421  < 2e-16 ***
## educCollege+        -0.731113   0.077836  -9.393  < 2e-16 ***
## educHS/GED          -0.468144   0.057075  -8.202 2.39e-16 ***
## educSome college    -0.615805   0.069746  -8.829  < 2e-16 ***
## foreign             -0.085453   0.096271  -0.888 0.374744    
## proxyb               2.594883   0.057794  44.899  < 2e-16 ***
## lnincome            -0.133435   0.016422  -8.125 4.51e-16 ***
## chronic              0.181897   0.023887   7.615 2.67e-14 ***
## dep2c                0.513403   0.050347  10.197  < 2e-16 ***
## bsisolation:female  -0.131497   0.054962  -2.393 0.016736 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8615173)
## 
## Number of Fisher Scoring iterations: 7
gtsummary::tbl_regression(model6, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
bsisolation 1.43 1.30, 1.57 <0.001
Sex 1.75 1.26, 2.43 <0.001
Race/Ethnicity


    NH White — —
    NH Black 2.24 1.96, 2.55 <0.001
    Hispanic 1.47 1.21, 1.79 <0.001
Age 1.10 1.10, 1.11 <0.001
Educational Attainment


    LTHS — —
    College+ 0.48 0.41, 0.56 <0.001
    HS/GED 0.63 0.56, 0.70 <0.001
    Some college 0.54 0.47, 0.62 <0.001
Born in the U.S 0.92 0.76, 1.11 0.4
proxyb 13.4 12.0, 15.0 <0.001
lnincome 0.88 0.85, 0.90 <0.001
chronic 1.20 1.14, 1.26 <0.001
RECODE of cesdm 1.67 1.51, 1.84 <0.001
bsisolation * Sex 0.88 0.79, 0.98 0.017
1 OR = Odds Ratio, CI = Confidence Interval

Model 7 # Race

model7 <- svyglm(
  formula = `_Y` ~ bsisolation3c + female + race + age + educ + foreign + proxyb +lnincome +chronic +dep2c + bsisolation3c*race ,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model7)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation3c + female + race + age + 
##     educ + foreign + proxyb + lnincome + chronic + dep2c + bsisolation3c * 
##     race, design = survey_design, family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -10.690472   0.287092 -37.237  < 2e-16 ***
## bsisolation3c2                0.553893   0.066572   8.320  < 2e-16 ***
## bsisolation3c3                0.675565   0.076004   8.889  < 2e-16 ***
## female                        0.172195   0.053122   3.241  0.00119 ** 
## raceNH Black                  0.966590   0.130988   7.379 1.61e-13 ***
## raceHispanic                  0.726259   0.168542   4.309 1.64e-05 ***
## age                           0.099781   0.002963  33.675  < 2e-16 ***
## educCollege+                 -0.728419   0.078149  -9.321  < 2e-16 ***
## educHS/GED                   -0.470105   0.057051  -8.240  < 2e-16 ***
## educSome college             -0.617716   0.069727  -8.859  < 2e-16 ***
## foreign                      -0.074784   0.095109  -0.786  0.43169    
## proxyb                        2.569044   0.057115  44.980  < 2e-16 ***
## lnincome                     -0.134668   0.016272  -8.276  < 2e-16 ***
## chronic                       0.179968   0.023861   7.542 4.66e-14 ***
## dep2c                         0.520178   0.050445  10.312  < 2e-16 ***
## bsisolation3c2:raceNH Black  -0.310624   0.166232  -1.869  0.06168 .  
## bsisolation3c3:raceNH Black  -0.103777   0.167623  -0.619  0.53585    
## bsisolation3c2:raceHispanic  -0.420118   0.200191  -2.099  0.03586 *  
## bsisolation3c3:raceHispanic  -0.544706   0.261037  -2.087  0.03692 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8615356)
## 
## Number of Fisher Scoring iterations: 7
gtsummary::tbl_regression(model7, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
Isolation


    1 — —
    2 1.74 1.53, 1.98 <0.001
    3 1.97 1.69, 2.28 <0.001
Sex 1.19 1.07, 1.32 0.001
Race/Ethnicity


    NH White — —
    NH Black 2.63 2.03, 3.40 <0.001
    Hispanic 2.07 1.49, 2.88 <0.001
Age 1.10 1.10, 1.11 <0.001
Educational Attainment


    LTHS — —
    College+ 0.48 0.41, 0.56 <0.001
    HS/GED 0.62 0.56, 0.70 <0.001
    Some college 0.54 0.47, 0.62 <0.001
Born in the U.S 0.93 0.77, 1.12 0.4
proxyb 13.1 11.7, 14.6 <0.001
lnincome 0.87 0.85, 0.90 <0.001
chronic 1.20 1.14, 1.25 <0.001
RECODE of cesdm 1.68 1.52, 1.86 <0.001
Isolation * Race/Ethnicity


    2 * NH Black 0.73 0.53, 1.02 0.062
    3 * NH Black 0.90 0.65, 1.25 0.5
    2 * Hispanic 0.66 0.44, 0.97 0.036
    3 * Hispanic 0.58 0.35, 0.97 0.037
1 OR = Odds Ratio, CI = Confidence Interval

Model 8 # Race and sex

model8 <- svyglm(
  formula = `_Y` ~ bsisolation3c + female + race + age + educ + foreign + proxyb +lnincome +chronic +dep2c + bsisolation3c*race + bsisolation3c*female ,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model8)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation3c + female + race + age + 
##     educ + foreign + proxyb + lnincome + chronic + dep2c + bsisolation3c * 
##     race + bsisolation3c * female, design = survey_design, family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -10.703404   0.286915 -37.305  < 2e-16 ***
## bsisolation3c2                0.517289   0.093476   5.534 3.14e-08 ***
## bsisolation3c3                0.849056   0.121786   6.972 3.15e-12 ***
## female                        0.216297   0.093304   2.318   0.0204 *  
## raceNH Black                  0.965864   0.131026   7.372 1.70e-13 ***
## raceHispanic                  0.725857   0.168776   4.301 1.70e-05 ***
## age                           0.099825   0.002953  33.809  < 2e-16 ***
## educCollege+                 -0.727481   0.077985  -9.328  < 2e-16 ***
## educHS/GED                   -0.472118   0.057004  -8.282  < 2e-16 ***
## educSome college             -0.621386   0.069674  -8.919  < 2e-16 ***
## foreign                      -0.075139   0.095155  -0.790   0.4297    
## proxyb                        2.582863   0.058092  44.462  < 2e-16 ***
## lnincome                     -0.136151   0.016155  -8.428  < 2e-16 ***
## chronic                       0.181256   0.023908   7.581 3.45e-14 ***
## dep2c                         0.518433   0.050350  10.297  < 2e-16 ***
## bsisolation3c2:raceNH Black  -0.311005   0.166180  -1.871   0.0613 .  
## bsisolation3c3:raceNH Black  -0.097256   0.167621  -0.580   0.5618    
## bsisolation3c2:raceHispanic  -0.416229   0.200698  -2.074   0.0381 *  
## bsisolation3c3:raceHispanic  -0.542198   0.261635  -2.072   0.0382 *  
## bsisolation3c2:female         0.045460   0.115619   0.393   0.6942    
## bsisolation3c3:female        -0.246244   0.141629  -1.739   0.0821 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8626164)
## 
## Number of Fisher Scoring iterations: 7
gtsummary::tbl_regression(model8, exp = TRUE) 
Characteristic OR1 95% CI1 p-value
Isolation


    1 — —
    2 1.68 1.40, 2.01 <0.001
    3 2.34 1.84, 2.97 <0.001
Sex 1.24 1.03, 1.49 0.020
Race/Ethnicity


    NH White — —
    NH Black 2.63 2.03, 3.40 <0.001
    Hispanic 2.07 1.48, 2.88 <0.001
Age 1.10 1.10, 1.11 <0.001
Educational Attainment


    LTHS — —
    College+ 0.48 0.41, 0.56 <0.001
    HS/GED 0.62 0.56, 0.70 <0.001
    Some college 0.54 0.47, 0.62 <0.001
Born in the U.S 0.93 0.77, 1.12 0.4
proxyb 13.2 11.8, 14.8 <0.001
lnincome 0.87 0.85, 0.90 <0.001
chronic 1.20 1.14, 1.26 <0.001
RECODE of cesdm 1.68 1.52, 1.85 <0.001
Isolation * Race/Ethnicity


    2 * NH Black 0.73 0.53, 1.01 0.061
    3 * NH Black 0.91 0.65, 1.26 0.6
    2 * Hispanic 0.66 0.45, 0.98 0.038
    3 * Hispanic 0.58 0.35, 0.97 0.038
Isolation * Sex


    2 * Sex 1.05 0.83, 1.31 0.7
    3 * Sex 0.78 0.59, 1.03 0.082
1 OR = Odds Ratio, CI = Confidence Interval

Predicted Probability Plot

library(sjPlot)
library(sjmisc)
## 
## Attaching package: 'sjmisc'
## The following objects are masked from 'package:janitor':
## 
##     remove_empty_cols, remove_empty_rows
library(ggplot2)
data(efc)
theme_set(theme_sjplot())
#install.packages("margins")
library(margins)

Model

model_race <- svyglm(
  formula = `_Y` ~ bsisolation + race + female+ age + educ + foreign + proxyb+ bsisolation*race,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model_race)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation + race + female + age + educ + 
##     foreign + proxyb + bsisolation * race, design = survey_design, 
##     family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -12.402964   0.239579 -51.770  < 2e-16 ***
## bsisolation                0.357253   0.028088  12.719  < 2e-16 ***
## raceNH Black               1.050075   0.208139   5.045 4.54e-07 ***
## raceHispanic               0.996343   0.398212   2.502   0.0123 *  
## female                     0.217539   0.052634   4.133 3.58e-05 ***
## age                        0.102272   0.002955  34.614  < 2e-16 ***
## educCollege+              -0.961585   0.075940 -12.662  < 2e-16 ***
## educHS/GED                -0.551378   0.056408  -9.775  < 2e-16 ***
## educSome college          -0.738161   0.068845 -10.722  < 2e-16 ***
## foreign                   -0.076573   0.092960  -0.824   0.4101    
## proxyb                     2.544657   0.056559  44.991  < 2e-16 ***
## bsisolation:raceNH Black  -0.045755   0.063065  -0.726   0.4681    
## bsisolation:raceHispanic  -0.158641   0.133842  -1.185   0.2359    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8649419)
## 
## Number of Fisher Scoring iterations: 7
model_gender <- svyglm(
  formula = `_Y` ~ bsisolation+female+ race+ age + educ + foreign + proxyb+bsisolation*female,
  design = survey_design,
  family = quasibinomial(link = "logit")
)

summary(model_gender)
## 
## Call:
## svyglm(formula = `_Y` ~ bsisolation + female + race + age + educ + 
##     foreign + proxyb + bsisolation * female, design = survey_design, 
##     family = quasibinomial(link = "logit"))
## 
## Survey design:
## svydesign(id = ~1, strata = ~raestrat, weights = ~r5wtresp, data = data)
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -12.546505   0.242966 -51.639  < 2e-16 ***
## bsisolation          0.402618   0.048036   8.382  < 2e-16 ***
## female               0.532724   0.165284   3.223  0.00127 ** 
## raceNH Black         0.911807   0.066037  13.807  < 2e-16 ***
## raceHispanic         0.521327   0.097000   5.374  7.7e-08 ***
## age                  0.102542   0.002942  34.859  < 2e-16 ***
## educCollege+        -0.968390   0.075650 -12.801  < 2e-16 ***
## educHS/GED          -0.553962   0.056466  -9.811  < 2e-16 ***
## educSome college    -0.745557   0.068897 -10.821  < 2e-16 ***
## foreign             -0.087836   0.094504  -0.929  0.35266    
## proxyb               2.557965   0.057209  44.712  < 2e-16 ***
## bsisolation:female  -0.106798   0.054084  -1.975  0.04831 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.8721745)
## 
## Number of Fisher Scoring iterations: 7

Simple

theme_set(theme_sjplot())
plot_model(model_gender, type = "pred", terms = c("bsisolation","female"))

# Plot predictive margins
plot_model(model_race, type = "pred", terms = c("bsisolation", "race"))

Race

theme_set(theme_sjplot())
plot_model(model_race, type = "pred", terms = c("bsisolation", "race"))

# Plot predictive margins
plot_model_race <- plot_model(model_race, type = "pred", terms = c("bsisolation", "race"))

plot_data <- as.data.frame(plot_model_race$data)


# Check the structure of the extracted data
str(plot_data)
## 'data.frame':    18 obs. of  7 variables:
##  $ x        : num  0 0 0 1 1 1 2 2 2 3 ...
##  $ predicted: num  0.00709 0.02 0.01898 0.01011 0.02712 ...
##  $ std.error: num  0.0988 0.1961 0.3899 0.0782 0.1427 ...
##  $ conf.low : num  0.00585 0.01371 0.00893 0.00868 0.02064 ...
##  $ conf.high: num  0.00859 0.0291 0.03988 0.01176 0.03556 ...
##  $ group    : Factor w/ 3 levels "NH White","NH Black",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ group_col: Factor w/ 3 levels "NH White","NH Black",..: 1 2 3 1 2 3 1 2 3 1 ...
##  - attr(*, "legend.labels")= chr [1:3] "NH White" "NH Black" "Hispanic"
##  - attr(*, "x.is.factor")= chr "0"
##  - attr(*, "continuous.group")= logi FALSE
##  - attr(*, "rawdata")='data.frame':  91241 obs. of  4 variables:
##   ..$ response: num [1:91241] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..- attr(*, "label")= chr "0: Event did not happen; 1: Event happened"
##   .. ..- attr(*, "format.stata")= chr "%9.0g"
##   ..$ x       : num [1:91241] 3 1 1 1 1 1 1 2 1 2 ...
##   ..$ group   : Factor w/ 3 levels "NH White","NH Black",..: 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "label")= chr "Race/Ethnicity"
##   ..$ facet   : Factor w/ 1 level "1": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "title")= chr "Predicted probabilities of 0: Event did not happen; 1: Event happened"
##  - attr(*, "x.title")= chr "bsisolation"
##  - attr(*, "y.title")= chr "0: Event did not happen; 1: Event happened"
##  - attr(*, "legend.title")= chr "Race/Ethnicity"
##  - attr(*, "constant.values")=List of 5
##   ..$ female : num 0.571
##   ..$ age    : num 70.5
##   ..$ educ   : chr "LTHS"
##   ..$ foreign: num 0.0731
##   ..$ proxyb : num 0.0511
##  - attr(*, "terms")= chr [1:2] "bsisolation" "race"
##  - attr(*, "original.terms")= chr [1:2] "bsisolation" "race"
##  - attr(*, "at.list")=List of 2
##   ..$ bsisolation: num [1:6] 0 1 2 3 4 5
##   ..$ race       : chr [1:3] "NH White" "NH Black" "Hispanic"
##  - attr(*, "ci.lvl")= num 0.95
##  - attr(*, "type")= chr "fe"
##  - attr(*, "response.name")= chr "_Y"
##  - attr(*, "family")= chr "quasibinomial"
##  - attr(*, "link")= chr "logit"
##  - attr(*, "logistic")= chr "1"
##  - attr(*, "link_inverse")=function (eta)  
##  - attr(*, "link_function")=function (mu)  
##  - attr(*, "is.trial")= chr "0"
##  - attr(*, "fitfun")= chr "glm"
##  - attr(*, "model.name")= chr "model"
# Create a ggplot with shaded confidence intervals
ggplot(plot_data, aes(x = x, y = predicted, color = group, fill = group)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3, size = 0) +
  geom_line() +
  labs(
    title = "Predictive Margins of Race/Ethnicity with 95% CIs",
    x = "Social Isolation",
    y = "Predicted Probability of Risk of Dementia",
    color = "Race/Ethnicity",
    fill = "Race/Ethnicity"
  ) +
  theme_minimal() +
  guides(color = guide_legend(title = "Race/Ethnicity"), fill = guide_legend(title = "Race/Ethnicity")) +
  theme_nice +
  scale_y_continuous(
    breaks = seq(0, 0.1, 0.02)
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Gender

plot_model_gender <- plot_model(model_gender, type = "pred", terms = c("bsisolation", "female"))

plot_data_g <- as.data.frame(plot_model_gender$data)

# Check the structure of the extracted data
str(plot_data_g)
## 'data.frame':    12 obs. of  7 variables:
##  $ x        : num  0 0 1 1 2 2 3 3 4 4 ...
##  $ predicted: num  0.00554 0.0094 0.00826 0.01259 0.01231 ...
##  $ std.error: num  0.1455 0.1055 0.1057 0.0829 0.0761 ...
##  $ conf.low : num  0.00417 0.00766 0.00673 0.01072 0.01062 ...
##  $ conf.high: num  0.00735 0.01153 0.01014 0.01478 0.01426 ...
##  $ group    : Factor w/ 2 levels "0.male","1.female": 1 2 1 2 1 2 1 2 1 2 ...
##  $ group_col: Factor w/ 2 levels "0.male","1.female": 1 2 1 2 1 2 1 2 1 2 ...
##  - attr(*, "legend.labels")= chr [1:2] "0.male" "1.female"
##  - attr(*, "x.is.factor")= chr "0"
##  - attr(*, "continuous.group")= logi FALSE
##  - attr(*, "rawdata")='data.frame':  91241 obs. of  4 variables:
##   ..$ response: num [1:91241] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..- attr(*, "label")= chr "0: Event did not happen; 1: Event happened"
##   .. ..- attr(*, "format.stata")= chr "%9.0g"
##   ..$ x       : num [1:91241] 3 1 1 1 1 1 1 2 1 2 ...
##   ..$ group   : Factor w/ 2 levels "0.male","1.female": 2 1 1 1 1 1 1 1 2 2 ...
##   .. ..- attr(*, "label")= chr "Sex"
##   ..$ facet   : Factor w/ 1 level "1": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "title")= chr "Predicted probabilities of 0: Event did not happen; 1: Event happened"
##  - attr(*, "x.title")= chr "bsisolation"
##  - attr(*, "y.title")= chr "0: Event did not happen; 1: Event happened"
##  - attr(*, "legend.title")= chr "Sex"
##  - attr(*, "constant.values")=List of 5
##   ..$ race   : chr "NH White"
##   ..$ age    : num 70.5
##   ..$ educ   : chr "LTHS"
##   ..$ foreign: num 0.0731
##   ..$ proxyb : num 0.0511
##  - attr(*, "terms")= chr [1:2] "bsisolation" "female"
##  - attr(*, "original.terms")= chr [1:2] "bsisolation" "female"
##  - attr(*, "at.list")=List of 2
##   ..$ bsisolation: num [1:6] 0 1 2 3 4 5
##   ..$ female     : dbl+lbl [1:2] 0, 1
##    .. ..@ label       : chr "Sex"
##    .. ..@ format.stata: chr "%9.0g"
##    .. ..@ labels      : Named num [1:2] 0 1
##    .. .. ..- attr(*, "names")= chr [1:2] "0.male" "1.female"
##  - attr(*, "ci.lvl")= num 0.95
##  - attr(*, "type")= chr "fe"
##  - attr(*, "response.name")= chr "_Y"
##  - attr(*, "family")= chr "quasibinomial"
##  - attr(*, "link")= chr "logit"
##  - attr(*, "logistic")= chr "1"
##  - attr(*, "link_inverse")=function (eta)  
##  - attr(*, "link_function")=function (mu)  
##  - attr(*, "is.trial")= chr "0"
##  - attr(*, "fitfun")= chr "glm"
##  - attr(*, "model.name")= chr "model"
plot_data_g$group <- car::Recode(plot_data_g$group,
                           recodes = "'0.male'='Male' ; '1.female'= 'Female'; else=NA")

# Create a ggplot with shaded confidence intervals
ggplot(plot_data_g, aes(x = x, y = predicted, color = group, fill = group)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3, size = 0) +
  geom_line() +
  labs(
    title = "Predictive Margins of Sex with 95% CIs",
    x = "Social Isolation",
    y = "Predicted Probability of Risk of Dementia",
    color = "Sex",
    fill = "Sex"
  ) +
  theme_minimal() +
  guides(color = guide_legend(title = "Sex"), fill = guide_legend(title = "Sex")) +
  theme_nice +
  scale_y_continuous(
    breaks = seq(0, 0.6, 0.01)
  )