Add logistic regression formulas

logformula1 <- "came_out_friend ~ COHORT+gender+lesbian_gay+white+latino+W1POVERTYCAT+US_born+W1CONVERSION+GEDUC2+ACE_index+COHORT*white+lesbian_gay*white"

logformula2 <- "came_out_family ~ COHORT+gender+lesbian_gay+white+latino+W1POVERTYCAT+US_born+W1CONVERSION+GEDUC2+ACE_index"


logformula3 <- "outed_to_family~COHORT+gender+lesbian_gay+white+latino+W1POVERTYCAT+US_born+W1CONVERSION+GEDUC2+ACE_index+COHORT*white+lesbian_gay*white"

Check the models without survey weights

Add in Survey Weights

survey_design <- svydesign(ids=~0, weights=~survey_weights,
                           data=df2)
log_model1_weights <- svyglm(logformula1,
                       data=df2, family=stats::binomial(link=logit),
                       design=survey_design,
                       subset=NULL,start=NULL, rescale=TRUE, 
                       deff=FALSE,influence=FALSE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(log_model1_weights)
## 
## Call:
## svyglm(formula = logformula1, design = survey_design, subset = NULL, 
##     family = stats::binomial(link = logit), start = NULL, rescale = TRUE, 
##     data = df2, deff = FALSE, influence = FALSE)
## 
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   4.48299    0.88724   5.053 4.93e-07 ***
## COHORTMiddle                  0.61498    0.69831   0.881  0.37865    
## COHORTOlder                  -0.72021    0.67327  -1.070  0.28493    
## genderMan                    -1.15743    0.38677  -2.993  0.00282 ** 
## genderNon-binary/genderqueer  0.54411    0.68543   0.794  0.42743    
## lesbian_gayNo                -1.90705    0.55370  -3.444  0.00059 ***
## whiteYes                      0.74825    0.81804   0.915  0.36051    
## latinoYes                     1.39834    0.64803   2.158  0.03111 *  
## W1POVERTYCAT100-199%         -0.08283    0.62440  -0.133  0.89448    
## W1POVERTYCAT200-299%         -0.28218    0.65845  -0.429  0.66831    
## W1POVERTYCAT300%+            -0.06775    0.60749  -0.112  0.91121    
## US_bornNo                    -0.75826    0.63172  -1.200  0.23022    
## W1CONVERSIONYes               0.68501    0.69014   0.993  0.32109    
## GEDUC2More than high school  -0.52130    0.42845  -1.217  0.22392    
## ACE_index                     0.08882    0.08234   1.079  0.28093    
## COHORTMiddle:whiteYes        -0.27270    1.00377  -0.272  0.78591    
## COHORTOlder:whiteYes          0.18330    0.86091   0.213  0.83143    
## lesbian_gayNo:whiteYes       -0.01162    0.83035  -0.014  0.98884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.204209)
## 
## Number of Fisher Scoring iterations: 7
#log_model1_weights %>% gtsummary::tbl_regression(exp = TRUE) 
#log_model1_weights$coefficients

model1_df = modelr::add_predictions(data=df2,
                        model=log_model1_weights,
                        type = "response")

model1_df = model1_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model1_df$pred = as_factor(model1_df$pred)
table(model1_df$pred)
## 
##    1 
## 1416

Model 1: Came Out to Friends

It seems that the logistic regression model for this variable is not accurate in the prediction, most likely due to the small number of people that never came out (n= 56 or 4% of the total)

log_model2_weights <- svyglm(logformula2,
                             data=df2, family=stats::binomial(link=logit),
                             design=survey_design,
                             subset=NULL,start=NULL, rescale=TRUE, 
                             deff=FALSE,influence=FALSE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(log_model2_weights)
## 
## Call:
## svyglm(formula = logformula2, design = survey_design, subset = NULL, 
##     family = stats::binomial(link = logit), start = NULL, rescale = TRUE, 
##     data = df2, deff = FALSE, influence = FALSE)
## 
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.64820    0.42835   6.182 8.27e-10 ***
## COHORTMiddle                  0.93955    0.24541   3.828 0.000135 ***
## COHORTOlder                   0.17588    0.24239   0.726 0.468198    
## genderMan                    -0.53397    0.22067  -2.420 0.015655 *  
## genderNon-binary/genderqueer  0.26492    0.35783   0.740 0.459220    
## lesbian_gayNo                -2.23458    0.26913  -8.303 2.36e-16 ***
## whiteYes                      0.40802    0.27114   1.505 0.132603    
## latinoYes                     0.53263    0.32665   1.631 0.103204    
## W1POVERTYCAT100-199%         -0.07640    0.31583  -0.242 0.808894    
## W1POVERTYCAT200-299%         -0.12444    0.36766  -0.338 0.735066    
## W1POVERTYCAT300%+             0.15636    0.29671   0.527 0.598283    
## US_bornNo                    -0.28549    0.33650  -0.848 0.396345    
## W1CONVERSIONYes               0.45633    0.50101   0.911 0.362547    
## GEDUC2More than high school  -0.35527    0.22297  -1.593 0.111313    
## ACE_index                     0.10999    0.04893   2.248 0.024737 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9910466)
## 
## Number of Fisher Scoring iterations: 5
t2 = log_model2_weights %>% gtsummary::tbl_regression(exp = TRUE) %>% 
  add_significance_stars(hide_ci = FALSE,
                         hide_p = FALSE,
                         thresholds = c(0.01, 0.05, 0.1))
#t2

model2_df = modelr::add_predictions(data=df2,
                        model=log_model2_weights,
                        type = "response")

model2_df = model2_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model2_df$pred = as_factor(model2_df$pred)
model2_df$came_out_family = model2_df$came_out_family %>% recode_factor("No" = 0,"Yes" = 1)
yardstick::accuracy(model2_df,
                    truth=came_out_family,
                    estimate=pred)
## # A tibble: 1 × 3
##   .metric  .estimator .estimate
##   <chr>    <chr>          <dbl>
## 1 accuracy binary         0.853
#yardstick::metrics(model2_df,truth=came_out_family,estimate=pred)


options(scipen=999)
#overall effect of significant variable on the outcome
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=2)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=4)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=6)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=15)
log_model3_weights <- svyglm(logformula3,
                             data=df2, family=stats::binomial(link=logit),
                             design=survey_design,
                             subset=NULL,start=NULL, rescale=TRUE, 
                             deff=FALSE,influence=FALSE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(log_model3_weights)
## 
## Call:
## svyglm(formula = logformula3, design = survey_design, subset = NULL, 
##     family = stats::binomial(link = logit), start = NULL, rescale = TRUE, 
##     data = df2, deff = FALSE, influence = FALSE)
## 
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
## 
## Coefficients:
##                              Estimate Std. Error t value        Pr(>|t|)    
## (Intercept)                   1.33404    0.34231   3.897        0.000102 ***
## COHORTMiddle                  0.84182    0.32425   2.596        0.009524 ** 
## COHORTOlder                   0.73909    0.38663   1.912        0.056131 .  
## genderMan                    -0.42716    0.17001  -2.513        0.012099 *  
## genderNon-binary/genderqueer  0.07978    0.28928   0.276        0.782756    
## lesbian_gayNo                -1.80727    0.27121  -6.664 0.0000000000383 ***
## whiteYes                     -0.32250    0.33964  -0.950        0.342517    
## latinoYes                     0.33686    0.25229   1.335        0.182030    
## W1POVERTYCAT100-199%         -0.22899    0.24618  -0.930        0.352445    
## W1POVERTYCAT200-299%          0.06897    0.28159   0.245        0.806538    
## W1POVERTYCAT300%+            -0.41534    0.23016  -1.805        0.071360 .  
## US_bornNo                    -0.32118    0.31440  -1.022        0.307156    
## W1CONVERSIONYes               1.31724    0.34767   3.789        0.000158 ***
## GEDUC2More than high school  -0.51370    0.17143  -2.997        0.002778 ** 
## ACE_index                     0.10492    0.03547   2.958        0.003153 ** 
## COHORTMiddle:whiteYes        -0.19290    0.38691  -0.499        0.618165    
## COHORTOlder:whiteYes          0.03985    0.43806   0.091        0.927524    
## lesbian_gayNo:whiteYes        0.34235    0.32795   1.044        0.296712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.033545)
## 
## Number of Fisher Scoring iterations: 4
t3 = log_model3_weights %>% gtsummary::tbl_regression(exp = TRUE) %>% 
  add_significance_stars(hide_ci = FALSE,
                         hide_p = FALSE,
                         thresholds = c(0.01, 0.05, 0.1))
#t3

model3_df = modelr::add_predictions(data=df2,
                        model=log_model3_weights,
                        type = "response")

model3_df = model3_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model3_df$pred = as_factor(model3_df$pred)
model3_df$outed_to_family = model3_df$outed_to_family %>% recode_factor("No" = 0,"Yes" = 1)
yardstick::accuracy(model3_df,outed_to_family,pred)
## # A tibble: 1 × 3
##   .metric  .estimator .estimate
##   <chr>    <chr>          <dbl>
## 1 accuracy binary         0.706
# yardstick::metrics(model3_df,outed_to_family,pred)
#table(model3_df$outed_to_family)
#table(model3_df$pred)

#Wald test for significant variables
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=2)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=3)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=4)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=6)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=11)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=13)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=14)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=15)
tbl_merge <-
  tbl_merge(
    tbls = list(t2, t3),
    tab_spanner = c("**Came out to Family**", "**Outed to Family**")
  )
tbl_merge 
Characteristic Came out to Family Outed to Family
OR1,2 SE2 95% CI2 p-value OR1,2 SE2 95% CI2 p-value
COHORT
Younger
Middle 2.56*** 0.245 1.58, 4.14 <0.001 2.32*** 0.324 1.23, 4.38 0.010
Older 1.19 0.242 0.74, 1.92 0.5 2.09* 0.387 0.98, 4.47 0.056
gender
Woman
Man 0.59** 0.221 0.38, 0.90 0.016 0.65** 0.170 0.47, 0.91 0.012
Non-binary/genderqueer 1.30 0.358 0.65, 2.63 0.5 1.08 0.289 0.61, 1.91 0.8
lesbian_gay
Yes
No 0.11*** 0.269 0.06, 0.18 <0.001 0.16*** 0.271 0.10, 0.28 <0.001
white
No
Yes 1.50 0.271 0.88, 2.56 0.13 0.72 0.340 0.37, 1.41 0.3
latino
No
Yes 1.70 0.327 0.90, 3.23 0.10 1.40 0.252 0.85, 2.30 0.2
W1POVERTYCAT
<100%
100-199% 0.93 0.316 0.50, 1.72 0.8 0.80 0.246 0.49, 1.29 0.4
200-299% 0.88 0.368 0.43, 1.82 0.7 1.07 0.282 0.62, 1.86 0.8
300%+ 1.17 0.297 0.65, 2.09 0.6 0.66* 0.230 0.42, 1.04 0.071
US_born
Yes
No 0.75 0.336 0.39, 1.45 0.4 0.73 0.314 0.39, 1.34 0.3
W1CONVERSION
No
Yes 1.58 0.501 0.59, 4.22 0.4 3.73*** 0.348 1.89, 7.38 <0.001
GEDUC2
High school less
More than high school 0.70 0.223 0.45, 1.09 0.11 0.60*** 0.171 0.43, 0.84 0.003
ACE_index 1.12** 0.049 1.01, 1.23 0.025 1.11*** 0.035 1.04, 1.19 0.003
COHORT * white
Middle * Yes 0.82 0.387 0.39, 1.76 0.6
Older * Yes 1.04 0.438 0.44, 2.46 >0.9
lesbian_gay * white
No * Yes 1.41 0.328 0.74, 2.68 0.3
1 *p<0.1; **p<0.05; ***p<0.01
2 OR = Odds Ratio, SE = Standard Error, CI = Confidence Interval
tbl_merge  %>%
  as_flex_table() %>%
  flextable::save_as_docx(path="log_model_table.docx")