2.2_Logistic_Regression

Check the models without survey weights

Add in Survey Weights

survey_design <- svydesign(ids=~0, weights=~survey_weights,
                           data=df2)

log_model1_weights <- svyglm(logformula1,
                       data=df2, family=stats::binomial(link=logit),
                       design=survey_design,
                       subset=NULL,start=NULL, rescale=TRUE, 
                       deff=FALSE,influence=FALSE)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(log_model1_weights)

## 
## Call:
## svyglm(formula = logformula1, design = survey_design, subset = NULL, 
##     family = stats::binomial(link = logit), start = NULL, rescale = TRUE, 
##     data = df2, deff = FALSE, influence = FALSE)
## 
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   4.48299    0.88724   5.053 4.93e-07 ***
## COHORTMiddle                  0.61498    0.69831   0.881  0.37865    
## COHORTOlder                  -0.72021    0.67327  -1.070  0.28493    
## genderMan                    -1.15743    0.38677  -2.993  0.00282 ** 
## genderNon-binary/genderqueer  0.54411    0.68543   0.794  0.42743    
## lesbian_gayNo                -1.90705    0.55370  -3.444  0.00059 ***
## whiteYes                      0.74825    0.81804   0.915  0.36051    
## latinoYes                     1.39834    0.64803   2.158  0.03111 *  
## W1POVERTYCAT100-199%         -0.08283    0.62440  -0.133  0.89448    
## W1POVERTYCAT200-299%         -0.28218    0.65845  -0.429  0.66831    
## W1POVERTYCAT300%+            -0.06775    0.60749  -0.112  0.91121    
## US_bornNo                    -0.75826    0.63172  -1.200  0.23022    
## W1CONVERSIONYes               0.68501    0.69014   0.993  0.32109    
## GEDUC2More than high school  -0.52130    0.42845  -1.217  0.22392    
## ACE_index                     0.08882    0.08234   1.079  0.28093    
## COHORTMiddle:whiteYes        -0.27270    1.00377  -0.272  0.78591    
## COHORTOlder:whiteYes          0.18330    0.86091   0.213  0.83143    
## lesbian_gayNo:whiteYes       -0.01162    0.83035  -0.014  0.98884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.204209)
## 
## Number of Fisher Scoring iterations: 7

#log_model1_weights %>% gtsummary::tbl_regression(exp = TRUE) 
#log_model1_weights$coefficients

model1_df = modelr::add_predictions(data=df2,
                        model=log_model1_weights,
                        type = "response")

model1_df = model1_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model1_df$pred = as_factor(model1_df$pred)
table(model1_df$pred)

## 
##    1 
## 1416

Model 1: Came Out to Friends

It seems that the logistic regression model for this variable is not accurate in the prediction, most likely due to the small number of people that never came out (n= 56 or 4% of the total)

log_model2_weights <- svyglm(logformula2,
                             data=df2, family=stats::binomial(link=logit),
                             design=survey_design,
                             subset=NULL,start=NULL, rescale=TRUE, 
                             deff=FALSE,influence=FALSE)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(log_model2_weights)

## 
## Call:
## svyglm(formula = logformula2, design = survey_design, subset = NULL, 
##     family = stats::binomial(link = logit), start = NULL, rescale = TRUE, 
##     data = df2, deff = FALSE, influence = FALSE)
## 
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.64820    0.42835   6.182 8.27e-10 ***
## COHORTMiddle                  0.93955    0.24541   3.828 0.000135 ***
## COHORTOlder                   0.17588    0.24239   0.726 0.468198    
## genderMan                    -0.53397    0.22067  -2.420 0.015655 *  
## genderNon-binary/genderqueer  0.26492    0.35783   0.740 0.459220    
## lesbian_gayNo                -2.23458    0.26913  -8.303 2.36e-16 ***
## whiteYes                      0.40802    0.27114   1.505 0.132603    
## latinoYes                     0.53263    0.32665   1.631 0.103204    
## W1POVERTYCAT100-199%         -0.07640    0.31583  -0.242 0.808894    
## W1POVERTYCAT200-299%         -0.12444    0.36766  -0.338 0.735066    
## W1POVERTYCAT300%+             0.15636    0.29671   0.527 0.598283    
## US_bornNo                    -0.28549    0.33650  -0.848 0.396345    
## W1CONVERSIONYes               0.45633    0.50101   0.911 0.362547    
## GEDUC2More than high school  -0.35527    0.22297  -1.593 0.111313    
## ACE_index                     0.10999    0.04893   2.248 0.024737 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9910466)
## 
## Number of Fisher Scoring iterations: 5

t2 = log_model2_weights %>% gtsummary::tbl_regression(exp = TRUE) %>% 
  add_significance_stars(hide_ci = FALSE,
                         hide_p = FALSE,
                         thresholds = c(0.01, 0.05, 0.1))
#t2

model2_df = modelr::add_predictions(data=df2,
                        model=log_model2_weights,
                        type = "response")

model2_df = model2_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model2_df$pred = as_factor(model2_df$pred)
model2_df$came_out_family = model2_df$came_out_family %>% recode_factor("No" = 0,"Yes" = 1)
yardstick::accuracy(model2_df,
                    truth=came_out_family,
                    estimate=pred)

## # A tibble: 1 × 3
##   .metric  .estimator .estimate
##   <chr>    <chr>          <dbl>
## 1 accuracy binary         0.853

#yardstick::metrics(model2_df,truth=came_out_family,estimate=pred)


options(scipen=999)
#overall effect of significant variable on the outcome
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=2)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=4)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=6)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=15)

log_model3_weights <- svyglm(logformula3,
                             data=df2, family=stats::binomial(link=logit),
                             design=survey_design,
                             subset=NULL,start=NULL, rescale=TRUE, 
                             deff=FALSE,influence=FALSE)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(log_model3_weights)

## 
## Call:
## svyglm(formula = logformula3, design = survey_design, subset = NULL, 
##     family = stats::binomial(link = logit), start = NULL, rescale = TRUE, 
##     data = df2, deff = FALSE, influence = FALSE)
## 
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
## 
## Coefficients:
##                              Estimate Std. Error t value        Pr(>|t|)    
## (Intercept)                   1.33404    0.34231   3.897        0.000102 ***
## COHORTMiddle                  0.84182    0.32425   2.596        0.009524 ** 
## COHORTOlder                   0.73909    0.38663   1.912        0.056131 .  
## genderMan                    -0.42716    0.17001  -2.513        0.012099 *  
## genderNon-binary/genderqueer  0.07978    0.28928   0.276        0.782756    
## lesbian_gayNo                -1.80727    0.27121  -6.664 0.0000000000383 ***
## whiteYes                     -0.32250    0.33964  -0.950        0.342517    
## latinoYes                     0.33686    0.25229   1.335        0.182030    
## W1POVERTYCAT100-199%         -0.22899    0.24618  -0.930        0.352445    
## W1POVERTYCAT200-299%          0.06897    0.28159   0.245        0.806538    
## W1POVERTYCAT300%+            -0.41534    0.23016  -1.805        0.071360 .  
## US_bornNo                    -0.32118    0.31440  -1.022        0.307156    
## W1CONVERSIONYes               1.31724    0.34767   3.789        0.000158 ***
## GEDUC2More than high school  -0.51370    0.17143  -2.997        0.002778 ** 
## ACE_index                     0.10492    0.03547   2.958        0.003153 ** 
## COHORTMiddle:whiteYes        -0.19290    0.38691  -0.499        0.618165    
## COHORTOlder:whiteYes          0.03985    0.43806   0.091        0.927524    
## lesbian_gayNo:whiteYes        0.34235    0.32795   1.044        0.296712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.033545)
## 
## Number of Fisher Scoring iterations: 4

t3 = log_model3_weights %>% gtsummary::tbl_regression(exp = TRUE) %>% 
  add_significance_stars(hide_ci = FALSE,
                         hide_p = FALSE,
                         thresholds = c(0.01, 0.05, 0.1))
#t3

model3_df = modelr::add_predictions(data=df2,
                        model=log_model3_weights,
                        type = "response")

model3_df = model3_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model3_df$pred = as_factor(model3_df$pred)
model3_df$outed_to_family = model3_df$outed_to_family %>% recode_factor("No" = 0,"Yes" = 1)
yardstick::accuracy(model3_df,outed_to_family,pred)

## # A tibble: 1 × 3
##   .metric  .estimator .estimate
##   <chr>    <chr>          <dbl>
## 1 accuracy binary         0.706

# yardstick::metrics(model3_df,outed_to_family,pred)
#table(model3_df$outed_to_family)
#table(model3_df$pred)

#Wald test for significant variables
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=2)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=3)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=4)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=6)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=11)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=13)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=14)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=15)

tbl_merge <-
  tbl_merge(
    tbls = list(t2, t3),
    tab_spanner = c("**Came out to Family**", "**Outed to Family**")
  )
tbl_merge

Characteristic	Came out to Family				Outed to Family
Characteristic	OR^1,2	SE²	95% CI²	p-value	OR^1,2	SE²	95% CI²	p-value
COHORT
Younger	—	—	—		—	—	—
Middle	2.56***	0.245	1.58, 4.14	<0.001	2.32***	0.324	1.23, 4.38	0.010
Older	1.19	0.242	0.74, 1.92	0.5	2.09*	0.387	0.98, 4.47	0.056
gender
Woman	—	—	—		—	—	—
Man	0.59**	0.221	0.38, 0.90	0.016	0.65**	0.170	0.47, 0.91	0.012
Non-binary/genderqueer	1.30	0.358	0.65, 2.63	0.5	1.08	0.289	0.61, 1.91	0.8
lesbian_gay
Yes	—	—	—		—	—	—
No	0.11***	0.269	0.06, 0.18	<0.001	0.16***	0.271	0.10, 0.28	<0.001
white
No	—	—	—		—	—	—
Yes	1.50	0.271	0.88, 2.56	0.13	0.72	0.340	0.37, 1.41	0.3
latino
No	—	—	—		—	—	—
Yes	1.70	0.327	0.90, 3.23	0.10	1.40	0.252	0.85, 2.30	0.2
W1POVERTYCAT
<100%	—	—	—		—	—	—
100-199%	0.93	0.316	0.50, 1.72	0.8	0.80	0.246	0.49, 1.29	0.4
200-299%	0.88	0.368	0.43, 1.82	0.7	1.07	0.282	0.62, 1.86	0.8
300%+	1.17	0.297	0.65, 2.09	0.6	0.66*	0.230	0.42, 1.04	0.071
US_born
Yes	—	—	—		—	—	—
No	0.75	0.336	0.39, 1.45	0.4	0.73	0.314	0.39, 1.34	0.3
W1CONVERSION
No	—	—	—		—	—	—
Yes	1.58	0.501	0.59, 4.22	0.4	3.73***	0.348	1.89, 7.38	<0.001
GEDUC2
High school less	—	—	—		—	—	—
More than high school	0.70	0.223	0.45, 1.09	0.11	0.60***	0.171	0.43, 0.84	0.003
ACE_index	1.12**	0.049	1.01, 1.23	0.025	1.11***	0.035	1.04, 1.19	0.003
COHORT * white
Middle * Yes					0.82	0.387	0.39, 1.76	0.6
Older * Yes					1.04	0.438	0.44, 2.46	>0.9
lesbian_gay * white
No * Yes					1.41	0.328	0.74, 2.68	0.3
¹ p<0.1; p<0.05; **p<0.01
² OR = Odds Ratio, SE = Standard Error, CI = Confidence Interval

tbl_merge  %>%
  as_flex_table() %>%
  flextable::save_as_docx(path="log_model_table.docx")

2.2_Logistic_Regression

Matthew Kusen

5/2/2022

Add logistic regression formulas

Check the models without survey weights

Add in Survey Weights

Model 1: Came Out to Friends