logformula1 <- "came_out_friend ~ COHORT+gender+lesbian_gay+white+latino+W1POVERTYCAT+US_born+W1CONVERSION+GEDUC2+ACE_index+COHORT*white+lesbian_gay*white"
logformula2 <- "came_out_family ~ COHORT+gender+lesbian_gay+white+latino+W1POVERTYCAT+US_born+W1CONVERSION+GEDUC2+ACE_index"
logformula3 <- "outed_to_family~COHORT+gender+lesbian_gay+white+latino+W1POVERTYCAT+US_born+W1CONVERSION+GEDUC2+ACE_index+COHORT*white+lesbian_gay*white"
survey_design <- svydesign(ids=~0, weights=~survey_weights,
data=df2)
log_model1_weights <- svyglm(logformula1,
data=df2, family=stats::binomial(link=logit),
design=survey_design,
subset=NULL,start=NULL, rescale=TRUE,
deff=FALSE,influence=FALSE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(log_model1_weights)
##
## Call:
## svyglm(formula = logformula1, design = survey_design, subset = NULL,
## family = stats::binomial(link = logit), start = NULL, rescale = TRUE,
## data = df2, deff = FALSE, influence = FALSE)
##
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.48299 0.88724 5.053 4.93e-07 ***
## COHORTMiddle 0.61498 0.69831 0.881 0.37865
## COHORTOlder -0.72021 0.67327 -1.070 0.28493
## genderMan -1.15743 0.38677 -2.993 0.00282 **
## genderNon-binary/genderqueer 0.54411 0.68543 0.794 0.42743
## lesbian_gayNo -1.90705 0.55370 -3.444 0.00059 ***
## whiteYes 0.74825 0.81804 0.915 0.36051
## latinoYes 1.39834 0.64803 2.158 0.03111 *
## W1POVERTYCAT100-199% -0.08283 0.62440 -0.133 0.89448
## W1POVERTYCAT200-299% -0.28218 0.65845 -0.429 0.66831
## W1POVERTYCAT300%+ -0.06775 0.60749 -0.112 0.91121
## US_bornNo -0.75826 0.63172 -1.200 0.23022
## W1CONVERSIONYes 0.68501 0.69014 0.993 0.32109
## GEDUC2More than high school -0.52130 0.42845 -1.217 0.22392
## ACE_index 0.08882 0.08234 1.079 0.28093
## COHORTMiddle:whiteYes -0.27270 1.00377 -0.272 0.78591
## COHORTOlder:whiteYes 0.18330 0.86091 0.213 0.83143
## lesbian_gayNo:whiteYes -0.01162 0.83035 -0.014 0.98884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.204209)
##
## Number of Fisher Scoring iterations: 7
#log_model1_weights %>% gtsummary::tbl_regression(exp = TRUE)
#log_model1_weights$coefficients
model1_df = modelr::add_predictions(data=df2,
model=log_model1_weights,
type = "response")
model1_df = model1_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model1_df$pred = as_factor(model1_df$pred)
table(model1_df$pred)
##
## 1
## 1416
It seems that the logistic regression model for this variable is not accurate in the prediction, most likely due to the small number of people that never came out (n= 56 or 4% of the total)
log_model2_weights <- svyglm(logformula2,
data=df2, family=stats::binomial(link=logit),
design=survey_design,
subset=NULL,start=NULL, rescale=TRUE,
deff=FALSE,influence=FALSE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(log_model2_weights)
##
## Call:
## svyglm(formula = logformula2, design = survey_design, subset = NULL,
## family = stats::binomial(link = logit), start = NULL, rescale = TRUE,
## data = df2, deff = FALSE, influence = FALSE)
##
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.64820 0.42835 6.182 8.27e-10 ***
## COHORTMiddle 0.93955 0.24541 3.828 0.000135 ***
## COHORTOlder 0.17588 0.24239 0.726 0.468198
## genderMan -0.53397 0.22067 -2.420 0.015655 *
## genderNon-binary/genderqueer 0.26492 0.35783 0.740 0.459220
## lesbian_gayNo -2.23458 0.26913 -8.303 2.36e-16 ***
## whiteYes 0.40802 0.27114 1.505 0.132603
## latinoYes 0.53263 0.32665 1.631 0.103204
## W1POVERTYCAT100-199% -0.07640 0.31583 -0.242 0.808894
## W1POVERTYCAT200-299% -0.12444 0.36766 -0.338 0.735066
## W1POVERTYCAT300%+ 0.15636 0.29671 0.527 0.598283
## US_bornNo -0.28549 0.33650 -0.848 0.396345
## W1CONVERSIONYes 0.45633 0.50101 0.911 0.362547
## GEDUC2More than high school -0.35527 0.22297 -1.593 0.111313
## ACE_index 0.10999 0.04893 2.248 0.024737 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9910466)
##
## Number of Fisher Scoring iterations: 5
t2 = log_model2_weights %>% gtsummary::tbl_regression(exp = TRUE) %>%
add_significance_stars(hide_ci = FALSE,
hide_p = FALSE,
thresholds = c(0.01, 0.05, 0.1))
#t2
model2_df = modelr::add_predictions(data=df2,
model=log_model2_weights,
type = "response")
model2_df = model2_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model2_df$pred = as_factor(model2_df$pred)
model2_df$came_out_family = model2_df$came_out_family %>% recode_factor("No" = 0,"Yes" = 1)
yardstick::accuracy(model2_df,
truth=came_out_family,
estimate=pred)
## # A tibble: 1 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 accuracy binary 0.853
#yardstick::metrics(model2_df,truth=came_out_family,estimate=pred)
options(scipen=999)
#overall effect of significant variable on the outcome
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=2)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=4)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=6)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=15)
log_model3_weights <- svyglm(logformula3,
data=df2, family=stats::binomial(link=logit),
design=survey_design,
subset=NULL,start=NULL, rescale=TRUE,
deff=FALSE,influence=FALSE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(log_model3_weights)
##
## Call:
## svyglm(formula = logformula3, design = survey_design, subset = NULL,
## family = stats::binomial(link = logit), start = NULL, rescale = TRUE,
## data = df2, deff = FALSE, influence = FALSE)
##
## Survey design:
## svydesign(ids = ~0, weights = ~survey_weights, data = df2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.33404 0.34231 3.897 0.000102 ***
## COHORTMiddle 0.84182 0.32425 2.596 0.009524 **
## COHORTOlder 0.73909 0.38663 1.912 0.056131 .
## genderMan -0.42716 0.17001 -2.513 0.012099 *
## genderNon-binary/genderqueer 0.07978 0.28928 0.276 0.782756
## lesbian_gayNo -1.80727 0.27121 -6.664 0.0000000000383 ***
## whiteYes -0.32250 0.33964 -0.950 0.342517
## latinoYes 0.33686 0.25229 1.335 0.182030
## W1POVERTYCAT100-199% -0.22899 0.24618 -0.930 0.352445
## W1POVERTYCAT200-299% 0.06897 0.28159 0.245 0.806538
## W1POVERTYCAT300%+ -0.41534 0.23016 -1.805 0.071360 .
## US_bornNo -0.32118 0.31440 -1.022 0.307156
## W1CONVERSIONYes 1.31724 0.34767 3.789 0.000158 ***
## GEDUC2More than high school -0.51370 0.17143 -2.997 0.002778 **
## ACE_index 0.10492 0.03547 2.958 0.003153 **
## COHORTMiddle:whiteYes -0.19290 0.38691 -0.499 0.618165
## COHORTOlder:whiteYes 0.03985 0.43806 0.091 0.927524
## lesbian_gayNo:whiteYes 0.34235 0.32795 1.044 0.296712
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.033545)
##
## Number of Fisher Scoring iterations: 4
t3 = log_model3_weights %>% gtsummary::tbl_regression(exp = TRUE) %>%
add_significance_stars(hide_ci = FALSE,
hide_p = FALSE,
thresholds = c(0.01, 0.05, 0.1))
#t3
model3_df = modelr::add_predictions(data=df2,
model=log_model3_weights,
type = "response")
model3_df = model3_df %>% mutate(pred = ifelse(pred > 0.5,1,0))
model3_df$pred = as_factor(model3_df$pred)
model3_df$outed_to_family = model3_df$outed_to_family %>% recode_factor("No" = 0,"Yes" = 1)
yardstick::accuracy(model3_df,outed_to_family,pred)
## # A tibble: 1 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 accuracy binary 0.706
# yardstick::metrics(model3_df,outed_to_family,pred)
#table(model3_df$outed_to_family)
#table(model3_df$pred)
#Wald test for significant variables
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=2)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=3)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=4)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=6)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=11)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=13)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=14)
#wald.test(b = coef(log_model2_weights), Sigma = vcov(log_model2_weights),Terms=15)
tbl_merge <-
tbl_merge(
tbls = list(t2, t3),
tab_spanner = c("**Came out to Family**", "**Outed to Family**")
)
tbl_merge
| Characteristic | Came out to Family | Outed to Family | ||||||
|---|---|---|---|---|---|---|---|---|
| OR1,2 | SE2 | 95% CI2 | p-value | OR1,2 | SE2 | 95% CI2 | p-value | |
| COHORT | ||||||||
| Younger | — | — | — | — | — | — | ||
| Middle | 2.56*** | 0.245 | 1.58, 4.14 | <0.001 | 2.32*** | 0.324 | 1.23, 4.38 | 0.010 |
| Older | 1.19 | 0.242 | 0.74, 1.92 | 0.5 | 2.09* | 0.387 | 0.98, 4.47 | 0.056 |
| gender | ||||||||
| Woman | — | — | — | — | — | — | ||
| Man | 0.59** | 0.221 | 0.38, 0.90 | 0.016 | 0.65** | 0.170 | 0.47, 0.91 | 0.012 |
| Non-binary/genderqueer | 1.30 | 0.358 | 0.65, 2.63 | 0.5 | 1.08 | 0.289 | 0.61, 1.91 | 0.8 |
| lesbian_gay | ||||||||
| Yes | — | — | — | — | — | — | ||
| No | 0.11*** | 0.269 | 0.06, 0.18 | <0.001 | 0.16*** | 0.271 | 0.10, 0.28 | <0.001 |
| white | ||||||||
| No | — | — | — | — | — | — | ||
| Yes | 1.50 | 0.271 | 0.88, 2.56 | 0.13 | 0.72 | 0.340 | 0.37, 1.41 | 0.3 |
| latino | ||||||||
| No | — | — | — | — | — | — | ||
| Yes | 1.70 | 0.327 | 0.90, 3.23 | 0.10 | 1.40 | 0.252 | 0.85, 2.30 | 0.2 |
| W1POVERTYCAT | ||||||||
| <100% | — | — | — | — | — | — | ||
| 100-199% | 0.93 | 0.316 | 0.50, 1.72 | 0.8 | 0.80 | 0.246 | 0.49, 1.29 | 0.4 |
| 200-299% | 0.88 | 0.368 | 0.43, 1.82 | 0.7 | 1.07 | 0.282 | 0.62, 1.86 | 0.8 |
| 300%+ | 1.17 | 0.297 | 0.65, 2.09 | 0.6 | 0.66* | 0.230 | 0.42, 1.04 | 0.071 |
| US_born | ||||||||
| Yes | — | — | — | — | — | — | ||
| No | 0.75 | 0.336 | 0.39, 1.45 | 0.4 | 0.73 | 0.314 | 0.39, 1.34 | 0.3 |
| W1CONVERSION | ||||||||
| No | — | — | — | — | — | — | ||
| Yes | 1.58 | 0.501 | 0.59, 4.22 | 0.4 | 3.73*** | 0.348 | 1.89, 7.38 | <0.001 |
| GEDUC2 | ||||||||
| High school less | — | — | — | — | — | — | ||
| More than high school | 0.70 | 0.223 | 0.45, 1.09 | 0.11 | 0.60*** | 0.171 | 0.43, 0.84 | 0.003 |
| ACE_index | 1.12** | 0.049 | 1.01, 1.23 | 0.025 | 1.11*** | 0.035 | 1.04, 1.19 | 0.003 |
| COHORT * white | ||||||||
| Middle * Yes | 0.82 | 0.387 | 0.39, 1.76 | 0.6 | ||||
| Older * Yes | 1.04 | 0.438 | 0.44, 2.46 | >0.9 | ||||
| lesbian_gay * white | ||||||||
| No * Yes | 1.41 | 0.328 | 0.74, 2.68 | 0.3 | ||||
| 1 *p<0.1; **p<0.05; ***p<0.01 | ||||||||
| 2 OR = Odds Ratio, SE = Standard Error, CI = Confidence Interval | ||||||||
tbl_merge %>%
as_flex_table() %>%
flextable::save_as_docx(path="log_model_table.docx")