Data
load("merged_data.RData")
merged_data <- merged_data %>%
mutate(new_class = case_when(is.na(featurew_class) ~ 1,
featurew_class == "1" ~ 2,
featurew_class == "2" ~ 3,
featurew_class == "3" ~ 0,
TRUE ~ 99),
new_class2 = case_when(totalw_class == 0 ~ 1,
totalw_class == "1" ~ 0,
totalw_class == "2" ~ 2,
TRUE ~ 99))
table(merged_data$new_class)
##
## 0 1 2 3
## 633 417 147 30
table(merged_data$new_class2)
##
## 0 1 2
## 575 417 235
f_class <- merged_data %>%
group_by(new_class) %>%
reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
learn_vac = mean(learnvac_durW4, na.rm = TRUE),
get_vac = mean(getvac_durW4, na.rm = TRUE),
get_ans = mean(getans_durW4, na.rm = TRUE))
f_class
## # A tibble: 4 × 5
## new_class learn_hpv learn_vac get_vac get_ans
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.670 0.386 0.435 0.210
## 2 1 NaN NaN NaN NaN
## 3 2 2.50 1.49 1.46 0.736
## 4 3 2.83 1.83 1.37 3.06
t_class <- merged_data %>%
group_by(new_class2) %>%
reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
learn_vac = mean(learnvac_durW4, na.rm = TRUE),
get_vac = mean(getvac_durW4, na.rm = TRUE),
get_ans = mean(getans_durW4, na.rm = TRUE))
t_class
## # A tibble: 3 × 5
## new_class2 learn_hpv learn_vac get_vac get_ans
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.652 0.374 0.444 0.229
## 2 1 NaN NaN NaN NaN
## 3 2 2.13 1.29 1.17 0.857
vac_comp_all
model <- glm(vac_comp_all ~factor(new_class)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_comp_all ~ factor(new_class) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.94841 0.19217 -4.935 8e-07 ***
## factor(new_class)1 -0.49500 0.13969 -3.544 0.000395 ***
## factor(new_class)2 0.15584 0.15206 1.025 0.305423
## factor(new_class)3 -0.16951 0.35878 -0.472 0.636601
## sexident3 -0.10478 0.09266 -1.131 0.258162
## race4 -0.15558 0.05156 -3.018 0.002547 **
## age2 0.09023 0.11688 0.772 0.440128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 647.77 on 1221 degrees of freedom
## Residual deviance: 616.80 on 1215 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 630.8
##
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3
## 0.3873552 0.6095703 1.1686428 0.8440792
## sexident3 race4 age2
## 0.9005245 0.8559174 1.0944268
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3873552 0.2673784 0.5609716
## factor(new_class)1 0.6095703 0.4595286 0.7962753
## factor(new_class)2 1.1686428 0.8627322 1.5665341
## factor(new_class)3 0.8440792 0.3863537 1.6102561
## sexident3 0.9005245 0.7470016 1.0759206
## race4 0.8559174 0.7730772 0.9447353
## age2 1.0944268 0.8721888 1.3807046
model <- glm(vac_comp_all ~factor(new_class2)+sexident3+race4+age2,
family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_comp_all ~ factor(new_class2) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.92128 0.19340 -4.764 1.9e-06 ***
## factor(new_class2)1 -0.51179 0.14124 -3.623 0.000291 ***
## factor(new_class2)2 0.02946 0.13431 0.219 0.826387
## sexident3 -0.10548 0.09268 -1.138 0.255034
## race4 -0.15747 0.05149 -3.058 0.002229 **
## age2 0.08165 0.11652 0.701 0.483436
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 647.77 on 1221 degrees of freedom
## Residual deviance: 618.12 on 1216 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 630.12
##
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class2)1 factor(new_class2)2 sexident3
## 0.3980094 0.5994230 1.0298978 0.8998885
## race4 age2
## 0.8543059 1.0850800
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3980094 0.2743705 0.5775961
## factor(new_class2)1 0.5994230 0.4505794 0.7856465
## factor(new_class2)2 1.0298978 0.7887949 1.3359026
## sexident3 0.8998885 0.7464318 1.0751971
## race4 0.8543059 0.7716783 0.9428610
## age2 1.0850800 0.8653883 1.3677212
vac_init_all
model <- glm(vac_init_all ~factor(new_class)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_init_all ~ factor(new_class) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.331986 0.131172 -2.531 0.0114 *
## factor(new_class)1 -0.202034 0.083490 -2.420 0.0155 *
## factor(new_class)2 0.123773 0.116877 1.059 0.2896
## factor(new_class)3 -0.461093 0.266367 -1.731 0.0834 .
## sexident3 -0.062732 0.059903 -1.047 0.2950
## race4 0.005215 0.032921 0.158 0.8741
## age2 0.028233 0.078264 0.361 0.7183
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1542.4 on 1221 degrees of freedom
## Residual deviance: 1529.7 on 1215 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 1543.7
##
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3
## 0.7174972 0.8170673 1.1317592 0.6305939
## sexident3 race4 age2
## 0.9391954 1.0052285 1.0286350
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7174972 0.5547860 0.9277447
## factor(new_class)1 0.8170673 0.6934844 0.9619474
## factor(new_class)2 1.1317592 0.8995123 1.4221768
## factor(new_class)3 0.6305939 0.3663138 1.0468029
## sexident3 0.9391954 0.8344121 1.0560157
## race4 1.0052285 0.9423840 1.0721693
## age2 1.0286350 0.8824766 1.1996526
model <- glm(vac_init_all ~factor(new_class2)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_init_all ~ factor(new_class2) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.323718 0.131800 -2.456 0.014 *
## factor(new_class2)1 -0.203016 0.085074 -2.386 0.017 *
## factor(new_class2)2 0.022626 0.099744 0.227 0.821
## sexident3 -0.060961 0.059856 -1.018 0.308
## race4 0.003644 0.032859 0.111 0.912
## age2 0.018435 0.078082 0.236 0.813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1542.4 on 1221 degrees of freedom
## Residual deviance: 1534.3 on 1216 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 1546.3
##
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class2)1 factor(new_class2)2 sexident3
## 0.7234541 0.8162651 1.0228835 0.9408597
## race4 age2
## 1.0036508 1.0186057
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7234541 0.5588832 0.9363643
## factor(new_class2)1 0.8162651 0.6906758 0.9640354
## factor(new_class2)2 1.0228835 0.8408688 1.2431896
## sexident3 0.9408597 0.8360309 1.0577233
## race4 1.0036508 0.9409878 1.0703862
## age2 1.0186057 0.8742393 1.1874188