Data
load("merged_data.RData")
merged_data <- merged_data %>%
mutate(new_class = case_when(is.na(featurew_class) ~ 1,
featurew_class == "1" ~ 0,
featurew_class == "2" ~ 2,
featurew_class == "3" ~ 3,
TRUE ~ 99),
new_class2 = case_when(totalw_class == 0 ~ 1,
totalw_class == "1" ~ 0,
totalw_class == "2" ~ 2,
TRUE ~ 99))
table(merged_data$new_class)
##
## 0 1 2 3
## 147 417 30 633
table(merged_data$new_class2)
##
## 0 1 2
## 575 417 235
f_class <- merged_data %>%
group_by(new_class) %>%
reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
learn_vac = mean(learnvac_durW4, na.rm = TRUE),
get_vac = mean(getvac_durW4, na.rm = TRUE),
get_ans = mean(getans_durW4, na.rm = TRUE))
f_class
## # A tibble: 4 × 5
## new_class learn_hpv learn_vac get_vac get_ans
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 2.50 1.49 1.46 0.736
## 2 1 NaN NaN NaN NaN
## 3 2 2.83 1.83 1.37 3.06
## 4 3 0.670 0.386 0.435 0.210
t_class <- merged_data %>%
group_by(new_class2) %>%
reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
learn_vac = mean(learnvac_durW4, na.rm = TRUE),
get_vac = mean(getvac_durW4, na.rm = TRUE),
get_ans = mean(getans_durW4, na.rm = TRUE))
t_class
## # A tibble: 3 × 5
## new_class2 learn_hpv learn_vac get_vac get_ans
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.652 0.374 0.444 0.229
## 2 1 NaN NaN NaN NaN
## 3 2 2.13 1.29 1.17 0.857
vac_comp_all
model <- glm(vac_comp_all ~factor(new_class)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_comp_all ~ factor(new_class) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.79257 0.21646 -3.661 0.000251 ***
## factor(new_class)1 -0.65084 0.18051 -3.606 0.000311 ***
## factor(new_class)2 -0.32535 0.37665 -0.864 0.387689
## factor(new_class)3 -0.15584 0.15206 -1.025 0.305423
## sexident3 -0.10478 0.09266 -1.131 0.258162
## race4 -0.15558 0.05156 -3.018 0.002547 **
## age2 0.09023 0.11688 0.772 0.440128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 647.77 on 1221 degrees of freedom
## Residual deviance: 616.80 on 1215 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 630.8
##
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3
## 0.4526799 0.5216053 0.7222730 0.8556934
## sexident3 race4 age2
## 0.9005245 0.8559174 1.0944268
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4526799 0.2967246 0.6878864
## factor(new_class)1 0.5216053 0.3654582 0.7431215
## factor(new_class)2 0.7222730 0.3213510 1.4370137
## factor(new_class)3 0.8556934 0.6383519 1.1591082
## sexident3 0.9005245 0.7470016 1.0759206
## race4 0.8559174 0.7730772 0.9447353
## age2 1.0944268 0.8721888 1.3807046
model <- glm(vac_comp_all ~factor(new_class2)+sexident3+race4+age2,
family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_comp_all ~ factor(new_class2) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.92128 0.19340 -4.764 1.9e-06 ***
## factor(new_class2)1 -0.51179 0.14124 -3.623 0.000291 ***
## factor(new_class2)2 0.02946 0.13431 0.219 0.826387
## sexident3 -0.10548 0.09268 -1.138 0.255034
## race4 -0.15747 0.05149 -3.058 0.002229 **
## age2 0.08165 0.11652 0.701 0.483436
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 647.77 on 1221 degrees of freedom
## Residual deviance: 618.12 on 1216 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 630.12
##
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class2)1 factor(new_class2)2 sexident3
## 0.3980094 0.5994230 1.0298978 0.8998885
## race4 age2
## 0.8543059 1.0850800
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3980094 0.2743705 0.5775961
## factor(new_class2)1 0.5994230 0.4505794 0.7856465
## factor(new_class2)2 1.0298978 0.7887949 1.3359026
## sexident3 0.8998885 0.7464318 1.0751971
## race4 0.8543059 0.7716783 0.9428610
## age2 1.0850800 0.8653883 1.3677212
vac_init_all
model <- glm(vac_init_all ~factor(new_class)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_init_all ~ factor(new_class) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.208213 0.154612 -1.347 0.17808
## factor(new_class)1 -0.325807 0.124069 -2.626 0.00864 **
## factor(new_class)2 -0.584866 0.281708 -2.076 0.03788 *
## factor(new_class)3 -0.123773 0.116877 -1.059 0.28960
## sexident3 -0.062732 0.059903 -1.047 0.29500
## race4 0.005215 0.032921 0.158 0.87413
## age2 0.028233 0.078264 0.361 0.71830
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1542.4 on 1221 degrees of freedom
## Residual deviance: 1529.7 on 1215 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 1543.7
##
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3
## 0.8120341 0.7219445 0.5571803 0.8835802
## sexident3 race4 age2
## 0.9391954 1.0052285 1.0286350
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.8120341 0.5992725 1.099387
## factor(new_class)1 0.7219445 0.5663724 0.920966
## factor(new_class)2 0.5571803 0.3147795 0.954839
## factor(new_class)3 0.8835802 0.7031474 1.111714
## sexident3 0.9391954 0.8344121 1.056016
## race4 1.0052285 0.9423840 1.072169
## age2 1.0286350 0.8824766 1.199653
model <- glm(vac_init_all ~factor(new_class2)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_init_all ~ factor(new_class2) + sexident3 +
## race4 + age2, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.323718 0.131800 -2.456 0.014 *
## factor(new_class2)1 -0.203016 0.085074 -2.386 0.017 *
## factor(new_class2)2 0.022626 0.099744 0.227 0.821
## sexident3 -0.060961 0.059856 -1.018 0.308
## race4 0.003644 0.032859 0.111 0.912
## age2 0.018435 0.078082 0.236 0.813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1542.4 on 1221 degrees of freedom
## Residual deviance: 1534.3 on 1216 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 1546.3
##
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class2)1 factor(new_class2)2 sexident3
## 0.7234541 0.8162651 1.0228835 0.9408597
## race4 age2
## 1.0036508 1.0186057
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7234541 0.5588832 0.9363643
## factor(new_class2)1 0.8162651 0.6906758 0.9640354
## factor(new_class2)2 1.0228835 0.8408688 1.2431896
## sexident3 0.9408597 0.8360309 1.0577233
## race4 1.0036508 0.9409878 1.0703862
## age2 1.0186057 0.8742393 1.1874188