Data

load("merged_data.RData")

merged_data <- merged_data %>%
    mutate(new_class = case_when(is.na(featurew_class) ~ 1,
            featurew_class == "1" ~ 0,
            featurew_class == "2" ~ 2,
            featurew_class == "3" ~ 3,
            TRUE ~ 99),
        new_class2 = case_when(totalw_class == 0 ~ 1,
            totalw_class == "1" ~ 0,
            totalw_class == "2" ~ 2,
            TRUE ~ 99))

table(merged_data$new_class)
## 
##   0   1   2   3 
## 147 417  30 633
table(merged_data$new_class2)
## 
##   0   1   2 
## 575 417 235
f_class <- merged_data %>%
    group_by(new_class) %>%
    reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
        learn_vac = mean(learnvac_durW4, na.rm = TRUE),
        get_vac = mean(getvac_durW4, na.rm = TRUE),
        get_ans = mean(getans_durW4, na.rm = TRUE))
f_class
## # A tibble: 4 × 5
##   new_class learn_hpv learn_vac get_vac get_ans
##       <dbl>     <dbl>     <dbl>   <dbl>   <dbl>
## 1         0     2.50      1.49    1.46    0.736
## 2         1   NaN       NaN     NaN     NaN    
## 3         2     2.83      1.83    1.37    3.06 
## 4         3     0.670     0.386   0.435   0.210
t_class <- merged_data %>%
    group_by(new_class2) %>%
    reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
        learn_vac = mean(learnvac_durW4, na.rm = TRUE),
        get_vac = mean(getvac_durW4, na.rm = TRUE),
        get_ans = mean(getans_durW4, na.rm = TRUE))
t_class
## # A tibble: 3 × 5
##   new_class2 learn_hpv learn_vac get_vac get_ans
##        <dbl>     <dbl>     <dbl>   <dbl>   <dbl>
## 1          0     0.652     0.374   0.444   0.229
## 2          1   NaN       NaN     NaN     NaN    
## 3          2     2.13      1.29    1.17    0.857

vac_comp_all

model <- glm(vac_comp_all ~factor(new_class)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
## 
## Call:
## glm(formula = vac_comp_all ~ factor(new_class) + sexident3 + 
##     race4 + age2, family = binomial(link = "probit"), data = merged_data)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.79257    0.21646  -3.661 0.000251 ***
## factor(new_class)1 -0.65084    0.18051  -3.606 0.000311 ***
## factor(new_class)2 -0.32535    0.37665  -0.864 0.387689    
## factor(new_class)3 -0.15584    0.15206  -1.025 0.305423    
## sexident3          -0.10478    0.09266  -1.131 0.258162    
## race4              -0.15558    0.05156  -3.018 0.002547 ** 
## age2                0.09023    0.11688   0.772 0.440128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 647.77  on 1221  degrees of freedom
## Residual deviance: 616.80  on 1215  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 630.8
## 
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
##        (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3 
##          0.4526799          0.5216053          0.7222730          0.8556934 
##          sexident3              race4               age2 
##          0.9005245          0.8559174          1.0944268
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
##                           OR     2.5 %    97.5 %
## (Intercept)        0.4526799 0.2967246 0.6878864
## factor(new_class)1 0.5216053 0.3654582 0.7431215
## factor(new_class)2 0.7222730 0.3213510 1.4370137
## factor(new_class)3 0.8556934 0.6383519 1.1591082
## sexident3          0.9005245 0.7470016 1.0759206
## race4              0.8559174 0.7730772 0.9447353
## age2               1.0944268 0.8721888 1.3807046
model <- glm(vac_comp_all ~factor(new_class2)+sexident3+race4+age2,
    family = binomial(link = "probit"), data = merged_data)
summary(model)
## 
## Call:
## glm(formula = vac_comp_all ~ factor(new_class2) + sexident3 + 
##     race4 + age2, family = binomial(link = "probit"), data = merged_data)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.92128    0.19340  -4.764  1.9e-06 ***
## factor(new_class2)1 -0.51179    0.14124  -3.623 0.000291 ***
## factor(new_class2)2  0.02946    0.13431   0.219 0.826387    
## sexident3           -0.10548    0.09268  -1.138 0.255034    
## race4               -0.15747    0.05149  -3.058 0.002229 ** 
## age2                 0.08165    0.11652   0.701 0.483436    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 647.77  on 1221  degrees of freedom
## Residual deviance: 618.12  on 1216  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 630.12
## 
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
##         (Intercept) factor(new_class2)1 factor(new_class2)2           sexident3 
##           0.3980094           0.5994230           1.0298978           0.8998885 
##               race4                age2 
##           0.8543059           1.0850800
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
##                            OR     2.5 %    97.5 %
## (Intercept)         0.3980094 0.2743705 0.5775961
## factor(new_class2)1 0.5994230 0.4505794 0.7856465
## factor(new_class2)2 1.0298978 0.7887949 1.3359026
## sexident3           0.8998885 0.7464318 1.0751971
## race4               0.8543059 0.7716783 0.9428610
## age2                1.0850800 0.8653883 1.3677212

vac_init_all

model <- glm(vac_init_all ~factor(new_class)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
## 
## Call:
## glm(formula = vac_init_all ~ factor(new_class) + sexident3 + 
##     race4 + age2, family = binomial(link = "probit"), data = merged_data)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)   
## (Intercept)        -0.208213   0.154612  -1.347  0.17808   
## factor(new_class)1 -0.325807   0.124069  -2.626  0.00864 **
## factor(new_class)2 -0.584866   0.281708  -2.076  0.03788 * 
## factor(new_class)3 -0.123773   0.116877  -1.059  0.28960   
## sexident3          -0.062732   0.059903  -1.047  0.29500   
## race4               0.005215   0.032921   0.158  0.87413   
## age2                0.028233   0.078264   0.361  0.71830   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1542.4  on 1221  degrees of freedom
## Residual deviance: 1529.7  on 1215  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 1543.7
## 
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
##        (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3 
##          0.8120341          0.7219445          0.5571803          0.8835802 
##          sexident3              race4               age2 
##          0.9391954          1.0052285          1.0286350
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
##                           OR     2.5 %   97.5 %
## (Intercept)        0.8120341 0.5992725 1.099387
## factor(new_class)1 0.7219445 0.5663724 0.920966
## factor(new_class)2 0.5571803 0.3147795 0.954839
## factor(new_class)3 0.8835802 0.7031474 1.111714
## sexident3          0.9391954 0.8344121 1.056016
## race4              1.0052285 0.9423840 1.072169
## age2               1.0286350 0.8824766 1.199653
model <- glm(vac_init_all ~factor(new_class2)+sexident3+race4+age2, family = binomial(link = "probit"), data = merged_data)
summary(model)
## 
## Call:
## glm(formula = vac_init_all ~ factor(new_class2) + sexident3 + 
##     race4 + age2, family = binomial(link = "probit"), data = merged_data)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -0.323718   0.131800  -2.456    0.014 *
## factor(new_class2)1 -0.203016   0.085074  -2.386    0.017 *
## factor(new_class2)2  0.022626   0.099744   0.227    0.821  
## sexident3           -0.060961   0.059856  -1.018    0.308  
## race4                0.003644   0.032859   0.111    0.912  
## age2                 0.018435   0.078082   0.236    0.813  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1542.4  on 1221  degrees of freedom
## Residual deviance: 1534.3  on 1216  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 1546.3
## 
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
##         (Intercept) factor(new_class2)1 factor(new_class2)2           sexident3 
##           0.7234541           0.8162651           1.0228835           0.9408597 
##               race4                age2 
##           1.0036508           1.0186057
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
##                            OR     2.5 %    97.5 %
## (Intercept)         0.7234541 0.5588832 0.9363643
## factor(new_class2)1 0.8162651 0.6906758 0.9640354
## factor(new_class2)2 1.0228835 0.8408688 1.2431896
## sexident3           0.9408597 0.8360309 1.0577233
## race4               1.0036508 0.9409878 1.0703862
## age2                1.0186057 0.8742393 1.1874188