Data
load("merged_data.RData")
merged_data <- merged_data %>%
mutate(new_class = case_when(is.na(featurew_class) ~ 1,
featurew_class == "1" ~ 2,
featurew_class == "2" ~ 3,
featurew_class == "3" ~ 0,
TRUE ~ 99),
new_class2 = case_when(totalw_class == 0 ~ 1,
totalw_class == "1" ~ 0,
totalw_class == "2" ~ 2,
TRUE ~ 99))
table(merged_data$new_class)
##
## 0 1 2 3
## 633 417 147 30
table(merged_data$new_class2)
##
## 0 1 2
## 575 417 235
f_class <- merged_data %>%
group_by(new_class) %>%
reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
learn_vac = mean(learnvac_durW4, na.rm = TRUE),
get_vac = mean(getvac_durW4, na.rm = TRUE),
get_ans = mean(getans_durW4, na.rm = TRUE))
f_class
## # A tibble: 4 × 5
## new_class learn_hpv learn_vac get_vac get_ans
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.670 0.386 0.435 0.210
## 2 1 NaN NaN NaN NaN
## 3 2 2.50 1.49 1.46 0.736
## 4 3 2.83 1.83 1.37 3.06
t_class <- merged_data %>%
group_by(new_class2) %>%
reframe(learn_hpv = mean(learnhpv_durW5, na.rm = TRUE),
learn_vac = mean(learnvac_durW4, na.rm = TRUE),
get_vac = mean(getvac_durW4, na.rm = TRUE),
get_ans = mean(getans_durW4, na.rm = TRUE))
t_class
## # A tibble: 3 × 5
## new_class2 learn_hpv learn_vac get_vac get_ans
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.652 0.374 0.444 0.229
## 2 1 NaN NaN NaN NaN
## 3 2 2.13 1.29 1.17 0.857
vac_comp_all
model <- glm(vac_comp_all ~factor(new_class)+sexident3+race+age, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_comp_all ~ factor(new_class) + sexident3 +
## race + age, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.72833 0.58920 -2.933 0.003353 **
## factor(new_class)1 -0.49288 0.13876 -3.552 0.000382 ***
## factor(new_class)2 0.15965 0.15168 1.053 0.292559
## factor(new_class)3 -0.19203 0.35975 -0.534 0.593488
## sexident3 -0.11792 0.09268 -1.272 0.203238
## race -0.06161 0.03028 -2.034 0.041924 *
## age 0.03073 0.02527 1.216 0.223956
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 647.77 on 1221 degrees of freedom
## Residual deviance: 621.46 on 1215 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 635.46
##
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3
## 0.1775815 0.6108657 1.1730991 0.8252798
## sexident3 race age
## 0.8887632 0.9402535 1.0312042
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.1775815 0.05467233 0.5606379
## factor(new_class)1 0.6108657 0.46148841 0.7965344
## factor(new_class)2 1.1730991 0.86638565 1.5718245
## factor(new_class)3 0.8252798 0.37628472 1.5765633
## sexident3 0.8887632 0.73723757 1.0614893
## race 0.9402535 0.88409909 0.9961046
## age 1.0312042 0.98146154 1.0846031
model <- glm(vac_comp_all ~factor(new_class2)+sexident3+race+age,
family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_comp_all ~ factor(new_class2) + sexident3 +
## race + age, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.66429 0.58553 -2.842 0.00448 **
## factor(new_class2)1 -0.50724 0.14030 -3.615 0.00030 ***
## factor(new_class2)2 0.03792 0.13379 0.283 0.77685
## sexident3 -0.11904 0.09270 -1.284 0.19907
## race -0.06288 0.03022 -2.080 0.03750 *
## age 0.02869 0.02517 1.140 0.25434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 647.77 on 1221 degrees of freedom
## Residual deviance: 622.88 on 1216 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 634.88
##
## Number of Fisher Scoring iterations: 6
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class2)1 factor(new_class2)2 sexident3
## 0.1893253 0.6021577 1.0386468 0.8877727
## race age
## 0.9390589 1.0291097
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.1893253 0.05886933 0.5926005
## factor(new_class2)1 0.6021577 0.45366267 0.7877403
## factor(new_class2)2 1.0386468 0.79628276 1.3460388
## sexident3 0.8877727 0.73637578 1.0603393
## race 0.9390589 0.88306612 0.9947287
## age 1.0291097 0.97966908 1.0821213
vac_init_all
model <- glm(vac_init_all ~factor(new_class)+sexident3+race+age, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_init_all ~ factor(new_class) + sexident3 +
## race + age, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.406617 0.394595 -1.030 0.3028
## factor(new_class)1 -0.203339 0.083531 -2.434 0.0149 *
## factor(new_class)2 0.112255 0.116906 0.960 0.3369
## factor(new_class)3 -0.467867 0.266368 -1.756 0.0790 .
## sexident3 -0.057956 0.059964 -0.967 0.3338
## race -0.027697 0.018840 -1.470 0.1415
## age 0.007293 0.017000 0.429 0.6679
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1542.4 on 1221 degrees of freedom
## Residual deviance: 1527.5 on 1215 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 1541.5
##
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class)1 factor(new_class)2 factor(new_class)3
## 0.6658994 0.8160013 1.1187984 0.6263368
## sexident3 race age
## 0.9436913 0.9726831 1.0073195
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.6658994 0.3068255 1.4413913
## factor(new_class)1 0.8160013 0.6924812 0.9608168
## factor(new_class)2 1.1187984 0.8889997 1.4061514
## factor(new_class)3 0.6263368 0.3640141 1.0393747
## sexident3 0.9436913 0.8383069 1.0612050
## race 0.9726831 0.9370776 1.0092623
## age 1.0073195 0.9743462 1.0415124
model <- glm(vac_init_all ~factor(new_class2)+sexident3+race+age, family = binomial(link = "probit"), data = merged_data)
summary(model)
##
## Call:
## glm(formula = vac_init_all ~ factor(new_class2) + sexident3 +
## race + age, family = binomial(link = "probit"), data = merged_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.365280 0.392773 -0.930 0.3524
## factor(new_class2)1 -0.203705 0.085109 -2.393 0.0167 *
## factor(new_class2)2 0.017456 0.099813 0.175 0.8612
## sexident3 -0.056179 0.059920 -0.938 0.3485
## race -0.028282 0.018788 -1.505 0.1322
## age 0.005398 0.016969 0.318 0.7504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1542.4 on 1221 degrees of freedom
## Residual deviance: 1532.0 on 1216 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: 1544
##
## Number of Fisher Scoring iterations: 4
odds_ratios <- exp(coef(model))
print(odds_ratios)
## (Intercept) factor(new_class2)1 factor(new_class2)2 sexident3
## 0.6940020 0.8157031 1.0176090 0.9453696
## race age
## 0.9721141 1.0054130
exp(cbind(OR = coef(model), confint(model)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.6940020 0.3211401 1.4961515
## factor(new_class2)1 0.8157031 0.6901300 0.9634608
## factor(new_class2)2 1.0176090 0.8363856 1.2369705
## sexident3 0.9453696 0.8399326 1.0629357
## race 0.9721141 0.9366107 1.0085860
## age 1.0054130 0.9725670 1.0394620