The following R packages were used for this analysis:
tidymodels
tidyverse
plotly
pacman
skimr
ggplot2
forcats
themis
readxl
janitor
klaR
tune
discrim
vip
AMR
party
msc <- read_csv("mscph.csv", col_types = cols(), na = "NA")
dim(msc)
[1] 600 17
msc %>% count(final_outcome_result)
# A tibble: 2 × 2
final_outcome_result n
<chr> <int>
1 HIV Infected 210
2 HIV Uninfected 390
new_msc <- msc %>% select(-first_pcr_test_result)
skimr::skim(new_msc)
| Name | new_msc |
| Number of rows | 600 |
| Number of columns | 16 |
| _______________________ | |
| Column type frequency: | |
| character | 12 |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| pep_id_sex | 0 | 1.00 | 4 | 6 | 0 | 2 | 0 |
| pep_id_deliv_type | 0 | 1.00 | 16 | 18 | 0 | 4 | 0 |
| pep_id_hiv_serostatus_mo | 0 | 1.00 | 14 | 22 | 0 | 3 | 0 |
| second_pcr_test | 0 | 1.00 | 8 | 8 | 0 | 2 | 0 |
| final_outcome_result | 0 | 1.00 | 12 | 14 | 0 | 2 | 0 |
| mother_received_intervetion_y_n | 0 | 1.00 | 2 | 3 | 0 | 2 | 0 |
| place_site_of_delivery | 0 | 1.00 | 15 | 16 | 0 | 2 | 0 |
| brst_feeding_method | 0 | 1.00 | 9 | 13 | 0 | 2 | 0 |
| type_of_avr_prophy_for_baby | 0 | 1.00 | 3 | 9 | 0 | 2 | 0 |
| cotrimoxazole_administered_y_n | 0 | 1.00 | 2 | 3 | 0 | 2 | 0 |
| education | 0 | 1.00 | 7 | 9 | 0 | 3 | 0 |
| marital_st | 85 | 0.86 | 6 | 9 | 0 | 5 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| pep_id_gest_age_birth_wks | 0 | 1 | 38.05 | 0.51 | 36 | 38 | 38 | 38 | 40 | ▁▁▇▁▁ |
| pep_id_birth_weight_grams | 0 | 1 | 3043.15 | 581.61 | 2000 | 2600 | 3000 | 3400 | 5700 | ▆▇▃▁▁ |
| mothers_recent_viral_load | 0 | 1 | 3620.24 | 21581.18 | 0 | 20 | 20 | 45 | 234542 | ▇▁▁▁▁ |
| age_mo | 0 | 1 | 37.38 | 6.06 | 22 | 33 | 38 | 41 | 69 | ▂▇▃▁▁ |
# Change variables from character to factor
new_msc$pep_id_sex <- factor(new_msc$pep_id_sex)
new_msc$pep_id_deliv_type <- factor(new_msc$pep_id_deliv_type)
new_msc$second_pcr_test <- factor(new_msc$second_pcr_test)
new_msc$final_outcome_result <- factor(new_msc$final_outcome_result)
new_msc$mother_received_intervetion_y_n <- factor(new_msc$mother_received_intervetion_y_n)
new_msc$brst_feeding_method <- factor(new_msc$brst_feeding_method)
new_msc$type_of_avr_prophy_for_baby <- factor(new_msc$type_of_avr_prophy_for_baby)
new_msc$cotrimoxazole_administered_y_n <- factor(new_msc$cotrimoxazole_administered_y_n)
new_msc$pep_id_hiv_serostatus_mo <- factor(new_msc$pep_id_hiv_serostatus_mo)
new_msc$place_site_of_delivery <- factor(new_msc$place_site_of_delivery)
new_msc$education <- factor(new_msc$education)
new_msc$marital_st <- factor(new_msc$marital_st)
# Variable sex
new_msc %>% count(pep_id_sex)
## # A tibble: 2 × 2
## pep_id_sex n
## <fct> <int>
## 1 Female 289
## 2 Male 311
# HIV status of mother
new_msc %>% count(pep_id_hiv_serostatus_mo)
## # A tibble: 3 × 2
## pep_id_hiv_serostatus_mo n
## <fct> <int>
## 1 HIV-1 & HIV-2 positive 69
## 2 HIV-1 positive 521
## 3 HIV-2 positive 10
# Final outcome of infants
new_msc %>% count(final_outcome_result)
## # A tibble: 2 × 2
## final_outcome_result n
## <fct> <int>
## 1 HIV Infected 210
## 2 HIV Uninfected 390
# Mode of delivery
new_msc %>% count(pep_id_deliv_type)
## # A tibble: 4 × 2
## pep_id_deliv_type n
## <fct> <int>
## 1 Assisted vaginal 12
## 2 Elective cesarean 137
## 3 Emergency cesarean 44
## 4 Standard vaginal 407
# PCR count of infants
new_msc %>% count(second_pcr_test)
## # A tibble: 2 × 2
## second_pcr_test n
## <fct> <int>
## 1 Negative 577
## 2 Positive 23
# Mothers treatment
new_msc %>% count(mother_received_intervetion_y_n)
## # A tibble: 2 × 2
## mother_received_intervetion_y_n n
## <fct> <int>
## 1 No 34
## 2 Yes 566
# Delivery site
new_msc %>% count(place_site_of_delivery)
## # A tibble: 2 × 2
## place_site_of_delivery n
## <fct> <int>
## 1 Inside Facility 571
## 2 Outside Facility 29
# Mode of feeding
new_msc %>% count(brst_feeding_method)
## # A tibble: 2 × 2
## brst_feeding_method n
## <fct> <int>
## 1 Exclusive 332
## 2 Mixed Feeding 268
# AVT for infants
new_msc %>% count(type_of_avr_prophy_for_baby)
## # A tibble: 2 × 2
## type_of_avr_prophy_for_baby n
## <fct> <int>
## 1 AZT + NVP 54
## 2 NVP 546
# Antibiotic therapy for mothers
new_msc %>% count(cotrimoxazole_administered_y_n)
## # A tibble: 2 × 2
## cotrimoxazole_administered_y_n n
## <fct> <int>
## 1 No 44
## 2 Yes 556
# Mothers education level
new_msc %>% count(education)
## # A tibble: 3 × 2
## education n
## <fct> <int>
## 1 Primary 66
## 2 Secondary 367
## 3 Tertiary 167
# Marital status
new_msc %>% count(marital_st)
## # A tibble: 6 × 2
## marital_st n
## <fct> <int>
## 1 Divorced 14
## 2 Married 285
## 3 Separated 21
## 4 Single 191
## 5 Widowed 4
## 6 <NA> 85
# Infant sex
new_msc %>% count(pep_id_sex)
## # A tibble: 2 × 2
## pep_id_sex n
## <fct> <int>
## 1 Female 289
## 2 Male 311
# Age group of mothers
mothers <- msc$age_mo
group_mothers <- age_groups(mothers, split_at = "tens")
table(group_mothers)
## group_mothers
## 0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80-89 90-99 100+
## 0 0 50 323 217 4 6 0 0 0 0
# Viral load of mothers
msc %>%
ggplot(aes(mothers_recent_viral_load)) + geom_histogram(fill = "red") + labs(x = "Viral load of mothers (copies/ml)", caption = "Histogram of mothers viral load. Most of the mothers have very low viral load but some have very high viral load")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Gestational age of infants
msc %>%
ggplot(aes(pep_id_gest_age_birth_wks)) + geom_histogram(fill = "purple") + labs(x = "Gestational age of infants (weeks)", caption = "Histogram of infants gestational age at birth. The average age was 38 weeks")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Weight of infants
msc %>%
ggplot(aes(pep_id_birth_weight_grams)) + geom_histogram(fill = "violet") + labs(x = "Weight of infants (grams)", caption = "Histogram showing the distribution of infants weight at birth. The majority of infants weighed between 2 to 4.5kg")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Age of mothers
msc %>%
ggplot(aes(age_mo)) + geom_histogram(fill = "yellow") + labs(x = "Age of mothers (years)", caption = "Distribution of mothers age. The histogram is multimodal")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Viral load boxplot
vload_spread <- ggplot(msc) + aes(x = "", y = mothers_recent_viral_load, na.rm = TRUE) + geom_boxplot(fill = "maroon") + labs(y = "Viral load (copies/ml)", caption = "Box plot of mothers viral load showing the spiking up of some of the viral load")
ggplotly(vload_spread)
# Box plot for infant weight
infant_wt <- ggplot(msc) + aes(x = "", y = pep_id_birth_weight_grams, na.rm = TRUE) + geom_boxplot(fill = "navy") + labs(y = "Birth weight (grams)", caption = "Box plot of infants weight. Maximum weight was 5.7kg and minimum 2kg")
ggplotly(infant_wt)
# Gestational age @ birth
age_wk <- ggplot(msc) + aes(x = "", y = pep_id_gest_age_birth_wks, na.rm = TRUE) + geom_boxplot(fill = "limegreen") + labs(y = "Gestational age (weeks)", caption = "Box plot of gestational age of infants at birth. Minimum 36 weeks, maximum 40 weeks")
ggplotly (age_wk)
# Box plot of infant weight by outcome
infant_out <- ggplot(msc) + aes(x = "", y = pep_id_birth_weight_grams, fill = final_outcome_result, na.rm = TRUE) + geom_boxplot() + labs(y = "Birth weight per outcome", caption = "Box plot showing weight distribution of infants with final outcome of HIV infected ")
ggplotly(infant_out)
# Box plot of infant weight by outcome
infant_out <- ggplot(msc) + aes(x = "", y = pep_id_birth_weight_grams, fill = final_outcome_result, na.rm = TRUE) + geom_boxplot() + labs(y = "Birth weight per outcome", caption = "Box plot showing weight distribution of infants with final outcome of HIV uninfected ")
ggplotly(infant_out)
# ARV drug by final outcome
table(msc$type_of_avr_prophy_for_baby, msc$final_outcome_result)
HIV Infected HIV Uninfected
AZT + NVP 42 12
NVP 168 378
# Delivery mode by final outcome
table(msc$pep_id_deliv_type, msc$final_outcome_result)
HIV Infected HIV Uninfected
Assisted vaginal 6 6
Elective cesarean 39 98
Emergency cesarean 12 32
Standard vaginal 153 254
# Remove columns with NAs
new_msc <- new_msc %>% select(-marital_st)
# Split data
set.seed(111)
data_split <- initial_split(new_msc, prop = .8, strata = final_outcome_result)
data_split
<Training/Testing/Total>
<480/120/600>
# Training and test data
m_train <- data_split %>% training()
m_test <- data_split %>% testing()
# Recipe
reci_m <- recipe(final_outcome_result ~ ., data = m_train) %>%
step_rose(final_outcome_result)
# Prep and juice for test
reci_prep <- reci_m %>% prep()
reci_juice <- reci_prep %>% juice()
# Random forest
rf <- rand_forest(
mtry = 4,
trees = 1000,
min_n = 1
) %>%
set_mode("classification") %>%
set_engine("ranger")
# Workflow
rf_wf <- workflow() %>%
add_recipe(reci_m) %>%
add_model(rf)
# Cross validation
set.seed(222)
m_folds <- vfold_cv(m_train)
# Set metrics
ev_metrics <- metric_set(accuracy, sensitivity, specificity, precision, roc_auc, npv)
# Evaluate model
doParallel::registerDoParallel()
set.seed(333)
rf_rs <- fit_resamples(
rf_wf,
resamples = m_folds,
metrics = ev_metrics
)
collect_metrics(rf_rs)
# A tibble: 6 × 6
.metric .estimator mean n std_err .config
<chr> <chr> <dbl> <int> <dbl> <chr>
1 accuracy binary 0.733 10 0.0191 Preprocessor1_Model1
2 npv binary 0.743 10 0.0290 Preprocessor1_Model1
3 precision binary 0.729 10 0.0523 Preprocessor1_Model1
4 roc_auc binary 0.767 10 0.0230 Preprocessor1_Model1
5 sensitivity binary 0.407 10 0.0619 Preprocessor1_Model1
6 specificity binary 0.915 10 0.0219 Preprocessor1_Model1
lr <- logistic_reg() %>%
set_engine(engine = "glm") %>%
set_mode("classification")
# Workflow
lr_wf <- workflow() %>%
add_recipe(reci_m) %>%
add_model(lr)
# Evaluate
set.seed(444)
lr_rs <- fit_resamples(
lr_wf,
resamples = m_folds,
metrics = ev_metrics
)
collect_metrics(lr_rs)
# A tibble: 6 × 6
.metric .estimator mean n std_err .config
<chr> <chr> <dbl> <int> <dbl> <chr>
1 accuracy binary 0.740 10 0.0174 Preprocessor1_Model1
2 npv binary 0.837 10 0.0282 Preprocessor1_Model1
3 precision binary 0.602 10 0.0190 Preprocessor1_Model1
4 roc_auc binary 0.778 10 0.0158 Preprocessor1_Model1
5 sensitivity binary 0.754 10 0.0263 Preprocessor1_Model1
6 specificity binary 0.732 10 0.0206 Preprocessor1_Model1
Logistic_m <- glm(
final_outcome_result ~ .,
data = m_train,
family = "binomial"
)
summary(Logistic_m)
Call:
glm(formula = final_outcome_result ~ ., family = "binomial",
data = m_train)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.229e+01 9.210e+00 1.335 0.18192
pep_id_sexMale 2.365e-01 2.404e-01 0.984 0.32510
pep_id_deliv_typeElective cesarean 7.328e-01 1.008e+00 0.727 0.46710
pep_id_deliv_typeEmergency cesarean 1.165e+00 1.077e+00 1.082 0.27941
pep_id_deliv_typeStandard vaginal 2.764e-01 9.851e-01 0.281 0.77904
pep_id_gest_age_birth_wks -1.761e-01 2.356e-01 -0.748 0.45469
pep_id_hiv_serostatus_moHIV-1 positive -3.014e+00 1.036e+00 -2.909 0.00363
pep_id_hiv_serostatus_moHIV-2 positive 1.365e+01 1.338e+03 0.010 0.99186
pep_id_birth_weight_grams -1.162e-03 2.306e-04 -5.040 4.65e-07
mothers_recent_viral_load -8.160e-06 9.375e-06 -0.870 0.38408
second_pcr_testPositive -1.811e+01 7.571e+02 -0.024 0.98091
mother_received_intervetion_y_nYes -1.782e+00 1.337e+00 -1.332 0.18285
place_site_of_deliveryOutside Facility -2.486e-01 1.092e+00 -0.228 0.82001
brst_feeding_methodMixed Feeding 1.056e+00 2.556e-01 4.131 3.61e-05
type_of_avr_prophy_for_babyNVP 2.620e+00 1.259e+00 2.081 0.03743
cotrimoxazole_administered_y_nYes 5.235e-02 4.537e-01 0.115 0.90814
age_mo 1.533e-03 1.984e-02 0.077 0.93841
educationSecondary -2.512e-01 3.928e-01 -0.640 0.52244
educationTertiary -1.376e-01 4.223e-01 -0.326 0.74450
(Intercept)
pep_id_sexMale
pep_id_deliv_typeElective cesarean
pep_id_deliv_typeEmergency cesarean
pep_id_deliv_typeStandard vaginal
pep_id_gest_age_birth_wks
pep_id_hiv_serostatus_moHIV-1 positive **
pep_id_hiv_serostatus_moHIV-2 positive
pep_id_birth_weight_grams ***
mothers_recent_viral_load
second_pcr_testPositive
mother_received_intervetion_y_nYes
place_site_of_deliveryOutside Facility
brst_feeding_methodMixed Feeding ***
type_of_avr_prophy_for_babyNVP *
cotrimoxazole_administered_y_nYes
age_mo
educationSecondary
educationTertiary
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 621.55 on 479 degrees of freedom
Residual deviance: 459.08 on 461 degrees of freedom
AIC: 497.08
Number of Fisher Scoring iterations: 16
exp(cbind(Odds_ratio = coef(Logistic_m), confint(Logistic_m)))
Waiting for profiling to be done...
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: algorithm did not converge
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: algorithm did not converge
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
collapsing to unique 'x' values
Odds_ratio 2.5 % 97.5 %
(Intercept) 2.184062e+05 3.101120e-03 1.794986e+13
pep_id_sexMale 1.266852e+00 7.920845e-01 2.035661e+00
pep_id_deliv_typeElective cesarean 2.080926e+00 2.217325e-01 1.388428e+01
pep_id_deliv_typeEmergency cesarean 3.206970e+00 3.115883e-01 2.513343e+01
pep_id_deliv_typeStandard vaginal 1.318363e+00 1.454594e-01 8.402283e+00
pep_id_gest_age_birth_wks 8.385166e-01 5.269806e-01 1.333996e+00
pep_id_hiv_serostatus_moHIV-1 positive 4.907288e-02 2.701280e-03 2.421520e-01
pep_id_hiv_serostatus_moHIV-2 positive 8.437007e+05 7.306479e+14 2.889471e+235
pep_id_birth_weight_grams 9.988382e-01 9.983719e-01 9.992768e-01
mothers_recent_viral_load 9.999918e-01 9.999625e-01 1.000006e+00
second_pcr_testPositive 1.359223e-08 6.301266e-140 1.141085e-42
mother_received_intervetion_y_nYes 1.683693e-01 6.657543e-03 1.881591e+00
place_site_of_deliveryOutside Facility 7.799266e-01 8.026426e-02 6.668719e+00
brst_feeding_methodMixed Feeding 2.874948e+00 1.758467e+00 4.800733e+00
type_of_avr_prophy_for_babyNVP 1.374059e+01 1.525412e+00 3.265184e+02
cotrimoxazole_administered_y_nYes 1.053746e+00 4.239856e-01 2.543926e+00
age_mo 1.001534e+00 9.632457e-01 1.041533e+00
educationSecondary 7.778291e-01 3.494402e-01 1.647425e+00
educationTertiary 8.714253e-01 3.717511e-01 1.965514e+00
dt <- decision_tree() %>%
set_engine(engine = "rpart") %>%
set_mode("classification")
# Workflow
dt_wf <- workflow() %>%
add_recipe(reci_m) %>%
add_model(dt)
# Evaluate
set.seed(555)
dt_rs <- fit_resamples(
dt_wf,
resamples = m_folds,
metrics = ev_metrics
)
collect_metrics(dt_rs)
# A tibble: 6 × 6
.metric .estimator mean n std_err .config
<chr> <chr> <dbl> <int> <dbl> <chr>
1 accuracy binary 0.7 10 0.0255 Preprocessor1_Model1
2 npv binary 0.709 10 0.0327 Preprocessor1_Model1
3 precision binary 0.722 10 0.0635 Preprocessor1_Model1
4 roc_auc binary 0.637 10 0.0324 Preprocessor1_Model1
5 sensitivity binary 0.278 10 0.0659 Preprocessor1_Model1
6 specificity binary 0.940 10 0.0201 Preprocessor1_Model1
nb <- naive_Bayes() %>%
set_engine(engine = "klaR") %>%
set_mode("classification")
# Workflow
nb_wf <- workflow() %>%
add_recipe(reci_m) %>%
add_model(nb)
# Evaluate
set.seed(777)
nb_rs <- fit_resamples(
nb_wf,
resamples = m_folds,
metrics = ev_metrics
)
collect_metrics(nb_rs)
# A tibble: 6 × 6
.metric .estimator mean n std_err .config
<chr> <chr> <dbl> <int> <dbl> <chr>
1 accuracy binary 0.721 10 0.0206 Preprocessor1_Model1
2 npv binary 0.708 10 0.0244 Preprocessor1_Model1
3 precision binary 0.822 10 0.0478 Preprocessor1_Model1
4 roc_auc binary 0.804 10 0.0151 Preprocessor1_Model1
5 sensitivity binary 0.259 10 0.0197 Preprocessor1_Model1
6 specificity binary 0.972 10 0.00712 Preprocessor1_Model1
# Trees
x <- ctree(final_outcome_result ~ ., data=new_msc)
plot(x, type="extended")
cforest(final_outcome_result ~ ., data=new_msc, controls=cforest_control(mtry=4, mincriterion=0))
Random Forest using Conditional Inference Trees
Number of trees: 500
Response: final_outcome_result
Inputs: pep_id_sex, pep_id_deliv_type, pep_id_gest_age_birth_wks, pep_id_hiv_serostatus_mo, pep_id_birth_weight_grams, mothers_recent_viral_load, second_pcr_test, mother_received_intervetion_y_n, place_site_of_delivery, brst_feeding_method, type_of_avr_prophy_for_baby, cotrimoxazole_administered_y_n, age_mo, education
Number of observations: 600