Load packages

The following R packages were used for this analysis:

  1. tidymodels
  2. tidyverse
  3. plotly
  4. pacman
  5. skimr
  6. ggplot2
  7. forcats
  8. themis
  9. readxl
  10. janitor
  11. klaR
  12. tune
  13. discrim
  14. vip
  15. AMR
  16. party
msc <- read_csv("mscph.csv", col_types = cols(),  na = "NA")
dim(msc)
[1] 600  17
msc %>% count(final_outcome_result)
# A tibble: 2 × 2
  final_outcome_result     n
  <chr>                <int>
1 HIV Infected           210
2 HIV Uninfected         390
new_msc <- msc %>% select(-first_pcr_test_result)
skimr::skim(new_msc)
Data summary
Name new_msc
Number of rows 600
Number of columns 16
_______________________
Column type frequency:
character 12
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
pep_id_sex 0 1.00 4 6 0 2 0
pep_id_deliv_type 0 1.00 16 18 0 4 0
pep_id_hiv_serostatus_mo 0 1.00 14 22 0 3 0
second_pcr_test 0 1.00 8 8 0 2 0
final_outcome_result 0 1.00 12 14 0 2 0
mother_received_intervetion_y_n 0 1.00 2 3 0 2 0
place_site_of_delivery 0 1.00 15 16 0 2 0
brst_feeding_method 0 1.00 9 13 0 2 0
type_of_avr_prophy_for_baby 0 1.00 3 9 0 2 0
cotrimoxazole_administered_y_n 0 1.00 2 3 0 2 0
education 0 1.00 7 9 0 3 0
marital_st 85 0.86 6 9 0 5 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
pep_id_gest_age_birth_wks 0 1 38.05 0.51 36 38 38 38 40 ▁▁▇▁▁
pep_id_birth_weight_grams 0 1 3043.15 581.61 2000 2600 3000 3400 5700 ▆▇▃▁▁
mothers_recent_viral_load 0 1 3620.24 21581.18 0 20 20 45 234542 ▇▁▁▁▁
age_mo 0 1 37.38 6.06 22 33 38 41 69 ▂▇▃▁▁

Exploratory analysis

# Change variables from character to factor
new_msc$pep_id_sex <- factor(new_msc$pep_id_sex)

new_msc$pep_id_deliv_type <- factor(new_msc$pep_id_deliv_type)

new_msc$second_pcr_test <- factor(new_msc$second_pcr_test)

new_msc$final_outcome_result <- factor(new_msc$final_outcome_result)

new_msc$mother_received_intervetion_y_n <- factor(new_msc$mother_received_intervetion_y_n)

new_msc$brst_feeding_method <- factor(new_msc$brst_feeding_method)

new_msc$type_of_avr_prophy_for_baby <- factor(new_msc$type_of_avr_prophy_for_baby)

new_msc$cotrimoxazole_administered_y_n <- factor(new_msc$cotrimoxazole_administered_y_n)

new_msc$pep_id_hiv_serostatus_mo <- factor(new_msc$pep_id_hiv_serostatus_mo)

new_msc$place_site_of_delivery <- factor(new_msc$place_site_of_delivery)

new_msc$education <- factor(new_msc$education)

new_msc$marital_st <- factor(new_msc$marital_st)

# Variable sex
new_msc %>% count(pep_id_sex)
## # A tibble: 2 × 2
##   pep_id_sex     n
##   <fct>      <int>
## 1 Female       289
## 2 Male         311
# HIV status of mother
new_msc %>% count(pep_id_hiv_serostatus_mo)
## # A tibble: 3 × 2
##   pep_id_hiv_serostatus_mo     n
##   <fct>                    <int>
## 1 HIV-1 & HIV-2 positive      69
## 2 HIV-1 positive             521
## 3 HIV-2 positive              10
# Final outcome of infants
new_msc %>% count(final_outcome_result)
## # A tibble: 2 × 2
##   final_outcome_result     n
##   <fct>                <int>
## 1 HIV Infected           210
## 2 HIV Uninfected         390
# Mode of delivery
new_msc %>%  count(pep_id_deliv_type)
## # A tibble: 4 × 2
##   pep_id_deliv_type      n
##   <fct>              <int>
## 1 Assisted vaginal      12
## 2 Elective cesarean    137
## 3 Emergency cesarean    44
## 4 Standard vaginal     407
# PCR count of infants
new_msc %>% count(second_pcr_test)
## # A tibble: 2 × 2
##   second_pcr_test     n
##   <fct>           <int>
## 1 Negative          577
## 2 Positive           23
# Mothers treatment
new_msc %>% count(mother_received_intervetion_y_n)
## # A tibble: 2 × 2
##   mother_received_intervetion_y_n     n
##   <fct>                           <int>
## 1 No                                 34
## 2 Yes                               566
# Delivery site
new_msc %>% count(place_site_of_delivery)
## # A tibble: 2 × 2
##   place_site_of_delivery     n
##   <fct>                  <int>
## 1 Inside Facility          571
## 2 Outside Facility          29
# Mode of feeding
new_msc %>% count(brst_feeding_method)
## # A tibble: 2 × 2
##   brst_feeding_method     n
##   <fct>               <int>
## 1 Exclusive             332
## 2 Mixed Feeding         268
# AVT for infants 
new_msc %>% count(type_of_avr_prophy_for_baby)
## # A tibble: 2 × 2
##   type_of_avr_prophy_for_baby     n
##   <fct>                       <int>
## 1 AZT + NVP                      54
## 2 NVP                           546
# Antibiotic therapy for mothers
new_msc %>% count(cotrimoxazole_administered_y_n)
## # A tibble: 2 × 2
##   cotrimoxazole_administered_y_n     n
##   <fct>                          <int>
## 1 No                                44
## 2 Yes                              556
# Mothers education level
new_msc %>% count(education)
## # A tibble: 3 × 2
##   education     n
##   <fct>     <int>
## 1 Primary      66
## 2 Secondary   367
## 3 Tertiary    167
# Marital status
new_msc %>% count(marital_st)
## # A tibble: 6 × 2
##   marital_st     n
##   <fct>      <int>
## 1 Divorced      14
## 2 Married      285
## 3 Separated     21
## 4 Single       191
## 5 Widowed        4
## 6 <NA>          85
# Infant sex
new_msc %>% count(pep_id_sex)
## # A tibble: 2 × 2
##   pep_id_sex     n
##   <fct>      <int>
## 1 Female       289
## 2 Male         311
# Age group of mothers
mothers <- msc$age_mo
group_mothers <- age_groups(mothers, split_at = "tens")
table(group_mothers)
## group_mothers
##   0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80-89 90-99  100+ 
##     0     0    50   323   217     4     6     0     0     0     0

Graphics

# Viral load of mothers
msc %>% 
  ggplot(aes(mothers_recent_viral_load)) + geom_histogram(fill = "red") + labs(x = "Viral load of mothers (copies/ml)", caption = "Histogram of mothers viral load. Most of the mothers have very low viral load but some have very high viral load")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Gestational age of infants
msc %>% 
  ggplot(aes(pep_id_gest_age_birth_wks)) + geom_histogram(fill = "purple") + labs(x = "Gestational age of infants (weeks)", caption = "Histogram of infants gestational age at birth. The average age was 38 weeks")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Weight of infants
msc %>% 
  ggplot(aes(pep_id_birth_weight_grams)) + geom_histogram(fill = "violet") + labs(x = "Weight of infants (grams)", caption = "Histogram showing the distribution of infants weight at birth. The majority of infants weighed between 2 to 4.5kg")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Age of mothers
msc %>% 
  ggplot(aes(age_mo)) + geom_histogram(fill = "yellow") + labs(x = "Age of mothers (years)", caption = "Distribution of mothers age. The histogram is multimodal")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Viral load boxplot
vload_spread <- ggplot(msc) + aes(x = "", y = mothers_recent_viral_load, na.rm = TRUE) + geom_boxplot(fill = "maroon") + labs(y = "Viral load (copies/ml)", caption = "Box plot of mothers viral load showing the spiking up of some of the viral load")
ggplotly(vload_spread)
# Box plot for infant weight
infant_wt <- ggplot(msc) + aes(x = "", y = pep_id_birth_weight_grams, na.rm = TRUE) + geom_boxplot(fill = "navy") + labs(y = "Birth weight (grams)", caption = "Box plot of infants weight. Maximum weight was 5.7kg and minimum 2kg")
ggplotly(infant_wt)
# Gestational age @ birth
age_wk <- ggplot(msc) + aes(x = "", y = pep_id_gest_age_birth_wks, na.rm = TRUE) + geom_boxplot(fill = "limegreen") + labs(y = "Gestational age (weeks)", caption = "Box plot of gestational age of infants at birth. Minimum 36 weeks, maximum 40 weeks")
ggplotly (age_wk)
# Box plot of infant weight by outcome
infant_out <- ggplot(msc) + aes(x = "", y = pep_id_birth_weight_grams, fill = final_outcome_result, na.rm = TRUE) + geom_boxplot() + labs(y = "Birth weight per outcome", caption = "Box plot showing weight distribution of infants with final outcome of HIV infected ")
ggplotly(infant_out)
# Box plot of infant weight by outcome
infant_out <- ggplot(msc) + aes(x = "", y = pep_id_birth_weight_grams, fill = final_outcome_result, na.rm = TRUE) + geom_boxplot() + labs(y = "Birth weight per outcome", caption = "Box plot showing weight distribution of infants with final outcome of HIV uninfected ")
ggplotly(infant_out)
# ARV drug by final outcome
table(msc$type_of_avr_prophy_for_baby, msc$final_outcome_result)
           
            HIV Infected HIV Uninfected
  AZT + NVP           42             12
  NVP                168            378
# Delivery mode by final outcome
table(msc$pep_id_deliv_type, msc$final_outcome_result)
                    
                     HIV Infected HIV Uninfected
  Assisted vaginal              6              6
  Elective cesarean            39             98
  Emergency cesarean           12             32
  Standard vaginal            153            254

Modeling

# Remove columns with NAs
new_msc <- new_msc %>% select(-marital_st)

# Split data
set.seed(111)
data_split <- initial_split(new_msc, prop = .8, strata = final_outcome_result)
data_split
<Training/Testing/Total>
<480/120/600>
# Training and test data
m_train <- data_split %>% training()
m_test <- data_split %>% testing()

# Recipe
reci_m <- recipe(final_outcome_result ~ ., data = m_train) %>% 
  step_rose(final_outcome_result) 
  

# Prep and juice for test
reci_prep <- reci_m %>% prep()
reci_juice <- reci_prep %>% juice()

Random forest

# Random forest
rf <- rand_forest(
  mtry = 4,
  trees = 1000,
  min_n = 1
) %>% 
  set_mode("classification") %>% 
  set_engine("ranger")

# Workflow
rf_wf <- workflow() %>% 
  add_recipe(reci_m) %>% 
  add_model(rf)

# Cross validation
set.seed(222)
m_folds <- vfold_cv(m_train)

# Set metrics
ev_metrics <- metric_set(accuracy, sensitivity, specificity, precision, roc_auc, npv)

# Evaluate model
doParallel::registerDoParallel()
set.seed(333)
rf_rs <- fit_resamples(
  rf_wf,
  resamples = m_folds,
  metrics = ev_metrics
)

collect_metrics(rf_rs)
# A tibble: 6 × 6
  .metric     .estimator  mean     n std_err .config             
  <chr>       <chr>      <dbl> <int>   <dbl> <chr>               
1 accuracy    binary     0.733    10  0.0191 Preprocessor1_Model1
2 npv         binary     0.743    10  0.0290 Preprocessor1_Model1
3 precision   binary     0.729    10  0.0523 Preprocessor1_Model1
4 roc_auc     binary     0.767    10  0.0230 Preprocessor1_Model1
5 sensitivity binary     0.407    10  0.0619 Preprocessor1_Model1
6 specificity binary     0.915    10  0.0219 Preprocessor1_Model1

Logistic regression

lr <- logistic_reg() %>% 
  set_engine(engine = "glm") %>% 
  set_mode("classification")

# Workflow
lr_wf <- workflow() %>%
  add_recipe(reci_m) %>% 
  add_model(lr)

# Evaluate
set.seed(444)
lr_rs <- fit_resamples(
  lr_wf,
  resamples = m_folds,
  metrics = ev_metrics
)

collect_metrics(lr_rs)
# A tibble: 6 × 6
  .metric     .estimator  mean     n std_err .config             
  <chr>       <chr>      <dbl> <int>   <dbl> <chr>               
1 accuracy    binary     0.740    10  0.0174 Preprocessor1_Model1
2 npv         binary     0.837    10  0.0282 Preprocessor1_Model1
3 precision   binary     0.602    10  0.0190 Preprocessor1_Model1
4 roc_auc     binary     0.778    10  0.0158 Preprocessor1_Model1
5 sensitivity binary     0.754    10  0.0263 Preprocessor1_Model1
6 specificity binary     0.732    10  0.0206 Preprocessor1_Model1
Logistic_m <- glm(
  final_outcome_result ~ ., 
  data = m_train,
  family = "binomial"
)
summary(Logistic_m)

Call:
glm(formula = final_outcome_result ~ ., family = "binomial", 
    data = m_train)

Coefficients:
                                         Estimate Std. Error z value Pr(>|z|)
(Intercept)                             1.229e+01  9.210e+00   1.335  0.18192
pep_id_sexMale                          2.365e-01  2.404e-01   0.984  0.32510
pep_id_deliv_typeElective cesarean      7.328e-01  1.008e+00   0.727  0.46710
pep_id_deliv_typeEmergency cesarean     1.165e+00  1.077e+00   1.082  0.27941
pep_id_deliv_typeStandard vaginal       2.764e-01  9.851e-01   0.281  0.77904
pep_id_gest_age_birth_wks              -1.761e-01  2.356e-01  -0.748  0.45469
pep_id_hiv_serostatus_moHIV-1 positive -3.014e+00  1.036e+00  -2.909  0.00363
pep_id_hiv_serostatus_moHIV-2 positive  1.365e+01  1.338e+03   0.010  0.99186
pep_id_birth_weight_grams              -1.162e-03  2.306e-04  -5.040 4.65e-07
mothers_recent_viral_load              -8.160e-06  9.375e-06  -0.870  0.38408
second_pcr_testPositive                -1.811e+01  7.571e+02  -0.024  0.98091
mother_received_intervetion_y_nYes     -1.782e+00  1.337e+00  -1.332  0.18285
place_site_of_deliveryOutside Facility -2.486e-01  1.092e+00  -0.228  0.82001
brst_feeding_methodMixed Feeding        1.056e+00  2.556e-01   4.131 3.61e-05
type_of_avr_prophy_for_babyNVP          2.620e+00  1.259e+00   2.081  0.03743
cotrimoxazole_administered_y_nYes       5.235e-02  4.537e-01   0.115  0.90814
age_mo                                  1.533e-03  1.984e-02   0.077  0.93841
educationSecondary                     -2.512e-01  3.928e-01  -0.640  0.52244
educationTertiary                      -1.376e-01  4.223e-01  -0.326  0.74450
                                          
(Intercept)                               
pep_id_sexMale                            
pep_id_deliv_typeElective cesarean        
pep_id_deliv_typeEmergency cesarean       
pep_id_deliv_typeStandard vaginal         
pep_id_gest_age_birth_wks                 
pep_id_hiv_serostatus_moHIV-1 positive ** 
pep_id_hiv_serostatus_moHIV-2 positive    
pep_id_birth_weight_grams              ***
mothers_recent_viral_load                 
second_pcr_testPositive                   
mother_received_intervetion_y_nYes        
place_site_of_deliveryOutside Facility    
brst_feeding_methodMixed Feeding       ***
type_of_avr_prophy_for_babyNVP         *  
cotrimoxazole_administered_y_nYes         
age_mo                                    
educationSecondary                        
educationTertiary                         
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 621.55  on 479  degrees of freedom
Residual deviance: 459.08  on 461  degrees of freedom
AIC: 497.08

Number of Fisher Scoring iterations: 16
exp(cbind(Odds_ratio = coef(Logistic_m), confint(Logistic_m)))
Waiting for profiling to be done...
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: algorithm did not converge
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: algorithm did not converge
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
collapsing to unique 'x' values
                                         Odds_ratio         2.5 %        97.5 %
(Intercept)                            2.184062e+05  3.101120e-03  1.794986e+13
pep_id_sexMale                         1.266852e+00  7.920845e-01  2.035661e+00
pep_id_deliv_typeElective cesarean     2.080926e+00  2.217325e-01  1.388428e+01
pep_id_deliv_typeEmergency cesarean    3.206970e+00  3.115883e-01  2.513343e+01
pep_id_deliv_typeStandard vaginal      1.318363e+00  1.454594e-01  8.402283e+00
pep_id_gest_age_birth_wks              8.385166e-01  5.269806e-01  1.333996e+00
pep_id_hiv_serostatus_moHIV-1 positive 4.907288e-02  2.701280e-03  2.421520e-01
pep_id_hiv_serostatus_moHIV-2 positive 8.437007e+05  7.306479e+14 2.889471e+235
pep_id_birth_weight_grams              9.988382e-01  9.983719e-01  9.992768e-01
mothers_recent_viral_load              9.999918e-01  9.999625e-01  1.000006e+00
second_pcr_testPositive                1.359223e-08 6.301266e-140  1.141085e-42
mother_received_intervetion_y_nYes     1.683693e-01  6.657543e-03  1.881591e+00
place_site_of_deliveryOutside Facility 7.799266e-01  8.026426e-02  6.668719e+00
brst_feeding_methodMixed Feeding       2.874948e+00  1.758467e+00  4.800733e+00
type_of_avr_prophy_for_babyNVP         1.374059e+01  1.525412e+00  3.265184e+02
cotrimoxazole_administered_y_nYes      1.053746e+00  4.239856e-01  2.543926e+00
age_mo                                 1.001534e+00  9.632457e-01  1.041533e+00
educationSecondary                     7.778291e-01  3.494402e-01  1.647425e+00
educationTertiary                      8.714253e-01  3.717511e-01  1.965514e+00

Decision trees

dt <- decision_tree() %>% 
  set_engine(engine = "rpart") %>% 
  set_mode("classification")

# Workflow
dt_wf <- workflow() %>%
  add_recipe(reci_m) %>% 
  add_model(dt)

# Evaluate
set.seed(555)
dt_rs <- fit_resamples(
  dt_wf,
  resamples = m_folds,
  metrics = ev_metrics
)

collect_metrics(dt_rs)
# A tibble: 6 × 6
  .metric     .estimator  mean     n std_err .config             
  <chr>       <chr>      <dbl> <int>   <dbl> <chr>               
1 accuracy    binary     0.7      10  0.0255 Preprocessor1_Model1
2 npv         binary     0.709    10  0.0327 Preprocessor1_Model1
3 precision   binary     0.722    10  0.0635 Preprocessor1_Model1
4 roc_auc     binary     0.637    10  0.0324 Preprocessor1_Model1
5 sensitivity binary     0.278    10  0.0659 Preprocessor1_Model1
6 specificity binary     0.940    10  0.0201 Preprocessor1_Model1

Naive Bayes

nb <- naive_Bayes() %>% 
  set_engine(engine = "klaR") %>% 
  set_mode("classification")

# Workflow
nb_wf <- workflow() %>%
  add_recipe(reci_m) %>% 
  add_model(nb)

# Evaluate
set.seed(777)
nb_rs <- fit_resamples(
  nb_wf,
  resamples = m_folds,
  metrics = ev_metrics
)

collect_metrics(nb_rs)
# A tibble: 6 × 6
  .metric     .estimator  mean     n std_err .config             
  <chr>       <chr>      <dbl> <int>   <dbl> <chr>               
1 accuracy    binary     0.721    10 0.0206  Preprocessor1_Model1
2 npv         binary     0.708    10 0.0244  Preprocessor1_Model1
3 precision   binary     0.822    10 0.0478  Preprocessor1_Model1
4 roc_auc     binary     0.804    10 0.0151  Preprocessor1_Model1
5 sensitivity binary     0.259    10 0.0197  Preprocessor1_Model1
6 specificity binary     0.972    10 0.00712 Preprocessor1_Model1
# Trees
x <- ctree(final_outcome_result ~ ., data=new_msc)
plot(x, type="extended")

cforest(final_outcome_result ~ ., data=new_msc, controls=cforest_control(mtry=4, mincriterion=0))

     Random Forest using Conditional Inference Trees

Number of trees:  500 

Response:  final_outcome_result 
Inputs:  pep_id_sex, pep_id_deliv_type, pep_id_gest_age_birth_wks, pep_id_hiv_serostatus_mo, pep_id_birth_weight_grams, mothers_recent_viral_load, second_pcr_test, mother_received_intervetion_y_n, place_site_of_delivery, brst_feeding_method, type_of_avr_prophy_for_baby, cotrimoxazole_administered_y_n, age_mo, education 
Number of observations:  600