Stage 1: Background & Research Questions

Load in data

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
mi_comp <- read.csv('D:/NCSU/Summer2022/SIBS/SIBS_HackAThon/2022 Hack-a-Thon Data-20220708/Myocardial infarction complications Database.csv')
summary(mi_comp)
##        ID              AGE             SEX            INF_ANAM     
##  Min.   :   1.0   Min.   :26.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 425.8   1st Qu.:54.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median : 850.5   Median :63.00   Median :1.0000   Median :0.0000  
##  Mean   : 850.5   Mean   :61.86   Mean   :0.6265   Mean   :0.5548  
##  3rd Qu.:1275.2   3rd Qu.:70.00   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1700.0   Max.   :92.00   Max.   :1.0000   Max.   :3.0000  
##                   NA's   :8                        NA's   :4       
##    STENOK_AN       FK_STENOK        IBS_POST        IBS_NASL    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :1.000   Median :2.000   Median :1.000   Median :0.000  
##  Mean   :2.316   Mean   :1.205   Mean   :1.161   Mean   :0.375  
##  3rd Qu.:5.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :6.000   Max.   :4.000   Max.   :2.000   Max.   :1.000  
##  NA's   :106     NA's   :73      NA's   :51      NA's   :1628   
##        GB          SIM_GIPERT         DLIT_AG         ZSN_A       
##  Min.   :0.000   Min.   :0.00000   Min.   :0.00   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.00   1st Qu.:0.0000  
##  Median :2.000   Median :0.00000   Median :3.00   Median :0.0000  
##  Mean   :1.393   Mean   :0.03369   Mean   :3.34   Mean   :0.1944  
##  3rd Qu.:2.000   3rd Qu.:0.00000   3rd Qu.:7.00   3rd Qu.:0.0000  
##  Max.   :3.000   Max.   :1.00000   Max.   :7.00   Max.   :4.0000  
##  NA's   :9       NA's   :8         NA's   :248    NA's   :54      
##      nr_11             nr_01              nr_02             nr_03        
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.02501   Mean   :0.002382   Mean   :0.01132   Mean   :0.02085  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.000000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :21        NA's   :21         NA's   :21        NA's   :21       
##      nr_04             nr_07              nr_08              np_01         
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.000000   Median :0.000000   Median :0.000000  
##  Mean   :0.01727   Mean   :0.000596   Mean   :0.002382   Mean   :0.001189  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.00000   Max.   :1.000000   Max.   :1.000000   Max.   :1.000000  
##  NA's   :21        NA's   :21         NA's   :21         NA's   :18        
##      np_04              np_05             np_07              np_08         
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.000000   Median :0.00000   Median :0.000000   Median :0.000000  
##  Mean   :0.001784   Mean   :0.00654   Mean   :0.000595   Mean   :0.003567  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.000000   Max.   :1.00000   Max.   :1.000000   Max.   :1.000000  
##  NA's   :18         NA's   :18        NA's   :18         NA's   :18        
##      np_09              np_10            endocr_01       endocr_02      
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.000000   Median :0.000   Median :0.00000  
##  Mean   :0.001189   Mean   :0.001784   Mean   :0.135   Mean   :0.02485  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000   3rd Qu.:0.00000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :1.000   Max.   :1.00000  
##  NA's   :18         NA's   :18         NA's   :11      NA's   :10       
##    endocr_03          zab_leg_01        zab_leg_02        zab_leg_03     
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.007692   Mean   :0.07915   Mean   :0.07147   Mean   :0.02185  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.000000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :10         NA's   :7         NA's   :7         NA's   :7        
##    zab_leg_04         zab_leg_06        S_AD_KBRIG      D_AD_KBRIG    
##  Min.   :0.000000   Min.   :0.00000   Min.   :  0.0   Min.   :  0.00  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:120.0   1st Qu.: 70.00  
##  Median :0.000000   Median :0.00000   Median :140.0   Median : 80.00  
##  Mean   :0.005316   Mean   :0.01299   Mean   :136.9   Mean   : 81.39  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:160.0   3rd Qu.: 90.00  
##  Max.   :1.000000   Max.   :1.00000   Max.   :260.0   Max.   :190.00  
##  NA's   :7          NA's   :7         NA's   :1076    NA's   :1076    
##    S_AD_ORIT       D_AD_ORIT         O_L_POST         K_SH_POST     
##  Min.   :  0.0   Min.   :  0.00   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:120.0   1st Qu.: 80.00   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :130.0   Median : 80.00   Median :0.00000   Median :0.0000  
##  Mean   :134.6   Mean   : 82.75   Mean   :0.06517   Mean   :0.0273  
##  3rd Qu.:150.0   3rd Qu.: 90.00   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :260.0   Max.   :190.00   Max.   :1.00000   Max.   :1.0000  
##  NA's   :267     NA's   :267      NA's   :12        NA's   :15      
##    MP_TP_POST         SVT_POST           GT_POST           FIB_G_POST      
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.000000   Median :0.000000   Median :0.000000  
##  Mean   :0.06762   Mean   :0.004739   Mean   :0.004739   Mean   :0.008886  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.00000   Max.   :1.000000   Max.   :1.000000   Max.   :1.000000  
##  NA's   :14        NA's   :12         NA's   :12         NA's   :12        
##      ant_im          lat_im           inf_im         post_im      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :1.000   Median :1.0000   Median :0.000   Median :0.0000  
##  Mean   :1.571   Mean   :0.8617   Mean   :1.015   Mean   :0.2592  
##  3rd Qu.:4.000   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:0.0000  
##  Max.   :4.000   Max.   :4.0000   Max.   :4.000   Max.   :4.0000  
##  NA's   :83      NA's   :80       NA's   :80      NA's   :72      
##     IM_PG_P        ritm_ecg_p_01    ritm_ecg_p_02     ritm_ecg_p_04    
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :1.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.02943   Mean   :0.6647   Mean   :0.06137   Mean   :0.01486  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :1         NA's   :152      NA's   :152       NA's   :152      
##  ritm_ecg_p_06     ritm_ecg_p_07   ritm_ecg_p_08      n_r_ecg_p_01    
##  Min.   :0.00000   Min.   :0.000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000   Median :0.00000   Median :0.00000  
##  Mean   :0.00065   Mean   :0.228   Mean   :0.02972   Mean   :0.03659  
##  3rd Qu.:0.00000   3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :152       NA's   :152     NA's   :152       NA's   :115      
##   n_r_ecg_p_02      n_r_ecg_p_03     n_r_ecg_p_04      n_r_ecg_p_05    
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.00505   Mean   :0.1287   Mean   :0.04353   Mean   :0.04416  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :115       NA's   :115      NA's   :115       NA's   :115      
##   n_r_ecg_p_06      n_r_ecg_p_08      n_r_ecg_p_09      n_r_ecg_p_10    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02019   Mean   :0.00252   Mean   :0.00126   Mean   :0.00126  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :115       NA's   :115       NA's   :115       NA's   :115      
##   n_p_ecg_p_01      n_p_ecg_p_03      n_p_ecg_p_04      n_p_ecg_p_05    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.00126   Mean   :0.02019   Mean   :0.00315   Mean   :0.00126  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :115       NA's   :115       NA's   :115       NA's   :115      
##   n_p_ecg_p_06      n_p_ecg_p_07      n_p_ecg_p_08      n_p_ecg_p_09    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01703   Mean   :0.06435   Mean   :0.00442   Mean   :0.00631  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  NA's   :115       NA's   :115       NA's   :115       NA's   :115      
##   n_p_ecg_p_10      n_p_ecg_p_11      n_p_ecg_p_12      fibr_ter_01      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.000000  
##  Mean   :0.02145   Mean   :0.01767   Mean   :0.04921   Mean   :0.007692  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.000000  
##  NA's   :115       NA's   :115       NA's   :115       NA's   :10        
##   fibr_ter_02        fibr_ter_03       fibr_ter_05        fibr_ter_06      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.000000   Median :0.00000   Median :0.000000   Median :0.000000  
##  Mean   :0.009467   Mean   :0.04024   Mean   :0.002367   Mean   :0.005325  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.000000   Max.   :1.00000   Max.   :1.000000   Max.   :1.000000  
##  NA's   :10         NA's   :10        NA's   :10         NA's   :10        
##   fibr_ter_07       fibr_ter_08           GIPO_K          K_BLOOD     
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.0000   Min.   :2.300  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:3.700  
##  Median :0.00000   Median :0.000000   Median :0.0000   Median :4.100  
##  Mean   :0.00355   Mean   :0.001183   Mean   :0.4012   Mean   :4.191  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:1.0000   3rd Qu.:4.600  
##  Max.   :1.00000   Max.   :1.000000   Max.   :1.0000   Max.   :8.200  
##  NA's   :10        NA's   :10         NA's   :369      NA's   :371    
##     GIPER_NA         NA_BLOOD       ALT_BLOOD        AST_BLOOD     
##  Min.   :0.0000   Min.   :117.0   Min.   :0.0300   Min.   :0.0400  
##  1st Qu.:0.0000   1st Qu.:133.0   1st Qu.:0.2300   1st Qu.:0.1500  
##  Median :0.0000   Median :136.0   Median :0.3800   Median :0.2200  
##  Mean   :0.0226   Mean   :136.6   Mean   :0.4814   Mean   :0.2637  
##  3rd Qu.:0.0000   3rd Qu.:140.0   3rd Qu.:0.6100   3rd Qu.:0.3300  
##  Max.   :1.0000   Max.   :169.0   Max.   :3.0000   Max.   :2.1500  
##  NA's   :375      NA's   :375     NA's   :284      NA's   :285     
##    KFK_BLOOD       L_BLOOD            ROE            TIME_B_S    
##  Min.   :1.20   Min.   : 2.000   Min.   :  1.00   Min.   :1.000  
##  1st Qu.:1.35   1st Qu.: 6.400   1st Qu.:  5.00   1st Qu.:2.000  
##  Median :1.60   Median : 8.000   Median : 10.00   Median :4.000  
##  Mean   :2.00   Mean   : 8.783   Mean   : 13.44   Mean   :4.684  
##  3rd Qu.:2.25   3rd Qu.:10.450   3rd Qu.: 18.00   3rd Qu.:7.000  
##  Max.   :3.60   Max.   :27.900   Max.   :140.00   Max.   :9.000  
##  NA's   :1696   NA's   :125      NA's   :203      NA's   :126    
##     R_AB_1_n         R_AB_2_n         R_AB_3_n           NA_KB       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.00000   Median :1.0000  
##  Mean   :0.3159   Mean   :0.1407   Mean   :0.07761   Mean   :0.5925  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :3.0000   Max.   :3.0000   Max.   :3.00000   Max.   :1.0000  
##  NA's   :16       NA's   :108      NA's   :128       NA's   :657     
##    NOT_NA_KB          LID_KB           NITR_S          NA_R_1_n    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :1.0000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.6913   Mean   :0.3871   Mean   :0.1153   Mean   :0.485  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :4.000  
##  NA's   :686      NA's   :677      NA's   :9        NA's   :5      
##     NA_R_2_n          NA_R_3_n         NOT_NA_1_n      NOT_NA_2_n    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.000   Median :0.0000  
##  Mean   :0.09422   Mean   :0.05864   Mean   :0.332   Mean   :0.1132  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:0.0000  
##  Max.   :3.00000   Max.   :2.00000   Max.   :4.000   Max.   :3.0000  
##  NA's   :108       NA's   :131       NA's   :10      NA's   :110     
##    NOT_NA_3_n         LID_S_n         B_BLOK_S_n       ANT_CA_S_n    
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :1.0000  
##  Mean   :0.08477   Mean   :0.2834   Mean   :0.1273   Mean   :0.6669  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :2.00000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##  NA's   :131       NA's   :10       NA's   :11       NA's   :13      
##    GEPAR_S_n         ASP_S_n          TIKL_S_n         TRENT_S_n     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.7148   Mean   :0.7439   Mean   :0.01782   Mean   :0.2025  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##  NA's   :17       NA's   :17       NA's   :16        NA's   :16      
##    FIBR_PREDS    PREDS_TAH         JELUD_TAH         FIBR_JELUD     
##  Min.   :0.0   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.1   Mean   :0.01176   Mean   :0.02471   Mean   :0.04176  
##  3rd Qu.:0.0   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.0   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##                                                                     
##     A_V_BLOK         OTEK_LANC           RAZRIV           DRESSLER      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.03353   Mean   :0.09353   Mean   :0.03176   Mean   :0.04412  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##                                                                         
##       ZSN             REC_IM          P_IM_STEN           LET_IS      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.2318   Mean   :0.09353   Mean   :0.08706   Mean   :0.4771  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.00000   Max.   :7.0000  
## 
#when doing lm(), ADD na.action=na.omit --> exclude from the analysis any subject who does not have all the variables necessary to fit a model
#Logistic regression model would work better since a lot of categorical var.
#head(mi_comp)
#tail(mi_comp)
#str(mi_comp) #Display the structure of a dataset. Gives data type, dimensions, column names, and data type within columns

###need to change to correct type???????? Some categorical some binary
#mi_compNEW <- for(i in 49:85){
#  as.factor(mi_comp)
#}

#mi_compNEW <- as.binary()

names <- c("ID", "SEX", "FK_STENOK","IBS_NASL", "IBS_POST","GB", "SIM_GIPERT",  "DLIT_AG",  "ZSN_A",    "nr_11", "nr_01",   "nr_02",    "nr_03",    "nr_04",    "nr_07",    "nr_08",    "np_01",    "np_04",    "np_05",    "np_07",    "np_08" ,"np_09",   "np_10" ,   "endocr_02",    "endocr_03",    "zab_leg_01",   "zab_leg_02",   "zab_leg_03",   "zab_leg_04",   "zab_leg_06",       "O_L_POST", "K_SH_POST",    "MP_TP_POST",   "SVT_POST", "GT_POST",  "FIB_G_POST",   "ant_im",   "lat_im inf_im",    "post_im",  "IM_PG_P",  "ritm_ecg_p_01",    "ritm_ecg_p_02",    "ritm_ecg_p_04",    "ritm_ecg_p_06",    "ritm_ecg_p_07",    "ritm_ecg_p_08",    "n_r_ecg_p_01", "n_r_ecg_p_02", "n_r_ecg_p_03", "n_r_ecg_p_04", "n_r_ecg_p_05", "n_r_ecg_p_06", "n_r_ecg_p_08", "n_r_ecg_p_09", "n_r_ecg_p_10", "n_p_ecg_p_01", "n_p_ecg_p_03", "n_p_ecg_p_04", "n_p_ecg_p_05", "n_p_ecg_p_06", "n_p_ecg_p_07", "n_p_ecg_p_08", "n_p_ecg_p_09", "n_p_ecg_p_10", "n_p_ecg_p_11", "n_p_ecg_p_12", "fibr_ter_01",  "fibr_ter_02",  "fibr_ter_03",  "fibr_ter_05",  "fibr_ter_06",  "fibr_ter_07",  "fibr_ter_08",  "GIPO_K",       "GIPER_NA",         "TIME_B_S", "R_AB_1_n", "R_AB_2_n", "R_AB_3_n", "NA_KB",    "NOT_NA_KB",    "LID_KB",   "NITR_S",   "NA_R_1_n", "NA_R_2_n", "NA_R_3_n", "NOT_NA_1_n",   "NOT_NA_2_n",   "NOT_NA_3_n",   "LID_S_n",  "B_BLOK_S_n",   "ANT_CA_S_n",   "GEPAR_S_n",    "ASP_S_n",  "TIKL_S_n", "TRENT_S_n",    "FIBR_PREDS",   "PREDS_TAH",    "JELUD_TAH",    "FIBR_JELUD",   "A_V_BLOK", "OTEK_LANC",    "RAZRIV",   "DRESSLER", "ZSN",  "REC_IM",   "P_IM_STEN",    "LET_IS")

namesNEW <- function(data){
  mi_comp$data <- as.factor(mi_comp$data)
}

#for (item in names){
#  namesNEW(item)
#}
#Error in `$<-.data.frame`(`*tmp*`, data, value = integer(0)) : 
#  replacement has 0 rows, data has 1700


#variable that can be numerical OR cumulative dummy coding:
#INF_ANAM   STENOK_AN FK_STENOK IBS_POST GB 
#ant_im lat_im  inf_im  post_im TIME_B_S
####Possible source of error!!! Ordinary attribute are not accurate, only shows order!!!

#numerical variables:
#S_AD_KBRIG D_AD_KBRIG  S_AD_ORIT   D_AD_ORIT K_BLOOD NA_BLOOD  ALT_BLOOD AST_BLOOD KFK_BLOOD   L_BLOOD ROE
#######################mi_comp[names] <- lapply(mi_comp[names], factor)
str(mi_comp)
## 'data.frame':    1700 obs. of  124 variables:
##  $ ID           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ AGE          : int  77 55 52 68 60 64 70 65 60 77 ...
##  $ SEX          : int  1 1 1 0 1 1 1 1 1 0 ...
##  $ INF_ANAM     : int  2 1 0 0 0 0 1 0 0 2 ...
##  $ STENOK_AN    : int  1 0 0 0 0 1 1 1 0 0 ...
##  $ FK_STENOK    : int  1 0 0 0 0 2 2 1 0 0 ...
##  $ IBS_POST     : int  2 0 2 2 2 1 1 2 2 0 ...
##  $ IBS_NASL     : int  NA 0 NA NA NA NA NA NA NA NA ...
##  $ GB           : int  3 0 2 2 3 0 2 2 2 3 ...
##  $ SIM_GIPERT   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ DLIT_AG      : int  7 0 2 3 7 0 7 7 6 6 ...
##  $ ZSN_A        : int  0 0 0 1 0 0 1 0 0 1 ...
##  $ nr_11        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nr_01        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nr_02        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nr_03        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nr_04        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nr_07        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nr_08        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ np_01        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ np_04        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ np_05        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ np_07        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ np_08        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ np_09        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ np_10        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ endocr_01    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ endocr_02    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ endocr_03    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ zab_leg_01   : int  0 0 0 1 0 0 1 0 0 0 ...
##  $ zab_leg_02   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ zab_leg_03   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ zab_leg_04   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ zab_leg_06   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ S_AD_KBRIG   : int  NA NA 150 NA 190 NA 120 NA 200 NA ...
##  $ D_AD_KBRIG   : int  NA NA 100 NA 100 NA 80 NA 120 NA ...
##  $ S_AD_ORIT    : int  180 120 180 120 160 140 120 145 195 200 ...
##  $ D_AD_ORIT    : int  100 90 100 70 90 90 80 95 120 100 ...
##  $ O_L_POST     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ K_SH_POST    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ MP_TP_POST   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SVT_POST     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ GT_POST      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FIB_G_POST   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ant_im       : int  1 4 4 0 4 1 0 0 0 4 ...
##  $ lat_im       : int  0 1 1 1 1 1 0 0 0 1 ...
##  $ inf_im       : int  0 0 0 1 0 0 3 2 3 0 ...
##  $ post_im      : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ IM_PG_P      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ritm_ecg_p_01: int  0 1 1 1 0 0 1 1 1 0 ...
##  $ ritm_ecg_p_02: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ritm_ecg_p_04: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ritm_ecg_p_06: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ritm_ecg_p_07: int  1 0 0 0 1 1 0 0 0 1 ...
##  $ ritm_ecg_p_08: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_r_ecg_p_01 : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ n_r_ecg_p_02 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_r_ecg_p_03 : int  0 0 1 0 0 0 0 0 0 1 ...
##  $ n_r_ecg_p_04 : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ n_r_ecg_p_05 : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ n_r_ecg_p_06 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_r_ecg_p_08 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_r_ecg_p_09 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_r_ecg_p_10 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_01 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_03 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_04 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_05 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_06 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_07 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_08 : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_09 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_10 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_11 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_p_ecg_p_12 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fibr_ter_01  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fibr_ter_02  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fibr_ter_03  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fibr_ter_05  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fibr_ter_06  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fibr_ter_07  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fibr_ter_08  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ GIPO_K       : int  0 1 0 1 1 NA NA 0 NA NA ...
##  $ K_BLOOD      : num  4.7 3.5 4 3.9 3.5 NA NA 4.5 NA NA ...
##  $ GIPER_NA     : int  0 0 0 0 0 NA NA 0 NA NA ...
##  $ NA_BLOOD     : int  138 132 132 146 132 NA NA 136 NA NA ...
##  $ ALT_BLOOD    : num  NA 0.38 0.3 0.75 0.45 0.45 0.3 NA 0.3 0.38 ...
##  $ AST_BLOOD    : num  NA 0.18 0.11 0.37 0.22 0.22 0.11 NA 0.37 0.11 ...
##  $ KFK_BLOOD    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ L_BLOOD      : num  8 7.8 10.8 NA 8.3 7.2 11.1 6.2 6.2 6.9 ...
##  $ ROE          : int  16 3 NA NA NA 2 5 20 3 30 ...
##  $ TIME_B_S     : int  4 2 3 2 9 2 1 7 3 3 ...
##  $ R_AB_1_n     : int  0 0 3 0 0 0 0 3 0 0 ...
##  $ R_AB_2_n     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ R_AB_3_n     : int  1 0 0 1 0 0 0 0 0 0 ...
##  $ NA_KB        : int  NA 1 1 NA 0 0 0 0 0 NA ...
##  $ NOT_NA_KB    : int  NA 0 1 NA 0 1 1 0 1 NA ...
##  $ LID_KB       : int  NA 1 1 NA 0 0 0 0 0 NA ...
##  $ NITR_S       : int  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
  • Still need more understanding of the variables.
  • Assume all explanatory/covariates measured after 3rd day of hospitalization.
  • Need to change variables to easier writing. Ex. NOT mi_comp$age –> age. Since there are 124 variables (111 covariates, 12 complications, 1 ID), need a FOR LOOP to do that? ANSWER: NOPE! in lm(), just add data=mi_comp option!
  • Change variable to correct type so analysis.
  • Maybe would help to create own variable?
#ID AGE SEX INF_ANAM    STENOK_AN   FK_STENOK   IBS_POST    IBS_NASL    GB  SIM_GIPERT  DLIT_AG ZSN_A   nr_11   nr_01   nr_02   nr_03   nr_04   nr_07   nr_08   np_01   np_04   np_05   np_07   np_08   np_09   np_10   endocr_01   endocr_02   endocr_03   zab_leg_01  zab_leg_02  zab_leg_03  zab_leg_04  zab_leg_06  S_AD_KBRIG  D_AD_KBRIG  S_AD_ORIT   D_AD_ORIT   O_L_POST    K_SH_POST   MP_TP_POST  SVT_POST    GT_POST FIB_G_POST  ant_im  lat_im  inf_im  post_im IM_PG_P ritm_ecg_p_01   ritm_ecg_p_02   ritm_ecg_p_04   ritm_ecg_p_06   ritm_ecg_p_07   ritm_ecg_p_08   n_r_ecg_p_01    n_r_ecg_p_02    n_r_ecg_p_03    n_r_ecg_p_04    n_r_ecg_p_05    n_r_ecg_p_06    n_r_ecg_p_08    n_r_ecg_p_09    n_r_ecg_p_10    n_p_ecg_p_01    n_p_ecg_p_03    n_p_ecg_p_04    n_p_ecg_p_05    n_p_ecg_p_06    n_p_ecg_p_07    n_p_ecg_p_08    n_p_ecg_p_09    n_p_ecg_p_10    n_p_ecg_p_11    n_p_ecg_p_12    fibr_ter_01 fibr_ter_02 fibr_ter_03 fibr_ter_05 fibr_ter_06 fibr_ter_07 fibr_ter_08 GIPO_K  K_BLOOD GIPER_NA    NA_BLOOD    ALT_BLOOD   AST_BLOOD   KFK_BLOOD   L_BLOOD ROE TIME_B_S    R_AB_1_n    R_AB_2_n    R_AB_3_n    NA_KB   NOT_NA_KB   LID_KB  NITR_S  NA_R_1_n    NA_R_2_n    NA_R_3_n    NOT_NA_1_n  NOT_NA_2_n  NOT_NA_3_n  LID_S_n B_BLOK_S_n  ANT_CA_S_n  GEPAR_S_n   ASP_S_n TIKL_S_n    TRENT_S_n   FIBR_PREDS  PREDS_TAH   JELUD_TAH   FIBR_JELUD  A_V_BLOK    OTEK_LANC   RAZRIV  DRESSLER    ZSN REC_IM  P_IM_STEN   LET_IS

Stage 2: Descriptive Analysis

EDA

#library(GGally)
#Problem is how to deal with missing data before do all pairs
#ggpairs(mi_comp,upper = list(continuous = wrap("cor", size = 2.5)),lower = list(continuous = "smooth"))
#also based on Descriptive Statistics.pdf, the Chronic heart failure (ZSN) has highest prob of complication!!! (focus on this response var most?!)
  • How to split data that one part to train and another do what?
  • This is where visualization happens
  • Remember dealing with missing data.
  • Rare complication, rare event → THEN less likely to be a useful reponse varible????
  • Think for some BOMB Visualization

Stage 3: Inferential Analysis

ageZSN <- lm(ZSN ~ AGE, na.action=na.omit, data=mi_comp)
summary(ageZSN)
## 
## Call:
## lm(formula = ZSN ~ AGE, data = mi_comp, na.action = na.omit)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39675 -0.25437 -0.20508 -0.09556  0.90444 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.1070610  0.0567076  -1.888   0.0592 .  
## AGE          0.0054762  0.0009019   6.072 1.56e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4176 on 1690 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.02135,    Adjusted R-squared:  0.02077 
## F-statistic: 36.86 on 1 and 1690 DF,  p-value: 1.561e-09

Logistic Regression/MLR

NO LOGISTIC FOR SURE. B/C ALL COMPLICATION(RESPONCE VAR) ARE CATEGORICAL/BINARY VARIABLES

  • Think for some Q&A question that people might ask