library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
mi_comp <- read.csv('D:/NCSU/Summer2022/SIBS/SIBS_HackAThon/2022 Hack-a-Thon Data-20220708/Myocardial infarction complications Database.csv')
summary(mi_comp)
## ID AGE SEX INF_ANAM
## Min. : 1.0 Min. :26.00 Min. :0.0000 Min. :0.0000
## 1st Qu.: 425.8 1st Qu.:54.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 850.5 Median :63.00 Median :1.0000 Median :0.0000
## Mean : 850.5 Mean :61.86 Mean :0.6265 Mean :0.5548
## 3rd Qu.:1275.2 3rd Qu.:70.00 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1700.0 Max. :92.00 Max. :1.0000 Max. :3.0000
## NA's :8 NA's :4
## STENOK_AN FK_STENOK IBS_POST IBS_NASL
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :1.000 Median :2.000 Median :1.000 Median :0.000
## Mean :2.316 Mean :1.205 Mean :1.161 Mean :0.375
## 3rd Qu.:5.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :6.000 Max. :4.000 Max. :2.000 Max. :1.000
## NA's :106 NA's :73 NA's :51 NA's :1628
## GB SIM_GIPERT DLIT_AG ZSN_A
## Min. :0.000 Min. :0.00000 Min. :0.00 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00 1st Qu.:0.0000
## Median :2.000 Median :0.00000 Median :3.00 Median :0.0000
## Mean :1.393 Mean :0.03369 Mean :3.34 Mean :0.1944
## 3rd Qu.:2.000 3rd Qu.:0.00000 3rd Qu.:7.00 3rd Qu.:0.0000
## Max. :3.000 Max. :1.00000 Max. :7.00 Max. :4.0000
## NA's :9 NA's :8 NA's :248 NA's :54
## nr_11 nr_01 nr_02 nr_03
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.02501 Mean :0.002382 Mean :0.01132 Mean :0.02085
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.000000 Max. :1.00000 Max. :1.00000
## NA's :21 NA's :21 NA's :21 NA's :21
## nr_04 nr_07 nr_08 np_01
## Min. :0.00000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :0.000000 Median :0.000000 Median :0.000000
## Mean :0.01727 Mean :0.000596 Mean :0.002382 Mean :0.001189
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.000000 Max. :1.000000 Max. :1.000000
## NA's :21 NA's :21 NA's :21 NA's :18
## np_04 np_05 np_07 np_08
## Min. :0.000000 Min. :0.00000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.000000 Median :0.00000 Median :0.000000 Median :0.000000
## Mean :0.001784 Mean :0.00654 Mean :0.000595 Mean :0.003567
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.000000 Max. :1.00000 Max. :1.000000 Max. :1.000000
## NA's :18 NA's :18 NA's :18 NA's :18
## np_09 np_10 endocr_01 endocr_02
## Min. :0.000000 Min. :0.000000 Min. :0.000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000 1st Qu.:0.00000
## Median :0.000000 Median :0.000000 Median :0.000 Median :0.00000
## Mean :0.001189 Mean :0.001784 Mean :0.135 Mean :0.02485
## 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :1.000000 Max. :1.000000 Max. :1.000 Max. :1.00000
## NA's :18 NA's :18 NA's :11 NA's :10
## endocr_03 zab_leg_01 zab_leg_02 zab_leg_03
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.007692 Mean :0.07915 Mean :0.07147 Mean :0.02185
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.000000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## NA's :10 NA's :7 NA's :7 NA's :7
## zab_leg_04 zab_leg_06 S_AD_KBRIG D_AD_KBRIG
## Min. :0.000000 Min. :0.00000 Min. : 0.0 Min. : 0.00
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:120.0 1st Qu.: 70.00
## Median :0.000000 Median :0.00000 Median :140.0 Median : 80.00
## Mean :0.005316 Mean :0.01299 Mean :136.9 Mean : 81.39
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:160.0 3rd Qu.: 90.00
## Max. :1.000000 Max. :1.00000 Max. :260.0 Max. :190.00
## NA's :7 NA's :7 NA's :1076 NA's :1076
## S_AD_ORIT D_AD_ORIT O_L_POST K_SH_POST
## Min. : 0.0 Min. : 0.00 Min. :0.00000 Min. :0.0000
## 1st Qu.:120.0 1st Qu.: 80.00 1st Qu.:0.00000 1st Qu.:0.0000
## Median :130.0 Median : 80.00 Median :0.00000 Median :0.0000
## Mean :134.6 Mean : 82.75 Mean :0.06517 Mean :0.0273
## 3rd Qu.:150.0 3rd Qu.: 90.00 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :260.0 Max. :190.00 Max. :1.00000 Max. :1.0000
## NA's :267 NA's :267 NA's :12 NA's :15
## MP_TP_POST SVT_POST GT_POST FIB_G_POST
## Min. :0.00000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :0.000000 Median :0.000000 Median :0.000000
## Mean :0.06762 Mean :0.004739 Mean :0.004739 Mean :0.008886
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.000000 Max. :1.000000 Max. :1.000000
## NA's :14 NA's :12 NA's :12 NA's :12
## ant_im lat_im inf_im post_im
## Min. :0.000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.000 Median :1.0000 Median :0.000 Median :0.0000
## Mean :1.571 Mean :0.8617 Mean :1.015 Mean :0.2592
## 3rd Qu.:4.000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.000 Max. :4.0000 Max. :4.000 Max. :4.0000
## NA's :83 NA's :80 NA's :80 NA's :72
## IM_PG_P ritm_ecg_p_01 ritm_ecg_p_02 ritm_ecg_p_04
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :1.0000 Median :0.00000 Median :0.00000
## Mean :0.02943 Mean :0.6647 Mean :0.06137 Mean :0.01486
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## NA's :1 NA's :152 NA's :152 NA's :152
## ritm_ecg_p_06 ritm_ecg_p_07 ritm_ecg_p_08 n_r_ecg_p_01
## Min. :0.00000 Min. :0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000 Median :0.00000 Median :0.00000
## Mean :0.00065 Mean :0.228 Mean :0.02972 Mean :0.03659
## 3rd Qu.:0.00000 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.000 Max. :1.00000 Max. :1.00000
## NA's :152 NA's :152 NA's :152 NA's :115
## n_r_ecg_p_02 n_r_ecg_p_03 n_r_ecg_p_04 n_r_ecg_p_05
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.00505 Mean :0.1287 Mean :0.04353 Mean :0.04416
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## NA's :115 NA's :115 NA's :115 NA's :115
## n_r_ecg_p_06 n_r_ecg_p_08 n_r_ecg_p_09 n_r_ecg_p_10
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02019 Mean :0.00252 Mean :0.00126 Mean :0.00126
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## NA's :115 NA's :115 NA's :115 NA's :115
## n_p_ecg_p_01 n_p_ecg_p_03 n_p_ecg_p_04 n_p_ecg_p_05
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.00126 Mean :0.02019 Mean :0.00315 Mean :0.00126
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## NA's :115 NA's :115 NA's :115 NA's :115
## n_p_ecg_p_06 n_p_ecg_p_07 n_p_ecg_p_08 n_p_ecg_p_09
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01703 Mean :0.06435 Mean :0.00442 Mean :0.00631
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## NA's :115 NA's :115 NA's :115 NA's :115
## n_p_ecg_p_10 n_p_ecg_p_11 n_p_ecg_p_12 fibr_ter_01
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.02145 Mean :0.01767 Mean :0.04921 Mean :0.007692
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.000000
## NA's :115 NA's :115 NA's :115 NA's :10
## fibr_ter_02 fibr_ter_03 fibr_ter_05 fibr_ter_06
## Min. :0.000000 Min. :0.00000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.000000 Median :0.00000 Median :0.000000 Median :0.000000
## Mean :0.009467 Mean :0.04024 Mean :0.002367 Mean :0.005325
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.000000 Max. :1.00000 Max. :1.000000 Max. :1.000000
## NA's :10 NA's :10 NA's :10 NA's :10
## fibr_ter_07 fibr_ter_08 GIPO_K K_BLOOD
## Min. :0.00000 Min. :0.000000 Min. :0.0000 Min. :2.300
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:3.700
## Median :0.00000 Median :0.000000 Median :0.0000 Median :4.100
## Mean :0.00355 Mean :0.001183 Mean :0.4012 Mean :4.191
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:1.0000 3rd Qu.:4.600
## Max. :1.00000 Max. :1.000000 Max. :1.0000 Max. :8.200
## NA's :10 NA's :10 NA's :369 NA's :371
## GIPER_NA NA_BLOOD ALT_BLOOD AST_BLOOD
## Min. :0.0000 Min. :117.0 Min. :0.0300 Min. :0.0400
## 1st Qu.:0.0000 1st Qu.:133.0 1st Qu.:0.2300 1st Qu.:0.1500
## Median :0.0000 Median :136.0 Median :0.3800 Median :0.2200
## Mean :0.0226 Mean :136.6 Mean :0.4814 Mean :0.2637
## 3rd Qu.:0.0000 3rd Qu.:140.0 3rd Qu.:0.6100 3rd Qu.:0.3300
## Max. :1.0000 Max. :169.0 Max. :3.0000 Max. :2.1500
## NA's :375 NA's :375 NA's :284 NA's :285
## KFK_BLOOD L_BLOOD ROE TIME_B_S
## Min. :1.20 Min. : 2.000 Min. : 1.00 Min. :1.000
## 1st Qu.:1.35 1st Qu.: 6.400 1st Qu.: 5.00 1st Qu.:2.000
## Median :1.60 Median : 8.000 Median : 10.00 Median :4.000
## Mean :2.00 Mean : 8.783 Mean : 13.44 Mean :4.684
## 3rd Qu.:2.25 3rd Qu.:10.450 3rd Qu.: 18.00 3rd Qu.:7.000
## Max. :3.60 Max. :27.900 Max. :140.00 Max. :9.000
## NA's :1696 NA's :125 NA's :203 NA's :126
## R_AB_1_n R_AB_2_n R_AB_3_n NA_KB
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :1.0000
## Mean :0.3159 Mean :0.1407 Mean :0.07761 Mean :0.5925
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :3.0000 Max. :3.0000 Max. :3.00000 Max. :1.0000
## NA's :16 NA's :108 NA's :128 NA's :657
## NOT_NA_KB LID_KB NITR_S NA_R_1_n
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.6913 Mean :0.3871 Mean :0.1153 Mean :0.485
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :4.000
## NA's :686 NA's :677 NA's :9 NA's :5
## NA_R_2_n NA_R_3_n NOT_NA_1_n NOT_NA_2_n
## Min. :0.00000 Min. :0.00000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.000 Median :0.0000
## Mean :0.09422 Mean :0.05864 Mean :0.332 Mean :0.1132
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :3.00000 Max. :2.00000 Max. :4.000 Max. :3.0000
## NA's :108 NA's :131 NA's :10 NA's :110
## NOT_NA_3_n LID_S_n B_BLOK_S_n ANT_CA_S_n
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.08477 Mean :0.2834 Mean :0.1273 Mean :0.6669
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :2.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :131 NA's :10 NA's :11 NA's :13
## GEPAR_S_n ASP_S_n TIKL_S_n TRENT_S_n
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.00000 Median :0.0000
## Mean :0.7148 Mean :0.7439 Mean :0.01782 Mean :0.2025
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## NA's :17 NA's :17 NA's :16 NA's :16
## FIBR_PREDS PREDS_TAH JELUD_TAH FIBR_JELUD
## Min. :0.0 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1 Mean :0.01176 Mean :0.02471 Mean :0.04176
## 3rd Qu.:0.0 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0 Max. :1.00000 Max. :1.00000 Max. :1.00000
##
## A_V_BLOK OTEK_LANC RAZRIV DRESSLER
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.03353 Mean :0.09353 Mean :0.03176 Mean :0.04412
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
##
## ZSN REC_IM P_IM_STEN LET_IS
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.2318 Mean :0.09353 Mean :0.08706 Mean :0.4771
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :7.0000
##
#when doing lm(), ADD na.action=na.omit --> exclude from the analysis any subject who does not have all the variables necessary to fit a model
#Logistic regression model would work better since a lot of categorical var.
#head(mi_comp)
#tail(mi_comp)
#str(mi_comp) #Display the structure of a dataset. Gives data type, dimensions, column names, and data type within columns
###need to change to correct type???????? Some categorical some binary
#mi_compNEW <- for(i in 49:85){
# as.factor(mi_comp)
#}
#mi_compNEW <- as.binary()
names <- c("ID", "SEX", "FK_STENOK","IBS_NASL", "IBS_POST","GB", "SIM_GIPERT", "DLIT_AG", "ZSN_A", "nr_11", "nr_01", "nr_02", "nr_03", "nr_04", "nr_07", "nr_08", "np_01", "np_04", "np_05", "np_07", "np_08" ,"np_09", "np_10" , "endocr_02", "endocr_03", "zab_leg_01", "zab_leg_02", "zab_leg_03", "zab_leg_04", "zab_leg_06", "O_L_POST", "K_SH_POST", "MP_TP_POST", "SVT_POST", "GT_POST", "FIB_G_POST", "ant_im", "lat_im inf_im", "post_im", "IM_PG_P", "ritm_ecg_p_01", "ritm_ecg_p_02", "ritm_ecg_p_04", "ritm_ecg_p_06", "ritm_ecg_p_07", "ritm_ecg_p_08", "n_r_ecg_p_01", "n_r_ecg_p_02", "n_r_ecg_p_03", "n_r_ecg_p_04", "n_r_ecg_p_05", "n_r_ecg_p_06", "n_r_ecg_p_08", "n_r_ecg_p_09", "n_r_ecg_p_10", "n_p_ecg_p_01", "n_p_ecg_p_03", "n_p_ecg_p_04", "n_p_ecg_p_05", "n_p_ecg_p_06", "n_p_ecg_p_07", "n_p_ecg_p_08", "n_p_ecg_p_09", "n_p_ecg_p_10", "n_p_ecg_p_11", "n_p_ecg_p_12", "fibr_ter_01", "fibr_ter_02", "fibr_ter_03", "fibr_ter_05", "fibr_ter_06", "fibr_ter_07", "fibr_ter_08", "GIPO_K", "GIPER_NA", "TIME_B_S", "R_AB_1_n", "R_AB_2_n", "R_AB_3_n", "NA_KB", "NOT_NA_KB", "LID_KB", "NITR_S", "NA_R_1_n", "NA_R_2_n", "NA_R_3_n", "NOT_NA_1_n", "NOT_NA_2_n", "NOT_NA_3_n", "LID_S_n", "B_BLOK_S_n", "ANT_CA_S_n", "GEPAR_S_n", "ASP_S_n", "TIKL_S_n", "TRENT_S_n", "FIBR_PREDS", "PREDS_TAH", "JELUD_TAH", "FIBR_JELUD", "A_V_BLOK", "OTEK_LANC", "RAZRIV", "DRESSLER", "ZSN", "REC_IM", "P_IM_STEN", "LET_IS")
namesNEW <- function(data){
mi_comp$data <- as.factor(mi_comp$data)
}
#for (item in names){
# namesNEW(item)
#}
#Error in `$<-.data.frame`(`*tmp*`, data, value = integer(0)) :
# replacement has 0 rows, data has 1700
#variable that can be numerical OR cumulative dummy coding:
#INF_ANAM STENOK_AN FK_STENOK IBS_POST GB
#ant_im lat_im inf_im post_im TIME_B_S
####Possible source of error!!! Ordinary attribute are not accurate, only shows order!!!
#numerical variables:
#S_AD_KBRIG D_AD_KBRIG S_AD_ORIT D_AD_ORIT K_BLOOD NA_BLOOD ALT_BLOOD AST_BLOOD KFK_BLOOD L_BLOOD ROE
#######################mi_comp[names] <- lapply(mi_comp[names], factor)
str(mi_comp)
## 'data.frame': 1700 obs. of 124 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ AGE : int 77 55 52 68 60 64 70 65 60 77 ...
## $ SEX : int 1 1 1 0 1 1 1 1 1 0 ...
## $ INF_ANAM : int 2 1 0 0 0 0 1 0 0 2 ...
## $ STENOK_AN : int 1 0 0 0 0 1 1 1 0 0 ...
## $ FK_STENOK : int 1 0 0 0 0 2 2 1 0 0 ...
## $ IBS_POST : int 2 0 2 2 2 1 1 2 2 0 ...
## $ IBS_NASL : int NA 0 NA NA NA NA NA NA NA NA ...
## $ GB : int 3 0 2 2 3 0 2 2 2 3 ...
## $ SIM_GIPERT : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DLIT_AG : int 7 0 2 3 7 0 7 7 6 6 ...
## $ ZSN_A : int 0 0 0 1 0 0 1 0 0 1 ...
## $ nr_11 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nr_01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nr_02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nr_03 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nr_04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nr_07 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nr_08 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ np_01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ np_04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ np_05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ np_07 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ np_08 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ np_09 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ np_10 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ endocr_01 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ endocr_02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ endocr_03 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ zab_leg_01 : int 0 0 0 1 0 0 1 0 0 0 ...
## $ zab_leg_02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ zab_leg_03 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ zab_leg_04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ zab_leg_06 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ S_AD_KBRIG : int NA NA 150 NA 190 NA 120 NA 200 NA ...
## $ D_AD_KBRIG : int NA NA 100 NA 100 NA 80 NA 120 NA ...
## $ S_AD_ORIT : int 180 120 180 120 160 140 120 145 195 200 ...
## $ D_AD_ORIT : int 100 90 100 70 90 90 80 95 120 100 ...
## $ O_L_POST : int 0 0 0 0 0 0 0 0 0 0 ...
## $ K_SH_POST : int 0 0 0 0 0 0 0 0 0 0 ...
## $ MP_TP_POST : int 0 0 0 0 0 0 0 0 0 0 ...
## $ SVT_POST : int 0 0 0 0 0 0 0 0 0 0 ...
## $ GT_POST : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FIB_G_POST : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ant_im : int 1 4 4 0 4 1 0 0 0 4 ...
## $ lat_im : int 0 1 1 1 1 1 0 0 0 1 ...
## $ inf_im : int 0 0 0 1 0 0 3 2 3 0 ...
## $ post_im : int 0 0 0 0 0 0 0 0 2 0 ...
## $ IM_PG_P : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ritm_ecg_p_01: int 0 1 1 1 0 0 1 1 1 0 ...
## $ ritm_ecg_p_02: int 0 0 0 0 0 0 0 0 0 0 ...
## $ ritm_ecg_p_04: int 0 0 0 0 0 0 0 0 0 0 ...
## $ ritm_ecg_p_06: int 0 0 0 0 0 0 0 0 0 0 ...
## $ ritm_ecg_p_07: int 1 0 0 0 1 1 0 0 0 1 ...
## $ ritm_ecg_p_08: int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_r_ecg_p_01 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ n_r_ecg_p_02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_r_ecg_p_03 : int 0 0 1 0 0 0 0 0 0 1 ...
## $ n_r_ecg_p_04 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ n_r_ecg_p_05 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ n_r_ecg_p_06 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_r_ecg_p_08 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_r_ecg_p_09 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_r_ecg_p_10 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_03 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_04 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_06 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_07 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_08 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_09 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_10 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_11 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_p_ecg_p_12 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fibr_ter_01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fibr_ter_02 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fibr_ter_03 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fibr_ter_05 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fibr_ter_06 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fibr_ter_07 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ fibr_ter_08 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ GIPO_K : int 0 1 0 1 1 NA NA 0 NA NA ...
## $ K_BLOOD : num 4.7 3.5 4 3.9 3.5 NA NA 4.5 NA NA ...
## $ GIPER_NA : int 0 0 0 0 0 NA NA 0 NA NA ...
## $ NA_BLOOD : int 138 132 132 146 132 NA NA 136 NA NA ...
## $ ALT_BLOOD : num NA 0.38 0.3 0.75 0.45 0.45 0.3 NA 0.3 0.38 ...
## $ AST_BLOOD : num NA 0.18 0.11 0.37 0.22 0.22 0.11 NA 0.37 0.11 ...
## $ KFK_BLOOD : num NA NA NA NA NA NA NA NA NA NA ...
## $ L_BLOOD : num 8 7.8 10.8 NA 8.3 7.2 11.1 6.2 6.2 6.9 ...
## $ ROE : int 16 3 NA NA NA 2 5 20 3 30 ...
## $ TIME_B_S : int 4 2 3 2 9 2 1 7 3 3 ...
## $ R_AB_1_n : int 0 0 3 0 0 0 0 3 0 0 ...
## $ R_AB_2_n : int 0 0 0 0 0 0 0 0 0 0 ...
## $ R_AB_3_n : int 1 0 0 1 0 0 0 0 0 0 ...
## $ NA_KB : int NA 1 1 NA 0 0 0 0 0 NA ...
## $ NOT_NA_KB : int NA 0 1 NA 0 1 1 0 1 NA ...
## $ LID_KB : int NA 1 1 NA 0 0 0 0 0 NA ...
## $ NITR_S : int 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
#ID AGE SEX INF_ANAM STENOK_AN FK_STENOK IBS_POST IBS_NASL GB SIM_GIPERT DLIT_AG ZSN_A nr_11 nr_01 nr_02 nr_03 nr_04 nr_07 nr_08 np_01 np_04 np_05 np_07 np_08 np_09 np_10 endocr_01 endocr_02 endocr_03 zab_leg_01 zab_leg_02 zab_leg_03 zab_leg_04 zab_leg_06 S_AD_KBRIG D_AD_KBRIG S_AD_ORIT D_AD_ORIT O_L_POST K_SH_POST MP_TP_POST SVT_POST GT_POST FIB_G_POST ant_im lat_im inf_im post_im IM_PG_P ritm_ecg_p_01 ritm_ecg_p_02 ritm_ecg_p_04 ritm_ecg_p_06 ritm_ecg_p_07 ritm_ecg_p_08 n_r_ecg_p_01 n_r_ecg_p_02 n_r_ecg_p_03 n_r_ecg_p_04 n_r_ecg_p_05 n_r_ecg_p_06 n_r_ecg_p_08 n_r_ecg_p_09 n_r_ecg_p_10 n_p_ecg_p_01 n_p_ecg_p_03 n_p_ecg_p_04 n_p_ecg_p_05 n_p_ecg_p_06 n_p_ecg_p_07 n_p_ecg_p_08 n_p_ecg_p_09 n_p_ecg_p_10 n_p_ecg_p_11 n_p_ecg_p_12 fibr_ter_01 fibr_ter_02 fibr_ter_03 fibr_ter_05 fibr_ter_06 fibr_ter_07 fibr_ter_08 GIPO_K K_BLOOD GIPER_NA NA_BLOOD ALT_BLOOD AST_BLOOD KFK_BLOOD L_BLOOD ROE TIME_B_S R_AB_1_n R_AB_2_n R_AB_3_n NA_KB NOT_NA_KB LID_KB NITR_S NA_R_1_n NA_R_2_n NA_R_3_n NOT_NA_1_n NOT_NA_2_n NOT_NA_3_n LID_S_n B_BLOK_S_n ANT_CA_S_n GEPAR_S_n ASP_S_n TIKL_S_n TRENT_S_n FIBR_PREDS PREDS_TAH JELUD_TAH FIBR_JELUD A_V_BLOK OTEK_LANC RAZRIV DRESSLER ZSN REC_IM P_IM_STEN LET_IS
#library(GGally)
#Problem is how to deal with missing data before do all pairs
#ggpairs(mi_comp,upper = list(continuous = wrap("cor", size = 2.5)),lower = list(continuous = "smooth"))
#also based on Descriptive Statistics.pdf, the Chronic heart failure (ZSN) has highest prob of complication!!! (focus on this response var most?!)
ageZSN <- lm(ZSN ~ AGE, na.action=na.omit, data=mi_comp)
summary(ageZSN)
##
## Call:
## lm(formula = ZSN ~ AGE, data = mi_comp, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39675 -0.25437 -0.20508 -0.09556 0.90444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1070610 0.0567076 -1.888 0.0592 .
## AGE 0.0054762 0.0009019 6.072 1.56e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4176 on 1690 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.02135, Adjusted R-squared: 0.02077
## F-statistic: 36.86 on 1 and 1690 DF, p-value: 1.561e-09
NO LOGISTIC FOR SURE. B/C ALL COMPLICATION(RESPONCE VAR) ARE CATEGORICAL/BINARY VARIABLES