Installing the packages

pacman::p_load(tidyverse, caret, corrplot, e1071, interplot, caTools, car, ROCR, IRdisplay, xlsx, ggmap, ggpubr, broom, relaimpo, ggpubr, MASS, MLmetrics)
## package 'corrplot' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'proxy' successfully unpacked and MD5 sums checked
## package 'e1071' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'checkmate' successfully unpacked and MD5 sums checked
## package 'htmlwidgets' successfully unpacked and MD5 sums checked
## package 'minqa' successfully unpacked and MD5 sums checked
## package 'nloptr' successfully unpacked and MD5 sums checked
## package 'RcppEigen' successfully unpacked and MD5 sums checked
## package 'Formula' successfully unpacked and MD5 sums checked
## package 'latticeExtra' successfully unpacked and MD5 sums checked
## package 'gridExtra' successfully unpacked and MD5 sums checked
## package 'htmlTable' successfully unpacked and MD5 sums checked
## package 'viridis' successfully unpacked and MD5 sums checked
## package 'lme4' successfully unpacked and MD5 sums checked
## package 'coda' successfully unpacked and MD5 sums checked
## package 'Hmisc' successfully unpacked and MD5 sums checked
## package 'abind' successfully unpacked and MD5 sums checked
## package 'arm' successfully unpacked and MD5 sums checked
## package 'interactionTest' successfully unpacked and MD5 sums checked
## package 'interplot' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'caTools' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'matrixStats' successfully unpacked and MD5 sums checked
## package 'RcppArmadillo' successfully unpacked and MD5 sums checked
## package 'SparseM' successfully unpacked and MD5 sums checked
## package 'MatrixModels' successfully unpacked and MD5 sums checked
## package 'conquer' successfully unpacked and MD5 sums checked
## package 'openxlsx' successfully unpacked and MD5 sums checked
## package 'carData' successfully unpacked and MD5 sums checked
## package 'pbkrtest' successfully unpacked and MD5 sums checked
## package 'quantreg' successfully unpacked and MD5 sums checked
## package 'maptools' successfully unpacked and MD5 sums checked
## package 'rio' successfully unpacked and MD5 sums checked
## package 'car' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'gtools' successfully unpacked and MD5 sums checked
## package 'gplots' successfully unpacked and MD5 sums checked
## package 'ROCR' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'repr' successfully unpacked and MD5 sums checked
## package 'IRdisplay' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'ggrepel' successfully unpacked and MD5 sums checked
## package 'ggsci' successfully unpacked and MD5 sums checked
## package 'cowplot' successfully unpacked and MD5 sums checked
## package 'ggsignif' successfully unpacked and MD5 sums checked
## package 'polynom' successfully unpacked and MD5 sums checked
## package 'rstatix' successfully unpacked and MD5 sums checked
## package 'ggpubr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'survey' successfully unpacked and MD5 sums checked
## package 'mitools' successfully unpacked and MD5 sums checked
## package 'corpcor' successfully unpacked and MD5 sums checked
## package 'relaimpo' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'MLmetrics' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages

Set Directory

getwd()
## [1] "C:/Users/ngsook/Desktop/Data Scienc Project/dataset_diabetes/dataset_diabetes"
setwd("C:/Users/ngsook/Desktop/Data Scienc Project/dataset_diabetes/dataset_diabetes")

Read and explore the data

hospital <- read.csv("diabetic_data.csv")
dim(hospital)
## [1] 101766     50
summary(hospital)
##   encounter_id        patient_nbr            race              gender         
##  Min.   :    12522   Min.   :      135   Length:101766      Length:101766     
##  1st Qu.: 84961194   1st Qu.: 23413221   Class :character   Class :character  
##  Median :152388987   Median : 45505143   Mode  :character   Mode  :character  
##  Mean   :165201646   Mean   : 54330401                                        
##  3rd Qu.:230270888   3rd Qu.: 87545950                                        
##  Max.   :443867222   Max.   :189502619                                        
##      age               weight          admission_type_id
##  Length:101766      Length:101766      Min.   :1.000    
##  Class :character   Class :character   1st Qu.:1.000    
##  Mode  :character   Mode  :character   Median :1.000    
##                                        Mean   :2.024    
##                                        3rd Qu.:3.000    
##                                        Max.   :8.000    
##  discharge_disposition_id admission_source_id time_in_hospital
##  Min.   : 1.000           Min.   : 1.000      Min.   : 1.000  
##  1st Qu.: 1.000           1st Qu.: 1.000      1st Qu.: 2.000  
##  Median : 1.000           Median : 7.000      Median : 4.000  
##  Mean   : 3.716           Mean   : 5.754      Mean   : 4.396  
##  3rd Qu.: 4.000           3rd Qu.: 7.000      3rd Qu.: 6.000  
##  Max.   :28.000           Max.   :25.000      Max.   :14.000  
##   payer_code        medical_specialty  num_lab_procedures num_procedures
##  Length:101766      Length:101766      Min.   :  1.0      Min.   :0.00  
##  Class :character   Class :character   1st Qu.: 31.0      1st Qu.:0.00  
##  Mode  :character   Mode  :character   Median : 44.0      Median :1.00  
##                                        Mean   : 43.1      Mean   :1.34  
##                                        3rd Qu.: 57.0      3rd Qu.:2.00  
##                                        Max.   :132.0      Max.   :6.00  
##  num_medications number_outpatient number_emergency  number_inpatient 
##  Min.   : 1.00   Min.   : 0.0000   Min.   : 0.0000   Min.   : 0.0000  
##  1st Qu.:10.00   1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 0.0000  
##  Median :15.00   Median : 0.0000   Median : 0.0000   Median : 0.0000  
##  Mean   :16.02   Mean   : 0.3694   Mean   : 0.1978   Mean   : 0.6356  
##  3rd Qu.:20.00   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 1.0000  
##  Max.   :81.00   Max.   :42.0000   Max.   :76.0000   Max.   :21.0000  
##     diag_1             diag_2             diag_3          number_diagnoses
##  Length:101766      Length:101766      Length:101766      Min.   : 1.000  
##  Class :character   Class :character   Class :character   1st Qu.: 6.000  
##  Mode  :character   Mode  :character   Mode  :character   Median : 8.000  
##                                                           Mean   : 7.423  
##                                                           3rd Qu.: 9.000  
##                                                           Max.   :16.000  
##  max_glu_serum       A1Cresult          metformin         repaglinide       
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  nateglinide        chlorpropamide     glimepiride        acetohexamide     
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   glipizide          glyburide         tolbutamide        pioglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  rosiglitazone        acarbose           miglitol         troglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   tolazamide          examide          citoglipton          insulin         
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  glyburide.metformin glipizide.metformin glimepiride.pioglitazone
##  Length:101766       Length:101766       Length:101766           
##  Class :character    Class :character    Class :character        
##  Mode  :character    Mode  :character    Mode  :character        
##                                                                  
##                                                                  
##                                                                  
##  metformin.rosiglitazone metformin.pioglitazone    change         
##  Length:101766           Length:101766          Length:101766     
##  Class :character        Class :character       Class :character  
##  Mode  :character        Mode  :character       Mode  :character  
##                                                                   
##                                                                   
##                                                                   
##  diabetesMed         readmitted       
##  Length:101766      Length:101766     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
sapply(hospital, class)
##             encounter_id              patient_nbr                     race 
##                "integer"                "integer"              "character" 
##                   gender                      age                   weight 
##              "character"              "character"              "character" 
##        admission_type_id discharge_disposition_id      admission_source_id 
##                "integer"                "integer"                "integer" 
##         time_in_hospital               payer_code        medical_specialty 
##                "integer"              "character"              "character" 
##       num_lab_procedures           num_procedures          num_medications 
##                "integer"                "integer"                "integer" 
##        number_outpatient         number_emergency         number_inpatient 
##                "integer"                "integer"                "integer" 
##                   diag_1                   diag_2                   diag_3 
##              "character"              "character"              "character" 
##         number_diagnoses            max_glu_serum                A1Cresult 
##                "integer"              "character"              "character" 
##                metformin              repaglinide              nateglinide 
##              "character"              "character"              "character" 
##           chlorpropamide              glimepiride            acetohexamide 
##              "character"              "character"              "character" 
##                glipizide                glyburide              tolbutamide 
##              "character"              "character"              "character" 
##             pioglitazone            rosiglitazone                 acarbose 
##              "character"              "character"              "character" 
##                 miglitol             troglitazone               tolazamide 
##              "character"              "character"              "character" 
##                  examide              citoglipton                  insulin 
##              "character"              "character"              "character" 
##      glyburide.metformin      glipizide.metformin glimepiride.pioglitazone 
##              "character"              "character"              "character" 
##  metformin.rosiglitazone   metformin.pioglitazone                   change 
##              "character"              "character"              "character" 
##              diabetesMed               readmitted 
##              "character"              "character"
str(hospital)
## 'data.frame':    101766 obs. of  50 variables:
##  $ encounter_id            : int  2278392 149190 64410 500364 16680 35754 55842 63768 12522 15738 ...
##  $ patient_nbr             : int  8222157 55629189 86047875 82442376 42519267 82637451 84259809 114882984 48330783 63555939 ...
##  $ race                    : chr  "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
##  $ gender                  : chr  "Female" "Female" "Female" "Male" ...
##  $ age                     : chr  "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
##  $ weight                  : chr  "?" "?" "?" "?" ...
##  $ admission_type_id       : int  6 1 1 1 1 2 3 1 2 3 ...
##  $ discharge_disposition_id: int  25 1 1 1 1 1 1 1 1 3 ...
##  $ admission_source_id     : int  1 7 7 7 7 2 2 7 4 4 ...
##  $ time_in_hospital        : int  1 3 2 2 1 3 4 5 13 12 ...
##  $ payer_code              : chr  "?" "?" "?" "?" ...
##  $ medical_specialty       : chr  "Pediatrics-Endocrinology" "?" "?" "?" ...
##  $ num_lab_procedures      : int  41 59 11 44 51 31 70 73 68 33 ...
##  $ num_procedures          : int  0 0 5 1 0 6 1 0 2 3 ...
##  $ num_medications         : int  1 18 13 16 8 16 21 12 28 18 ...
##  $ number_outpatient       : int  0 0 2 0 0 0 0 0 0 0 ...
##  $ number_emergency        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_inpatient        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ diag_1                  : chr  "250.83" "276" "648" "8" ...
##  $ diag_2                  : chr  "?" "250.01" "250" "250.43" ...
##  $ diag_3                  : chr  "?" "255" "V27" "403" ...
##  $ number_diagnoses        : int  1 9 6 7 5 9 7 8 8 8 ...
##  $ max_glu_serum           : chr  "None" "None" "None" "None" ...
##  $ A1Cresult               : chr  "None" "None" "None" "None" ...
##  $ metformin               : chr  "No" "No" "No" "No" ...
##  $ repaglinide             : chr  "No" "No" "No" "No" ...
##  $ nateglinide             : chr  "No" "No" "No" "No" ...
##  $ chlorpropamide          : chr  "No" "No" "No" "No" ...
##  $ glimepiride             : chr  "No" "No" "No" "No" ...
##  $ acetohexamide           : chr  "No" "No" "No" "No" ...
##  $ glipizide               : chr  "No" "No" "Steady" "No" ...
##  $ glyburide               : chr  "No" "No" "No" "No" ...
##  $ tolbutamide             : chr  "No" "No" "No" "No" ...
##  $ pioglitazone            : chr  "No" "No" "No" "No" ...
##  $ rosiglitazone           : chr  "No" "No" "No" "No" ...
##  $ acarbose                : chr  "No" "No" "No" "No" ...
##  $ miglitol                : chr  "No" "No" "No" "No" ...
##  $ troglitazone            : chr  "No" "No" "No" "No" ...
##  $ tolazamide              : chr  "No" "No" "No" "No" ...
##  $ examide                 : chr  "No" "No" "No" "No" ...
##  $ citoglipton             : chr  "No" "No" "No" "No" ...
##  $ insulin                 : chr  "No" "Up" "No" "Up" ...
##  $ glyburide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glipizide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glimepiride.pioglitazone: chr  "No" "No" "No" "No" ...
##  $ metformin.rosiglitazone : chr  "No" "No" "No" "No" ...
##  $ metformin.pioglitazone  : chr  "No" "No" "No" "No" ...
##  $ change                  : chr  "No" "Ch" "No" "Ch" ...
##  $ diabetesMed             : chr  "No" "Yes" "Yes" "Yes" ...
##  $ readmitted              : chr  "NO" ">30" "NO" "NO" ...

Remove the variables that not going to contribute to modelling

1. Remove identifier “encounter_id” and " patient_nbr"

2. Remove “weight” because too many missing value, there is no imputation available

3. “payer_code” not going to influence the re-admission

4. Remove “diag_1”, “diag_2”, “diag_3” due to metadata is not clear "

col <- c("encounter_id", "patient_nbr", "weight", "payer_code", "diag_1", "diag_2", "diag_3")
hospital1 <- dplyr::select(hospital, -col)
colnames(hospital1)
##  [1] "race"                     "gender"                  
##  [3] "age"                      "admission_type_id"       
##  [5] "discharge_disposition_id" "admission_source_id"     
##  [7] "time_in_hospital"         "medical_specialty"       
##  [9] "num_lab_procedures"       "num_procedures"          
## [11] "num_medications"          "number_outpatient"       
## [13] "number_emergency"         "number_inpatient"        
## [15] "number_diagnoses"         "max_glu_serum"           
## [17] "A1Cresult"                "metformin"               
## [19] "repaglinide"              "nateglinide"             
## [21] "chlorpropamide"           "glimepiride"             
## [23] "acetohexamide"            "glipizide"               
## [25] "glyburide"                "tolbutamide"             
## [27] "pioglitazone"             "rosiglitazone"           
## [29] "acarbose"                 "miglitol"                
## [31] "troglitazone"             "tolazamide"              
## [33] "examide"                  "citoglipton"             
## [35] "insulin"                  "glyburide.metformin"     
## [37] "glipizide.metformin"      "glimepiride.pioglitazone"
## [39] "metformin.rosiglitazone"  "metformin.pioglitazone"  
## [41] "change"                   "diabetesMed"             
## [43] "readmitted"
summary(hospital1)
##      race              gender              age            admission_type_id
##  Length:101766      Length:101766      Length:101766      Min.   :1.000    
##  Class :character   Class :character   Class :character   1st Qu.:1.000    
##  Mode  :character   Mode  :character   Mode  :character   Median :1.000    
##                                                           Mean   :2.024    
##                                                           3rd Qu.:3.000    
##                                                           Max.   :8.000    
##  discharge_disposition_id admission_source_id time_in_hospital
##  Min.   : 1.000           Min.   : 1.000      Min.   : 1.000  
##  1st Qu.: 1.000           1st Qu.: 1.000      1st Qu.: 2.000  
##  Median : 1.000           Median : 7.000      Median : 4.000  
##  Mean   : 3.716           Mean   : 5.754      Mean   : 4.396  
##  3rd Qu.: 4.000           3rd Qu.: 7.000      3rd Qu.: 6.000  
##  Max.   :28.000           Max.   :25.000      Max.   :14.000  
##  medical_specialty  num_lab_procedures num_procedures num_medications
##  Length:101766      Min.   :  1.0      Min.   :0.00   Min.   : 1.00  
##  Class :character   1st Qu.: 31.0      1st Qu.:0.00   1st Qu.:10.00  
##  Mode  :character   Median : 44.0      Median :1.00   Median :15.00  
##                     Mean   : 43.1      Mean   :1.34   Mean   :16.02  
##                     3rd Qu.: 57.0      3rd Qu.:2.00   3rd Qu.:20.00  
##                     Max.   :132.0      Max.   :6.00   Max.   :81.00  
##  number_outpatient number_emergency  number_inpatient  number_diagnoses
##  Min.   : 0.0000   Min.   : 0.0000   Min.   : 0.0000   Min.   : 1.000  
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 6.000  
##  Median : 0.0000   Median : 0.0000   Median : 0.0000   Median : 8.000  
##  Mean   : 0.3694   Mean   : 0.1978   Mean   : 0.6356   Mean   : 7.423  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 1.0000   3rd Qu.: 9.000  
##  Max.   :42.0000   Max.   :76.0000   Max.   :21.0000   Max.   :16.000  
##  max_glu_serum       A1Cresult          metformin         repaglinide       
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  nateglinide        chlorpropamide     glimepiride        acetohexamide     
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   glipizide          glyburide         tolbutamide        pioglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  rosiglitazone        acarbose           miglitol         troglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   tolazamide          examide          citoglipton          insulin         
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  glyburide.metformin glipizide.metformin glimepiride.pioglitazone
##  Length:101766       Length:101766       Length:101766           
##  Class :character    Class :character    Class :character        
##  Mode  :character    Mode  :character    Mode  :character        
##                                                                  
##                                                                  
##                                                                  
##  metformin.rosiglitazone metformin.pioglitazone    change         
##  Length:101766           Length:101766          Length:101766     
##  Class :character        Class :character       Class :character  
##  Mode  :character        Mode  :character       Mode  :character  
##                                                                   
##                                                                   
##                                                                   
##  diabetesMed         readmitted       
##  Length:101766      Length:101766     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

Change the data type

“admission_type_id”, “discharge_disposition_id”, “admission_source_id” ==> factor

col2 <- c("admission_type_id", "discharge_disposition_id", "admission_source_id")
hospital1[col2]=lapply(hospital1[col2],as.factor)
str(hospital1)
## 'data.frame':    101766 obs. of  43 variables:
##  $ race                    : chr  "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
##  $ gender                  : chr  "Female" "Female" "Female" "Male" ...
##  $ age                     : chr  "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
##  $ admission_type_id       : Factor w/ 8 levels "1","2","3","4",..: 6 1 1 1 1 2 3 1 2 3 ...
##  $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 1 1 1 1 1 1 1 1 3 ...
##  $ admission_source_id     : Factor w/ 17 levels "1","2","3","4",..: 1 7 7 7 7 2 2 7 4 4 ...
##  $ time_in_hospital        : int  1 3 2 2 1 3 4 5 13 12 ...
##  $ medical_specialty       : chr  "Pediatrics-Endocrinology" "?" "?" "?" ...
##  $ num_lab_procedures      : int  41 59 11 44 51 31 70 73 68 33 ...
##  $ num_procedures          : int  0 0 5 1 0 6 1 0 2 3 ...
##  $ num_medications         : int  1 18 13 16 8 16 21 12 28 18 ...
##  $ number_outpatient       : int  0 0 2 0 0 0 0 0 0 0 ...
##  $ number_emergency        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_inpatient        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ number_diagnoses        : int  1 9 6 7 5 9 7 8 8 8 ...
##  $ max_glu_serum           : chr  "None" "None" "None" "None" ...
##  $ A1Cresult               : chr  "None" "None" "None" "None" ...
##  $ metformin               : chr  "No" "No" "No" "No" ...
##  $ repaglinide             : chr  "No" "No" "No" "No" ...
##  $ nateglinide             : chr  "No" "No" "No" "No" ...
##  $ chlorpropamide          : chr  "No" "No" "No" "No" ...
##  $ glimepiride             : chr  "No" "No" "No" "No" ...
##  $ acetohexamide           : chr  "No" "No" "No" "No" ...
##  $ glipizide               : chr  "No" "No" "Steady" "No" ...
##  $ glyburide               : chr  "No" "No" "No" "No" ...
##  $ tolbutamide             : chr  "No" "No" "No" "No" ...
##  $ pioglitazone            : chr  "No" "No" "No" "No" ...
##  $ rosiglitazone           : chr  "No" "No" "No" "No" ...
##  $ acarbose                : chr  "No" "No" "No" "No" ...
##  $ miglitol                : chr  "No" "No" "No" "No" ...
##  $ troglitazone            : chr  "No" "No" "No" "No" ...
##  $ tolazamide              : chr  "No" "No" "No" "No" ...
##  $ examide                 : chr  "No" "No" "No" "No" ...
##  $ citoglipton             : chr  "No" "No" "No" "No" ...
##  $ insulin                 : chr  "No" "Up" "No" "Up" ...
##  $ glyburide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glipizide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glimepiride.pioglitazone: chr  "No" "No" "No" "No" ...
##  $ metformin.rosiglitazone : chr  "No" "No" "No" "No" ...
##  $ metformin.pioglitazone  : chr  "No" "No" "No" "No" ...
##  $ change                  : chr  "No" "Ch" "No" "Ch" ...
##  $ diabetesMed             : chr  "No" "Yes" "Yes" "Yes" ...
##  $ readmitted              : chr  "NO" ">30" "NO" "NO" ...

Remove missing value in “medical_specialty”

hospital1$medical_specialty <- replace(hospital1$medical_specialty, hospital1$medical_specialty == "?" ,NA)
summary(hospital1)
##      race              gender              age            admission_type_id
##  Length:101766      Length:101766      Length:101766      1      :53990    
##  Class :character   Class :character   Class :character   3      :18869    
##  Mode  :character   Mode  :character   Mode  :character   2      :18480    
##                                                           6      : 5291    
##                                                           5      : 4785    
##                                                           8      :  320    
##                                                           (Other):   31    
##  discharge_disposition_id admission_source_id time_in_hospital
##  1      :60234            7      :57494       Min.   : 1.000  
##  3      :13954            1      :29565       1st Qu.: 2.000  
##  6      :12902            17     : 6781       Median : 4.000  
##  18     : 3691            4      : 3187       Mean   : 4.396  
##  2      : 2128            6      : 2264       3rd Qu.: 6.000  
##  22     : 1993            2      : 1104       Max.   :14.000  
##  (Other): 6864            (Other): 1371                       
##  medical_specialty  num_lab_procedures num_procedures num_medications
##  Length:101766      Min.   :  1.0      Min.   :0.00   Min.   : 1.00  
##  Class :character   1st Qu.: 31.0      1st Qu.:0.00   1st Qu.:10.00  
##  Mode  :character   Median : 44.0      Median :1.00   Median :15.00  
##                     Mean   : 43.1      Mean   :1.34   Mean   :16.02  
##                     3rd Qu.: 57.0      3rd Qu.:2.00   3rd Qu.:20.00  
##                     Max.   :132.0      Max.   :6.00   Max.   :81.00  
##                                                                      
##  number_outpatient number_emergency  number_inpatient  number_diagnoses
##  Min.   : 0.0000   Min.   : 0.0000   Min.   : 0.0000   Min.   : 1.000  
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 6.000  
##  Median : 0.0000   Median : 0.0000   Median : 0.0000   Median : 8.000  
##  Mean   : 0.3694   Mean   : 0.1978   Mean   : 0.6356   Mean   : 7.423  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 1.0000   3rd Qu.: 9.000  
##  Max.   :42.0000   Max.   :76.0000   Max.   :21.0000   Max.   :16.000  
##                                                                        
##  max_glu_serum       A1Cresult          metformin         repaglinide       
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  nateglinide        chlorpropamide     glimepiride        acetohexamide     
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   glipizide          glyburide         tolbutamide        pioglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  rosiglitazone        acarbose           miglitol         troglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   tolazamide          examide          citoglipton          insulin         
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  glyburide.metformin glipizide.metformin glimepiride.pioglitazone
##  Length:101766       Length:101766       Length:101766           
##  Class :character    Class :character    Class :character        
##  Mode  :character    Mode  :character    Mode  :character        
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  metformin.rosiglitazone metformin.pioglitazone    change         
##  Length:101766           Length:101766          Length:101766     
##  Class :character        Class :character       Class :character  
##  Mode  :character        Mode  :character       Mode  :character  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  diabetesMed         readmitted       
##  Length:101766      Length:101766     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
str(hospital1)
## 'data.frame':    101766 obs. of  43 variables:
##  $ race                    : chr  "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
##  $ gender                  : chr  "Female" "Female" "Female" "Male" ...
##  $ age                     : chr  "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
##  $ admission_type_id       : Factor w/ 8 levels "1","2","3","4",..: 6 1 1 1 1 2 3 1 2 3 ...
##  $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 1 1 1 1 1 1 1 1 3 ...
##  $ admission_source_id     : Factor w/ 17 levels "1","2","3","4",..: 1 7 7 7 7 2 2 7 4 4 ...
##  $ time_in_hospital        : int  1 3 2 2 1 3 4 5 13 12 ...
##  $ medical_specialty       : chr  "Pediatrics-Endocrinology" NA NA NA ...
##  $ num_lab_procedures      : int  41 59 11 44 51 31 70 73 68 33 ...
##  $ num_procedures          : int  0 0 5 1 0 6 1 0 2 3 ...
##  $ num_medications         : int  1 18 13 16 8 16 21 12 28 18 ...
##  $ number_outpatient       : int  0 0 2 0 0 0 0 0 0 0 ...
##  $ number_emergency        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_inpatient        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ number_diagnoses        : int  1 9 6 7 5 9 7 8 8 8 ...
##  $ max_glu_serum           : chr  "None" "None" "None" "None" ...
##  $ A1Cresult               : chr  "None" "None" "None" "None" ...
##  $ metformin               : chr  "No" "No" "No" "No" ...
##  $ repaglinide             : chr  "No" "No" "No" "No" ...
##  $ nateglinide             : chr  "No" "No" "No" "No" ...
##  $ chlorpropamide          : chr  "No" "No" "No" "No" ...
##  $ glimepiride             : chr  "No" "No" "No" "No" ...
##  $ acetohexamide           : chr  "No" "No" "No" "No" ...
##  $ glipizide               : chr  "No" "No" "Steady" "No" ...
##  $ glyburide               : chr  "No" "No" "No" "No" ...
##  $ tolbutamide             : chr  "No" "No" "No" "No" ...
##  $ pioglitazone            : chr  "No" "No" "No" "No" ...
##  $ rosiglitazone           : chr  "No" "No" "No" "No" ...
##  $ acarbose                : chr  "No" "No" "No" "No" ...
##  $ miglitol                : chr  "No" "No" "No" "No" ...
##  $ troglitazone            : chr  "No" "No" "No" "No" ...
##  $ tolazamide              : chr  "No" "No" "No" "No" ...
##  $ examide                 : chr  "No" "No" "No" "No" ...
##  $ citoglipton             : chr  "No" "No" "No" "No" ...
##  $ insulin                 : chr  "No" "Up" "No" "Up" ...
##  $ glyburide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glipizide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glimepiride.pioglitazone: chr  "No" "No" "No" "No" ...
##  $ metformin.rosiglitazone : chr  "No" "No" "No" "No" ...
##  $ metformin.pioglitazone  : chr  "No" "No" "No" "No" ...
##  $ change                  : chr  "No" "Ch" "No" "Ch" ...
##  $ diabetesMed             : chr  "No" "Yes" "Yes" "Yes" ...
##  $ readmitted              : chr  "NO" ">30" "NO" "NO" ...

Remove missing value in race variable

hospital1$race <- replace(hospital1$race, hospital1$race == "?",NA)
summary(hospital1)
##      race              gender              age            admission_type_id
##  Length:101766      Length:101766      Length:101766      1      :53990    
##  Class :character   Class :character   Class :character   3      :18869    
##  Mode  :character   Mode  :character   Mode  :character   2      :18480    
##                                                           6      : 5291    
##                                                           5      : 4785    
##                                                           8      :  320    
##                                                           (Other):   31    
##  discharge_disposition_id admission_source_id time_in_hospital
##  1      :60234            7      :57494       Min.   : 1.000  
##  3      :13954            1      :29565       1st Qu.: 2.000  
##  6      :12902            17     : 6781       Median : 4.000  
##  18     : 3691            4      : 3187       Mean   : 4.396  
##  2      : 2128            6      : 2264       3rd Qu.: 6.000  
##  22     : 1993            2      : 1104       Max.   :14.000  
##  (Other): 6864            (Other): 1371                       
##  medical_specialty  num_lab_procedures num_procedures num_medications
##  Length:101766      Min.   :  1.0      Min.   :0.00   Min.   : 1.00  
##  Class :character   1st Qu.: 31.0      1st Qu.:0.00   1st Qu.:10.00  
##  Mode  :character   Median : 44.0      Median :1.00   Median :15.00  
##                     Mean   : 43.1      Mean   :1.34   Mean   :16.02  
##                     3rd Qu.: 57.0      3rd Qu.:2.00   3rd Qu.:20.00  
##                     Max.   :132.0      Max.   :6.00   Max.   :81.00  
##                                                                      
##  number_outpatient number_emergency  number_inpatient  number_diagnoses
##  Min.   : 0.0000   Min.   : 0.0000   Min.   : 0.0000   Min.   : 1.000  
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 6.000  
##  Median : 0.0000   Median : 0.0000   Median : 0.0000   Median : 8.000  
##  Mean   : 0.3694   Mean   : 0.1978   Mean   : 0.6356   Mean   : 7.423  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 1.0000   3rd Qu.: 9.000  
##  Max.   :42.0000   Max.   :76.0000   Max.   :21.0000   Max.   :16.000  
##                                                                        
##  max_glu_serum       A1Cresult          metformin         repaglinide       
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  nateglinide        chlorpropamide     glimepiride        acetohexamide     
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   glipizide          glyburide         tolbutamide        pioglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  rosiglitazone        acarbose           miglitol         troglitazone      
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   tolazamide          examide          citoglipton          insulin         
##  Length:101766      Length:101766      Length:101766      Length:101766     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  glyburide.metformin glipizide.metformin glimepiride.pioglitazone
##  Length:101766       Length:101766       Length:101766           
##  Class :character    Class :character    Class :character        
##  Mode  :character    Mode  :character    Mode  :character        
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  metformin.rosiglitazone metformin.pioglitazone    change         
##  Length:101766           Length:101766          Length:101766     
##  Class :character        Class :character       Class :character  
##  Mode  :character        Mode  :character       Mode  :character  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  diabetesMed         readmitted       
##  Length:101766      Length:101766     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
str(hospital1)
## 'data.frame':    101766 obs. of  43 variables:
##  $ race                    : chr  "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
##  $ gender                  : chr  "Female" "Female" "Female" "Male" ...
##  $ age                     : chr  "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
##  $ admission_type_id       : Factor w/ 8 levels "1","2","3","4",..: 6 1 1 1 1 2 3 1 2 3 ...
##  $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 1 1 1 1 1 1 1 1 3 ...
##  $ admission_source_id     : Factor w/ 17 levels "1","2","3","4",..: 1 7 7 7 7 2 2 7 4 4 ...
##  $ time_in_hospital        : int  1 3 2 2 1 3 4 5 13 12 ...
##  $ medical_specialty       : chr  "Pediatrics-Endocrinology" NA NA NA ...
##  $ num_lab_procedures      : int  41 59 11 44 51 31 70 73 68 33 ...
##  $ num_procedures          : int  0 0 5 1 0 6 1 0 2 3 ...
##  $ num_medications         : int  1 18 13 16 8 16 21 12 28 18 ...
##  $ number_outpatient       : int  0 0 2 0 0 0 0 0 0 0 ...
##  $ number_emergency        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_inpatient        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ number_diagnoses        : int  1 9 6 7 5 9 7 8 8 8 ...
##  $ max_glu_serum           : chr  "None" "None" "None" "None" ...
##  $ A1Cresult               : chr  "None" "None" "None" "None" ...
##  $ metformin               : chr  "No" "No" "No" "No" ...
##  $ repaglinide             : chr  "No" "No" "No" "No" ...
##  $ nateglinide             : chr  "No" "No" "No" "No" ...
##  $ chlorpropamide          : chr  "No" "No" "No" "No" ...
##  $ glimepiride             : chr  "No" "No" "No" "No" ...
##  $ acetohexamide           : chr  "No" "No" "No" "No" ...
##  $ glipizide               : chr  "No" "No" "Steady" "No" ...
##  $ glyburide               : chr  "No" "No" "No" "No" ...
##  $ tolbutamide             : chr  "No" "No" "No" "No" ...
##  $ pioglitazone            : chr  "No" "No" "No" "No" ...
##  $ rosiglitazone           : chr  "No" "No" "No" "No" ...
##  $ acarbose                : chr  "No" "No" "No" "No" ...
##  $ miglitol                : chr  "No" "No" "No" "No" ...
##  $ troglitazone            : chr  "No" "No" "No" "No" ...
##  $ tolazamide              : chr  "No" "No" "No" "No" ...
##  $ examide                 : chr  "No" "No" "No" "No" ...
##  $ citoglipton             : chr  "No" "No" "No" "No" ...
##  $ insulin                 : chr  "No" "Up" "No" "Up" ...
##  $ glyburide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glipizide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glimepiride.pioglitazone: chr  "No" "No" "No" "No" ...
##  $ metformin.rosiglitazone : chr  "No" "No" "No" "No" ...
##  $ metformin.pioglitazone  : chr  "No" "No" "No" "No" ...
##  $ change                  : chr  "No" "Ch" "No" "Ch" ...
##  $ diabetesMed             : chr  "No" "Yes" "Yes" "Yes" ...
##  $ readmitted              : chr  "NO" ">30" "NO" "NO" ...

Check and remove the missing value

apply(hospital1, 2, function(col)sum(is.na(col)))
##                     race                   gender                      age 
##                     2273                        0                        0 
##        admission_type_id discharge_disposition_id      admission_source_id 
##                        0                        0                        0 
##         time_in_hospital        medical_specialty       num_lab_procedures 
##                        0                    49949                        0 
##           num_procedures          num_medications        number_outpatient 
##                        0                        0                        0 
##         number_emergency         number_inpatient         number_diagnoses 
##                        0                        0                        0 
##            max_glu_serum                A1Cresult                metformin 
##                        0                        0                        0 
##              repaglinide              nateglinide           chlorpropamide 
##                        0                        0                        0 
##              glimepiride            acetohexamide                glipizide 
##                        0                        0                        0 
##                glyburide              tolbutamide             pioglitazone 
##                        0                        0                        0 
##            rosiglitazone                 acarbose                 miglitol 
##                        0                        0                        0 
##             troglitazone               tolazamide                  examide 
##                        0                        0                        0 
##              citoglipton                  insulin      glyburide.metformin 
##                        0                        0                        0 
##      glipizide.metformin glimepiride.pioglitazone  metformin.rosiglitazone 
##                        0                        0                        0 
##   metformin.pioglitazone                   change              diabetesMed 
##                        0                        0                        0 
##               readmitted 
##                        0
hospital2 <- hospital1[rowSums(is.na(hospital1)) == 0,]
apply(hospital2, 2, function(col)sum(is.na(col)))
##                     race                   gender                      age 
##                        0                        0                        0 
##        admission_type_id discharge_disposition_id      admission_source_id 
##                        0                        0                        0 
##         time_in_hospital        medical_specialty       num_lab_procedures 
##                        0                        0                        0 
##           num_procedures          num_medications        number_outpatient 
##                        0                        0                        0 
##         number_emergency         number_inpatient         number_diagnoses 
##                        0                        0                        0 
##            max_glu_serum                A1Cresult                metformin 
##                        0                        0                        0 
##              repaglinide              nateglinide           chlorpropamide 
##                        0                        0                        0 
##              glimepiride            acetohexamide                glipizide 
##                        0                        0                        0 
##                glyburide              tolbutamide             pioglitazone 
##                        0                        0                        0 
##            rosiglitazone                 acarbose                 miglitol 
##                        0                        0                        0 
##             troglitazone               tolazamide                  examide 
##                        0                        0                        0 
##              citoglipton                  insulin      glyburide.metformin 
##                        0                        0                        0 
##      glipizide.metformin glimepiride.pioglitazone  metformin.rosiglitazone 
##                        0                        0                        0 
##   metformin.pioglitazone                   change              diabetesMed 
##                        0                        0                        0 
##               readmitted 
##                        0

Check duplicate and remove duplicate

dim(hospital2)
## [1] 50727    43
dim(unique(hospital2))
## [1] 50725    43
hospital3 <- unique(hospital2)

Explore Readmission

table(hospital3$readmitted)
## 
##   <30   >30    NO 
##  5477 17118 28130
hospital3 %>%
  group_by(readmitted) %>%
  summarise(per = n()/nrow(hospital3)) %>%
  ggplot(aes(x=readmitted, y = per, fill = readmitted)) +
  geom_bar(stat = 'identity') +
  geom_text(aes(label = round(per,2)), vjust =2)

table(hospital3$readmitted)
## 
##   <30   >30    NO 
##  5477 17118 28130

Classify <30 and >30 as 1 “re-admit” and NO as 0

hospital3$readmitted=lapply(hospital3$readmitted, as.character)
str(hospital3)
## 'data.frame':    50725 obs. of  43 variables:
##  $ race                    : chr  "Caucasian" "Caucasian" "Caucasian" "Caucasian" ...
##  $ gender                  : chr  "Female" "Female" "Female" "Male" ...
##  $ age                     : chr  "[0-10)" "[90-100)" "[40-50)" "[80-90)" ...
##  $ admission_type_id       : Factor w/ 8 levels "1","2","3","4",..: 6 3 1 1 1 1 1 1 1 1 ...
##  $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 3 3 6 1 3 1 2 1 1 ...
##  $ admission_source_id     : Factor w/ 17 levels "1","2","3","4",..: 1 4 7 7 7 7 1 7 7 7 ...
##  $ time_in_hospital        : int  1 12 7 10 3 6 2 5 6 2 ...
##  $ medical_specialty       : chr  "Pediatrics-Endocrinology" "InternalMedicine" "Family/GeneralPractice" "Family/GeneralPractice" ...
##  $ num_lab_procedures      : int  41 33 60 55 29 64 25 52 27 41 ...
##  $ num_procedures          : int  0 3 0 1 0 3 2 0 0 0 ...
##  $ num_medications         : int  1 18 15 31 11 18 11 14 16 11 ...
##  $ number_outpatient       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_emergency        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ number_inpatient        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_diagnoses        : int  1 8 8 8 3 7 3 8 8 6 ...
##  $ max_glu_serum           : chr  "None" "None" "None" "None" ...
##  $ A1Cresult               : chr  "None" "None" "None" "None" ...
##  $ metformin               : chr  "No" "No" "Steady" "No" ...
##  $ repaglinide             : chr  "No" "No" "Up" "No" ...
##  $ nateglinide             : chr  "No" "No" "No" "No" ...
##  $ chlorpropamide          : chr  "No" "No" "No" "No" ...
##  $ glimepiride             : chr  "No" "No" "No" "No" ...
##  $ acetohexamide           : chr  "No" "No" "No" "No" ...
##  $ glipizide               : chr  "No" "No" "No" "No" ...
##  $ glyburide               : chr  "No" "No" "No" "No" ...
##  $ tolbutamide             : chr  "No" "No" "No" "No" ...
##  $ pioglitazone            : chr  "No" "No" "No" "No" ...
##  $ rosiglitazone           : chr  "No" "Steady" "No" "No" ...
##  $ acarbose                : chr  "No" "No" "No" "No" ...
##  $ miglitol                : chr  "No" "No" "No" "No" ...
##  $ troglitazone            : chr  "No" "No" "No" "No" ...
##  $ tolazamide              : chr  "No" "No" "No" "No" ...
##  $ examide                 : chr  "No" "No" "No" "No" ...
##  $ citoglipton             : chr  "No" "No" "No" "No" ...
##  $ insulin                 : chr  "No" "Steady" "Down" "Steady" ...
##  $ glyburide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glipizide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glimepiride.pioglitazone: chr  "No" "No" "No" "No" ...
##  $ metformin.rosiglitazone : chr  "No" "No" "No" "No" ...
##  $ metformin.pioglitazone  : chr  "No" "No" "No" "No" ...
##  $ change                  : chr  "No" "Ch" "Ch" "No" ...
##  $ diabetesMed             : chr  "No" "Yes" "Yes" "Yes" ...
##  $ readmitted              :List of 50725
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "<30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "<30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "<30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "<30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "<30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "<30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "<30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "<30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr ">30"
##   ..$ : chr "NO"
##   ..$ : chr "NO"
##   .. [list output truncated]
hospital3$readmitted <- replace(hospital3$readmitted, hospital3$readmitted == "<30","YES")
hospital3$readmitted <- replace(hospital3$readmitted, hospital3$readmitted == ">30","YES")
hospital3$readmitted <- factor(hospital3$readmitted, levels = c("NO","YES"),
                       labels = c(0, 1))
summary(hospital3)
##      race              gender              age            admission_type_id
##  Length:50725       Length:50725       Length:50725       1      :20338    
##  Class :character   Class :character   Class :character   2      :12493    
##  Mode  :character   Mode  :character   Mode  :character   3      :11985    
##                                                           6      : 3885    
##                                                           5      : 1738    
##                                                           8      :  284    
##                                                           (Other):    2    
##  discharge_disposition_id admission_source_id time_in_hospital
##  1      :32189            7      :25206       Min.   : 1.000  
##  3      : 6561            1      :17489       1st Qu.: 2.000  
##  6      : 5259            17     : 4241       Median : 4.000  
##  22     : 1134            6      : 1592       Mean   : 4.393  
##  2      : 1088            4      : 1400       3rd Qu.: 6.000  
##  18     :  894            5      :  500       Max.   :14.000  
##  (Other): 3600            (Other):  297                       
##  medical_specialty  num_lab_procedures num_procedures  num_medications
##  Length:50725       Min.   :  1.00     Min.   :0.000   Min.   : 1.00  
##  Class :character   1st Qu.: 32.00     1st Qu.:0.000   1st Qu.:10.00  
##  Mode  :character   Median : 44.00     Median :1.000   Median :14.00  
##                     Mean   : 42.89     Mean   :1.442   Mean   :15.77  
##                     3rd Qu.: 56.00     3rd Qu.:2.000   3rd Qu.:20.00  
##                     Max.   :132.00     Max.   :6.000   Max.   :81.00  
##                                                                       
##  number_outpatient number_emergency  number_inpatient number_diagnoses
##  Min.   : 0.0000   Min.   : 0.0000   Min.   : 0.000   Min.   : 1.00   
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 0.000   1st Qu.: 5.00   
##  Median : 0.0000   Median : 0.0000   Median : 0.000   Median : 8.00   
##  Mean   : 0.2515   Mean   : 0.2089   Mean   : 0.638   Mean   : 7.14   
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 1.000   3rd Qu.: 9.00   
##  Max.   :38.0000   Max.   :76.0000   Max.   :16.000   Max.   :16.00   
##                                                                       
##  max_glu_serum       A1Cresult          metformin         repaglinide       
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  nateglinide        chlorpropamide     glimepiride        acetohexamide     
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   glipizide          glyburide         tolbutamide        pioglitazone      
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  rosiglitazone        acarbose           miglitol         troglitazone      
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   tolazamide          examide          citoglipton          insulin         
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  glyburide.metformin glipizide.metformin glimepiride.pioglitazone
##  Length:50725        Length:50725        Length:50725            
##  Class :character    Class :character    Class :character        
##  Mode  :character    Mode  :character    Mode  :character        
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  metformin.rosiglitazone metformin.pioglitazone    change         
##  Length:50725            Length:50725           Length:50725      
##  Class :character        Class :character       Class :character  
##  Mode  :character        Mode  :character       Mode  :character  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  diabetesMed        readmitted
##  Length:50725       0:28130   
##  Class :character   1:22595   
##  Mode  :character             
##                               
##                               
##                               
## 
table(hospital3$readmitted)
## 
##     0     1 
## 28130 22595
hospital3 %>%
  group_by(readmitted) %>%
  summarise(per = n()/nrow(hospital3)) %>%
  ggplot(aes(x=readmitted, y = per, fill = readmitted)) +
  geom_bar(stat = 'identity') +
  geom_text(aes(label = round(per,2)), vjust =2)

## Visualization

hospital3 %>%
  group_by(diabetesMed, readmitted) %>%
  summarize(total = n()) %>%
  ggplot(aes(x=diabetesMed, y = total, fill = readmitted)) +
  geom_bar(stat= 'identity',position=position_dodge()) +
  geom_text(aes(label=total), vjust=1.6, color="white",
            position = position_dodge(0.9), size=3.5)

Remove the data that have only 1 level and imbalance data

“examide”, “citoglipton”, “glimepiride.pioglitazone”, “metformin.rosiglitazone”, “acetohexamide”

“glipizide.metformin”, “metformin.pioglitazone”,“troglitazone”

summary(hospital3)
##      race              gender              age            admission_type_id
##  Length:50725       Length:50725       Length:50725       1      :20338    
##  Class :character   Class :character   Class :character   2      :12493    
##  Mode  :character   Mode  :character   Mode  :character   3      :11985    
##                                                           6      : 3885    
##                                                           5      : 1738    
##                                                           8      :  284    
##                                                           (Other):    2    
##  discharge_disposition_id admission_source_id time_in_hospital
##  1      :32189            7      :25206       Min.   : 1.000  
##  3      : 6561            1      :17489       1st Qu.: 2.000  
##  6      : 5259            17     : 4241       Median : 4.000  
##  22     : 1134            6      : 1592       Mean   : 4.393  
##  2      : 1088            4      : 1400       3rd Qu.: 6.000  
##  18     :  894            5      :  500       Max.   :14.000  
##  (Other): 3600            (Other):  297                       
##  medical_specialty  num_lab_procedures num_procedures  num_medications
##  Length:50725       Min.   :  1.00     Min.   :0.000   Min.   : 1.00  
##  Class :character   1st Qu.: 32.00     1st Qu.:0.000   1st Qu.:10.00  
##  Mode  :character   Median : 44.00     Median :1.000   Median :14.00  
##                     Mean   : 42.89     Mean   :1.442   Mean   :15.77  
##                     3rd Qu.: 56.00     3rd Qu.:2.000   3rd Qu.:20.00  
##                     Max.   :132.00     Max.   :6.000   Max.   :81.00  
##                                                                       
##  number_outpatient number_emergency  number_inpatient number_diagnoses
##  Min.   : 0.0000   Min.   : 0.0000   Min.   : 0.000   Min.   : 1.00   
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 0.000   1st Qu.: 5.00   
##  Median : 0.0000   Median : 0.0000   Median : 0.000   Median : 8.00   
##  Mean   : 0.2515   Mean   : 0.2089   Mean   : 0.638   Mean   : 7.14   
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 1.000   3rd Qu.: 9.00   
##  Max.   :38.0000   Max.   :76.0000   Max.   :16.000   Max.   :16.00   
##                                                                       
##  max_glu_serum       A1Cresult          metformin         repaglinide       
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  nateglinide        chlorpropamide     glimepiride        acetohexamide     
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   glipizide          glyburide         tolbutamide        pioglitazone      
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  rosiglitazone        acarbose           miglitol         troglitazone      
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   tolazamide          examide          citoglipton          insulin         
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  glyburide.metformin glipizide.metformin glimepiride.pioglitazone
##  Length:50725        Length:50725        Length:50725            
##  Class :character    Class :character    Class :character        
##  Mode  :character    Mode  :character    Mode  :character        
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  metformin.rosiglitazone metformin.pioglitazone    change         
##  Length:50725            Length:50725           Length:50725      
##  Class :character        Class :character       Class :character  
##  Mode  :character        Mode  :character       Mode  :character  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##  diabetesMed        readmitted
##  Length:50725       0:28130   
##  Class :character   1:22595   
##  Mode  :character             
##                               
##                               
##                               
## 
col2 <- c("examide", "citoglipton", "glimepiride.pioglitazone", "metformin.rosiglitazone", "acetohexamide",
          "glipizide.metformin", "metformin.pioglitazone","troglitazone")
hospital3 <- dplyr::select(hospital3, -col2)
colnames(hospital3)
##  [1] "race"                     "gender"                  
##  [3] "age"                      "admission_type_id"       
##  [5] "discharge_disposition_id" "admission_source_id"     
##  [7] "time_in_hospital"         "medical_specialty"       
##  [9] "num_lab_procedures"       "num_procedures"          
## [11] "num_medications"          "number_outpatient"       
## [13] "number_emergency"         "number_inpatient"        
## [15] "number_diagnoses"         "max_glu_serum"           
## [17] "A1Cresult"                "metformin"               
## [19] "repaglinide"              "nateglinide"             
## [21] "chlorpropamide"           "glimepiride"             
## [23] "glipizide"                "glyburide"               
## [25] "tolbutamide"              "pioglitazone"            
## [27] "rosiglitazone"            "acarbose"                
## [29] "miglitol"                 "tolazamide"              
## [31] "insulin"                  "glyburide.metformin"     
## [33] "change"                   "diabetesMed"             
## [35] "readmitted"
summary(hospital3)
##      race              gender              age            admission_type_id
##  Length:50725       Length:50725       Length:50725       1      :20338    
##  Class :character   Class :character   Class :character   2      :12493    
##  Mode  :character   Mode  :character   Mode  :character   3      :11985    
##                                                           6      : 3885    
##                                                           5      : 1738    
##                                                           8      :  284    
##                                                           (Other):    2    
##  discharge_disposition_id admission_source_id time_in_hospital
##  1      :32189            7      :25206       Min.   : 1.000  
##  3      : 6561            1      :17489       1st Qu.: 2.000  
##  6      : 5259            17     : 4241       Median : 4.000  
##  22     : 1134            6      : 1592       Mean   : 4.393  
##  2      : 1088            4      : 1400       3rd Qu.: 6.000  
##  18     :  894            5      :  500       Max.   :14.000  
##  (Other): 3600            (Other):  297                       
##  medical_specialty  num_lab_procedures num_procedures  num_medications
##  Length:50725       Min.   :  1.00     Min.   :0.000   Min.   : 1.00  
##  Class :character   1st Qu.: 32.00     1st Qu.:0.000   1st Qu.:10.00  
##  Mode  :character   Median : 44.00     Median :1.000   Median :14.00  
##                     Mean   : 42.89     Mean   :1.442   Mean   :15.77  
##                     3rd Qu.: 56.00     3rd Qu.:2.000   3rd Qu.:20.00  
##                     Max.   :132.00     Max.   :6.000   Max.   :81.00  
##                                                                       
##  number_outpatient number_emergency  number_inpatient number_diagnoses
##  Min.   : 0.0000   Min.   : 0.0000   Min.   : 0.000   Min.   : 1.00   
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.: 0.000   1st Qu.: 5.00   
##  Median : 0.0000   Median : 0.0000   Median : 0.000   Median : 8.00   
##  Mean   : 0.2515   Mean   : 0.2089   Mean   : 0.638   Mean   : 7.14   
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 1.000   3rd Qu.: 9.00   
##  Max.   :38.0000   Max.   :76.0000   Max.   :16.000   Max.   :16.00   
##                                                                       
##  max_glu_serum       A1Cresult          metformin         repaglinide       
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  nateglinide        chlorpropamide     glimepiride         glipizide        
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   glyburide         tolbutamide        pioglitazone       rosiglitazone     
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    acarbose           miglitol          tolazamide          insulin         
##  Length:50725       Length:50725       Length:50725       Length:50725      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  glyburide.metformin    change          diabetesMed        readmitted
##  Length:50725        Length:50725       Length:50725       0:28130   
##  Class :character    Class :character   Class :character   1:22595   
##  Mode  :character    Mode  :character   Mode  :character             
##                                                                      
##                                                                      
##                                                                      
## 

Split the data

set.seed(123)

Create a boolean flag to split data

splitData = sample.split(hospital3$readmitted, SplitRatio = 0.7)
train_set = hospital3[splitData,]
nrow(train_set)/nrow(hospital3)
## [1] 0.6999901
test_set = hospital3[!splitData,]
nrow(test_set)/nrow(hospital3)
## [1] 0.3000099

Modelling (Logistic Regression)

model = glm(readmitted~., data = train_set, family = binomial)
summary(model)
## 
## Call:
## glm(formula = readmitted ~ ., family = binomial, data = train_set)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3399  -1.0246  -0.6968   1.1656   2.6079  
## 
## Coefficients:
##                                                         Estimate Std. Error
## (Intercept)                                           -2.586e+00  2.716e+03
## raceAsian                                             -3.881e-01  1.305e-01
## raceCaucasian                                          7.406e-03  2.961e-02
## raceHispanic                                          -3.894e-02  7.898e-02
## raceOther                                             -2.593e-01  9.435e-02
## genderMale                                            -7.165e-02  2.346e-02
## genderUnknown/Invalid                                 -1.485e+01  1.455e+03
## age[10-20)                                             7.014e-01  3.367e-01
## age[20-30)                                             5.675e-01  3.730e-01
## age[30-40)                                             6.189e-01  3.656e-01
## age[40-50)                                             7.031e-01  3.626e-01
## age[50-60)                                             6.662e-01  3.622e-01
## age[60-70)                                             6.997e-01  3.622e-01
## age[70-80)                                             8.265e-01  3.622e-01
## age[80-90)                                             7.293e-01  3.628e-01
## age[90-100)                                            5.333e-01  3.690e-01
## admission_type_id2                                     1.640e-01  4.045e-02
## admission_type_id3                                     1.991e-01  4.923e-02
## admission_type_id4                                    -1.403e+01  1.455e+03
## admission_type_id5                                     7.960e-02  1.133e-01
## admission_type_id6                                     7.073e-01  6.891e-02
## admission_type_id8                                    -1.021e-01  1.621e-01
## discharge_disposition_id2                              2.806e-02  7.830e-02
## discharge_disposition_id3                             -3.168e-03  3.906e-02
## discharge_disposition_id4                              1.609e-01  1.278e-01
## discharge_disposition_id5                              2.711e-01  9.615e-02
## discharge_disposition_id6                              1.558e-01  3.953e-02
## discharge_disposition_id7                             -1.382e-01  1.636e-01
## discharge_disposition_id8                              2.820e-01  3.882e-01
## discharge_disposition_id9                              1.591e+01  1.455e+03
## discharge_disposition_id10                             3.325e-02  1.424e+00
## discharge_disposition_id11                            -1.594e+01  5.807e+01
## discharge_disposition_id12                             7.559e-01  1.427e+00
## discharge_disposition_id13                            -2.711e+00  3.643e-01
## discharge_disposition_id14                            -2.484e+00  4.217e-01
## discharge_disposition_id15                             1.427e+00  6.498e-01
## discharge_disposition_id16                             4.888e-01  6.960e-01
## discharge_disposition_id17                            -2.968e-01  7.470e-01
## discharge_disposition_id18                             2.826e-01  9.058e-02
## discharge_disposition_id19                            -1.493e+01  8.327e+02
## discharge_disposition_id20                            -1.601e+01  1.455e+03
## discharge_disposition_id22                             1.537e-01  7.908e-02
## discharge_disposition_id23                            -5.538e-01  1.792e-01
## discharge_disposition_id24                            -2.928e-01  8.722e-01
## discharge_disposition_id25                            -3.084e-01  1.043e-01
## discharge_disposition_id27                            -1.524e+01  1.455e+03
## discharge_disposition_id28                             3.668e-01  5.629e-01
## admission_source_id2                                  -1.164e-01  1.861e-01
## admission_source_id3                                  -4.602e-02  4.208e-01
## admission_source_id4                                  -5.083e-01  8.071e-02
## admission_source_id5                                  -2.471e-01  1.258e-01
## admission_source_id6                                  -5.127e-01  8.352e-02
## admission_source_id7                                   1.455e-01  4.445e-02
## admission_source_id8                                   6.663e-01  8.256e-01
## admission_source_id9                                  -2.947e-01  3.914e-01
## admission_source_id10                                  3.366e-01  1.416e+00
## admission_source_id14                                 -5.764e-01  2.058e+03
## admission_source_id17                                 -1.280e-01  7.835e-02
## admission_source_id22                                 -1.573e+01  1.455e+03
## time_in_hospital                                       1.651e-02  4.845e-03
## medical_specialtyAnesthesiology                       -1.380e+00  1.497e+00
## medical_specialtyAnesthesiology-Pediatric             -8.233e-01  1.402e+00
## medical_specialtyCardiology                           -7.146e-01  1.249e+00
## medical_specialtyCardiology-Pediatric                  1.042e+00  1.549e+00
## medical_specialtyDCPTEAM                              -1.579e+01  7.570e+02
## medical_specialtyDentistry                             1.472e+01  8.172e+02
## medical_specialtyDermatology                           1.517e+01  1.455e+03
## medical_specialtyEmergency/Trauma                     -6.731e-01  1.249e+00
## medical_specialtyEndocrinology                        -8.538e-01  1.269e+00
## medical_specialtyEndocrinology-Metabolism             -1.610e+01  5.974e+02
## medical_specialtyFamily/GeneralPractice               -6.065e-01  1.249e+00
## medical_specialtyGastroenterology                     -5.442e-01  1.253e+00
## medical_specialtyGynecology                           -1.891e+00  1.328e+00
## medical_specialtyHematology                           -2.976e-01  1.283e+00
## medical_specialtyHematology/Oncology                  -6.771e-01  1.262e+00
## medical_specialtyHospitalist                          -8.908e-01  1.291e+00
## medical_specialtyInfectiousDiseases                   -1.447e-02  1.331e+00
## medical_specialtyInternalMedicine                     -7.451e-01  1.249e+00
## medical_specialtyNephrology                           -3.115e-01  1.250e+00
## medical_specialtyNeurology                            -1.212e+00  1.263e+00
## medical_specialtyObsterics&Gynecology-GynecologicOnco -1.607e+00  1.373e+00
## medical_specialtyObstetrics                           -2.614e+00  1.629e+00
## medical_specialtyObstetricsandGynecology              -1.654e+00  1.255e+00
## medical_specialtyOncology                             -6.281e-01  1.257e+00
## medical_specialtyOphthalmology                        -1.132e+00  1.325e+00
## medical_specialtyOrthopedics                          -1.039e+00  1.251e+00
## medical_specialtyOrthopedics-Reconstructive           -1.213e+00  1.251e+00
## medical_specialtyOsteopath                            -8.127e-01  1.309e+00
## medical_specialtyOtolaryngology                       -1.401e+00  1.274e+00
## medical_specialtyOutreachServices                     -6.091e-01  1.422e+00
## medical_specialtyPathology                             8.351e-02  1.427e+00
## medical_specialtyPediatrics                           -8.516e-01  1.262e+00
## medical_specialtyPediatrics-AllergyandImmunology       1.431e+01  1.455e+03
## medical_specialtyPediatrics-CriticalCare              -4.500e-01  1.290e+00
## medical_specialtyPediatrics-EmergencyMedicine          1.569e+01  1.455e+03
## medical_specialtyPediatrics-Endocrinology             -1.235e+00  1.291e+00
## medical_specialtyPediatrics-Hematology-Oncology       -1.030e+00  1.770e+00
## medical_specialtyPediatrics-Neurology                 -6.397e-01  1.481e+00
## medical_specialtyPediatrics-Pulmonology                7.569e-01  1.420e+00
## medical_specialtyPerinatology                         -1.603e+01  1.455e+03
## medical_specialtyPhysicalMedicineandRehabilitation    -8.830e-01  1.257e+00
## medical_specialtyPhysicianNotFound                    -4.071e-01  1.454e+00
## medical_specialtyPodiatry                             -3.907e-02  1.279e+00
## medical_specialtyPsychiatry                           -8.182e-01  1.252e+00
## medical_specialtyPsychiatry-Addictive                 -1.652e+01  1.455e+03
## medical_specialtyPsychiatry-Child/Adolescent          -1.073e+00  1.527e+00
## medical_specialtyPsychology                           -8.316e-01  1.274e+00
## medical_specialtyPulmonology                          -5.766e-01  1.251e+00
## medical_specialtyRadiologist                          -7.600e-01  1.251e+00
## medical_specialtyRadiology                            -7.499e-01  1.300e+00
## medical_specialtyResident                              1.418e+01  1.455e+03
## medical_specialtyRheumatology                         -1.275e+00  1.376e+00
## medical_specialtySpeech                               -1.643e+01  1.455e+03
## medical_specialtySportsMedicine                        1.524e+01  1.455e+03
## medical_specialtySurgeon                              -1.279e+00  1.308e+00
## medical_specialtySurgery-Cardiovascular               -1.091e+00  1.275e+00
## medical_specialtySurgery-Cardiovascular/Thoracic      -1.410e+00  1.254e+00
## medical_specialtySurgery-Colon&Rectal                 -2.753e-01  1.608e+00
## medical_specialtySurgery-General                      -8.236e-01  1.250e+00
## medical_specialtySurgery-Maxillofacial                -2.316e+00  1.652e+00
## medical_specialtySurgery-Neuro                        -1.338e+00  1.255e+00
## medical_specialtySurgery-Pediatric                    -1.506e+00  1.692e+00
## medical_specialtySurgery-Plastic                      -9.050e-01  1.315e+00
## medical_specialtySurgery-PlasticwithinHeadandNeck      1.490e+01  1.455e+03
## medical_specialtySurgery-Thoracic                     -6.634e-01  1.274e+00
## medical_specialtySurgery-Vascular                     -5.523e-01  1.254e+00
## medical_specialtySurgicalSpecialty                    -1.554e+00  1.323e+00
## medical_specialtyUrology                              -9.738e-01  1.253e+00
## num_lab_procedures                                    -6.787e-04  7.350e-04
## num_procedures                                        -1.891e-02  7.889e-03
## num_medications                                        1.225e-03  1.936e-03
## number_outpatient                                      1.123e-01  1.394e-02
## number_emergency                                       1.579e-01  1.985e-02
## number_inpatient                                       3.661e-01  1.215e-02
## number_diagnoses                                       6.372e-02  6.735e-03
## max_glu_serum>300                                      4.996e-02  1.422e-01
## max_glu_serumNone                                     -1.254e-01  1.075e-01
## max_glu_serumNorm                                     -2.880e-02  1.108e-01
## A1Cresult>8                                            3.178e-02  6.854e-02
## A1CresultNone                                          1.304e-01  5.884e-02
## A1CresultNorm                                         -1.197e-01  7.943e-02
## metforminNo                                            2.157e-02  1.585e-01
## metforminSteady                                       -1.024e-01  1.587e-01
## metforminUp                                           -2.045e-02  1.889e-01
## repaglinideNo                                         -3.695e-01  5.094e-01
## repaglinideSteady                                     -1.951e-01  5.152e-01
## repaglinideUp                                         -2.200e-01  5.899e-01
## nateglinideNo                                         -8.560e-01  1.256e+00
## nateglinideSteady                                     -7.025e-01  1.263e+00
## nateglinideUp                                         -3.066e+00  1.606e+00
## chlorpropamideNo                                       1.442e+01  1.455e+03
## chlorpropamideSteady                                   1.470e+01  1.455e+03
## chlorpropamideUp                                       1.402e+01  1.455e+03
## glimepirideNo                                          2.544e-02  2.364e-01
## glimepirideSteady                                      3.124e-02  2.395e-01
## glimepirideUp                                         -1.433e-01  2.895e-01
## glipizideNo                                           -3.152e-01  1.653e-01
## glipizideSteady                                       -2.923e-01  1.655e-01
## glipizideUp                                           -3.141e-01  2.008e-01
## glyburideNo                                           -1.002e-01  1.629e-01
## glyburideSteady                                       -1.425e-01  1.640e-01
## glyburideUp                                           -2.326e-01  2.096e-01
## tolbutamideSteady                                     -7.962e-01  1.173e+00
## pioglitazoneNo                                        -3.905e-01  3.457e-01
## pioglitazoneSteady                                    -4.101e-01  3.477e-01
## pioglitazoneUp                                         3.745e-02  4.179e-01
## rosiglitazoneNo                                        1.658e+00  5.620e-01
## rosiglitazoneSteady                                    1.755e+00  5.633e-01
## rosiglitazoneUp                                        1.189e+00  6.129e-01
## acarboseNo                                            -1.438e+01  1.010e+03
## acarboseSteady                                        -1.384e+01  1.010e+03
## acarboseUp                                             2.036e+00  1.439e+03
## miglitolNo                                            -1.359e+01  1.455e+03
## miglitolSteady                                        -1.320e+01  1.455e+03
## miglitolUp                                            -2.915e+01  2.058e+03
## tolazamideSteady                                      -1.165e+00  6.529e-01
## insulinNo                                             -7.410e-02  5.993e-02
## insulinSteady                                         -1.544e-01  4.634e-02
## insulinUp                                             -5.348e-02  4.674e-02
## glyburide.metforminNo                                  1.525e+01  1.455e+03
## glyburide.metforminSteady                              1.541e+01  1.455e+03
## glyburide.metforminUp                                 -8.332e-01  2.058e+03
## changeNo                                              -7.866e-03  4.308e-02
## diabetesMedYes                                         3.129e-01  4.198e-02
##                                                       z value Pr(>|z|)    
## (Intercept)                                            -0.001 0.999240    
## raceAsian                                              -2.974 0.002938 ** 
## raceCaucasian                                           0.250 0.802518    
## raceHispanic                                           -0.493 0.622030    
## raceOther                                              -2.749 0.005985 ** 
## genderMale                                             -3.054 0.002257 ** 
## genderUnknown/Invalid                                  -0.010 0.991857    
## age[10-20)                                              2.083 0.037260 *  
## age[20-30)                                              1.522 0.128117    
## age[30-40)                                              1.693 0.090538 .  
## age[40-50)                                              1.939 0.052462 .  
## age[50-60)                                              1.839 0.065860 .  
## age[60-70)                                              1.932 0.053382 .  
## age[70-80)                                              2.282 0.022493 *  
## age[80-90)                                              2.010 0.044449 *  
## age[90-100)                                             1.445 0.148374    
## admission_type_id2                                      4.056 5.00e-05 ***
## admission_type_id3                                      4.044 5.26e-05 ***
## admission_type_id4                                     -0.010 0.992308    
## admission_type_id5                                      0.703 0.482354    
## admission_type_id6                                     10.265  < 2e-16 ***
## admission_type_id8                                     -0.630 0.528949    
## discharge_disposition_id2                               0.358 0.720121    
## discharge_disposition_id3                              -0.081 0.935354    
## discharge_disposition_id4                               1.260 0.207775    
## discharge_disposition_id5                               2.820 0.004802 ** 
## discharge_disposition_id6                               3.942 8.09e-05 ***
## discharge_disposition_id7                              -0.845 0.398215    
## discharge_disposition_id8                               0.726 0.467556    
## discharge_disposition_id9                               0.011 0.991276    
## discharge_disposition_id10                              0.023 0.981374    
## discharge_disposition_id11                             -0.274 0.783720    
## discharge_disposition_id12                              0.530 0.596336    
## discharge_disposition_id13                             -7.440 1.00e-13 ***
## discharge_disposition_id14                             -5.890 3.85e-09 ***
## discharge_disposition_id15                              2.196 0.028105 *  
## discharge_disposition_id16                              0.702 0.482504    
## discharge_disposition_id17                             -0.397 0.691108    
## discharge_disposition_id18                              3.120 0.001810 ** 
## discharge_disposition_id19                             -0.018 0.985696    
## discharge_disposition_id20                             -0.011 0.991221    
## discharge_disposition_id22                              1.943 0.051976 .  
## discharge_disposition_id23                             -3.091 0.001996 ** 
## discharge_disposition_id24                             -0.336 0.737068    
## discharge_disposition_id25                             -2.956 0.003115 ** 
## discharge_disposition_id27                             -0.010 0.991644    
## discharge_disposition_id28                              0.652 0.514604    
## admission_source_id2                                   -0.626 0.531584    
## admission_source_id3                                   -0.109 0.912903    
## admission_source_id4                                   -6.297 3.03e-10 ***
## admission_source_id5                                   -1.964 0.049555 *  
## admission_source_id6                                   -6.139 8.33e-10 ***
## admission_source_id7                                    3.273 0.001064 ** 
## admission_source_id8                                    0.807 0.419629    
## admission_source_id9                                   -0.753 0.451572    
## admission_source_id10                                   0.238 0.812119    
## admission_source_id14                                   0.000 0.999777    
## admission_source_id17                                  -1.634 0.102303    
## admission_source_id22                                  -0.011 0.991378    
## time_in_hospital                                        3.407 0.000656 ***
## medical_specialtyAnesthesiology                        -0.922 0.356462    
## medical_specialtyAnesthesiology-Pediatric              -0.587 0.556967    
## medical_specialtyCardiology                            -0.572 0.567260    
## medical_specialtyCardiology-Pediatric                   0.673 0.501196    
## medical_specialtyDCPTEAM                               -0.021 0.983361    
## medical_specialtyDentistry                              0.018 0.985634    
## medical_specialtyDermatology                            0.010 0.991685    
## medical_specialtyEmergency/Trauma                      -0.539 0.589968    
## medical_specialtyEndocrinology                         -0.673 0.501249    
## medical_specialtyEndocrinology-Metabolism              -0.027 0.978498    
## medical_specialtyFamily/GeneralPractice                -0.486 0.627296    
## medical_specialtyGastroenterology                      -0.434 0.664177    
## medical_specialtyGynecology                            -1.425 0.154298    
## medical_specialtyHematology                            -0.232 0.816537    
## medical_specialtyHematology/Oncology                   -0.537 0.591609    
## medical_specialtyHospitalist                           -0.690 0.490274    
## medical_specialtyInfectiousDiseases                    -0.011 0.991323    
## medical_specialtyInternalMedicine                      -0.597 0.550685    
## medical_specialtyNephrology                            -0.249 0.803291    
## medical_specialtyNeurology                             -0.960 0.337238    
## medical_specialtyObsterics&Gynecology-GynecologicOnco  -1.170 0.242011    
## medical_specialtyObstetrics                            -1.605 0.108546    
## medical_specialtyObstetricsandGynecology               -1.318 0.187467    
## medical_specialtyOncology                              -0.500 0.617238    
## medical_specialtyOphthalmology                         -0.855 0.392823    
## medical_specialtyOrthopedics                           -0.830 0.406325    
## medical_specialtyOrthopedics-Reconstructive            -0.969 0.332339    
## medical_specialtyOsteopath                             -0.621 0.534628    
## medical_specialtyOtolaryngology                        -1.100 0.271220    
## medical_specialtyOutreachServices                      -0.429 0.668280    
## medical_specialtyPathology                              0.059 0.953336    
## medical_specialtyPediatrics                            -0.675 0.499902    
## medical_specialtyPediatrics-AllergyandImmunology        0.010 0.992155    
## medical_specialtyPediatrics-CriticalCare               -0.349 0.727166    
## medical_specialtyPediatrics-EmergencyMedicine           0.011 0.991397    
## medical_specialtyPediatrics-Endocrinology              -0.956 0.338826    
## medical_specialtyPediatrics-Hematology-Oncology        -0.582 0.560893    
## medical_specialtyPediatrics-Neurology                  -0.432 0.665772    
## medical_specialtyPediatrics-Pulmonology                 0.533 0.594059    
## medical_specialtyPerinatology                          -0.011 0.991213    
## medical_specialtyPhysicalMedicineandRehabilitation     -0.702 0.482371    
## medical_specialtyPhysicianNotFound                     -0.280 0.779532    
## medical_specialtyPodiatry                              -0.031 0.975632    
## medical_specialtyPsychiatry                            -0.653 0.513521    
## medical_specialtyPsychiatry-Addictive                  -0.011 0.990945    
## medical_specialtyPsychiatry-Child/Adolescent           -0.703 0.482261    
## medical_specialtyPsychology                            -0.653 0.513955    
## medical_specialtyPulmonology                           -0.461 0.644908    
## medical_specialtyRadiologist                           -0.607 0.543607    
## medical_specialtyRadiology                             -0.577 0.564101    
## medical_specialtyResident                               0.010 0.992227    
## medical_specialtyRheumatology                          -0.926 0.354391    
## medical_specialtySpeech                                -0.011 0.990994    
## medical_specialtySportsMedicine                         0.010 0.991645    
## medical_specialtySurgeon                               -0.977 0.328448    
## medical_specialtySurgery-Cardiovascular                -0.856 0.392263    
## medical_specialtySurgery-Cardiovascular/Thoracic       -1.125 0.260645    
## medical_specialtySurgery-Colon&Rectal                  -0.171 0.864039    
## medical_specialtySurgery-General                       -0.659 0.509814    
## medical_specialtySurgery-Maxillofacial                 -1.401 0.161077    
## medical_specialtySurgery-Neuro                         -1.066 0.286505    
## medical_specialtySurgery-Pediatric                     -0.890 0.373392    
## medical_specialtySurgery-Plastic                       -0.688 0.491186    
## medical_specialtySurgery-PlasticwithinHeadandNeck       0.010 0.991834    
## medical_specialtySurgery-Thoracic                      -0.521 0.602454    
## medical_specialtySurgery-Vascular                      -0.441 0.659499    
## medical_specialtySurgicalSpecialty                     -1.175 0.240179    
## medical_specialtyUrology                               -0.777 0.436938    
## num_lab_procedures                                     -0.923 0.355775    
## num_procedures                                         -2.397 0.016540 *  
## num_medications                                         0.633 0.526947    
## number_outpatient                                       8.056 7.87e-16 ***
## number_emergency                                        7.953 1.82e-15 ***
## number_inpatient                                       30.123  < 2e-16 ***
## number_diagnoses                                        9.462  < 2e-16 ***
## max_glu_serum>300                                       0.351 0.725298    
## max_glu_serumNone                                      -1.166 0.243570    
## max_glu_serumNorm                                      -0.260 0.794884    
## A1Cresult>8                                             0.464 0.642859    
## A1CresultNone                                           2.216 0.026694 *  
## A1CresultNorm                                          -1.508 0.131650    
## metforminNo                                             0.136 0.891727    
## metforminSteady                                        -0.646 0.518599    
## metforminUp                                            -0.108 0.913762    
## repaglinideNo                                          -0.725 0.468215    
## repaglinideSteady                                      -0.379 0.704980    
## repaglinideUp                                          -0.373 0.709127    
## nateglinideNo                                          -0.681 0.495671    
## nateglinideSteady                                      -0.556 0.578019    
## nateglinideUp                                          -1.909 0.056295 .  
## chlorpropamideNo                                        0.010 0.992094    
## chlorpropamideSteady                                    0.010 0.991943    
## chlorpropamideUp                                        0.010 0.992314    
## glimepirideNo                                           0.108 0.914297    
## glimepirideSteady                                       0.130 0.896224    
## glimepirideUp                                          -0.495 0.620742    
## glipizideNo                                            -1.906 0.056592 .  
## glipizideSteady                                        -1.767 0.077277 .  
## glipizideUp                                            -1.564 0.117725    
## glyburideNo                                            -0.615 0.538537    
## glyburideSteady                                        -0.869 0.385004    
## glyburideUp                                            -1.110 0.267084    
## tolbutamideSteady                                      -0.679 0.497215    
## pioglitazoneNo                                         -1.130 0.258684    
## pioglitazoneSteady                                     -1.180 0.238190    
## pioglitazoneUp                                          0.090 0.928608    
## rosiglitazoneNo                                         2.949 0.003183 ** 
## rosiglitazoneSteady                                     3.116 0.001834 ** 
## rosiglitazoneUp                                         1.939 0.052441 .  
## acarboseNo                                             -0.014 0.988641    
## acarboseSteady                                         -0.014 0.989064    
## acarboseUp                                              0.001 0.998871    
## miglitolNo                                             -0.009 0.992548    
## miglitolSteady                                         -0.009 0.992761    
## miglitolUp                                             -0.014 0.988700    
## tolazamideSteady                                       -1.784 0.074461 .  
## insulinNo                                              -1.236 0.216293    
## insulinSteady                                          -3.333 0.000860 ***
## insulinUp                                              -1.144 0.252577    
## glyburide.metforminNo                                   0.010 0.991641    
## glyburide.metforminSteady                               0.011 0.991553    
## glyburide.metforminUp                                   0.000 0.999677    
## changeNo                                               -0.183 0.855130    
## diabetesMedYes                                          7.453 9.11e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 48799  on 35506  degrees of freedom
## Residual deviance: 44334  on 35323  degrees of freedom
## AIC: 44702
## 
## Number of Fisher Scoring iterations: 14

Check the multicollinearity

==> admission_type_id GVIF = 44.4

==> admission_source_id = 29.8

==> medical_specialty = 21.37

vif(model)
##                               GVIF Df GVIF^(1/(2*Df))
## race                      1.197494  4        1.022785
## gender                    1.063025  2        1.015397
## age                       4.013282  9        1.080259
## admission_type_id        44.414202  6        1.371808
## discharge_disposition_id  2.741557 25        1.020375
## admission_source_id      29.819556 12        1.151960
## time_in_hospital          1.637884  1        1.279798
## medical_specialty        21.377699 68        1.022773
## num_lab_procedures        1.605224  1        1.266974
## num_procedures            1.408844  1        1.186947
## num_medications           1.956667  1        1.398809
## number_outpatient         1.072492  1        1.035612
## number_emergency          1.093574  1        1.045741
## number_inpatient          1.099747  1        1.048688
## number_diagnoses          1.377824  1        1.173807
## max_glu_serum             2.455822  3        1.161536
## A1Cresult                 1.252679  3        1.038261
## metformin                 1.566838  3        1.077715
## repaglinide               1.078690  3        1.012705
## nateglinide               1.039902  3        1.006542
## chlorpropamide            1.015443  3        1.002557
## glimepiride               1.231944  3        1.035377
## glipizide                 1.515734  3        1.071776
## glyburide                 1.475243  3        1.066950
## tolbutamide               1.002485  1        1.001242
## pioglitazone              1.187738  3        1.029090
## rosiglitazone             1.188497  3        1.029200
## acarbose                  1.009084  3        1.001508
## miglitol                  1.005541  3        1.000921
## tolazamide                1.004064  1        1.002030
## insulin                   4.766724  3        1.297289
## glyburide.metformin       1.031464  3        1.005177
## change                    3.600169  1        1.897411
## diabetesMed               2.297392  1        1.515715

Remove admission_type_id and medical_specialty since it is contributes to multicollinearity

model1 = glm(readmitted~. -admission_type_id-medical_specialty, data = train_set, family = binomial)
summary(model1)
## 
## Call:
## glm(formula = readmitted ~ . - admission_type_id - medical_specialty, 
##     family = binomial, data = train_set)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.5187  -1.0239  -0.7621   1.1942   2.5944  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -3.710e+00  2.712e+03  -0.001 0.998908    
## raceAsian                  -3.177e-01  1.292e-01  -2.459 0.013916 *  
## raceCaucasian               1.232e-02  2.870e-02   0.429 0.667831    
## raceHispanic                2.185e-02  7.777e-02   0.281 0.778739    
## raceOther                  -2.252e-01  9.321e-02  -2.416 0.015711 *  
## genderMale                 -5.734e-02  2.309e-02  -2.484 0.013003 *  
## genderUnknown/Invalid      -1.467e+01  1.455e+03  -0.010 0.991957    
## age[10-20)                  7.925e-01  3.088e-01   2.566 0.010275 *  
## age[20-30)                  4.901e-01  3.007e-01   1.630 0.103115    
## age[30-40)                  6.749e-01  2.923e-01   2.309 0.020954 *  
## age[40-50)                  8.119e-01  2.891e-01   2.808 0.004979 ** 
## age[50-60)                  8.006e-01  2.884e-01   2.776 0.005497 ** 
## age[60-70)                  8.461e-01  2.883e-01   2.934 0.003342 ** 
## age[70-80)                  9.639e-01  2.883e-01   3.343 0.000830 ***
## age[80-90)                  8.797e-01  2.891e-01   3.043 0.002342 ** 
## age[90-100)                 6.822e-01  2.966e-01   2.300 0.021432 *  
## discharge_disposition_id2   4.450e-02  7.774e-02   0.572 0.567043    
## discharge_disposition_id3  -4.297e-02  3.805e-02  -1.129 0.258789    
## discharge_disposition_id4   1.504e-01  1.271e-01   1.183 0.236643    
## discharge_disposition_id5   1.909e-01  9.454e-02   2.019 0.043445 *  
## discharge_disposition_id6   1.412e-01  3.878e-02   3.640 0.000272 ***
## discharge_disposition_id7  -1.290e-01  1.631e-01  -0.791 0.428984    
## discharge_disposition_id8   1.744e-01  3.845e-01   0.453 0.650232    
## discharge_disposition_id9   1.623e+01  1.455e+03   0.011 0.991101    
## discharge_disposition_id10  5.575e-01  1.420e+00   0.393 0.694565    
## discharge_disposition_id11 -1.601e+01  5.829e+01  -0.275 0.783544    
## discharge_disposition_id12  6.497e-01  1.421e+00   0.457 0.647502    
## discharge_disposition_id13 -2.729e+00  3.663e-01  -7.450 9.33e-14 ***
## discharge_disposition_id14 -2.528e+00  4.169e-01  -6.062 1.34e-09 ***
## discharge_disposition_id15  1.452e+00  6.514e-01   2.230 0.025763 *  
## discharge_disposition_id16  2.086e-01  6.886e-01   0.303 0.761923    
## discharge_disposition_id17 -6.116e-01  7.451e-01  -0.821 0.411772    
## discharge_disposition_id18  1.748e-01  8.560e-02   2.042 0.041133 *  
## discharge_disposition_id19 -1.492e+01  8.383e+02  -0.018 0.985799    
## discharge_disposition_id20 -1.540e+01  1.455e+03  -0.011 0.991556    
## discharge_disposition_id22  7.837e-02  7.720e-02   1.015 0.310055    
## discharge_disposition_id23 -5.424e-01  1.777e-01  -3.053 0.002268 ** 
## discharge_disposition_id24  2.735e-02  8.686e-01   0.031 0.974879    
## discharge_disposition_id25  1.685e-01  9.007e-02   1.871 0.061315 .  
## discharge_disposition_id27 -1.537e+01  1.455e+03  -0.011 0.991574    
## discharge_disposition_id28  3.370e-01  5.594e-01   0.602 0.546885    
## admission_source_id2       -5.316e-02  1.831e-01  -0.290 0.771537    
## admission_source_id3       -2.041e-01  4.145e-01  -0.493 0.622348    
## admission_source_id4       -4.916e-01  7.379e-02  -6.662 2.69e-11 ***
## admission_source_id5       -2.710e-01  1.223e-01  -2.215 0.026742 *  
## admission_source_id6       -6.071e-01  7.567e-02  -8.024 1.02e-15 ***
## admission_source_id7        1.010e-01  2.767e-02   3.648 0.000264 ***
## admission_source_id8        6.478e-01  8.244e-01   0.786 0.431965    
## admission_source_id9       -2.536e-01  3.885e-01  -0.653 0.513998    
## admission_source_id10       4.403e-01  1.416e+00   0.311 0.755789    
## admission_source_id14      -1.477e+01  1.455e+03  -0.010 0.991905    
## admission_source_id17       1.573e-01  5.295e-02   2.971 0.002967 ** 
## admission_source_id22      -1.585e+01  1.455e+03  -0.011 0.991313    
## time_in_hospital            2.160e-02  4.593e-03   4.703 2.56e-06 ***
## num_lab_procedures          1.357e-03  6.866e-04   1.976 0.048123 *  
## num_procedures             -1.516e-02  7.453e-03  -2.034 0.041970 *  
## num_medications            -4.223e-03  1.796e-03  -2.351 0.018706 *  
## number_outpatient           1.006e-01  1.351e-02   7.447 9.54e-14 ***
## number_emergency            1.633e-01  1.975e-02   8.267  < 2e-16 ***
## number_inpatient            3.847e-01  1.208e-02  31.847  < 2e-16 ***
## number_diagnoses            6.909e-02  6.462e-03  10.692  < 2e-16 ***
## max_glu_serum>300           9.402e-02  1.409e-01   0.667 0.504532    
## max_glu_serumNone          -1.118e-01  9.672e-02  -1.156 0.247693    
## max_glu_serumNorm          -7.308e-02  1.096e-01  -0.667 0.504784    
## A1Cresult>8                 4.466e-02  6.786e-02   0.658 0.510488    
## A1CresultNone               1.277e-01  5.827e-02   2.192 0.028394 *  
## A1CresultNorm              -1.171e-01  7.880e-02  -1.486 0.137356    
## metforminNo                 8.649e-03  1.569e-01   0.055 0.956048    
## metforminSteady            -1.293e-01  1.571e-01  -0.823 0.410739    
## metforminUp                -5.736e-02  1.873e-01  -0.306 0.759439    
## repaglinideNo              -3.799e-01  5.053e-01  -0.752 0.452228    
## repaglinideSteady          -1.404e-01  5.112e-01  -0.275 0.783602    
## repaglinideUp              -1.975e-01  5.856e-01  -0.337 0.735922    
## nateglinideNo              -9.574e-01  1.258e+00  -0.761 0.446780    
## nateglinideSteady          -8.170e-01  1.265e+00  -0.646 0.518266    
## nateglinideUp              -3.123e+00  1.621e+00  -1.926 0.054088 .  
## chlorpropamideNo            1.498e+01  1.455e+03   0.010 0.991788    
## chlorpropamideSteady        1.522e+01  1.455e+03   0.010 0.991655    
## chlorpropamideUp            1.467e+01  1.455e+03   0.010 0.991959    
## glimepirideNo              -3.276e-02  2.339e-01  -0.140 0.888601    
## glimepirideSteady          -2.956e-03  2.370e-01  -0.012 0.990051    
## glimepirideUp              -1.816e-01  2.871e-01  -0.632 0.527138    
## glipizideNo                -3.080e-01  1.641e-01  -1.877 0.060474 .  
## glipizideSteady            -2.860e-01  1.642e-01  -1.742 0.081578 .  
## glipizideUp                -3.188e-01  1.995e-01  -1.599 0.109910    
## glyburideNo                -6.855e-02  1.619e-01  -0.423 0.671976    
## glyburideSteady            -1.209e-01  1.631e-01  -0.741 0.458609    
## glyburideUp                -2.254e-01  2.082e-01  -1.083 0.278986    
## tolbutamideSteady          -1.003e+00  1.164e+00  -0.862 0.388621    
## pioglitazoneNo             -4.147e-01  3.438e-01  -1.206 0.227812    
## pioglitazoneSteady         -4.123e-01  3.458e-01  -1.192 0.233137    
## pioglitazoneUp             -8.993e-03  4.150e-01  -0.022 0.982712    
## rosiglitazoneNo             1.649e+00  5.669e-01   2.909 0.003624 ** 
## rosiglitazoneSteady         1.755e+00  5.681e-01   3.090 0.002002 ** 
## rosiglitazoneUp             1.138e+00  6.162e-01   1.847 0.064742 .  
## acarboseNo                 -1.442e+01  9.990e+02  -0.014 0.988480    
## acarboseSteady             -1.386e+01  9.990e+02  -0.014 0.988931    
## acarboseUp                  1.624e+00  1.408e+03   0.001 0.999080    
## miglitolNo                 -1.355e+01  1.455e+03  -0.009 0.992570    
## miglitolSteady             -1.300e+01  1.455e+03  -0.009 0.992874    
## miglitolUp                 -2.913e+01  2.058e+03  -0.014 0.988709    
## tolazamideSteady           -1.053e+00  6.484e-01  -1.624 0.104447    
## insulinNo                  -9.093e-02  5.903e-02  -1.540 0.123501    
## insulinSteady              -1.836e-01  4.557e-02  -4.028 5.62e-05 ***
## insulinUp                  -3.572e-02  4.640e-02  -0.770 0.441423    
## glyburide.metforminNo       1.518e+01  1.455e+03   0.010 0.991678    
## glyburide.metforminSteady   1.535e+01  1.455e+03   0.011 0.991587    
## glyburide.metforminUp      -9.244e-01  2.058e+03   0.000 0.999642    
## changeNo                    2.109e-03  4.274e-02   0.049 0.960649    
## diabetesMedYes              3.115e-01  4.160e-02   7.490 6.90e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 48799  on 35506  degrees of freedom
## Residual deviance: 44819  on 35397  degrees of freedom
## AIC: 45039
## 
## Number of Fisher Scoring iterations: 14
vif(model1)
##                              GVIF Df GVIF^(1/(2*Df))
## race                     1.112673  4        1.013435
## gender                   1.043237  2        1.010638
## age                      1.532131  9        1.023986
## discharge_disposition_id 1.617130 25        1.009659
## admission_source_id      2.288851 12        1.035104
## time_in_hospital         1.491617  1        1.221318
## num_lab_procedures       1.420194  1        1.191719
## num_procedures           1.270811  1        1.127302
## num_medications          1.726409  1        1.313929
## number_outpatient        1.051710  1        1.025529
## number_emergency         1.082024  1        1.040204
## number_inpatient         1.086830  1        1.042511
## number_diagnoses         1.293270  1        1.137220
## max_glu_serum            1.657352  3        1.087850
## A1Cresult                1.229636  3        1.035053
## metformin                1.547351  3        1.075470
## repaglinide              1.063043  3        1.010241
## nateglinide              1.036839  3        1.006048
## chlorpropamide           1.011201  3        1.001858
## glimepiride              1.216255  3        1.033168
## glipizide                1.507541  3        1.070808
## glyburide                1.462808  3        1.065445
## tolbutamide              1.002080  1        1.001040
## pioglitazone             1.181287  3        1.028157
## rosiglitazone            1.176252  3        1.027425
## acarbose                 1.006716  3        1.001116
## miglitol                 1.004580  3        1.000762
## tolazamide               1.001970  1        1.000984
## insulin                  4.559680  3        1.287723
## glyburide.metformin      1.024788  3        1.004089
## change                   3.592061  1        1.895273
## diabetesMed              2.293810  1        1.514533

Test on training model and train set

trainpredict = predict(model1, newdata = train_set, type = 'response')
p_class = ifelse(trainpredict>0.5, "1","0")

matrix_table = table(train_set$readmitted, p_class)
matrix_table
##    p_class
##         0     1
##   0 15938  3753
##   1  9256  6560
accuracy = sum(diag(matrix_table))/sum(matrix_table)
round(accuracy,3)
## [1] 0.634

Test on training model and test set

drop admission_source_id == 13 due to this data missing during train the model

summary(test_set$admission_source_id)
##    1    2    3    4    5    6    7    8    9   10   11   13   14   17   20   22 
## 5287   51   17  394  154  461 7526    6   16    0    0    1    0 1303    0    2 
##   25 
##    0
test_set1 <- test_set %>%
  filter(admission_source_id != 13)

drop medical_specialty == “Neurophysiology, Pediatrics-InfectiousDiseases, Proctology”

due to this data missing during train the model

summary(test_set$medical_specialty)
##    Length     Class      Mode 
##     15218 character character
test_set2 <- test_set1 %>%
  filter(!medical_specialty %in% c("Neurophysiology", "Pediatrics-InfectiousDiseases", "Proctology"))

testpredict = predict(model1, newdata = test_set2, type = 'response')
p_class = ifelse(testpredict>0.5, "1","0")

matrix_table = table(test_set2$readmitted, p_class)
matrix_table
##    p_class
##        0    1
##   0 6815 1621
##   1 3932 2846
accuracy = sum(diag(matrix_table))/sum(matrix_table)
round(accuracy,3)
## [1] 0.635

Plot performance chart

pred = prediction(trainpredict, train_set$readmitted)
perf = performance(pred, "lift", "rpp")
plot(perf, main = "lift curve", xlab = 'Proportion of Customers (sorted prob)')

gain = performance(pred, "tpr", "rpp")
plot(gain, col="orange", lwd = 2)

Try the machine learning method

library(xgboost)

convert categorical factor to numeric

str(hospital3)
## 'data.frame':    50725 obs. of  35 variables:
##  $ race                    : chr  "Caucasian" "Caucasian" "Caucasian" "Caucasian" ...
##  $ gender                  : chr  "Female" "Female" "Female" "Male" ...
##  $ age                     : chr  "[0-10)" "[90-100)" "[40-50)" "[80-90)" ...
##  $ admission_type_id       : Factor w/ 8 levels "1","2","3","4",..: 6 3 1 1 1 1 1 1 1 1 ...
##  $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 3 3 6 1 3 1 2 1 1 ...
##  $ admission_source_id     : Factor w/ 17 levels "1","2","3","4",..: 1 4 7 7 7 7 1 7 7 7 ...
##  $ time_in_hospital        : int  1 12 7 10 3 6 2 5 6 2 ...
##  $ medical_specialty       : chr  "Pediatrics-Endocrinology" "InternalMedicine" "Family/GeneralPractice" "Family/GeneralPractice" ...
##  $ num_lab_procedures      : int  41 33 60 55 29 64 25 52 27 41 ...
##  $ num_procedures          : int  0 3 0 1 0 3 2 0 0 0 ...
##  $ num_medications         : int  1 18 15 31 11 18 11 14 16 11 ...
##  $ number_outpatient       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_emergency        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ number_inpatient        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_diagnoses        : int  1 8 8 8 3 7 3 8 8 6 ...
##  $ max_glu_serum           : chr  "None" "None" "None" "None" ...
##  $ A1Cresult               : chr  "None" "None" "None" "None" ...
##  $ metformin               : chr  "No" "No" "Steady" "No" ...
##  $ repaglinide             : chr  "No" "No" "Up" "No" ...
##  $ nateglinide             : chr  "No" "No" "No" "No" ...
##  $ chlorpropamide          : chr  "No" "No" "No" "No" ...
##  $ glimepiride             : chr  "No" "No" "No" "No" ...
##  $ glipizide               : chr  "No" "No" "No" "No" ...
##  $ glyburide               : chr  "No" "No" "No" "No" ...
##  $ tolbutamide             : chr  "No" "No" "No" "No" ...
##  $ pioglitazone            : chr  "No" "No" "No" "No" ...
##  $ rosiglitazone           : chr  "No" "Steady" "No" "No" ...
##  $ acarbose                : chr  "No" "No" "No" "No" ...
##  $ miglitol                : chr  "No" "No" "No" "No" ...
##  $ tolazamide              : chr  "No" "No" "No" "No" ...
##  $ insulin                 : chr  "No" "Steady" "Down" "Steady" ...
##  $ glyburide.metformin     : chr  "No" "No" "No" "No" ...
##  $ change                  : chr  "No" "Ch" "Ch" "No" ...
##  $ diabetesMed             : chr  "No" "Yes" "Yes" "Yes" ...
##  $ readmitted              : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 2 2 2 2 ...
category <- c("race", "gender", "age", "admission_type_id", "discharge_disposition_id", "admission_source_id",
              "medical_specialty", "max_glu_serum", "A1Cresult", "metformin", "repaglinide", "nateglinide",
              "chlorpropamide", "glimepiride", "glipizide", "glyburide", "tolbutamide", "pioglitazone",
              "rosiglitazone", "acarbose", "miglitol", "tolazamide", "insulin", "glyburide.metformin",
              "change","diabetesMed")

hospital_cat <- hospital3[category]

dmy <- dummyVars(" ~ .", data = hospital_cat)
trsf <- data.frame(predict(dmy, newdata = hospital_cat))
head(trsf)
##    raceAfricanAmerican raceAsian raceCaucasian raceHispanic raceOther
## 1                    0         0             1            0         0
## 10                   0         0             1            0         0
## 13                   0         0             1            0         0
## 14                   0         0             1            0         0
## 18                   0         0             1            0         0
## 27                   0         0             1            0         0
##    genderFemale genderMale genderUnknown.Invalid age.0.10. age.10.20.
## 1             1          0                     0         1          0
## 10            1          0                     0         0          0
## 13            1          0                     0         0          0
## 14            0          1                     0         0          0
## 18            1          0                     0         0          0
## 27            0          1                     0         0          0
##    age.20.30. age.30.40. age.40.50. age.50.60. age.60.70. age.70.80. age.80.90.
## 1           0          0          0          0          0          0          0
## 10          0          0          0          0          0          0          0
## 13          0          0          1          0          0          0          0
## 14          0          0          0          0          0          0          1
## 18          0          0          0          1          0          0          0
## 27          0          0          0          0          0          0          1
##    age.90.100. admission_type_id.1 admission_type_id.2 admission_type_id.3
## 1            0                   0                   0                   0
## 10           1                   0                   0                   1
## 13           0                   1                   0                   0
## 14           0                   1                   0                   0
## 18           0                   1                   0                   0
## 27           0                   1                   0                   0
##    admission_type_id.4 admission_type_id.5 admission_type_id.6
## 1                    0                   0                   1
## 10                   0                   0                   0
## 13                   0                   0                   0
## 14                   0                   0                   0
## 18                   0                   0                   0
## 27                   0                   0                   0
##    admission_type_id.7 admission_type_id.8 discharge_disposition_id.1
## 1                    0                   0                          0
## 10                   0                   0                          0
## 13                   0                   0                          0
## 14                   0                   0                          0
## 18                   0                   0                          1
## 27                   0                   0                          0
##    discharge_disposition_id.2 discharge_disposition_id.3
## 1                           0                          0
## 10                          0                          1
## 13                          0                          1
## 14                          0                          0
## 18                          0                          0
## 27                          0                          1
##    discharge_disposition_id.4 discharge_disposition_id.5
## 1                           0                          0
## 10                          0                          0
## 13                          0                          0
## 14                          0                          0
## 18                          0                          0
## 27                          0                          0
##    discharge_disposition_id.6 discharge_disposition_id.7
## 1                           0                          0
## 10                          0                          0
## 13                          0                          0
## 14                          1                          0
## 18                          0                          0
## 27                          0                          0
##    discharge_disposition_id.8 discharge_disposition_id.9
## 1                           0                          0
## 10                          0                          0
## 13                          0                          0
## 14                          0                          0
## 18                          0                          0
## 27                          0                          0
##    discharge_disposition_id.10 discharge_disposition_id.11
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.12 discharge_disposition_id.13
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.14 discharge_disposition_id.15
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.16 discharge_disposition_id.17
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.18 discharge_disposition_id.19
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.20 discharge_disposition_id.22
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.23 discharge_disposition_id.24
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.25 discharge_disposition_id.27
## 1                            1                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.28 admission_source_id.1 admission_source_id.2
## 1                            0                     1                     0
## 10                           0                     0                     0
## 13                           0                     0                     0
## 14                           0                     0                     0
## 18                           0                     0                     0
## 27                           0                     0                     0
##    admission_source_id.3 admission_source_id.4 admission_source_id.5
## 1                      0                     0                     0
## 10                     0                     1                     0
## 13                     0                     0                     0
## 14                     0                     0                     0
## 18                     0                     0                     0
## 27                     0                     0                     0
##    admission_source_id.6 admission_source_id.7 admission_source_id.8
## 1                      0                     0                     0
## 10                     0                     0                     0
## 13                     0                     1                     0
## 14                     0                     1                     0
## 18                     0                     1                     0
## 27                     0                     1                     0
##    admission_source_id.9 admission_source_id.10 admission_source_id.11
## 1                      0                      0                      0
## 10                     0                      0                      0
## 13                     0                      0                      0
## 14                     0                      0                      0
## 18                     0                      0                      0
## 27                     0                      0                      0
##    admission_source_id.13 admission_source_id.14 admission_source_id.17
## 1                       0                      0                      0
## 10                      0                      0                      0
## 13                      0                      0                      0
## 14                      0                      0                      0
## 18                      0                      0                      0
## 27                      0                      0                      0
##    admission_source_id.20 admission_source_id.22 admission_source_id.25
## 1                       0                      0                      0
## 10                      0                      0                      0
## 13                      0                      0                      0
## 14                      0                      0                      0
## 18                      0                      0                      0
## 27                      0                      0                      0
##    medical_specialtyAllergyandImmunology medical_specialtyAnesthesiology
## 1                                      0                               0
## 10                                     0                               0
## 13                                     0                               0
## 14                                     0                               0
## 18                                     0                               0
## 27                                     0                               0
##    medical_specialtyAnesthesiology.Pediatric medical_specialtyCardiology
## 1                                          0                           0
## 10                                         0                           0
## 13                                         0                           0
## 14                                         0                           0
## 18                                         0                           1
## 27                                         0                           1
##    medical_specialtyCardiology.Pediatric medical_specialtyDCPTEAM
## 1                                      0                        0
## 10                                     0                        0
## 13                                     0                        0
## 14                                     0                        0
## 18                                     0                        0
## 27                                     0                        0
##    medical_specialtyDentistry medical_specialtyDermatology
## 1                           0                            0
## 10                          0                            0
## 13                          0                            0
## 14                          0                            0
## 18                          0                            0
## 27                          0                            0
##    medical_specialtyEmergency.Trauma medical_specialtyEndocrinology
## 1                                  0                              0
## 10                                 0                              0
## 13                                 0                              0
## 14                                 0                              0
## 18                                 0                              0
## 27                                 0                              0
##    medical_specialtyEndocrinology.Metabolism
## 1                                          0
## 10                                         0
## 13                                         0
## 14                                         0
## 18                                         0
## 27                                         0
##    medical_specialtyFamily.GeneralPractice medical_specialtyGastroenterology
## 1                                        0                                 0
## 10                                       0                                 0
## 13                                       1                                 0
## 14                                       1                                 0
## 18                                       0                                 0
## 27                                       0                                 0
##    medical_specialtyGynecology medical_specialtyHematology
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    medical_specialtyHematology.Oncology medical_specialtyHospitalist
## 1                                     0                            0
## 10                                    0                            0
## 13                                    0                            0
## 14                                    0                            0
## 18                                    0                            0
## 27                                    0                            0
##    medical_specialtyInfectiousDiseases medical_specialtyInternalMedicine
## 1                                    0                                 0
## 10                                   0                                 1
## 13                                   0                                 0
## 14                                   0                                 0
## 18                                   0                                 0
## 27                                   0                                 0
##    medical_specialtyNephrology medical_specialtyNeurology
## 1                            0                          0
## 10                           0                          0
## 13                           0                          0
## 14                           0                          0
## 18                           0                          0
## 27                           0                          0
##    medical_specialtyNeurophysiology
## 1                                 0
## 10                                0
## 13                                0
## 14                                0
## 18                                0
## 27                                0
##    medical_specialtyObsterics.Gynecology.GynecologicOnco
## 1                                                      0
## 10                                                     0
## 13                                                     0
## 14                                                     0
## 18                                                     0
## 27                                                     0
##    medical_specialtyObstetrics medical_specialtyObstetricsandGynecology
## 1                            0                                        0
## 10                           0                                        0
## 13                           0                                        0
## 14                           0                                        0
## 18                           0                                        0
## 27                           0                                        0
##    medical_specialtyOncology medical_specialtyOphthalmology
## 1                          0                              0
## 10                         0                              0
## 13                         0                              0
## 14                         0                              0
## 18                         0                              0
## 27                         0                              0
##    medical_specialtyOrthopedics medical_specialtyOrthopedics.Reconstructive
## 1                             0                                           0
## 10                            0                                           0
## 13                            0                                           0
## 14                            0                                           0
## 18                            0                                           0
## 27                            0                                           0
##    medical_specialtyOsteopath medical_specialtyOtolaryngology
## 1                           0                               0
## 10                          0                               0
## 13                          0                               0
## 14                          0                               0
## 18                          0                               0
## 27                          0                               0
##    medical_specialtyOutreachServices medical_specialtyPathology
## 1                                  0                          0
## 10                                 0                          0
## 13                                 0                          0
## 14                                 0                          0
## 18                                 0                          0
## 27                                 0                          0
##    medical_specialtyPediatrics medical_specialtyPediatrics.AllergyandImmunology
## 1                            0                                                0
## 10                           0                                                0
## 13                           0                                                0
## 14                           0                                                0
## 18                           0                                                0
## 27                           0                                                0
##    medical_specialtyPediatrics.CriticalCare
## 1                                         0
## 10                                        0
## 13                                        0
## 14                                        0
## 18                                        0
## 27                                        0
##    medical_specialtyPediatrics.EmergencyMedicine
## 1                                              0
## 10                                             0
## 13                                             0
## 14                                             0
## 18                                             0
## 27                                             0
##    medical_specialtyPediatrics.Endocrinology
## 1                                          1
## 10                                         0
## 13                                         0
## 14                                         0
## 18                                         0
## 27                                         0
##    medical_specialtyPediatrics.Hematology.Oncology
## 1                                                0
## 10                                               0
## 13                                               0
## 14                                               0
## 18                                               0
## 27                                               0
##    medical_specialtyPediatrics.InfectiousDiseases
## 1                                               0
## 10                                              0
## 13                                              0
## 14                                              0
## 18                                              0
## 27                                              0
##    medical_specialtyPediatrics.Neurology
## 1                                      0
## 10                                     0
## 13                                     0
## 14                                     0
## 18                                     0
## 27                                     0
##    medical_specialtyPediatrics.Pulmonology medical_specialtyPerinatology
## 1                                        0                             0
## 10                                       0                             0
## 13                                       0                             0
## 14                                       0                             0
## 18                                       0                             0
## 27                                       0                             0
##    medical_specialtyPhysicalMedicineandRehabilitation
## 1                                                   0
## 10                                                  0
## 13                                                  0
## 14                                                  0
## 18                                                  0
## 27                                                  0
##    medical_specialtyPhysicianNotFound medical_specialtyPodiatry
## 1                                   0                         0
## 10                                  0                         0
## 13                                  0                         0
## 14                                  0                         0
## 18                                  0                         0
## 27                                  0                         0
##    medical_specialtyProctology medical_specialtyPsychiatry
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    medical_specialtyPsychiatry.Addictive
## 1                                      0
## 10                                     0
## 13                                     0
## 14                                     0
## 18                                     0
## 27                                     0
##    medical_specialtyPsychiatry.Child.Adolescent medical_specialtyPsychology
## 1                                             0                           0
## 10                                            0                           0
## 13                                            0                           0
## 14                                            0                           0
## 18                                            0                           0
## 27                                            0                           0
##    medical_specialtyPulmonology medical_specialtyRadiologist
## 1                             0                            0
## 10                            0                            0
## 13                            0                            0
## 14                            0                            0
## 18                            0                            0
## 27                            0                            0
##    medical_specialtyRadiology medical_specialtyResident
## 1                           0                         0
## 10                          0                         0
## 13                          0                         0
## 14                          0                         0
## 18                          0                         0
## 27                          0                         0
##    medical_specialtyRheumatology medical_specialtySpeech
## 1                              0                       0
## 10                             0                       0
## 13                             0                       0
## 14                             0                       0
## 18                             0                       0
## 27                             0                       0
##    medical_specialtySportsMedicine medical_specialtySurgeon
## 1                                0                        0
## 10                               0                        0
## 13                               0                        0
## 14                               0                        0
## 18                               0                        0
## 27                               0                        0
##    medical_specialtySurgery.Cardiovascular
## 1                                        0
## 10                                       0
## 13                                       0
## 14                                       0
## 18                                       0
## 27                                       0
##    medical_specialtySurgery.Cardiovascular.Thoracic
## 1                                                 0
## 10                                                0
## 13                                                0
## 14                                                0
## 18                                                0
## 27                                                0
##    medical_specialtySurgery.Colon.Rectal medical_specialtySurgery.General
## 1                                      0                                0
## 10                                     0                                0
## 13                                     0                                0
## 14                                     0                                0
## 18                                     0                                0
## 27                                     0                                0
##    medical_specialtySurgery.Maxillofacial medical_specialtySurgery.Neuro
## 1                                       0                              0
## 10                                      0                              0
## 13                                      0                              0
## 14                                      0                              0
## 18                                      0                              0
## 27                                      0                              0
##    medical_specialtySurgery.Pediatric medical_specialtySurgery.Plastic
## 1                                   0                                0
## 10                                  0                                0
## 13                                  0                                0
## 14                                  0                                0
## 18                                  0                                0
## 27                                  0                                0
##    medical_specialtySurgery.PlasticwithinHeadandNeck
## 1                                                  0
## 10                                                 0
## 13                                                 0
## 14                                                 0
## 18                                                 0
## 27                                                 0
##    medical_specialtySurgery.Thoracic medical_specialtySurgery.Vascular
## 1                                  0                                 0
## 10                                 0                                 0
## 13                                 0                                 0
## 14                                 0                                 0
## 18                                 0                                 0
## 27                                 0                                 0
##    medical_specialtySurgicalSpecialty medical_specialtyUrology
## 1                                   0                        0
## 10                                  0                        0
## 13                                  0                        0
## 14                                  0                        0
## 18                                  0                        0
## 27                                  0                        0
##    max_glu_serum.200 max_glu_serum.300 max_glu_serumNone max_glu_serumNorm
## 1                  0                 0                 1                 0
## 10                 0                 0                 1                 0
## 13                 0                 0                 1                 0
## 14                 0                 0                 1                 0
## 18                 0                 0                 1                 0
## 27                 0                 0                 1                 0
##    A1Cresult.7 A1Cresult.8 A1CresultNone A1CresultNorm metforminDown
## 1            0           0             1             0             0
## 10           0           0             1             0             0
## 13           0           0             1             0             0
## 14           0           0             1             0             0
## 18           0           0             1             0             0
## 27           1           0             0             0             0
##    metforminNo metforminSteady metforminUp repaglinideDown repaglinideNo
## 1            1               0           0               0             1
## 10           1               0           0               0             1
## 13           0               1           0               0             0
## 14           1               0           0               0             1
## 18           1               0           0               0             1
## 27           0               1           0               0             1
##    repaglinideSteady repaglinideUp nateglinideDown nateglinideNo
## 1                  0             0               0             1
## 10                 0             0               0             1
## 13                 0             1               0             1
## 14                 0             0               0             1
## 18                 0             0               0             1
## 27                 0             0               0             1
##    nateglinideSteady nateglinideUp chlorpropamideDown chlorpropamideNo
## 1                  0             0                  0                1
## 10                 0             0                  0                1
## 13                 0             0                  0                1
## 14                 0             0                  0                1
## 18                 0             0                  0                1
## 27                 0             0                  0                1
##    chlorpropamideSteady chlorpropamideUp glimepirideDown glimepirideNo
## 1                     0                0               0             1
## 10                    0                0               0             1
## 13                    0                0               0             1
## 14                    0                0               0             1
## 18                    0                0               0             1
## 27                    0                0               0             1
##    glimepirideSteady glimepirideUp glipizideDown glipizideNo glipizideSteady
## 1                  0             0             0           1               0
## 10                 0             0             0           1               0
## 13                 0             0             0           1               0
## 14                 0             0             0           1               0
## 18                 0             0             0           1               0
## 27                 0             0             0           1               0
##    glipizideUp glyburideDown glyburideNo glyburideSteady glyburideUp
## 1            0             0           1               0           0
## 10           0             0           1               0           0
## 13           0             0           1               0           0
## 14           0             0           1               0           0
## 18           0             0           0               1           0
## 27           0             0           0               1           0
##    tolbutamideNo tolbutamideSteady pioglitazoneDown pioglitazoneNo
## 1              1                 0                0              1
## 10             1                 0                0              1
## 13             1                 0                0              1
## 14             1                 0                0              1
## 18             1                 0                0              1
## 27             1                 0                0              1
##    pioglitazoneSteady pioglitazoneUp rosiglitazoneDown rosiglitazoneNo
## 1                   0              0                 0               1
## 10                  0              0                 0               0
## 13                  0              0                 0               1
## 14                  0              0                 0               1
## 18                  0              0                 0               1
## 27                  0              0                 0               1
##    rosiglitazoneSteady rosiglitazoneUp acarboseDown acarboseNo acarboseSteady
## 1                    0               0            0          1              0
## 10                   1               0            0          1              0
## 13                   0               0            0          1              0
## 14                   0               0            0          1              0
## 18                   0               0            0          1              0
## 27                   0               0            0          1              0
##    acarboseUp miglitolDown miglitolNo miglitolSteady miglitolUp tolazamideNo
## 1           0            0          1              0          0            1
## 10          0            0          1              0          0            1
## 13          0            0          1              0          0            1
## 14          0            0          1              0          0            1
## 18          0            0          1              0          0            1
## 27          0            0          1              0          0            1
##    tolazamideSteady insulinDown insulinNo insulinSteady insulinUp
## 1                 0           0         1             0         0
## 10                0           0         0             1         0
## 13                0           1         0             0         0
## 14                0           0         0             1         0
## 18                0           0         1             0         0
## 27                0           0         1             0         0
##    glyburide.metforminDown glyburide.metforminNo glyburide.metforminSteady
## 1                        0                     1                         0
## 10                       0                     1                         0
## 13                       0                     1                         0
## 14                       0                     1                         0
## 18                       0                     1                         0
## 27                       0                     1                         0
##    glyburide.metforminUp changeCh changeNo diabetesMedNo diabetesMedYes
## 1                      0        0        1             1              0
## 10                     0        1        0             0              1
## 13                     0        1        0             0              1
## 14                     0        0        1             0              1
## 18                     0        0        1             0              1
## 27                     0        1        0             0              1

Select numeric variable

hosp_numeric <- hospital3 %>%
  select_if(is.numeric)

hosp_readmitted <- as.numeric(hospital3$readmitted)-1

Combine numeric and categories’ converted data

hospital4 <- cbind(hosp_numeric, trsf, hosp_readmitted)
head(hospital4)
##    time_in_hospital num_lab_procedures num_procedures num_medications
## 1                 1                 41              0               1
## 10               12                 33              3              18
## 13                7                 60              0              15
## 14               10                 55              1              31
## 18                3                 29              0              11
## 27                6                 64              3              18
##    number_outpatient number_emergency number_inpatient number_diagnoses
## 1                  0                0                0                1
## 10                 0                0                0                8
## 13                 0                1                0                8
## 14                 0                0                0                8
## 18                 0                0                0                3
## 27                 0                0                0                7
##    raceAfricanAmerican raceAsian raceCaucasian raceHispanic raceOther
## 1                    0         0             1            0         0
## 10                   0         0             1            0         0
## 13                   0         0             1            0         0
## 14                   0         0             1            0         0
## 18                   0         0             1            0         0
## 27                   0         0             1            0         0
##    genderFemale genderMale genderUnknown.Invalid age.0.10. age.10.20.
## 1             1          0                     0         1          0
## 10            1          0                     0         0          0
## 13            1          0                     0         0          0
## 14            0          1                     0         0          0
## 18            1          0                     0         0          0
## 27            0          1                     0         0          0
##    age.20.30. age.30.40. age.40.50. age.50.60. age.60.70. age.70.80. age.80.90.
## 1           0          0          0          0          0          0          0
## 10          0          0          0          0          0          0          0
## 13          0          0          1          0          0          0          0
## 14          0          0          0          0          0          0          1
## 18          0          0          0          1          0          0          0
## 27          0          0          0          0          0          0          1
##    age.90.100. admission_type_id.1 admission_type_id.2 admission_type_id.3
## 1            0                   0                   0                   0
## 10           1                   0                   0                   1
## 13           0                   1                   0                   0
## 14           0                   1                   0                   0
## 18           0                   1                   0                   0
## 27           0                   1                   0                   0
##    admission_type_id.4 admission_type_id.5 admission_type_id.6
## 1                    0                   0                   1
## 10                   0                   0                   0
## 13                   0                   0                   0
## 14                   0                   0                   0
## 18                   0                   0                   0
## 27                   0                   0                   0
##    admission_type_id.7 admission_type_id.8 discharge_disposition_id.1
## 1                    0                   0                          0
## 10                   0                   0                          0
## 13                   0                   0                          0
## 14                   0                   0                          0
## 18                   0                   0                          1
## 27                   0                   0                          0
##    discharge_disposition_id.2 discharge_disposition_id.3
## 1                           0                          0
## 10                          0                          1
## 13                          0                          1
## 14                          0                          0
## 18                          0                          0
## 27                          0                          1
##    discharge_disposition_id.4 discharge_disposition_id.5
## 1                           0                          0
## 10                          0                          0
## 13                          0                          0
## 14                          0                          0
## 18                          0                          0
## 27                          0                          0
##    discharge_disposition_id.6 discharge_disposition_id.7
## 1                           0                          0
## 10                          0                          0
## 13                          0                          0
## 14                          1                          0
## 18                          0                          0
## 27                          0                          0
##    discharge_disposition_id.8 discharge_disposition_id.9
## 1                           0                          0
## 10                          0                          0
## 13                          0                          0
## 14                          0                          0
## 18                          0                          0
## 27                          0                          0
##    discharge_disposition_id.10 discharge_disposition_id.11
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.12 discharge_disposition_id.13
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.14 discharge_disposition_id.15
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.16 discharge_disposition_id.17
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.18 discharge_disposition_id.19
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.20 discharge_disposition_id.22
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.23 discharge_disposition_id.24
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.25 discharge_disposition_id.27
## 1                            1                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    discharge_disposition_id.28 admission_source_id.1 admission_source_id.2
## 1                            0                     1                     0
## 10                           0                     0                     0
## 13                           0                     0                     0
## 14                           0                     0                     0
## 18                           0                     0                     0
## 27                           0                     0                     0
##    admission_source_id.3 admission_source_id.4 admission_source_id.5
## 1                      0                     0                     0
## 10                     0                     1                     0
## 13                     0                     0                     0
## 14                     0                     0                     0
## 18                     0                     0                     0
## 27                     0                     0                     0
##    admission_source_id.6 admission_source_id.7 admission_source_id.8
## 1                      0                     0                     0
## 10                     0                     0                     0
## 13                     0                     1                     0
## 14                     0                     1                     0
## 18                     0                     1                     0
## 27                     0                     1                     0
##    admission_source_id.9 admission_source_id.10 admission_source_id.11
## 1                      0                      0                      0
## 10                     0                      0                      0
## 13                     0                      0                      0
## 14                     0                      0                      0
## 18                     0                      0                      0
## 27                     0                      0                      0
##    admission_source_id.13 admission_source_id.14 admission_source_id.17
## 1                       0                      0                      0
## 10                      0                      0                      0
## 13                      0                      0                      0
## 14                      0                      0                      0
## 18                      0                      0                      0
## 27                      0                      0                      0
##    admission_source_id.20 admission_source_id.22 admission_source_id.25
## 1                       0                      0                      0
## 10                      0                      0                      0
## 13                      0                      0                      0
## 14                      0                      0                      0
## 18                      0                      0                      0
## 27                      0                      0                      0
##    medical_specialtyAllergyandImmunology medical_specialtyAnesthesiology
## 1                                      0                               0
## 10                                     0                               0
## 13                                     0                               0
## 14                                     0                               0
## 18                                     0                               0
## 27                                     0                               0
##    medical_specialtyAnesthesiology.Pediatric medical_specialtyCardiology
## 1                                          0                           0
## 10                                         0                           0
## 13                                         0                           0
## 14                                         0                           0
## 18                                         0                           1
## 27                                         0                           1
##    medical_specialtyCardiology.Pediatric medical_specialtyDCPTEAM
## 1                                      0                        0
## 10                                     0                        0
## 13                                     0                        0
## 14                                     0                        0
## 18                                     0                        0
## 27                                     0                        0
##    medical_specialtyDentistry medical_specialtyDermatology
## 1                           0                            0
## 10                          0                            0
## 13                          0                            0
## 14                          0                            0
## 18                          0                            0
## 27                          0                            0
##    medical_specialtyEmergency.Trauma medical_specialtyEndocrinology
## 1                                  0                              0
## 10                                 0                              0
## 13                                 0                              0
## 14                                 0                              0
## 18                                 0                              0
## 27                                 0                              0
##    medical_specialtyEndocrinology.Metabolism
## 1                                          0
## 10                                         0
## 13                                         0
## 14                                         0
## 18                                         0
## 27                                         0
##    medical_specialtyFamily.GeneralPractice medical_specialtyGastroenterology
## 1                                        0                                 0
## 10                                       0                                 0
## 13                                       1                                 0
## 14                                       1                                 0
## 18                                       0                                 0
## 27                                       0                                 0
##    medical_specialtyGynecology medical_specialtyHematology
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    medical_specialtyHematology.Oncology medical_specialtyHospitalist
## 1                                     0                            0
## 10                                    0                            0
## 13                                    0                            0
## 14                                    0                            0
## 18                                    0                            0
## 27                                    0                            0
##    medical_specialtyInfectiousDiseases medical_specialtyInternalMedicine
## 1                                    0                                 0
## 10                                   0                                 1
## 13                                   0                                 0
## 14                                   0                                 0
## 18                                   0                                 0
## 27                                   0                                 0
##    medical_specialtyNephrology medical_specialtyNeurology
## 1                            0                          0
## 10                           0                          0
## 13                           0                          0
## 14                           0                          0
## 18                           0                          0
## 27                           0                          0
##    medical_specialtyNeurophysiology
## 1                                 0
## 10                                0
## 13                                0
## 14                                0
## 18                                0
## 27                                0
##    medical_specialtyObsterics.Gynecology.GynecologicOnco
## 1                                                      0
## 10                                                     0
## 13                                                     0
## 14                                                     0
## 18                                                     0
## 27                                                     0
##    medical_specialtyObstetrics medical_specialtyObstetricsandGynecology
## 1                            0                                        0
## 10                           0                                        0
## 13                           0                                        0
## 14                           0                                        0
## 18                           0                                        0
## 27                           0                                        0
##    medical_specialtyOncology medical_specialtyOphthalmology
## 1                          0                              0
## 10                         0                              0
## 13                         0                              0
## 14                         0                              0
## 18                         0                              0
## 27                         0                              0
##    medical_specialtyOrthopedics medical_specialtyOrthopedics.Reconstructive
## 1                             0                                           0
## 10                            0                                           0
## 13                            0                                           0
## 14                            0                                           0
## 18                            0                                           0
## 27                            0                                           0
##    medical_specialtyOsteopath medical_specialtyOtolaryngology
## 1                           0                               0
## 10                          0                               0
## 13                          0                               0
## 14                          0                               0
## 18                          0                               0
## 27                          0                               0
##    medical_specialtyOutreachServices medical_specialtyPathology
## 1                                  0                          0
## 10                                 0                          0
## 13                                 0                          0
## 14                                 0                          0
## 18                                 0                          0
## 27                                 0                          0
##    medical_specialtyPediatrics medical_specialtyPediatrics.AllergyandImmunology
## 1                            0                                                0
## 10                           0                                                0
## 13                           0                                                0
## 14                           0                                                0
## 18                           0                                                0
## 27                           0                                                0
##    medical_specialtyPediatrics.CriticalCare
## 1                                         0
## 10                                        0
## 13                                        0
## 14                                        0
## 18                                        0
## 27                                        0
##    medical_specialtyPediatrics.EmergencyMedicine
## 1                                              0
## 10                                             0
## 13                                             0
## 14                                             0
## 18                                             0
## 27                                             0
##    medical_specialtyPediatrics.Endocrinology
## 1                                          1
## 10                                         0
## 13                                         0
## 14                                         0
## 18                                         0
## 27                                         0
##    medical_specialtyPediatrics.Hematology.Oncology
## 1                                                0
## 10                                               0
## 13                                               0
## 14                                               0
## 18                                               0
## 27                                               0
##    medical_specialtyPediatrics.InfectiousDiseases
## 1                                               0
## 10                                              0
## 13                                              0
## 14                                              0
## 18                                              0
## 27                                              0
##    medical_specialtyPediatrics.Neurology
## 1                                      0
## 10                                     0
## 13                                     0
## 14                                     0
## 18                                     0
## 27                                     0
##    medical_specialtyPediatrics.Pulmonology medical_specialtyPerinatology
## 1                                        0                             0
## 10                                       0                             0
## 13                                       0                             0
## 14                                       0                             0
## 18                                       0                             0
## 27                                       0                             0
##    medical_specialtyPhysicalMedicineandRehabilitation
## 1                                                   0
## 10                                                  0
## 13                                                  0
## 14                                                  0
## 18                                                  0
## 27                                                  0
##    medical_specialtyPhysicianNotFound medical_specialtyPodiatry
## 1                                   0                         0
## 10                                  0                         0
## 13                                  0                         0
## 14                                  0                         0
## 18                                  0                         0
## 27                                  0                         0
##    medical_specialtyProctology medical_specialtyPsychiatry
## 1                            0                           0
## 10                           0                           0
## 13                           0                           0
## 14                           0                           0
## 18                           0                           0
## 27                           0                           0
##    medical_specialtyPsychiatry.Addictive
## 1                                      0
## 10                                     0
## 13                                     0
## 14                                     0
## 18                                     0
## 27                                     0
##    medical_specialtyPsychiatry.Child.Adolescent medical_specialtyPsychology
## 1                                             0                           0
## 10                                            0                           0
## 13                                            0                           0
## 14                                            0                           0
## 18                                            0                           0
## 27                                            0                           0
##    medical_specialtyPulmonology medical_specialtyRadiologist
## 1                             0                            0
## 10                            0                            0
## 13                            0                            0
## 14                            0                            0
## 18                            0                            0
## 27                            0                            0
##    medical_specialtyRadiology medical_specialtyResident
## 1                           0                         0
## 10                          0                         0
## 13                          0                         0
## 14                          0                         0
## 18                          0                         0
## 27                          0                         0
##    medical_specialtyRheumatology medical_specialtySpeech
## 1                              0                       0
## 10                             0                       0
## 13                             0                       0
## 14                             0                       0
## 18                             0                       0
## 27                             0                       0
##    medical_specialtySportsMedicine medical_specialtySurgeon
## 1                                0                        0
## 10                               0                        0
## 13                               0                        0
## 14                               0                        0
## 18                               0                        0
## 27                               0                        0
##    medical_specialtySurgery.Cardiovascular
## 1                                        0
## 10                                       0
## 13                                       0
## 14                                       0
## 18                                       0
## 27                                       0
##    medical_specialtySurgery.Cardiovascular.Thoracic
## 1                                                 0
## 10                                                0
## 13                                                0
## 14                                                0
## 18                                                0
## 27                                                0
##    medical_specialtySurgery.Colon.Rectal medical_specialtySurgery.General
## 1                                      0                                0
## 10                                     0                                0
## 13                                     0                                0
## 14                                     0                                0
## 18                                     0                                0
## 27                                     0                                0
##    medical_specialtySurgery.Maxillofacial medical_specialtySurgery.Neuro
## 1                                       0                              0
## 10                                      0                              0
## 13                                      0                              0
## 14                                      0                              0
## 18                                      0                              0
## 27                                      0                              0
##    medical_specialtySurgery.Pediatric medical_specialtySurgery.Plastic
## 1                                   0                                0
## 10                                  0                                0
## 13                                  0                                0
## 14                                  0                                0
## 18                                  0                                0
## 27                                  0                                0
##    medical_specialtySurgery.PlasticwithinHeadandNeck
## 1                                                  0
## 10                                                 0
## 13                                                 0
## 14                                                 0
## 18                                                 0
## 27                                                 0
##    medical_specialtySurgery.Thoracic medical_specialtySurgery.Vascular
## 1                                  0                                 0
## 10                                 0                                 0
## 13                                 0                                 0
## 14                                 0                                 0
## 18                                 0                                 0
## 27                                 0                                 0
##    medical_specialtySurgicalSpecialty medical_specialtyUrology
## 1                                   0                        0
## 10                                  0                        0
## 13                                  0                        0
## 14                                  0                        0
## 18                                  0                        0
## 27                                  0                        0
##    max_glu_serum.200 max_glu_serum.300 max_glu_serumNone max_glu_serumNorm
## 1                  0                 0                 1                 0
## 10                 0                 0                 1                 0
## 13                 0                 0                 1                 0
## 14                 0                 0                 1                 0
## 18                 0                 0                 1                 0
## 27                 0                 0                 1                 0
##    A1Cresult.7 A1Cresult.8 A1CresultNone A1CresultNorm metforminDown
## 1            0           0             1             0             0
## 10           0           0             1             0             0
## 13           0           0             1             0             0
## 14           0           0             1             0             0
## 18           0           0             1             0             0
## 27           1           0             0             0             0
##    metforminNo metforminSteady metforminUp repaglinideDown repaglinideNo
## 1            1               0           0               0             1
## 10           1               0           0               0             1
## 13           0               1           0               0             0
## 14           1               0           0               0             1
## 18           1               0           0               0             1
## 27           0               1           0               0             1
##    repaglinideSteady repaglinideUp nateglinideDown nateglinideNo
## 1                  0             0               0             1
## 10                 0             0               0             1
## 13                 0             1               0             1
## 14                 0             0               0             1
## 18                 0             0               0             1
## 27                 0             0               0             1
##    nateglinideSteady nateglinideUp chlorpropamideDown chlorpropamideNo
## 1                  0             0                  0                1
## 10                 0             0                  0                1
## 13                 0             0                  0                1
## 14                 0             0                  0                1
## 18                 0             0                  0                1
## 27                 0             0                  0                1
##    chlorpropamideSteady chlorpropamideUp glimepirideDown glimepirideNo
## 1                     0                0               0             1
## 10                    0                0               0             1
## 13                    0                0               0             1
## 14                    0                0               0             1
## 18                    0                0               0             1
## 27                    0                0               0             1
##    glimepirideSteady glimepirideUp glipizideDown glipizideNo glipizideSteady
## 1                  0             0             0           1               0
## 10                 0             0             0           1               0
## 13                 0             0             0           1               0
## 14                 0             0             0           1               0
## 18                 0             0             0           1               0
## 27                 0             0             0           1               0
##    glipizideUp glyburideDown glyburideNo glyburideSteady glyburideUp
## 1            0             0           1               0           0
## 10           0             0           1               0           0
## 13           0             0           1               0           0
## 14           0             0           1               0           0
## 18           0             0           0               1           0
## 27           0             0           0               1           0
##    tolbutamideNo tolbutamideSteady pioglitazoneDown pioglitazoneNo
## 1              1                 0                0              1
## 10             1                 0                0              1
## 13             1                 0                0              1
## 14             1                 0                0              1
## 18             1                 0                0              1
## 27             1                 0                0              1
##    pioglitazoneSteady pioglitazoneUp rosiglitazoneDown rosiglitazoneNo
## 1                   0              0                 0               1
## 10                  0              0                 0               0
## 13                  0              0                 0               1
## 14                  0              0                 0               1
## 18                  0              0                 0               1
## 27                  0              0                 0               1
##    rosiglitazoneSteady rosiglitazoneUp acarboseDown acarboseNo acarboseSteady
## 1                    0               0            0          1              0
## 10                   1               0            0          1              0
## 13                   0               0            0          1              0
## 14                   0               0            0          1              0
## 18                   0               0            0          1              0
## 27                   0               0            0          1              0
##    acarboseUp miglitolDown miglitolNo miglitolSteady miglitolUp tolazamideNo
## 1           0            0          1              0          0            1
## 10          0            0          1              0          0            1
## 13          0            0          1              0          0            1
## 14          0            0          1              0          0            1
## 18          0            0          1              0          0            1
## 27          0            0          1              0          0            1
##    tolazamideSteady insulinDown insulinNo insulinSteady insulinUp
## 1                 0           0         1             0         0
## 10                0           0         0             1         0
## 13                0           1         0             0         0
## 14                0           0         0             1         0
## 18                0           0         1             0         0
## 27                0           0         1             0         0
##    glyburide.metforminDown glyburide.metforminNo glyburide.metforminSteady
## 1                        0                     1                         0
## 10                       0                     1                         0
## 13                       0                     1                         0
## 14                       0                     1                         0
## 18                       0                     1                         0
## 27                       0                     1                         0
##    glyburide.metforminUp changeCh changeNo diabetesMedNo diabetesMedYes
## 1                      0        0        1             1              0
## 10                     0        1        0             0              1
## 13                     0        1        0             0              1
## 14                     0        0        1             0              1
## 18                     0        0        1             0              1
## 27                     0        1        0             0              1
##    hosp_readmitted
## 1                0
## 10               0
## 13               1
## 14               0
## 18               0
## 27               0
hospital_matrix <- data.matrix(hospital4)

get the numb 70/30 training test split

numberOfTrainingSamples <- round(length(hosp_readmitted) * .7)

training data

train_data <- hospital_matrix[1:numberOfTrainingSamples,]
train_labels <- hosp_readmitted[1:numberOfTrainingSamples]
dim(train_data)
## [1] 35508   218

testing data

test_data <- hospital_matrix[-(1:numberOfTrainingSamples),]
test_labels <- hosp_readmitted[-(1:numberOfTrainingSamples)]


dtrain <- xgb.DMatrix(data = train_data, label= train_labels)
dtest <- xgb.DMatrix(data = test_data, label= test_labels)

train a model using our training data

model <- xgboost(data = train_data, # the data
                 label = train_labels,
                 max.depth = 3, # the maximum depth of each decision tree
                 nround = 2, # max number of boosting iterations
                 objective = "binary:logistic")  # the objective function
## [22:46:40] WARNING: amalgamation/../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [1]  train-logloss:0.437525 
## [2]  train-logloss:0.296345

generate predictions for our held-out testing data

pred <- predict(model, dtest)
head(pred>5)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
head(test_labels)
## [1] 1 0 0 0 1 0

get & print the classification error

err <- mean(as.numeric(pred > 0.5) != test_labels)
print(paste("test-error=", err))
## [1] "test-error= 0"