pacman::p_load(tidyverse, caret, corrplot, e1071, interplot, caTools, car, ROCR, IRdisplay, xlsx, ggmap, ggpubr, broom, relaimpo, ggpubr, MASS, MLmetrics)
## package 'corrplot' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'proxy' successfully unpacked and MD5 sums checked
## package 'e1071' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'checkmate' successfully unpacked and MD5 sums checked
## package 'htmlwidgets' successfully unpacked and MD5 sums checked
## package 'minqa' successfully unpacked and MD5 sums checked
## package 'nloptr' successfully unpacked and MD5 sums checked
## package 'RcppEigen' successfully unpacked and MD5 sums checked
## package 'Formula' successfully unpacked and MD5 sums checked
## package 'latticeExtra' successfully unpacked and MD5 sums checked
## package 'gridExtra' successfully unpacked and MD5 sums checked
## package 'htmlTable' successfully unpacked and MD5 sums checked
## package 'viridis' successfully unpacked and MD5 sums checked
## package 'lme4' successfully unpacked and MD5 sums checked
## package 'coda' successfully unpacked and MD5 sums checked
## package 'Hmisc' successfully unpacked and MD5 sums checked
## package 'abind' successfully unpacked and MD5 sums checked
## package 'arm' successfully unpacked and MD5 sums checked
## package 'interactionTest' successfully unpacked and MD5 sums checked
## package 'interplot' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'caTools' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'matrixStats' successfully unpacked and MD5 sums checked
## package 'RcppArmadillo' successfully unpacked and MD5 sums checked
## package 'SparseM' successfully unpacked and MD5 sums checked
## package 'MatrixModels' successfully unpacked and MD5 sums checked
## package 'conquer' successfully unpacked and MD5 sums checked
## package 'openxlsx' successfully unpacked and MD5 sums checked
## package 'carData' successfully unpacked and MD5 sums checked
## package 'pbkrtest' successfully unpacked and MD5 sums checked
## package 'quantreg' successfully unpacked and MD5 sums checked
## package 'maptools' successfully unpacked and MD5 sums checked
## package 'rio' successfully unpacked and MD5 sums checked
## package 'car' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'gtools' successfully unpacked and MD5 sums checked
## package 'gplots' successfully unpacked and MD5 sums checked
## package 'ROCR' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'repr' successfully unpacked and MD5 sums checked
## package 'IRdisplay' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'ggrepel' successfully unpacked and MD5 sums checked
## package 'ggsci' successfully unpacked and MD5 sums checked
## package 'cowplot' successfully unpacked and MD5 sums checked
## package 'ggsignif' successfully unpacked and MD5 sums checked
## package 'polynom' successfully unpacked and MD5 sums checked
## package 'rstatix' successfully unpacked and MD5 sums checked
## package 'ggpubr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'survey' successfully unpacked and MD5 sums checked
## package 'mitools' successfully unpacked and MD5 sums checked
## package 'corpcor' successfully unpacked and MD5 sums checked
## package 'relaimpo' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
## package 'MLmetrics' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ngsook\AppData\Local\Temp\RtmpYJslPG\downloaded_packages
getwd()
## [1] "C:/Users/ngsook/Desktop/Data Scienc Project/dataset_diabetes/dataset_diabetes"
setwd("C:/Users/ngsook/Desktop/Data Scienc Project/dataset_diabetes/dataset_diabetes")
hospital <- read.csv("diabetic_data.csv")
dim(hospital)
## [1] 101766 50
summary(hospital)
## encounter_id patient_nbr race gender
## Min. : 12522 Min. : 135 Length:101766 Length:101766
## 1st Qu.: 84961194 1st Qu.: 23413221 Class :character Class :character
## Median :152388987 Median : 45505143 Mode :character Mode :character
## Mean :165201646 Mean : 54330401
## 3rd Qu.:230270888 3rd Qu.: 87545950
## Max. :443867222 Max. :189502619
## age weight admission_type_id
## Length:101766 Length:101766 Min. :1.000
## Class :character Class :character 1st Qu.:1.000
## Mode :character Mode :character Median :1.000
## Mean :2.024
## 3rd Qu.:3.000
## Max. :8.000
## discharge_disposition_id admission_source_id time_in_hospital
## Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 2.000
## Median : 1.000 Median : 7.000 Median : 4.000
## Mean : 3.716 Mean : 5.754 Mean : 4.396
## 3rd Qu.: 4.000 3rd Qu.: 7.000 3rd Qu.: 6.000
## Max. :28.000 Max. :25.000 Max. :14.000
## payer_code medical_specialty num_lab_procedures num_procedures
## Length:101766 Length:101766 Min. : 1.0 Min. :0.00
## Class :character Class :character 1st Qu.: 31.0 1st Qu.:0.00
## Mode :character Mode :character Median : 44.0 Median :1.00
## Mean : 43.1 Mean :1.34
## 3rd Qu.: 57.0 3rd Qu.:2.00
## Max. :132.0 Max. :6.00
## num_medications number_outpatient number_emergency number_inpatient
## Min. : 1.00 Min. : 0.0000 Min. : 0.0000 Min. : 0.0000
## 1st Qu.:10.00 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median :15.00 Median : 0.0000 Median : 0.0000 Median : 0.0000
## Mean :16.02 Mean : 0.3694 Mean : 0.1978 Mean : 0.6356
## 3rd Qu.:20.00 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.0000
## Max. :81.00 Max. :42.0000 Max. :76.0000 Max. :21.0000
## diag_1 diag_2 diag_3 number_diagnoses
## Length:101766 Length:101766 Length:101766 Min. : 1.000
## Class :character Class :character Class :character 1st Qu.: 6.000
## Mode :character Mode :character Mode :character Median : 8.000
## Mean : 7.423
## 3rd Qu.: 9.000
## Max. :16.000
## max_glu_serum A1Cresult metformin repaglinide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## nateglinide chlorpropamide glimepiride acetohexamide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## glipizide glyburide tolbutamide pioglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## rosiglitazone acarbose miglitol troglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## tolazamide examide citoglipton insulin
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## glyburide.metformin glipizide.metformin glimepiride.pioglitazone
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## metformin.rosiglitazone metformin.pioglitazone change
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## diabetesMed readmitted
## Length:101766 Length:101766
## Class :character Class :character
## Mode :character Mode :character
##
##
##
sapply(hospital, class)
## encounter_id patient_nbr race
## "integer" "integer" "character"
## gender age weight
## "character" "character" "character"
## admission_type_id discharge_disposition_id admission_source_id
## "integer" "integer" "integer"
## time_in_hospital payer_code medical_specialty
## "integer" "character" "character"
## num_lab_procedures num_procedures num_medications
## "integer" "integer" "integer"
## number_outpatient number_emergency number_inpatient
## "integer" "integer" "integer"
## diag_1 diag_2 diag_3
## "character" "character" "character"
## number_diagnoses max_glu_serum A1Cresult
## "integer" "character" "character"
## metformin repaglinide nateglinide
## "character" "character" "character"
## chlorpropamide glimepiride acetohexamide
## "character" "character" "character"
## glipizide glyburide tolbutamide
## "character" "character" "character"
## pioglitazone rosiglitazone acarbose
## "character" "character" "character"
## miglitol troglitazone tolazamide
## "character" "character" "character"
## examide citoglipton insulin
## "character" "character" "character"
## glyburide.metformin glipizide.metformin glimepiride.pioglitazone
## "character" "character" "character"
## metformin.rosiglitazone metformin.pioglitazone change
## "character" "character" "character"
## diabetesMed readmitted
## "character" "character"
str(hospital)
## 'data.frame': 101766 obs. of 50 variables:
## $ encounter_id : int 2278392 149190 64410 500364 16680 35754 55842 63768 12522 15738 ...
## $ patient_nbr : int 8222157 55629189 86047875 82442376 42519267 82637451 84259809 114882984 48330783 63555939 ...
## $ race : chr "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
## $ gender : chr "Female" "Female" "Female" "Male" ...
## $ age : chr "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
## $ weight : chr "?" "?" "?" "?" ...
## $ admission_type_id : int 6 1 1 1 1 2 3 1 2 3 ...
## $ discharge_disposition_id: int 25 1 1 1 1 1 1 1 1 3 ...
## $ admission_source_id : int 1 7 7 7 7 2 2 7 4 4 ...
## $ time_in_hospital : int 1 3 2 2 1 3 4 5 13 12 ...
## $ payer_code : chr "?" "?" "?" "?" ...
## $ medical_specialty : chr "Pediatrics-Endocrinology" "?" "?" "?" ...
## $ num_lab_procedures : int 41 59 11 44 51 31 70 73 68 33 ...
## $ num_procedures : int 0 0 5 1 0 6 1 0 2 3 ...
## $ num_medications : int 1 18 13 16 8 16 21 12 28 18 ...
## $ number_outpatient : int 0 0 2 0 0 0 0 0 0 0 ...
## $ number_emergency : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_inpatient : int 0 0 1 0 0 0 0 0 0 0 ...
## $ diag_1 : chr "250.83" "276" "648" "8" ...
## $ diag_2 : chr "?" "250.01" "250" "250.43" ...
## $ diag_3 : chr "?" "255" "V27" "403" ...
## $ number_diagnoses : int 1 9 6 7 5 9 7 8 8 8 ...
## $ max_glu_serum : chr "None" "None" "None" "None" ...
## $ A1Cresult : chr "None" "None" "None" "None" ...
## $ metformin : chr "No" "No" "No" "No" ...
## $ repaglinide : chr "No" "No" "No" "No" ...
## $ nateglinide : chr "No" "No" "No" "No" ...
## $ chlorpropamide : chr "No" "No" "No" "No" ...
## $ glimepiride : chr "No" "No" "No" "No" ...
## $ acetohexamide : chr "No" "No" "No" "No" ...
## $ glipizide : chr "No" "No" "Steady" "No" ...
## $ glyburide : chr "No" "No" "No" "No" ...
## $ tolbutamide : chr "No" "No" "No" "No" ...
## $ pioglitazone : chr "No" "No" "No" "No" ...
## $ rosiglitazone : chr "No" "No" "No" "No" ...
## $ acarbose : chr "No" "No" "No" "No" ...
## $ miglitol : chr "No" "No" "No" "No" ...
## $ troglitazone : chr "No" "No" "No" "No" ...
## $ tolazamide : chr "No" "No" "No" "No" ...
## $ examide : chr "No" "No" "No" "No" ...
## $ citoglipton : chr "No" "No" "No" "No" ...
## $ insulin : chr "No" "Up" "No" "Up" ...
## $ glyburide.metformin : chr "No" "No" "No" "No" ...
## $ glipizide.metformin : chr "No" "No" "No" "No" ...
## $ glimepiride.pioglitazone: chr "No" "No" "No" "No" ...
## $ metformin.rosiglitazone : chr "No" "No" "No" "No" ...
## $ metformin.pioglitazone : chr "No" "No" "No" "No" ...
## $ change : chr "No" "Ch" "No" "Ch" ...
## $ diabetesMed : chr "No" "Yes" "Yes" "Yes" ...
## $ readmitted : chr "NO" ">30" "NO" "NO" ...
col <- c("encounter_id", "patient_nbr", "weight", "payer_code", "diag_1", "diag_2", "diag_3")
hospital1 <- dplyr::select(hospital, -col)
colnames(hospital1)
## [1] "race" "gender"
## [3] "age" "admission_type_id"
## [5] "discharge_disposition_id" "admission_source_id"
## [7] "time_in_hospital" "medical_specialty"
## [9] "num_lab_procedures" "num_procedures"
## [11] "num_medications" "number_outpatient"
## [13] "number_emergency" "number_inpatient"
## [15] "number_diagnoses" "max_glu_serum"
## [17] "A1Cresult" "metformin"
## [19] "repaglinide" "nateglinide"
## [21] "chlorpropamide" "glimepiride"
## [23] "acetohexamide" "glipizide"
## [25] "glyburide" "tolbutamide"
## [27] "pioglitazone" "rosiglitazone"
## [29] "acarbose" "miglitol"
## [31] "troglitazone" "tolazamide"
## [33] "examide" "citoglipton"
## [35] "insulin" "glyburide.metformin"
## [37] "glipizide.metformin" "glimepiride.pioglitazone"
## [39] "metformin.rosiglitazone" "metformin.pioglitazone"
## [41] "change" "diabetesMed"
## [43] "readmitted"
summary(hospital1)
## race gender age admission_type_id
## Length:101766 Length:101766 Length:101766 Min. :1.000
## Class :character Class :character Class :character 1st Qu.:1.000
## Mode :character Mode :character Mode :character Median :1.000
## Mean :2.024
## 3rd Qu.:3.000
## Max. :8.000
## discharge_disposition_id admission_source_id time_in_hospital
## Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 2.000
## Median : 1.000 Median : 7.000 Median : 4.000
## Mean : 3.716 Mean : 5.754 Mean : 4.396
## 3rd Qu.: 4.000 3rd Qu.: 7.000 3rd Qu.: 6.000
## Max. :28.000 Max. :25.000 Max. :14.000
## medical_specialty num_lab_procedures num_procedures num_medications
## Length:101766 Min. : 1.0 Min. :0.00 Min. : 1.00
## Class :character 1st Qu.: 31.0 1st Qu.:0.00 1st Qu.:10.00
## Mode :character Median : 44.0 Median :1.00 Median :15.00
## Mean : 43.1 Mean :1.34 Mean :16.02
## 3rd Qu.: 57.0 3rd Qu.:2.00 3rd Qu.:20.00
## Max. :132.0 Max. :6.00 Max. :81.00
## number_outpatient number_emergency number_inpatient number_diagnoses
## Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 1.000
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 6.000
## Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 8.000
## Mean : 0.3694 Mean : 0.1978 Mean : 0.6356 Mean : 7.423
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.0000 3rd Qu.: 9.000
## Max. :42.0000 Max. :76.0000 Max. :21.0000 Max. :16.000
## max_glu_serum A1Cresult metformin repaglinide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## nateglinide chlorpropamide glimepiride acetohexamide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## glipizide glyburide tolbutamide pioglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## rosiglitazone acarbose miglitol troglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## tolazamide examide citoglipton insulin
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## glyburide.metformin glipizide.metformin glimepiride.pioglitazone
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## metformin.rosiglitazone metformin.pioglitazone change
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## diabetesMed readmitted
## Length:101766 Length:101766
## Class :character Class :character
## Mode :character Mode :character
##
##
##
col2 <- c("admission_type_id", "discharge_disposition_id", "admission_source_id")
hospital1[col2]=lapply(hospital1[col2],as.factor)
str(hospital1)
## 'data.frame': 101766 obs. of 43 variables:
## $ race : chr "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
## $ gender : chr "Female" "Female" "Female" "Male" ...
## $ age : chr "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
## $ admission_type_id : Factor w/ 8 levels "1","2","3","4",..: 6 1 1 1 1 2 3 1 2 3 ...
## $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 1 1 1 1 1 1 1 1 3 ...
## $ admission_source_id : Factor w/ 17 levels "1","2","3","4",..: 1 7 7 7 7 2 2 7 4 4 ...
## $ time_in_hospital : int 1 3 2 2 1 3 4 5 13 12 ...
## $ medical_specialty : chr "Pediatrics-Endocrinology" "?" "?" "?" ...
## $ num_lab_procedures : int 41 59 11 44 51 31 70 73 68 33 ...
## $ num_procedures : int 0 0 5 1 0 6 1 0 2 3 ...
## $ num_medications : int 1 18 13 16 8 16 21 12 28 18 ...
## $ number_outpatient : int 0 0 2 0 0 0 0 0 0 0 ...
## $ number_emergency : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_inpatient : int 0 0 1 0 0 0 0 0 0 0 ...
## $ number_diagnoses : int 1 9 6 7 5 9 7 8 8 8 ...
## $ max_glu_serum : chr "None" "None" "None" "None" ...
## $ A1Cresult : chr "None" "None" "None" "None" ...
## $ metformin : chr "No" "No" "No" "No" ...
## $ repaglinide : chr "No" "No" "No" "No" ...
## $ nateglinide : chr "No" "No" "No" "No" ...
## $ chlorpropamide : chr "No" "No" "No" "No" ...
## $ glimepiride : chr "No" "No" "No" "No" ...
## $ acetohexamide : chr "No" "No" "No" "No" ...
## $ glipizide : chr "No" "No" "Steady" "No" ...
## $ glyburide : chr "No" "No" "No" "No" ...
## $ tolbutamide : chr "No" "No" "No" "No" ...
## $ pioglitazone : chr "No" "No" "No" "No" ...
## $ rosiglitazone : chr "No" "No" "No" "No" ...
## $ acarbose : chr "No" "No" "No" "No" ...
## $ miglitol : chr "No" "No" "No" "No" ...
## $ troglitazone : chr "No" "No" "No" "No" ...
## $ tolazamide : chr "No" "No" "No" "No" ...
## $ examide : chr "No" "No" "No" "No" ...
## $ citoglipton : chr "No" "No" "No" "No" ...
## $ insulin : chr "No" "Up" "No" "Up" ...
## $ glyburide.metformin : chr "No" "No" "No" "No" ...
## $ glipizide.metformin : chr "No" "No" "No" "No" ...
## $ glimepiride.pioglitazone: chr "No" "No" "No" "No" ...
## $ metformin.rosiglitazone : chr "No" "No" "No" "No" ...
## $ metformin.pioglitazone : chr "No" "No" "No" "No" ...
## $ change : chr "No" "Ch" "No" "Ch" ...
## $ diabetesMed : chr "No" "Yes" "Yes" "Yes" ...
## $ readmitted : chr "NO" ">30" "NO" "NO" ...
hospital1$medical_specialty <- replace(hospital1$medical_specialty, hospital1$medical_specialty == "?" ,NA)
summary(hospital1)
## race gender age admission_type_id
## Length:101766 Length:101766 Length:101766 1 :53990
## Class :character Class :character Class :character 3 :18869
## Mode :character Mode :character Mode :character 2 :18480
## 6 : 5291
## 5 : 4785
## 8 : 320
## (Other): 31
## discharge_disposition_id admission_source_id time_in_hospital
## 1 :60234 7 :57494 Min. : 1.000
## 3 :13954 1 :29565 1st Qu.: 2.000
## 6 :12902 17 : 6781 Median : 4.000
## 18 : 3691 4 : 3187 Mean : 4.396
## 2 : 2128 6 : 2264 3rd Qu.: 6.000
## 22 : 1993 2 : 1104 Max. :14.000
## (Other): 6864 (Other): 1371
## medical_specialty num_lab_procedures num_procedures num_medications
## Length:101766 Min. : 1.0 Min. :0.00 Min. : 1.00
## Class :character 1st Qu.: 31.0 1st Qu.:0.00 1st Qu.:10.00
## Mode :character Median : 44.0 Median :1.00 Median :15.00
## Mean : 43.1 Mean :1.34 Mean :16.02
## 3rd Qu.: 57.0 3rd Qu.:2.00 3rd Qu.:20.00
## Max. :132.0 Max. :6.00 Max. :81.00
##
## number_outpatient number_emergency number_inpatient number_diagnoses
## Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 1.000
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 6.000
## Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 8.000
## Mean : 0.3694 Mean : 0.1978 Mean : 0.6356 Mean : 7.423
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.0000 3rd Qu.: 9.000
## Max. :42.0000 Max. :76.0000 Max. :21.0000 Max. :16.000
##
## max_glu_serum A1Cresult metformin repaglinide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## nateglinide chlorpropamide glimepiride acetohexamide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glipizide glyburide tolbutamide pioglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## rosiglitazone acarbose miglitol troglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## tolazamide examide citoglipton insulin
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glyburide.metformin glipizide.metformin glimepiride.pioglitazone
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## metformin.rosiglitazone metformin.pioglitazone change
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## diabetesMed readmitted
## Length:101766 Length:101766
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
str(hospital1)
## 'data.frame': 101766 obs. of 43 variables:
## $ race : chr "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
## $ gender : chr "Female" "Female" "Female" "Male" ...
## $ age : chr "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
## $ admission_type_id : Factor w/ 8 levels "1","2","3","4",..: 6 1 1 1 1 2 3 1 2 3 ...
## $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 1 1 1 1 1 1 1 1 3 ...
## $ admission_source_id : Factor w/ 17 levels "1","2","3","4",..: 1 7 7 7 7 2 2 7 4 4 ...
## $ time_in_hospital : int 1 3 2 2 1 3 4 5 13 12 ...
## $ medical_specialty : chr "Pediatrics-Endocrinology" NA NA NA ...
## $ num_lab_procedures : int 41 59 11 44 51 31 70 73 68 33 ...
## $ num_procedures : int 0 0 5 1 0 6 1 0 2 3 ...
## $ num_medications : int 1 18 13 16 8 16 21 12 28 18 ...
## $ number_outpatient : int 0 0 2 0 0 0 0 0 0 0 ...
## $ number_emergency : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_inpatient : int 0 0 1 0 0 0 0 0 0 0 ...
## $ number_diagnoses : int 1 9 6 7 5 9 7 8 8 8 ...
## $ max_glu_serum : chr "None" "None" "None" "None" ...
## $ A1Cresult : chr "None" "None" "None" "None" ...
## $ metformin : chr "No" "No" "No" "No" ...
## $ repaglinide : chr "No" "No" "No" "No" ...
## $ nateglinide : chr "No" "No" "No" "No" ...
## $ chlorpropamide : chr "No" "No" "No" "No" ...
## $ glimepiride : chr "No" "No" "No" "No" ...
## $ acetohexamide : chr "No" "No" "No" "No" ...
## $ glipizide : chr "No" "No" "Steady" "No" ...
## $ glyburide : chr "No" "No" "No" "No" ...
## $ tolbutamide : chr "No" "No" "No" "No" ...
## $ pioglitazone : chr "No" "No" "No" "No" ...
## $ rosiglitazone : chr "No" "No" "No" "No" ...
## $ acarbose : chr "No" "No" "No" "No" ...
## $ miglitol : chr "No" "No" "No" "No" ...
## $ troglitazone : chr "No" "No" "No" "No" ...
## $ tolazamide : chr "No" "No" "No" "No" ...
## $ examide : chr "No" "No" "No" "No" ...
## $ citoglipton : chr "No" "No" "No" "No" ...
## $ insulin : chr "No" "Up" "No" "Up" ...
## $ glyburide.metformin : chr "No" "No" "No" "No" ...
## $ glipizide.metformin : chr "No" "No" "No" "No" ...
## $ glimepiride.pioglitazone: chr "No" "No" "No" "No" ...
## $ metformin.rosiglitazone : chr "No" "No" "No" "No" ...
## $ metformin.pioglitazone : chr "No" "No" "No" "No" ...
## $ change : chr "No" "Ch" "No" "Ch" ...
## $ diabetesMed : chr "No" "Yes" "Yes" "Yes" ...
## $ readmitted : chr "NO" ">30" "NO" "NO" ...
hospital1$race <- replace(hospital1$race, hospital1$race == "?",NA)
summary(hospital1)
## race gender age admission_type_id
## Length:101766 Length:101766 Length:101766 1 :53990
## Class :character Class :character Class :character 3 :18869
## Mode :character Mode :character Mode :character 2 :18480
## 6 : 5291
## 5 : 4785
## 8 : 320
## (Other): 31
## discharge_disposition_id admission_source_id time_in_hospital
## 1 :60234 7 :57494 Min. : 1.000
## 3 :13954 1 :29565 1st Qu.: 2.000
## 6 :12902 17 : 6781 Median : 4.000
## 18 : 3691 4 : 3187 Mean : 4.396
## 2 : 2128 6 : 2264 3rd Qu.: 6.000
## 22 : 1993 2 : 1104 Max. :14.000
## (Other): 6864 (Other): 1371
## medical_specialty num_lab_procedures num_procedures num_medications
## Length:101766 Min. : 1.0 Min. :0.00 Min. : 1.00
## Class :character 1st Qu.: 31.0 1st Qu.:0.00 1st Qu.:10.00
## Mode :character Median : 44.0 Median :1.00 Median :15.00
## Mean : 43.1 Mean :1.34 Mean :16.02
## 3rd Qu.: 57.0 3rd Qu.:2.00 3rd Qu.:20.00
## Max. :132.0 Max. :6.00 Max. :81.00
##
## number_outpatient number_emergency number_inpatient number_diagnoses
## Min. : 0.0000 Min. : 0.0000 Min. : 0.0000 Min. : 1.000
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 6.000
## Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 8.000
## Mean : 0.3694 Mean : 0.1978 Mean : 0.6356 Mean : 7.423
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.0000 3rd Qu.: 9.000
## Max. :42.0000 Max. :76.0000 Max. :21.0000 Max. :16.000
##
## max_glu_serum A1Cresult metformin repaglinide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## nateglinide chlorpropamide glimepiride acetohexamide
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glipizide glyburide tolbutamide pioglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## rosiglitazone acarbose miglitol troglitazone
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## tolazamide examide citoglipton insulin
## Length:101766 Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glyburide.metformin glipizide.metformin glimepiride.pioglitazone
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## metformin.rosiglitazone metformin.pioglitazone change
## Length:101766 Length:101766 Length:101766
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## diabetesMed readmitted
## Length:101766 Length:101766
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
str(hospital1)
## 'data.frame': 101766 obs. of 43 variables:
## $ race : chr "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
## $ gender : chr "Female" "Female" "Female" "Male" ...
## $ age : chr "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
## $ admission_type_id : Factor w/ 8 levels "1","2","3","4",..: 6 1 1 1 1 2 3 1 2 3 ...
## $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 1 1 1 1 1 1 1 1 3 ...
## $ admission_source_id : Factor w/ 17 levels "1","2","3","4",..: 1 7 7 7 7 2 2 7 4 4 ...
## $ time_in_hospital : int 1 3 2 2 1 3 4 5 13 12 ...
## $ medical_specialty : chr "Pediatrics-Endocrinology" NA NA NA ...
## $ num_lab_procedures : int 41 59 11 44 51 31 70 73 68 33 ...
## $ num_procedures : int 0 0 5 1 0 6 1 0 2 3 ...
## $ num_medications : int 1 18 13 16 8 16 21 12 28 18 ...
## $ number_outpatient : int 0 0 2 0 0 0 0 0 0 0 ...
## $ number_emergency : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_inpatient : int 0 0 1 0 0 0 0 0 0 0 ...
## $ number_diagnoses : int 1 9 6 7 5 9 7 8 8 8 ...
## $ max_glu_serum : chr "None" "None" "None" "None" ...
## $ A1Cresult : chr "None" "None" "None" "None" ...
## $ metformin : chr "No" "No" "No" "No" ...
## $ repaglinide : chr "No" "No" "No" "No" ...
## $ nateglinide : chr "No" "No" "No" "No" ...
## $ chlorpropamide : chr "No" "No" "No" "No" ...
## $ glimepiride : chr "No" "No" "No" "No" ...
## $ acetohexamide : chr "No" "No" "No" "No" ...
## $ glipizide : chr "No" "No" "Steady" "No" ...
## $ glyburide : chr "No" "No" "No" "No" ...
## $ tolbutamide : chr "No" "No" "No" "No" ...
## $ pioglitazone : chr "No" "No" "No" "No" ...
## $ rosiglitazone : chr "No" "No" "No" "No" ...
## $ acarbose : chr "No" "No" "No" "No" ...
## $ miglitol : chr "No" "No" "No" "No" ...
## $ troglitazone : chr "No" "No" "No" "No" ...
## $ tolazamide : chr "No" "No" "No" "No" ...
## $ examide : chr "No" "No" "No" "No" ...
## $ citoglipton : chr "No" "No" "No" "No" ...
## $ insulin : chr "No" "Up" "No" "Up" ...
## $ glyburide.metformin : chr "No" "No" "No" "No" ...
## $ glipizide.metformin : chr "No" "No" "No" "No" ...
## $ glimepiride.pioglitazone: chr "No" "No" "No" "No" ...
## $ metformin.rosiglitazone : chr "No" "No" "No" "No" ...
## $ metformin.pioglitazone : chr "No" "No" "No" "No" ...
## $ change : chr "No" "Ch" "No" "Ch" ...
## $ diabetesMed : chr "No" "Yes" "Yes" "Yes" ...
## $ readmitted : chr "NO" ">30" "NO" "NO" ...
apply(hospital1, 2, function(col)sum(is.na(col)))
## race gender age
## 2273 0 0
## admission_type_id discharge_disposition_id admission_source_id
## 0 0 0
## time_in_hospital medical_specialty num_lab_procedures
## 0 49949 0
## num_procedures num_medications number_outpatient
## 0 0 0
## number_emergency number_inpatient number_diagnoses
## 0 0 0
## max_glu_serum A1Cresult metformin
## 0 0 0
## repaglinide nateglinide chlorpropamide
## 0 0 0
## glimepiride acetohexamide glipizide
## 0 0 0
## glyburide tolbutamide pioglitazone
## 0 0 0
## rosiglitazone acarbose miglitol
## 0 0 0
## troglitazone tolazamide examide
## 0 0 0
## citoglipton insulin glyburide.metformin
## 0 0 0
## glipizide.metformin glimepiride.pioglitazone metformin.rosiglitazone
## 0 0 0
## metformin.pioglitazone change diabetesMed
## 0 0 0
## readmitted
## 0
hospital2 <- hospital1[rowSums(is.na(hospital1)) == 0,]
apply(hospital2, 2, function(col)sum(is.na(col)))
## race gender age
## 0 0 0
## admission_type_id discharge_disposition_id admission_source_id
## 0 0 0
## time_in_hospital medical_specialty num_lab_procedures
## 0 0 0
## num_procedures num_medications number_outpatient
## 0 0 0
## number_emergency number_inpatient number_diagnoses
## 0 0 0
## max_glu_serum A1Cresult metformin
## 0 0 0
## repaglinide nateglinide chlorpropamide
## 0 0 0
## glimepiride acetohexamide glipizide
## 0 0 0
## glyburide tolbutamide pioglitazone
## 0 0 0
## rosiglitazone acarbose miglitol
## 0 0 0
## troglitazone tolazamide examide
## 0 0 0
## citoglipton insulin glyburide.metformin
## 0 0 0
## glipizide.metformin glimepiride.pioglitazone metformin.rosiglitazone
## 0 0 0
## metformin.pioglitazone change diabetesMed
## 0 0 0
## readmitted
## 0
dim(hospital2)
## [1] 50727 43
dim(unique(hospital2))
## [1] 50725 43
hospital3 <- unique(hospital2)
table(hospital3$readmitted)
##
## <30 >30 NO
## 5477 17118 28130
hospital3 %>%
group_by(readmitted) %>%
summarise(per = n()/nrow(hospital3)) %>%
ggplot(aes(x=readmitted, y = per, fill = readmitted)) +
geom_bar(stat = 'identity') +
geom_text(aes(label = round(per,2)), vjust =2)
table(hospital3$readmitted)
##
## <30 >30 NO
## 5477 17118 28130
hospital3$readmitted=lapply(hospital3$readmitted, as.character)
str(hospital3)
## 'data.frame': 50725 obs. of 43 variables:
## $ race : chr "Caucasian" "Caucasian" "Caucasian" "Caucasian" ...
## $ gender : chr "Female" "Female" "Female" "Male" ...
## $ age : chr "[0-10)" "[90-100)" "[40-50)" "[80-90)" ...
## $ admission_type_id : Factor w/ 8 levels "1","2","3","4",..: 6 3 1 1 1 1 1 1 1 1 ...
## $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 3 3 6 1 3 1 2 1 1 ...
## $ admission_source_id : Factor w/ 17 levels "1","2","3","4",..: 1 4 7 7 7 7 1 7 7 7 ...
## $ time_in_hospital : int 1 12 7 10 3 6 2 5 6 2 ...
## $ medical_specialty : chr "Pediatrics-Endocrinology" "InternalMedicine" "Family/GeneralPractice" "Family/GeneralPractice" ...
## $ num_lab_procedures : int 41 33 60 55 29 64 25 52 27 41 ...
## $ num_procedures : int 0 3 0 1 0 3 2 0 0 0 ...
## $ num_medications : int 1 18 15 31 11 18 11 14 16 11 ...
## $ number_outpatient : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_emergency : int 0 0 1 0 0 0 0 0 0 0 ...
## $ number_inpatient : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_diagnoses : int 1 8 8 8 3 7 3 8 8 6 ...
## $ max_glu_serum : chr "None" "None" "None" "None" ...
## $ A1Cresult : chr "None" "None" "None" "None" ...
## $ metformin : chr "No" "No" "Steady" "No" ...
## $ repaglinide : chr "No" "No" "Up" "No" ...
## $ nateglinide : chr "No" "No" "No" "No" ...
## $ chlorpropamide : chr "No" "No" "No" "No" ...
## $ glimepiride : chr "No" "No" "No" "No" ...
## $ acetohexamide : chr "No" "No" "No" "No" ...
## $ glipizide : chr "No" "No" "No" "No" ...
## $ glyburide : chr "No" "No" "No" "No" ...
## $ tolbutamide : chr "No" "No" "No" "No" ...
## $ pioglitazone : chr "No" "No" "No" "No" ...
## $ rosiglitazone : chr "No" "Steady" "No" "No" ...
## $ acarbose : chr "No" "No" "No" "No" ...
## $ miglitol : chr "No" "No" "No" "No" ...
## $ troglitazone : chr "No" "No" "No" "No" ...
## $ tolazamide : chr "No" "No" "No" "No" ...
## $ examide : chr "No" "No" "No" "No" ...
## $ citoglipton : chr "No" "No" "No" "No" ...
## $ insulin : chr "No" "Steady" "Down" "Steady" ...
## $ glyburide.metformin : chr "No" "No" "No" "No" ...
## $ glipizide.metformin : chr "No" "No" "No" "No" ...
## $ glimepiride.pioglitazone: chr "No" "No" "No" "No" ...
## $ metformin.rosiglitazone : chr "No" "No" "No" "No" ...
## $ metformin.pioglitazone : chr "No" "No" "No" "No" ...
## $ change : chr "No" "Ch" "Ch" "No" ...
## $ diabetesMed : chr "No" "Yes" "Yes" "Yes" ...
## $ readmitted :List of 50725
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "<30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "<30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "<30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "<30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "<30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "<30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "<30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "<30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr ">30"
## ..$ : chr "NO"
## ..$ : chr "NO"
## .. [list output truncated]
hospital3$readmitted <- replace(hospital3$readmitted, hospital3$readmitted == "<30","YES")
hospital3$readmitted <- replace(hospital3$readmitted, hospital3$readmitted == ">30","YES")
hospital3$readmitted <- factor(hospital3$readmitted, levels = c("NO","YES"),
labels = c(0, 1))
summary(hospital3)
## race gender age admission_type_id
## Length:50725 Length:50725 Length:50725 1 :20338
## Class :character Class :character Class :character 2 :12493
## Mode :character Mode :character Mode :character 3 :11985
## 6 : 3885
## 5 : 1738
## 8 : 284
## (Other): 2
## discharge_disposition_id admission_source_id time_in_hospital
## 1 :32189 7 :25206 Min. : 1.000
## 3 : 6561 1 :17489 1st Qu.: 2.000
## 6 : 5259 17 : 4241 Median : 4.000
## 22 : 1134 6 : 1592 Mean : 4.393
## 2 : 1088 4 : 1400 3rd Qu.: 6.000
## 18 : 894 5 : 500 Max. :14.000
## (Other): 3600 (Other): 297
## medical_specialty num_lab_procedures num_procedures num_medications
## Length:50725 Min. : 1.00 Min. :0.000 Min. : 1.00
## Class :character 1st Qu.: 32.00 1st Qu.:0.000 1st Qu.:10.00
## Mode :character Median : 44.00 Median :1.000 Median :14.00
## Mean : 42.89 Mean :1.442 Mean :15.77
## 3rd Qu.: 56.00 3rd Qu.:2.000 3rd Qu.:20.00
## Max. :132.00 Max. :6.000 Max. :81.00
##
## number_outpatient number_emergency number_inpatient number_diagnoses
## Min. : 0.0000 Min. : 0.0000 Min. : 0.000 Min. : 1.00
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 5.00
## Median : 0.0000 Median : 0.0000 Median : 0.000 Median : 8.00
## Mean : 0.2515 Mean : 0.2089 Mean : 0.638 Mean : 7.14
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 9.00
## Max. :38.0000 Max. :76.0000 Max. :16.000 Max. :16.00
##
## max_glu_serum A1Cresult metformin repaglinide
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## nateglinide chlorpropamide glimepiride acetohexamide
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glipizide glyburide tolbutamide pioglitazone
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## rosiglitazone acarbose miglitol troglitazone
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## tolazamide examide citoglipton insulin
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glyburide.metformin glipizide.metformin glimepiride.pioglitazone
## Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## metformin.rosiglitazone metformin.pioglitazone change
## Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## diabetesMed readmitted
## Length:50725 0:28130
## Class :character 1:22595
## Mode :character
##
##
##
##
table(hospital3$readmitted)
##
## 0 1
## 28130 22595
hospital3 %>%
group_by(readmitted) %>%
summarise(per = n()/nrow(hospital3)) %>%
ggplot(aes(x=readmitted, y = per, fill = readmitted)) +
geom_bar(stat = 'identity') +
geom_text(aes(label = round(per,2)), vjust =2)
## Visualization
hospital3 %>%
group_by(diabetesMed, readmitted) %>%
summarize(total = n()) %>%
ggplot(aes(x=diabetesMed, y = total, fill = readmitted)) +
geom_bar(stat= 'identity',position=position_dodge()) +
geom_text(aes(label=total), vjust=1.6, color="white",
position = position_dodge(0.9), size=3.5)
summary(hospital3)
## race gender age admission_type_id
## Length:50725 Length:50725 Length:50725 1 :20338
## Class :character Class :character Class :character 2 :12493
## Mode :character Mode :character Mode :character 3 :11985
## 6 : 3885
## 5 : 1738
## 8 : 284
## (Other): 2
## discharge_disposition_id admission_source_id time_in_hospital
## 1 :32189 7 :25206 Min. : 1.000
## 3 : 6561 1 :17489 1st Qu.: 2.000
## 6 : 5259 17 : 4241 Median : 4.000
## 22 : 1134 6 : 1592 Mean : 4.393
## 2 : 1088 4 : 1400 3rd Qu.: 6.000
## 18 : 894 5 : 500 Max. :14.000
## (Other): 3600 (Other): 297
## medical_specialty num_lab_procedures num_procedures num_medications
## Length:50725 Min. : 1.00 Min. :0.000 Min. : 1.00
## Class :character 1st Qu.: 32.00 1st Qu.:0.000 1st Qu.:10.00
## Mode :character Median : 44.00 Median :1.000 Median :14.00
## Mean : 42.89 Mean :1.442 Mean :15.77
## 3rd Qu.: 56.00 3rd Qu.:2.000 3rd Qu.:20.00
## Max. :132.00 Max. :6.000 Max. :81.00
##
## number_outpatient number_emergency number_inpatient number_diagnoses
## Min. : 0.0000 Min. : 0.0000 Min. : 0.000 Min. : 1.00
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 5.00
## Median : 0.0000 Median : 0.0000 Median : 0.000 Median : 8.00
## Mean : 0.2515 Mean : 0.2089 Mean : 0.638 Mean : 7.14
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 9.00
## Max. :38.0000 Max. :76.0000 Max. :16.000 Max. :16.00
##
## max_glu_serum A1Cresult metformin repaglinide
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## nateglinide chlorpropamide glimepiride acetohexamide
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glipizide glyburide tolbutamide pioglitazone
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## rosiglitazone acarbose miglitol troglitazone
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## tolazamide examide citoglipton insulin
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glyburide.metformin glipizide.metformin glimepiride.pioglitazone
## Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## metformin.rosiglitazone metformin.pioglitazone change
## Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## diabetesMed readmitted
## Length:50725 0:28130
## Class :character 1:22595
## Mode :character
##
##
##
##
col2 <- c("examide", "citoglipton", "glimepiride.pioglitazone", "metformin.rosiglitazone", "acetohexamide",
"glipizide.metformin", "metformin.pioglitazone","troglitazone")
hospital3 <- dplyr::select(hospital3, -col2)
colnames(hospital3)
## [1] "race" "gender"
## [3] "age" "admission_type_id"
## [5] "discharge_disposition_id" "admission_source_id"
## [7] "time_in_hospital" "medical_specialty"
## [9] "num_lab_procedures" "num_procedures"
## [11] "num_medications" "number_outpatient"
## [13] "number_emergency" "number_inpatient"
## [15] "number_diagnoses" "max_glu_serum"
## [17] "A1Cresult" "metformin"
## [19] "repaglinide" "nateglinide"
## [21] "chlorpropamide" "glimepiride"
## [23] "glipizide" "glyburide"
## [25] "tolbutamide" "pioglitazone"
## [27] "rosiglitazone" "acarbose"
## [29] "miglitol" "tolazamide"
## [31] "insulin" "glyburide.metformin"
## [33] "change" "diabetesMed"
## [35] "readmitted"
summary(hospital3)
## race gender age admission_type_id
## Length:50725 Length:50725 Length:50725 1 :20338
## Class :character Class :character Class :character 2 :12493
## Mode :character Mode :character Mode :character 3 :11985
## 6 : 3885
## 5 : 1738
## 8 : 284
## (Other): 2
## discharge_disposition_id admission_source_id time_in_hospital
## 1 :32189 7 :25206 Min. : 1.000
## 3 : 6561 1 :17489 1st Qu.: 2.000
## 6 : 5259 17 : 4241 Median : 4.000
## 22 : 1134 6 : 1592 Mean : 4.393
## 2 : 1088 4 : 1400 3rd Qu.: 6.000
## 18 : 894 5 : 500 Max. :14.000
## (Other): 3600 (Other): 297
## medical_specialty num_lab_procedures num_procedures num_medications
## Length:50725 Min. : 1.00 Min. :0.000 Min. : 1.00
## Class :character 1st Qu.: 32.00 1st Qu.:0.000 1st Qu.:10.00
## Mode :character Median : 44.00 Median :1.000 Median :14.00
## Mean : 42.89 Mean :1.442 Mean :15.77
## 3rd Qu.: 56.00 3rd Qu.:2.000 3rd Qu.:20.00
## Max. :132.00 Max. :6.000 Max. :81.00
##
## number_outpatient number_emergency number_inpatient number_diagnoses
## Min. : 0.0000 Min. : 0.0000 Min. : 0.000 Min. : 1.00
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 5.00
## Median : 0.0000 Median : 0.0000 Median : 0.000 Median : 8.00
## Mean : 0.2515 Mean : 0.2089 Mean : 0.638 Mean : 7.14
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 9.00
## Max. :38.0000 Max. :76.0000 Max. :16.000 Max. :16.00
##
## max_glu_serum A1Cresult metformin repaglinide
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## nateglinide chlorpropamide glimepiride glipizide
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glyburide tolbutamide pioglitazone rosiglitazone
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## acarbose miglitol tolazamide insulin
## Length:50725 Length:50725 Length:50725 Length:50725
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## glyburide.metformin change diabetesMed readmitted
## Length:50725 Length:50725 Length:50725 0:28130
## Class :character Class :character Class :character 1:22595
## Mode :character Mode :character Mode :character
##
##
##
##
set.seed(123)
splitData = sample.split(hospital3$readmitted, SplitRatio = 0.7)
train_set = hospital3[splitData,]
nrow(train_set)/nrow(hospital3)
## [1] 0.6999901
test_set = hospital3[!splitData,]
nrow(test_set)/nrow(hospital3)
## [1] 0.3000099
model = glm(readmitted~., data = train_set, family = binomial)
summary(model)
##
## Call:
## glm(formula = readmitted ~ ., family = binomial, data = train_set)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3399 -1.0246 -0.6968 1.1656 2.6079
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -2.586e+00 2.716e+03
## raceAsian -3.881e-01 1.305e-01
## raceCaucasian 7.406e-03 2.961e-02
## raceHispanic -3.894e-02 7.898e-02
## raceOther -2.593e-01 9.435e-02
## genderMale -7.165e-02 2.346e-02
## genderUnknown/Invalid -1.485e+01 1.455e+03
## age[10-20) 7.014e-01 3.367e-01
## age[20-30) 5.675e-01 3.730e-01
## age[30-40) 6.189e-01 3.656e-01
## age[40-50) 7.031e-01 3.626e-01
## age[50-60) 6.662e-01 3.622e-01
## age[60-70) 6.997e-01 3.622e-01
## age[70-80) 8.265e-01 3.622e-01
## age[80-90) 7.293e-01 3.628e-01
## age[90-100) 5.333e-01 3.690e-01
## admission_type_id2 1.640e-01 4.045e-02
## admission_type_id3 1.991e-01 4.923e-02
## admission_type_id4 -1.403e+01 1.455e+03
## admission_type_id5 7.960e-02 1.133e-01
## admission_type_id6 7.073e-01 6.891e-02
## admission_type_id8 -1.021e-01 1.621e-01
## discharge_disposition_id2 2.806e-02 7.830e-02
## discharge_disposition_id3 -3.168e-03 3.906e-02
## discharge_disposition_id4 1.609e-01 1.278e-01
## discharge_disposition_id5 2.711e-01 9.615e-02
## discharge_disposition_id6 1.558e-01 3.953e-02
## discharge_disposition_id7 -1.382e-01 1.636e-01
## discharge_disposition_id8 2.820e-01 3.882e-01
## discharge_disposition_id9 1.591e+01 1.455e+03
## discharge_disposition_id10 3.325e-02 1.424e+00
## discharge_disposition_id11 -1.594e+01 5.807e+01
## discharge_disposition_id12 7.559e-01 1.427e+00
## discharge_disposition_id13 -2.711e+00 3.643e-01
## discharge_disposition_id14 -2.484e+00 4.217e-01
## discharge_disposition_id15 1.427e+00 6.498e-01
## discharge_disposition_id16 4.888e-01 6.960e-01
## discharge_disposition_id17 -2.968e-01 7.470e-01
## discharge_disposition_id18 2.826e-01 9.058e-02
## discharge_disposition_id19 -1.493e+01 8.327e+02
## discharge_disposition_id20 -1.601e+01 1.455e+03
## discharge_disposition_id22 1.537e-01 7.908e-02
## discharge_disposition_id23 -5.538e-01 1.792e-01
## discharge_disposition_id24 -2.928e-01 8.722e-01
## discharge_disposition_id25 -3.084e-01 1.043e-01
## discharge_disposition_id27 -1.524e+01 1.455e+03
## discharge_disposition_id28 3.668e-01 5.629e-01
## admission_source_id2 -1.164e-01 1.861e-01
## admission_source_id3 -4.602e-02 4.208e-01
## admission_source_id4 -5.083e-01 8.071e-02
## admission_source_id5 -2.471e-01 1.258e-01
## admission_source_id6 -5.127e-01 8.352e-02
## admission_source_id7 1.455e-01 4.445e-02
## admission_source_id8 6.663e-01 8.256e-01
## admission_source_id9 -2.947e-01 3.914e-01
## admission_source_id10 3.366e-01 1.416e+00
## admission_source_id14 -5.764e-01 2.058e+03
## admission_source_id17 -1.280e-01 7.835e-02
## admission_source_id22 -1.573e+01 1.455e+03
## time_in_hospital 1.651e-02 4.845e-03
## medical_specialtyAnesthesiology -1.380e+00 1.497e+00
## medical_specialtyAnesthesiology-Pediatric -8.233e-01 1.402e+00
## medical_specialtyCardiology -7.146e-01 1.249e+00
## medical_specialtyCardiology-Pediatric 1.042e+00 1.549e+00
## medical_specialtyDCPTEAM -1.579e+01 7.570e+02
## medical_specialtyDentistry 1.472e+01 8.172e+02
## medical_specialtyDermatology 1.517e+01 1.455e+03
## medical_specialtyEmergency/Trauma -6.731e-01 1.249e+00
## medical_specialtyEndocrinology -8.538e-01 1.269e+00
## medical_specialtyEndocrinology-Metabolism -1.610e+01 5.974e+02
## medical_specialtyFamily/GeneralPractice -6.065e-01 1.249e+00
## medical_specialtyGastroenterology -5.442e-01 1.253e+00
## medical_specialtyGynecology -1.891e+00 1.328e+00
## medical_specialtyHematology -2.976e-01 1.283e+00
## medical_specialtyHematology/Oncology -6.771e-01 1.262e+00
## medical_specialtyHospitalist -8.908e-01 1.291e+00
## medical_specialtyInfectiousDiseases -1.447e-02 1.331e+00
## medical_specialtyInternalMedicine -7.451e-01 1.249e+00
## medical_specialtyNephrology -3.115e-01 1.250e+00
## medical_specialtyNeurology -1.212e+00 1.263e+00
## medical_specialtyObsterics&Gynecology-GynecologicOnco -1.607e+00 1.373e+00
## medical_specialtyObstetrics -2.614e+00 1.629e+00
## medical_specialtyObstetricsandGynecology -1.654e+00 1.255e+00
## medical_specialtyOncology -6.281e-01 1.257e+00
## medical_specialtyOphthalmology -1.132e+00 1.325e+00
## medical_specialtyOrthopedics -1.039e+00 1.251e+00
## medical_specialtyOrthopedics-Reconstructive -1.213e+00 1.251e+00
## medical_specialtyOsteopath -8.127e-01 1.309e+00
## medical_specialtyOtolaryngology -1.401e+00 1.274e+00
## medical_specialtyOutreachServices -6.091e-01 1.422e+00
## medical_specialtyPathology 8.351e-02 1.427e+00
## medical_specialtyPediatrics -8.516e-01 1.262e+00
## medical_specialtyPediatrics-AllergyandImmunology 1.431e+01 1.455e+03
## medical_specialtyPediatrics-CriticalCare -4.500e-01 1.290e+00
## medical_specialtyPediatrics-EmergencyMedicine 1.569e+01 1.455e+03
## medical_specialtyPediatrics-Endocrinology -1.235e+00 1.291e+00
## medical_specialtyPediatrics-Hematology-Oncology -1.030e+00 1.770e+00
## medical_specialtyPediatrics-Neurology -6.397e-01 1.481e+00
## medical_specialtyPediatrics-Pulmonology 7.569e-01 1.420e+00
## medical_specialtyPerinatology -1.603e+01 1.455e+03
## medical_specialtyPhysicalMedicineandRehabilitation -8.830e-01 1.257e+00
## medical_specialtyPhysicianNotFound -4.071e-01 1.454e+00
## medical_specialtyPodiatry -3.907e-02 1.279e+00
## medical_specialtyPsychiatry -8.182e-01 1.252e+00
## medical_specialtyPsychiatry-Addictive -1.652e+01 1.455e+03
## medical_specialtyPsychiatry-Child/Adolescent -1.073e+00 1.527e+00
## medical_specialtyPsychology -8.316e-01 1.274e+00
## medical_specialtyPulmonology -5.766e-01 1.251e+00
## medical_specialtyRadiologist -7.600e-01 1.251e+00
## medical_specialtyRadiology -7.499e-01 1.300e+00
## medical_specialtyResident 1.418e+01 1.455e+03
## medical_specialtyRheumatology -1.275e+00 1.376e+00
## medical_specialtySpeech -1.643e+01 1.455e+03
## medical_specialtySportsMedicine 1.524e+01 1.455e+03
## medical_specialtySurgeon -1.279e+00 1.308e+00
## medical_specialtySurgery-Cardiovascular -1.091e+00 1.275e+00
## medical_specialtySurgery-Cardiovascular/Thoracic -1.410e+00 1.254e+00
## medical_specialtySurgery-Colon&Rectal -2.753e-01 1.608e+00
## medical_specialtySurgery-General -8.236e-01 1.250e+00
## medical_specialtySurgery-Maxillofacial -2.316e+00 1.652e+00
## medical_specialtySurgery-Neuro -1.338e+00 1.255e+00
## medical_specialtySurgery-Pediatric -1.506e+00 1.692e+00
## medical_specialtySurgery-Plastic -9.050e-01 1.315e+00
## medical_specialtySurgery-PlasticwithinHeadandNeck 1.490e+01 1.455e+03
## medical_specialtySurgery-Thoracic -6.634e-01 1.274e+00
## medical_specialtySurgery-Vascular -5.523e-01 1.254e+00
## medical_specialtySurgicalSpecialty -1.554e+00 1.323e+00
## medical_specialtyUrology -9.738e-01 1.253e+00
## num_lab_procedures -6.787e-04 7.350e-04
## num_procedures -1.891e-02 7.889e-03
## num_medications 1.225e-03 1.936e-03
## number_outpatient 1.123e-01 1.394e-02
## number_emergency 1.579e-01 1.985e-02
## number_inpatient 3.661e-01 1.215e-02
## number_diagnoses 6.372e-02 6.735e-03
## max_glu_serum>300 4.996e-02 1.422e-01
## max_glu_serumNone -1.254e-01 1.075e-01
## max_glu_serumNorm -2.880e-02 1.108e-01
## A1Cresult>8 3.178e-02 6.854e-02
## A1CresultNone 1.304e-01 5.884e-02
## A1CresultNorm -1.197e-01 7.943e-02
## metforminNo 2.157e-02 1.585e-01
## metforminSteady -1.024e-01 1.587e-01
## metforminUp -2.045e-02 1.889e-01
## repaglinideNo -3.695e-01 5.094e-01
## repaglinideSteady -1.951e-01 5.152e-01
## repaglinideUp -2.200e-01 5.899e-01
## nateglinideNo -8.560e-01 1.256e+00
## nateglinideSteady -7.025e-01 1.263e+00
## nateglinideUp -3.066e+00 1.606e+00
## chlorpropamideNo 1.442e+01 1.455e+03
## chlorpropamideSteady 1.470e+01 1.455e+03
## chlorpropamideUp 1.402e+01 1.455e+03
## glimepirideNo 2.544e-02 2.364e-01
## glimepirideSteady 3.124e-02 2.395e-01
## glimepirideUp -1.433e-01 2.895e-01
## glipizideNo -3.152e-01 1.653e-01
## glipizideSteady -2.923e-01 1.655e-01
## glipizideUp -3.141e-01 2.008e-01
## glyburideNo -1.002e-01 1.629e-01
## glyburideSteady -1.425e-01 1.640e-01
## glyburideUp -2.326e-01 2.096e-01
## tolbutamideSteady -7.962e-01 1.173e+00
## pioglitazoneNo -3.905e-01 3.457e-01
## pioglitazoneSteady -4.101e-01 3.477e-01
## pioglitazoneUp 3.745e-02 4.179e-01
## rosiglitazoneNo 1.658e+00 5.620e-01
## rosiglitazoneSteady 1.755e+00 5.633e-01
## rosiglitazoneUp 1.189e+00 6.129e-01
## acarboseNo -1.438e+01 1.010e+03
## acarboseSteady -1.384e+01 1.010e+03
## acarboseUp 2.036e+00 1.439e+03
## miglitolNo -1.359e+01 1.455e+03
## miglitolSteady -1.320e+01 1.455e+03
## miglitolUp -2.915e+01 2.058e+03
## tolazamideSteady -1.165e+00 6.529e-01
## insulinNo -7.410e-02 5.993e-02
## insulinSteady -1.544e-01 4.634e-02
## insulinUp -5.348e-02 4.674e-02
## glyburide.metforminNo 1.525e+01 1.455e+03
## glyburide.metforminSteady 1.541e+01 1.455e+03
## glyburide.metforminUp -8.332e-01 2.058e+03
## changeNo -7.866e-03 4.308e-02
## diabetesMedYes 3.129e-01 4.198e-02
## z value Pr(>|z|)
## (Intercept) -0.001 0.999240
## raceAsian -2.974 0.002938 **
## raceCaucasian 0.250 0.802518
## raceHispanic -0.493 0.622030
## raceOther -2.749 0.005985 **
## genderMale -3.054 0.002257 **
## genderUnknown/Invalid -0.010 0.991857
## age[10-20) 2.083 0.037260 *
## age[20-30) 1.522 0.128117
## age[30-40) 1.693 0.090538 .
## age[40-50) 1.939 0.052462 .
## age[50-60) 1.839 0.065860 .
## age[60-70) 1.932 0.053382 .
## age[70-80) 2.282 0.022493 *
## age[80-90) 2.010 0.044449 *
## age[90-100) 1.445 0.148374
## admission_type_id2 4.056 5.00e-05 ***
## admission_type_id3 4.044 5.26e-05 ***
## admission_type_id4 -0.010 0.992308
## admission_type_id5 0.703 0.482354
## admission_type_id6 10.265 < 2e-16 ***
## admission_type_id8 -0.630 0.528949
## discharge_disposition_id2 0.358 0.720121
## discharge_disposition_id3 -0.081 0.935354
## discharge_disposition_id4 1.260 0.207775
## discharge_disposition_id5 2.820 0.004802 **
## discharge_disposition_id6 3.942 8.09e-05 ***
## discharge_disposition_id7 -0.845 0.398215
## discharge_disposition_id8 0.726 0.467556
## discharge_disposition_id9 0.011 0.991276
## discharge_disposition_id10 0.023 0.981374
## discharge_disposition_id11 -0.274 0.783720
## discharge_disposition_id12 0.530 0.596336
## discharge_disposition_id13 -7.440 1.00e-13 ***
## discharge_disposition_id14 -5.890 3.85e-09 ***
## discharge_disposition_id15 2.196 0.028105 *
## discharge_disposition_id16 0.702 0.482504
## discharge_disposition_id17 -0.397 0.691108
## discharge_disposition_id18 3.120 0.001810 **
## discharge_disposition_id19 -0.018 0.985696
## discharge_disposition_id20 -0.011 0.991221
## discharge_disposition_id22 1.943 0.051976 .
## discharge_disposition_id23 -3.091 0.001996 **
## discharge_disposition_id24 -0.336 0.737068
## discharge_disposition_id25 -2.956 0.003115 **
## discharge_disposition_id27 -0.010 0.991644
## discharge_disposition_id28 0.652 0.514604
## admission_source_id2 -0.626 0.531584
## admission_source_id3 -0.109 0.912903
## admission_source_id4 -6.297 3.03e-10 ***
## admission_source_id5 -1.964 0.049555 *
## admission_source_id6 -6.139 8.33e-10 ***
## admission_source_id7 3.273 0.001064 **
## admission_source_id8 0.807 0.419629
## admission_source_id9 -0.753 0.451572
## admission_source_id10 0.238 0.812119
## admission_source_id14 0.000 0.999777
## admission_source_id17 -1.634 0.102303
## admission_source_id22 -0.011 0.991378
## time_in_hospital 3.407 0.000656 ***
## medical_specialtyAnesthesiology -0.922 0.356462
## medical_specialtyAnesthesiology-Pediatric -0.587 0.556967
## medical_specialtyCardiology -0.572 0.567260
## medical_specialtyCardiology-Pediatric 0.673 0.501196
## medical_specialtyDCPTEAM -0.021 0.983361
## medical_specialtyDentistry 0.018 0.985634
## medical_specialtyDermatology 0.010 0.991685
## medical_specialtyEmergency/Trauma -0.539 0.589968
## medical_specialtyEndocrinology -0.673 0.501249
## medical_specialtyEndocrinology-Metabolism -0.027 0.978498
## medical_specialtyFamily/GeneralPractice -0.486 0.627296
## medical_specialtyGastroenterology -0.434 0.664177
## medical_specialtyGynecology -1.425 0.154298
## medical_specialtyHematology -0.232 0.816537
## medical_specialtyHematology/Oncology -0.537 0.591609
## medical_specialtyHospitalist -0.690 0.490274
## medical_specialtyInfectiousDiseases -0.011 0.991323
## medical_specialtyInternalMedicine -0.597 0.550685
## medical_specialtyNephrology -0.249 0.803291
## medical_specialtyNeurology -0.960 0.337238
## medical_specialtyObsterics&Gynecology-GynecologicOnco -1.170 0.242011
## medical_specialtyObstetrics -1.605 0.108546
## medical_specialtyObstetricsandGynecology -1.318 0.187467
## medical_specialtyOncology -0.500 0.617238
## medical_specialtyOphthalmology -0.855 0.392823
## medical_specialtyOrthopedics -0.830 0.406325
## medical_specialtyOrthopedics-Reconstructive -0.969 0.332339
## medical_specialtyOsteopath -0.621 0.534628
## medical_specialtyOtolaryngology -1.100 0.271220
## medical_specialtyOutreachServices -0.429 0.668280
## medical_specialtyPathology 0.059 0.953336
## medical_specialtyPediatrics -0.675 0.499902
## medical_specialtyPediatrics-AllergyandImmunology 0.010 0.992155
## medical_specialtyPediatrics-CriticalCare -0.349 0.727166
## medical_specialtyPediatrics-EmergencyMedicine 0.011 0.991397
## medical_specialtyPediatrics-Endocrinology -0.956 0.338826
## medical_specialtyPediatrics-Hematology-Oncology -0.582 0.560893
## medical_specialtyPediatrics-Neurology -0.432 0.665772
## medical_specialtyPediatrics-Pulmonology 0.533 0.594059
## medical_specialtyPerinatology -0.011 0.991213
## medical_specialtyPhysicalMedicineandRehabilitation -0.702 0.482371
## medical_specialtyPhysicianNotFound -0.280 0.779532
## medical_specialtyPodiatry -0.031 0.975632
## medical_specialtyPsychiatry -0.653 0.513521
## medical_specialtyPsychiatry-Addictive -0.011 0.990945
## medical_specialtyPsychiatry-Child/Adolescent -0.703 0.482261
## medical_specialtyPsychology -0.653 0.513955
## medical_specialtyPulmonology -0.461 0.644908
## medical_specialtyRadiologist -0.607 0.543607
## medical_specialtyRadiology -0.577 0.564101
## medical_specialtyResident 0.010 0.992227
## medical_specialtyRheumatology -0.926 0.354391
## medical_specialtySpeech -0.011 0.990994
## medical_specialtySportsMedicine 0.010 0.991645
## medical_specialtySurgeon -0.977 0.328448
## medical_specialtySurgery-Cardiovascular -0.856 0.392263
## medical_specialtySurgery-Cardiovascular/Thoracic -1.125 0.260645
## medical_specialtySurgery-Colon&Rectal -0.171 0.864039
## medical_specialtySurgery-General -0.659 0.509814
## medical_specialtySurgery-Maxillofacial -1.401 0.161077
## medical_specialtySurgery-Neuro -1.066 0.286505
## medical_specialtySurgery-Pediatric -0.890 0.373392
## medical_specialtySurgery-Plastic -0.688 0.491186
## medical_specialtySurgery-PlasticwithinHeadandNeck 0.010 0.991834
## medical_specialtySurgery-Thoracic -0.521 0.602454
## medical_specialtySurgery-Vascular -0.441 0.659499
## medical_specialtySurgicalSpecialty -1.175 0.240179
## medical_specialtyUrology -0.777 0.436938
## num_lab_procedures -0.923 0.355775
## num_procedures -2.397 0.016540 *
## num_medications 0.633 0.526947
## number_outpatient 8.056 7.87e-16 ***
## number_emergency 7.953 1.82e-15 ***
## number_inpatient 30.123 < 2e-16 ***
## number_diagnoses 9.462 < 2e-16 ***
## max_glu_serum>300 0.351 0.725298
## max_glu_serumNone -1.166 0.243570
## max_glu_serumNorm -0.260 0.794884
## A1Cresult>8 0.464 0.642859
## A1CresultNone 2.216 0.026694 *
## A1CresultNorm -1.508 0.131650
## metforminNo 0.136 0.891727
## metforminSteady -0.646 0.518599
## metforminUp -0.108 0.913762
## repaglinideNo -0.725 0.468215
## repaglinideSteady -0.379 0.704980
## repaglinideUp -0.373 0.709127
## nateglinideNo -0.681 0.495671
## nateglinideSteady -0.556 0.578019
## nateglinideUp -1.909 0.056295 .
## chlorpropamideNo 0.010 0.992094
## chlorpropamideSteady 0.010 0.991943
## chlorpropamideUp 0.010 0.992314
## glimepirideNo 0.108 0.914297
## glimepirideSteady 0.130 0.896224
## glimepirideUp -0.495 0.620742
## glipizideNo -1.906 0.056592 .
## glipizideSteady -1.767 0.077277 .
## glipizideUp -1.564 0.117725
## glyburideNo -0.615 0.538537
## glyburideSteady -0.869 0.385004
## glyburideUp -1.110 0.267084
## tolbutamideSteady -0.679 0.497215
## pioglitazoneNo -1.130 0.258684
## pioglitazoneSteady -1.180 0.238190
## pioglitazoneUp 0.090 0.928608
## rosiglitazoneNo 2.949 0.003183 **
## rosiglitazoneSteady 3.116 0.001834 **
## rosiglitazoneUp 1.939 0.052441 .
## acarboseNo -0.014 0.988641
## acarboseSteady -0.014 0.989064
## acarboseUp 0.001 0.998871
## miglitolNo -0.009 0.992548
## miglitolSteady -0.009 0.992761
## miglitolUp -0.014 0.988700
## tolazamideSteady -1.784 0.074461 .
## insulinNo -1.236 0.216293
## insulinSteady -3.333 0.000860 ***
## insulinUp -1.144 0.252577
## glyburide.metforminNo 0.010 0.991641
## glyburide.metforminSteady 0.011 0.991553
## glyburide.metforminUp 0.000 0.999677
## changeNo -0.183 0.855130
## diabetesMedYes 7.453 9.11e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 48799 on 35506 degrees of freedom
## Residual deviance: 44334 on 35323 degrees of freedom
## AIC: 44702
##
## Number of Fisher Scoring iterations: 14
vif(model)
## GVIF Df GVIF^(1/(2*Df))
## race 1.197494 4 1.022785
## gender 1.063025 2 1.015397
## age 4.013282 9 1.080259
## admission_type_id 44.414202 6 1.371808
## discharge_disposition_id 2.741557 25 1.020375
## admission_source_id 29.819556 12 1.151960
## time_in_hospital 1.637884 1 1.279798
## medical_specialty 21.377699 68 1.022773
## num_lab_procedures 1.605224 1 1.266974
## num_procedures 1.408844 1 1.186947
## num_medications 1.956667 1 1.398809
## number_outpatient 1.072492 1 1.035612
## number_emergency 1.093574 1 1.045741
## number_inpatient 1.099747 1 1.048688
## number_diagnoses 1.377824 1 1.173807
## max_glu_serum 2.455822 3 1.161536
## A1Cresult 1.252679 3 1.038261
## metformin 1.566838 3 1.077715
## repaglinide 1.078690 3 1.012705
## nateglinide 1.039902 3 1.006542
## chlorpropamide 1.015443 3 1.002557
## glimepiride 1.231944 3 1.035377
## glipizide 1.515734 3 1.071776
## glyburide 1.475243 3 1.066950
## tolbutamide 1.002485 1 1.001242
## pioglitazone 1.187738 3 1.029090
## rosiglitazone 1.188497 3 1.029200
## acarbose 1.009084 3 1.001508
## miglitol 1.005541 3 1.000921
## tolazamide 1.004064 1 1.002030
## insulin 4.766724 3 1.297289
## glyburide.metformin 1.031464 3 1.005177
## change 3.600169 1 1.897411
## diabetesMed 2.297392 1 1.515715
model1 = glm(readmitted~. -admission_type_id-medical_specialty, data = train_set, family = binomial)
summary(model1)
##
## Call:
## glm(formula = readmitted ~ . - admission_type_id - medical_specialty,
## family = binomial, data = train_set)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5187 -1.0239 -0.7621 1.1942 2.5944
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.710e+00 2.712e+03 -0.001 0.998908
## raceAsian -3.177e-01 1.292e-01 -2.459 0.013916 *
## raceCaucasian 1.232e-02 2.870e-02 0.429 0.667831
## raceHispanic 2.185e-02 7.777e-02 0.281 0.778739
## raceOther -2.252e-01 9.321e-02 -2.416 0.015711 *
## genderMale -5.734e-02 2.309e-02 -2.484 0.013003 *
## genderUnknown/Invalid -1.467e+01 1.455e+03 -0.010 0.991957
## age[10-20) 7.925e-01 3.088e-01 2.566 0.010275 *
## age[20-30) 4.901e-01 3.007e-01 1.630 0.103115
## age[30-40) 6.749e-01 2.923e-01 2.309 0.020954 *
## age[40-50) 8.119e-01 2.891e-01 2.808 0.004979 **
## age[50-60) 8.006e-01 2.884e-01 2.776 0.005497 **
## age[60-70) 8.461e-01 2.883e-01 2.934 0.003342 **
## age[70-80) 9.639e-01 2.883e-01 3.343 0.000830 ***
## age[80-90) 8.797e-01 2.891e-01 3.043 0.002342 **
## age[90-100) 6.822e-01 2.966e-01 2.300 0.021432 *
## discharge_disposition_id2 4.450e-02 7.774e-02 0.572 0.567043
## discharge_disposition_id3 -4.297e-02 3.805e-02 -1.129 0.258789
## discharge_disposition_id4 1.504e-01 1.271e-01 1.183 0.236643
## discharge_disposition_id5 1.909e-01 9.454e-02 2.019 0.043445 *
## discharge_disposition_id6 1.412e-01 3.878e-02 3.640 0.000272 ***
## discharge_disposition_id7 -1.290e-01 1.631e-01 -0.791 0.428984
## discharge_disposition_id8 1.744e-01 3.845e-01 0.453 0.650232
## discharge_disposition_id9 1.623e+01 1.455e+03 0.011 0.991101
## discharge_disposition_id10 5.575e-01 1.420e+00 0.393 0.694565
## discharge_disposition_id11 -1.601e+01 5.829e+01 -0.275 0.783544
## discharge_disposition_id12 6.497e-01 1.421e+00 0.457 0.647502
## discharge_disposition_id13 -2.729e+00 3.663e-01 -7.450 9.33e-14 ***
## discharge_disposition_id14 -2.528e+00 4.169e-01 -6.062 1.34e-09 ***
## discharge_disposition_id15 1.452e+00 6.514e-01 2.230 0.025763 *
## discharge_disposition_id16 2.086e-01 6.886e-01 0.303 0.761923
## discharge_disposition_id17 -6.116e-01 7.451e-01 -0.821 0.411772
## discharge_disposition_id18 1.748e-01 8.560e-02 2.042 0.041133 *
## discharge_disposition_id19 -1.492e+01 8.383e+02 -0.018 0.985799
## discharge_disposition_id20 -1.540e+01 1.455e+03 -0.011 0.991556
## discharge_disposition_id22 7.837e-02 7.720e-02 1.015 0.310055
## discharge_disposition_id23 -5.424e-01 1.777e-01 -3.053 0.002268 **
## discharge_disposition_id24 2.735e-02 8.686e-01 0.031 0.974879
## discharge_disposition_id25 1.685e-01 9.007e-02 1.871 0.061315 .
## discharge_disposition_id27 -1.537e+01 1.455e+03 -0.011 0.991574
## discharge_disposition_id28 3.370e-01 5.594e-01 0.602 0.546885
## admission_source_id2 -5.316e-02 1.831e-01 -0.290 0.771537
## admission_source_id3 -2.041e-01 4.145e-01 -0.493 0.622348
## admission_source_id4 -4.916e-01 7.379e-02 -6.662 2.69e-11 ***
## admission_source_id5 -2.710e-01 1.223e-01 -2.215 0.026742 *
## admission_source_id6 -6.071e-01 7.567e-02 -8.024 1.02e-15 ***
## admission_source_id7 1.010e-01 2.767e-02 3.648 0.000264 ***
## admission_source_id8 6.478e-01 8.244e-01 0.786 0.431965
## admission_source_id9 -2.536e-01 3.885e-01 -0.653 0.513998
## admission_source_id10 4.403e-01 1.416e+00 0.311 0.755789
## admission_source_id14 -1.477e+01 1.455e+03 -0.010 0.991905
## admission_source_id17 1.573e-01 5.295e-02 2.971 0.002967 **
## admission_source_id22 -1.585e+01 1.455e+03 -0.011 0.991313
## time_in_hospital 2.160e-02 4.593e-03 4.703 2.56e-06 ***
## num_lab_procedures 1.357e-03 6.866e-04 1.976 0.048123 *
## num_procedures -1.516e-02 7.453e-03 -2.034 0.041970 *
## num_medications -4.223e-03 1.796e-03 -2.351 0.018706 *
## number_outpatient 1.006e-01 1.351e-02 7.447 9.54e-14 ***
## number_emergency 1.633e-01 1.975e-02 8.267 < 2e-16 ***
## number_inpatient 3.847e-01 1.208e-02 31.847 < 2e-16 ***
## number_diagnoses 6.909e-02 6.462e-03 10.692 < 2e-16 ***
## max_glu_serum>300 9.402e-02 1.409e-01 0.667 0.504532
## max_glu_serumNone -1.118e-01 9.672e-02 -1.156 0.247693
## max_glu_serumNorm -7.308e-02 1.096e-01 -0.667 0.504784
## A1Cresult>8 4.466e-02 6.786e-02 0.658 0.510488
## A1CresultNone 1.277e-01 5.827e-02 2.192 0.028394 *
## A1CresultNorm -1.171e-01 7.880e-02 -1.486 0.137356
## metforminNo 8.649e-03 1.569e-01 0.055 0.956048
## metforminSteady -1.293e-01 1.571e-01 -0.823 0.410739
## metforminUp -5.736e-02 1.873e-01 -0.306 0.759439
## repaglinideNo -3.799e-01 5.053e-01 -0.752 0.452228
## repaglinideSteady -1.404e-01 5.112e-01 -0.275 0.783602
## repaglinideUp -1.975e-01 5.856e-01 -0.337 0.735922
## nateglinideNo -9.574e-01 1.258e+00 -0.761 0.446780
## nateglinideSteady -8.170e-01 1.265e+00 -0.646 0.518266
## nateglinideUp -3.123e+00 1.621e+00 -1.926 0.054088 .
## chlorpropamideNo 1.498e+01 1.455e+03 0.010 0.991788
## chlorpropamideSteady 1.522e+01 1.455e+03 0.010 0.991655
## chlorpropamideUp 1.467e+01 1.455e+03 0.010 0.991959
## glimepirideNo -3.276e-02 2.339e-01 -0.140 0.888601
## glimepirideSteady -2.956e-03 2.370e-01 -0.012 0.990051
## glimepirideUp -1.816e-01 2.871e-01 -0.632 0.527138
## glipizideNo -3.080e-01 1.641e-01 -1.877 0.060474 .
## glipizideSteady -2.860e-01 1.642e-01 -1.742 0.081578 .
## glipizideUp -3.188e-01 1.995e-01 -1.599 0.109910
## glyburideNo -6.855e-02 1.619e-01 -0.423 0.671976
## glyburideSteady -1.209e-01 1.631e-01 -0.741 0.458609
## glyburideUp -2.254e-01 2.082e-01 -1.083 0.278986
## tolbutamideSteady -1.003e+00 1.164e+00 -0.862 0.388621
## pioglitazoneNo -4.147e-01 3.438e-01 -1.206 0.227812
## pioglitazoneSteady -4.123e-01 3.458e-01 -1.192 0.233137
## pioglitazoneUp -8.993e-03 4.150e-01 -0.022 0.982712
## rosiglitazoneNo 1.649e+00 5.669e-01 2.909 0.003624 **
## rosiglitazoneSteady 1.755e+00 5.681e-01 3.090 0.002002 **
## rosiglitazoneUp 1.138e+00 6.162e-01 1.847 0.064742 .
## acarboseNo -1.442e+01 9.990e+02 -0.014 0.988480
## acarboseSteady -1.386e+01 9.990e+02 -0.014 0.988931
## acarboseUp 1.624e+00 1.408e+03 0.001 0.999080
## miglitolNo -1.355e+01 1.455e+03 -0.009 0.992570
## miglitolSteady -1.300e+01 1.455e+03 -0.009 0.992874
## miglitolUp -2.913e+01 2.058e+03 -0.014 0.988709
## tolazamideSteady -1.053e+00 6.484e-01 -1.624 0.104447
## insulinNo -9.093e-02 5.903e-02 -1.540 0.123501
## insulinSteady -1.836e-01 4.557e-02 -4.028 5.62e-05 ***
## insulinUp -3.572e-02 4.640e-02 -0.770 0.441423
## glyburide.metforminNo 1.518e+01 1.455e+03 0.010 0.991678
## glyburide.metforminSteady 1.535e+01 1.455e+03 0.011 0.991587
## glyburide.metforminUp -9.244e-01 2.058e+03 0.000 0.999642
## changeNo 2.109e-03 4.274e-02 0.049 0.960649
## diabetesMedYes 3.115e-01 4.160e-02 7.490 6.90e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 48799 on 35506 degrees of freedom
## Residual deviance: 44819 on 35397 degrees of freedom
## AIC: 45039
##
## Number of Fisher Scoring iterations: 14
vif(model1)
## GVIF Df GVIF^(1/(2*Df))
## race 1.112673 4 1.013435
## gender 1.043237 2 1.010638
## age 1.532131 9 1.023986
## discharge_disposition_id 1.617130 25 1.009659
## admission_source_id 2.288851 12 1.035104
## time_in_hospital 1.491617 1 1.221318
## num_lab_procedures 1.420194 1 1.191719
## num_procedures 1.270811 1 1.127302
## num_medications 1.726409 1 1.313929
## number_outpatient 1.051710 1 1.025529
## number_emergency 1.082024 1 1.040204
## number_inpatient 1.086830 1 1.042511
## number_diagnoses 1.293270 1 1.137220
## max_glu_serum 1.657352 3 1.087850
## A1Cresult 1.229636 3 1.035053
## metformin 1.547351 3 1.075470
## repaglinide 1.063043 3 1.010241
## nateglinide 1.036839 3 1.006048
## chlorpropamide 1.011201 3 1.001858
## glimepiride 1.216255 3 1.033168
## glipizide 1.507541 3 1.070808
## glyburide 1.462808 3 1.065445
## tolbutamide 1.002080 1 1.001040
## pioglitazone 1.181287 3 1.028157
## rosiglitazone 1.176252 3 1.027425
## acarbose 1.006716 3 1.001116
## miglitol 1.004580 3 1.000762
## tolazamide 1.001970 1 1.000984
## insulin 4.559680 3 1.287723
## glyburide.metformin 1.024788 3 1.004089
## change 3.592061 1 1.895273
## diabetesMed 2.293810 1 1.514533
trainpredict = predict(model1, newdata = train_set, type = 'response')
p_class = ifelse(trainpredict>0.5, "1","0")
matrix_table = table(train_set$readmitted, p_class)
matrix_table
## p_class
## 0 1
## 0 15938 3753
## 1 9256 6560
accuracy = sum(diag(matrix_table))/sum(matrix_table)
round(accuracy,3)
## [1] 0.634
summary(test_set$admission_source_id)
## 1 2 3 4 5 6 7 8 9 10 11 13 14 17 20 22
## 5287 51 17 394 154 461 7526 6 16 0 0 1 0 1303 0 2
## 25
## 0
test_set1 <- test_set %>%
filter(admission_source_id != 13)
summary(test_set$medical_specialty)
## Length Class Mode
## 15218 character character
test_set2 <- test_set1 %>%
filter(!medical_specialty %in% c("Neurophysiology", "Pediatrics-InfectiousDiseases", "Proctology"))
testpredict = predict(model1, newdata = test_set2, type = 'response')
p_class = ifelse(testpredict>0.5, "1","0")
matrix_table = table(test_set2$readmitted, p_class)
matrix_table
## p_class
## 0 1
## 0 6815 1621
## 1 3932 2846
accuracy = sum(diag(matrix_table))/sum(matrix_table)
round(accuracy,3)
## [1] 0.635
pred = prediction(trainpredict, train_set$readmitted)
perf = performance(pred, "lift", "rpp")
plot(perf, main = "lift curve", xlab = 'Proportion of Customers (sorted prob)')
gain = performance(pred, "tpr", "rpp")
plot(gain, col="orange", lwd = 2)
library(xgboost)
str(hospital3)
## 'data.frame': 50725 obs. of 35 variables:
## $ race : chr "Caucasian" "Caucasian" "Caucasian" "Caucasian" ...
## $ gender : chr "Female" "Female" "Female" "Male" ...
## $ age : chr "[0-10)" "[90-100)" "[40-50)" "[80-90)" ...
## $ admission_type_id : Factor w/ 8 levels "1","2","3","4",..: 6 3 1 1 1 1 1 1 1 1 ...
## $ discharge_disposition_id: Factor w/ 26 levels "1","2","3","4",..: 24 3 3 6 1 3 1 2 1 1 ...
## $ admission_source_id : Factor w/ 17 levels "1","2","3","4",..: 1 4 7 7 7 7 1 7 7 7 ...
## $ time_in_hospital : int 1 12 7 10 3 6 2 5 6 2 ...
## $ medical_specialty : chr "Pediatrics-Endocrinology" "InternalMedicine" "Family/GeneralPractice" "Family/GeneralPractice" ...
## $ num_lab_procedures : int 41 33 60 55 29 64 25 52 27 41 ...
## $ num_procedures : int 0 3 0 1 0 3 2 0 0 0 ...
## $ num_medications : int 1 18 15 31 11 18 11 14 16 11 ...
## $ number_outpatient : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_emergency : int 0 0 1 0 0 0 0 0 0 0 ...
## $ number_inpatient : int 0 0 0 0 0 0 0 0 0 0 ...
## $ number_diagnoses : int 1 8 8 8 3 7 3 8 8 6 ...
## $ max_glu_serum : chr "None" "None" "None" "None" ...
## $ A1Cresult : chr "None" "None" "None" "None" ...
## $ metformin : chr "No" "No" "Steady" "No" ...
## $ repaglinide : chr "No" "No" "Up" "No" ...
## $ nateglinide : chr "No" "No" "No" "No" ...
## $ chlorpropamide : chr "No" "No" "No" "No" ...
## $ glimepiride : chr "No" "No" "No" "No" ...
## $ glipizide : chr "No" "No" "No" "No" ...
## $ glyburide : chr "No" "No" "No" "No" ...
## $ tolbutamide : chr "No" "No" "No" "No" ...
## $ pioglitazone : chr "No" "No" "No" "No" ...
## $ rosiglitazone : chr "No" "Steady" "No" "No" ...
## $ acarbose : chr "No" "No" "No" "No" ...
## $ miglitol : chr "No" "No" "No" "No" ...
## $ tolazamide : chr "No" "No" "No" "No" ...
## $ insulin : chr "No" "Steady" "Down" "Steady" ...
## $ glyburide.metformin : chr "No" "No" "No" "No" ...
## $ change : chr "No" "Ch" "Ch" "No" ...
## $ diabetesMed : chr "No" "Yes" "Yes" "Yes" ...
## $ readmitted : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 2 2 2 2 ...
category <- c("race", "gender", "age", "admission_type_id", "discharge_disposition_id", "admission_source_id",
"medical_specialty", "max_glu_serum", "A1Cresult", "metformin", "repaglinide", "nateglinide",
"chlorpropamide", "glimepiride", "glipizide", "glyburide", "tolbutamide", "pioglitazone",
"rosiglitazone", "acarbose", "miglitol", "tolazamide", "insulin", "glyburide.metformin",
"change","diabetesMed")
hospital_cat <- hospital3[category]
dmy <- dummyVars(" ~ .", data = hospital_cat)
trsf <- data.frame(predict(dmy, newdata = hospital_cat))
head(trsf)
## raceAfricanAmerican raceAsian raceCaucasian raceHispanic raceOther
## 1 0 0 1 0 0
## 10 0 0 1 0 0
## 13 0 0 1 0 0
## 14 0 0 1 0 0
## 18 0 0 1 0 0
## 27 0 0 1 0 0
## genderFemale genderMale genderUnknown.Invalid age.0.10. age.10.20.
## 1 1 0 0 1 0
## 10 1 0 0 0 0
## 13 1 0 0 0 0
## 14 0 1 0 0 0
## 18 1 0 0 0 0
## 27 0 1 0 0 0
## age.20.30. age.30.40. age.40.50. age.50.60. age.60.70. age.70.80. age.80.90.
## 1 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## 13 0 0 1 0 0 0 0
## 14 0 0 0 0 0 0 1
## 18 0 0 0 1 0 0 0
## 27 0 0 0 0 0 0 1
## age.90.100. admission_type_id.1 admission_type_id.2 admission_type_id.3
## 1 0 0 0 0
## 10 1 0 0 1
## 13 0 1 0 0
## 14 0 1 0 0
## 18 0 1 0 0
## 27 0 1 0 0
## admission_type_id.4 admission_type_id.5 admission_type_id.6
## 1 0 0 1
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_type_id.7 admission_type_id.8 discharge_disposition_id.1
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 1
## 27 0 0 0
## discharge_disposition_id.2 discharge_disposition_id.3
## 1 0 0
## 10 0 1
## 13 0 1
## 14 0 0
## 18 0 0
## 27 0 1
## discharge_disposition_id.4 discharge_disposition_id.5
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.6 discharge_disposition_id.7
## 1 0 0
## 10 0 0
## 13 0 0
## 14 1 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.8 discharge_disposition_id.9
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.10 discharge_disposition_id.11
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.12 discharge_disposition_id.13
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.14 discharge_disposition_id.15
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.16 discharge_disposition_id.17
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.18 discharge_disposition_id.19
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.20 discharge_disposition_id.22
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.23 discharge_disposition_id.24
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.25 discharge_disposition_id.27
## 1 1 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.28 admission_source_id.1 admission_source_id.2
## 1 0 1 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.3 admission_source_id.4 admission_source_id.5
## 1 0 0 0
## 10 0 1 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.6 admission_source_id.7 admission_source_id.8
## 1 0 0 0
## 10 0 0 0
## 13 0 1 0
## 14 0 1 0
## 18 0 1 0
## 27 0 1 0
## admission_source_id.9 admission_source_id.10 admission_source_id.11
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.13 admission_source_id.14 admission_source_id.17
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.20 admission_source_id.22 admission_source_id.25
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## medical_specialtyAllergyandImmunology medical_specialtyAnesthesiology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyAnesthesiology.Pediatric medical_specialtyCardiology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 1
## 27 0 1
## medical_specialtyCardiology.Pediatric medical_specialtyDCPTEAM
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyDentistry medical_specialtyDermatology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyEmergency.Trauma medical_specialtyEndocrinology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyEndocrinology.Metabolism
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyFamily.GeneralPractice medical_specialtyGastroenterology
## 1 0 0
## 10 0 0
## 13 1 0
## 14 1 0
## 18 0 0
## 27 0 0
## medical_specialtyGynecology medical_specialtyHematology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyHematology.Oncology medical_specialtyHospitalist
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyInfectiousDiseases medical_specialtyInternalMedicine
## 1 0 0
## 10 0 1
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyNephrology medical_specialtyNeurology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyNeurophysiology
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyObsterics.Gynecology.GynecologicOnco
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyObstetrics medical_specialtyObstetricsandGynecology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOncology medical_specialtyOphthalmology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOrthopedics medical_specialtyOrthopedics.Reconstructive
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOsteopath medical_specialtyOtolaryngology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOutreachServices medical_specialtyPathology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPediatrics medical_specialtyPediatrics.AllergyandImmunology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPediatrics.CriticalCare
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.EmergencyMedicine
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Endocrinology
## 1 1
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Hematology.Oncology
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.InfectiousDiseases
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Neurology
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Pulmonology medical_specialtyPerinatology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPhysicalMedicineandRehabilitation
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPhysicianNotFound medical_specialtyPodiatry
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyProctology medical_specialtyPsychiatry
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPsychiatry.Addictive
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPsychiatry.Child.Adolescent medical_specialtyPsychology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPulmonology medical_specialtyRadiologist
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyRadiology medical_specialtyResident
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyRheumatology medical_specialtySpeech
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySportsMedicine medical_specialtySurgeon
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.Cardiovascular
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtySurgery.Cardiovascular.Thoracic
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtySurgery.Colon.Rectal medical_specialtySurgery.General
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.Maxillofacial medical_specialtySurgery.Neuro
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.Pediatric medical_specialtySurgery.Plastic
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.PlasticwithinHeadandNeck
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtySurgery.Thoracic medical_specialtySurgery.Vascular
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgicalSpecialty medical_specialtyUrology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## max_glu_serum.200 max_glu_serum.300 max_glu_serumNone max_glu_serumNorm
## 1 0 0 1 0
## 10 0 0 1 0
## 13 0 0 1 0
## 14 0 0 1 0
## 18 0 0 1 0
## 27 0 0 1 0
## A1Cresult.7 A1Cresult.8 A1CresultNone A1CresultNorm metforminDown
## 1 0 0 1 0 0
## 10 0 0 1 0 0
## 13 0 0 1 0 0
## 14 0 0 1 0 0
## 18 0 0 1 0 0
## 27 1 0 0 0 0
## metforminNo metforminSteady metforminUp repaglinideDown repaglinideNo
## 1 1 0 0 0 1
## 10 1 0 0 0 1
## 13 0 1 0 0 0
## 14 1 0 0 0 1
## 18 1 0 0 0 1
## 27 0 1 0 0 1
## repaglinideSteady repaglinideUp nateglinideDown nateglinideNo
## 1 0 0 0 1
## 10 0 0 0 1
## 13 0 1 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## nateglinideSteady nateglinideUp chlorpropamideDown chlorpropamideNo
## 1 0 0 0 1
## 10 0 0 0 1
## 13 0 0 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## chlorpropamideSteady chlorpropamideUp glimepirideDown glimepirideNo
## 1 0 0 0 1
## 10 0 0 0 1
## 13 0 0 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## glimepirideSteady glimepirideUp glipizideDown glipizideNo glipizideSteady
## 1 0 0 0 1 0
## 10 0 0 0 1 0
## 13 0 0 0 1 0
## 14 0 0 0 1 0
## 18 0 0 0 1 0
## 27 0 0 0 1 0
## glipizideUp glyburideDown glyburideNo glyburideSteady glyburideUp
## 1 0 0 1 0 0
## 10 0 0 1 0 0
## 13 0 0 1 0 0
## 14 0 0 1 0 0
## 18 0 0 0 1 0
## 27 0 0 0 1 0
## tolbutamideNo tolbutamideSteady pioglitazoneDown pioglitazoneNo
## 1 1 0 0 1
## 10 1 0 0 1
## 13 1 0 0 1
## 14 1 0 0 1
## 18 1 0 0 1
## 27 1 0 0 1
## pioglitazoneSteady pioglitazoneUp rosiglitazoneDown rosiglitazoneNo
## 1 0 0 0 1
## 10 0 0 0 0
## 13 0 0 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## rosiglitazoneSteady rosiglitazoneUp acarboseDown acarboseNo acarboseSteady
## 1 0 0 0 1 0
## 10 1 0 0 1 0
## 13 0 0 0 1 0
## 14 0 0 0 1 0
## 18 0 0 0 1 0
## 27 0 0 0 1 0
## acarboseUp miglitolDown miglitolNo miglitolSteady miglitolUp tolazamideNo
## 1 0 0 1 0 0 1
## 10 0 0 1 0 0 1
## 13 0 0 1 0 0 1
## 14 0 0 1 0 0 1
## 18 0 0 1 0 0 1
## 27 0 0 1 0 0 1
## tolazamideSteady insulinDown insulinNo insulinSteady insulinUp
## 1 0 0 1 0 0
## 10 0 0 0 1 0
## 13 0 1 0 0 0
## 14 0 0 0 1 0
## 18 0 0 1 0 0
## 27 0 0 1 0 0
## glyburide.metforminDown glyburide.metforminNo glyburide.metforminSteady
## 1 0 1 0
## 10 0 1 0
## 13 0 1 0
## 14 0 1 0
## 18 0 1 0
## 27 0 1 0
## glyburide.metforminUp changeCh changeNo diabetesMedNo diabetesMedYes
## 1 0 0 1 1 0
## 10 0 1 0 0 1
## 13 0 1 0 0 1
## 14 0 0 1 0 1
## 18 0 0 1 0 1
## 27 0 1 0 0 1
hosp_numeric <- hospital3 %>%
select_if(is.numeric)
hosp_readmitted <- as.numeric(hospital3$readmitted)-1
hospital4 <- cbind(hosp_numeric, trsf, hosp_readmitted)
head(hospital4)
## time_in_hospital num_lab_procedures num_procedures num_medications
## 1 1 41 0 1
## 10 12 33 3 18
## 13 7 60 0 15
## 14 10 55 1 31
## 18 3 29 0 11
## 27 6 64 3 18
## number_outpatient number_emergency number_inpatient number_diagnoses
## 1 0 0 0 1
## 10 0 0 0 8
## 13 0 1 0 8
## 14 0 0 0 8
## 18 0 0 0 3
## 27 0 0 0 7
## raceAfricanAmerican raceAsian raceCaucasian raceHispanic raceOther
## 1 0 0 1 0 0
## 10 0 0 1 0 0
## 13 0 0 1 0 0
## 14 0 0 1 0 0
## 18 0 0 1 0 0
## 27 0 0 1 0 0
## genderFemale genderMale genderUnknown.Invalid age.0.10. age.10.20.
## 1 1 0 0 1 0
## 10 1 0 0 0 0
## 13 1 0 0 0 0
## 14 0 1 0 0 0
## 18 1 0 0 0 0
## 27 0 1 0 0 0
## age.20.30. age.30.40. age.40.50. age.50.60. age.60.70. age.70.80. age.80.90.
## 1 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## 13 0 0 1 0 0 0 0
## 14 0 0 0 0 0 0 1
## 18 0 0 0 1 0 0 0
## 27 0 0 0 0 0 0 1
## age.90.100. admission_type_id.1 admission_type_id.2 admission_type_id.3
## 1 0 0 0 0
## 10 1 0 0 1
## 13 0 1 0 0
## 14 0 1 0 0
## 18 0 1 0 0
## 27 0 1 0 0
## admission_type_id.4 admission_type_id.5 admission_type_id.6
## 1 0 0 1
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_type_id.7 admission_type_id.8 discharge_disposition_id.1
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 1
## 27 0 0 0
## discharge_disposition_id.2 discharge_disposition_id.3
## 1 0 0
## 10 0 1
## 13 0 1
## 14 0 0
## 18 0 0
## 27 0 1
## discharge_disposition_id.4 discharge_disposition_id.5
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.6 discharge_disposition_id.7
## 1 0 0
## 10 0 0
## 13 0 0
## 14 1 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.8 discharge_disposition_id.9
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.10 discharge_disposition_id.11
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.12 discharge_disposition_id.13
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.14 discharge_disposition_id.15
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.16 discharge_disposition_id.17
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.18 discharge_disposition_id.19
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.20 discharge_disposition_id.22
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.23 discharge_disposition_id.24
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.25 discharge_disposition_id.27
## 1 1 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## discharge_disposition_id.28 admission_source_id.1 admission_source_id.2
## 1 0 1 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.3 admission_source_id.4 admission_source_id.5
## 1 0 0 0
## 10 0 1 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.6 admission_source_id.7 admission_source_id.8
## 1 0 0 0
## 10 0 0 0
## 13 0 1 0
## 14 0 1 0
## 18 0 1 0
## 27 0 1 0
## admission_source_id.9 admission_source_id.10 admission_source_id.11
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.13 admission_source_id.14 admission_source_id.17
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## admission_source_id.20 admission_source_id.22 admission_source_id.25
## 1 0 0 0
## 10 0 0 0
## 13 0 0 0
## 14 0 0 0
## 18 0 0 0
## 27 0 0 0
## medical_specialtyAllergyandImmunology medical_specialtyAnesthesiology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyAnesthesiology.Pediatric medical_specialtyCardiology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 1
## 27 0 1
## medical_specialtyCardiology.Pediatric medical_specialtyDCPTEAM
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyDentistry medical_specialtyDermatology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyEmergency.Trauma medical_specialtyEndocrinology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyEndocrinology.Metabolism
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyFamily.GeneralPractice medical_specialtyGastroenterology
## 1 0 0
## 10 0 0
## 13 1 0
## 14 1 0
## 18 0 0
## 27 0 0
## medical_specialtyGynecology medical_specialtyHematology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyHematology.Oncology medical_specialtyHospitalist
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyInfectiousDiseases medical_specialtyInternalMedicine
## 1 0 0
## 10 0 1
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyNephrology medical_specialtyNeurology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyNeurophysiology
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyObsterics.Gynecology.GynecologicOnco
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyObstetrics medical_specialtyObstetricsandGynecology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOncology medical_specialtyOphthalmology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOrthopedics medical_specialtyOrthopedics.Reconstructive
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOsteopath medical_specialtyOtolaryngology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyOutreachServices medical_specialtyPathology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPediatrics medical_specialtyPediatrics.AllergyandImmunology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPediatrics.CriticalCare
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.EmergencyMedicine
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Endocrinology
## 1 1
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Hematology.Oncology
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.InfectiousDiseases
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Neurology
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPediatrics.Pulmonology medical_specialtyPerinatology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPhysicalMedicineandRehabilitation
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPhysicianNotFound medical_specialtyPodiatry
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyProctology medical_specialtyPsychiatry
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPsychiatry.Addictive
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtyPsychiatry.Child.Adolescent medical_specialtyPsychology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyPulmonology medical_specialtyRadiologist
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyRadiology medical_specialtyResident
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtyRheumatology medical_specialtySpeech
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySportsMedicine medical_specialtySurgeon
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.Cardiovascular
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtySurgery.Cardiovascular.Thoracic
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtySurgery.Colon.Rectal medical_specialtySurgery.General
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.Maxillofacial medical_specialtySurgery.Neuro
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.Pediatric medical_specialtySurgery.Plastic
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgery.PlasticwithinHeadandNeck
## 1 0
## 10 0
## 13 0
## 14 0
## 18 0
## 27 0
## medical_specialtySurgery.Thoracic medical_specialtySurgery.Vascular
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## medical_specialtySurgicalSpecialty medical_specialtyUrology
## 1 0 0
## 10 0 0
## 13 0 0
## 14 0 0
## 18 0 0
## 27 0 0
## max_glu_serum.200 max_glu_serum.300 max_glu_serumNone max_glu_serumNorm
## 1 0 0 1 0
## 10 0 0 1 0
## 13 0 0 1 0
## 14 0 0 1 0
## 18 0 0 1 0
## 27 0 0 1 0
## A1Cresult.7 A1Cresult.8 A1CresultNone A1CresultNorm metforminDown
## 1 0 0 1 0 0
## 10 0 0 1 0 0
## 13 0 0 1 0 0
## 14 0 0 1 0 0
## 18 0 0 1 0 0
## 27 1 0 0 0 0
## metforminNo metforminSteady metforminUp repaglinideDown repaglinideNo
## 1 1 0 0 0 1
## 10 1 0 0 0 1
## 13 0 1 0 0 0
## 14 1 0 0 0 1
## 18 1 0 0 0 1
## 27 0 1 0 0 1
## repaglinideSteady repaglinideUp nateglinideDown nateglinideNo
## 1 0 0 0 1
## 10 0 0 0 1
## 13 0 1 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## nateglinideSteady nateglinideUp chlorpropamideDown chlorpropamideNo
## 1 0 0 0 1
## 10 0 0 0 1
## 13 0 0 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## chlorpropamideSteady chlorpropamideUp glimepirideDown glimepirideNo
## 1 0 0 0 1
## 10 0 0 0 1
## 13 0 0 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## glimepirideSteady glimepirideUp glipizideDown glipizideNo glipizideSteady
## 1 0 0 0 1 0
## 10 0 0 0 1 0
## 13 0 0 0 1 0
## 14 0 0 0 1 0
## 18 0 0 0 1 0
## 27 0 0 0 1 0
## glipizideUp glyburideDown glyburideNo glyburideSteady glyburideUp
## 1 0 0 1 0 0
## 10 0 0 1 0 0
## 13 0 0 1 0 0
## 14 0 0 1 0 0
## 18 0 0 0 1 0
## 27 0 0 0 1 0
## tolbutamideNo tolbutamideSteady pioglitazoneDown pioglitazoneNo
## 1 1 0 0 1
## 10 1 0 0 1
## 13 1 0 0 1
## 14 1 0 0 1
## 18 1 0 0 1
## 27 1 0 0 1
## pioglitazoneSteady pioglitazoneUp rosiglitazoneDown rosiglitazoneNo
## 1 0 0 0 1
## 10 0 0 0 0
## 13 0 0 0 1
## 14 0 0 0 1
## 18 0 0 0 1
## 27 0 0 0 1
## rosiglitazoneSteady rosiglitazoneUp acarboseDown acarboseNo acarboseSteady
## 1 0 0 0 1 0
## 10 1 0 0 1 0
## 13 0 0 0 1 0
## 14 0 0 0 1 0
## 18 0 0 0 1 0
## 27 0 0 0 1 0
## acarboseUp miglitolDown miglitolNo miglitolSteady miglitolUp tolazamideNo
## 1 0 0 1 0 0 1
## 10 0 0 1 0 0 1
## 13 0 0 1 0 0 1
## 14 0 0 1 0 0 1
## 18 0 0 1 0 0 1
## 27 0 0 1 0 0 1
## tolazamideSteady insulinDown insulinNo insulinSteady insulinUp
## 1 0 0 1 0 0
## 10 0 0 0 1 0
## 13 0 1 0 0 0
## 14 0 0 0 1 0
## 18 0 0 1 0 0
## 27 0 0 1 0 0
## glyburide.metforminDown glyburide.metforminNo glyburide.metforminSteady
## 1 0 1 0
## 10 0 1 0
## 13 0 1 0
## 14 0 1 0
## 18 0 1 0
## 27 0 1 0
## glyburide.metforminUp changeCh changeNo diabetesMedNo diabetesMedYes
## 1 0 0 1 1 0
## 10 0 1 0 0 1
## 13 0 1 0 0 1
## 14 0 0 1 0 1
## 18 0 0 1 0 1
## 27 0 1 0 0 1
## hosp_readmitted
## 1 0
## 10 0
## 13 1
## 14 0
## 18 0
## 27 0
hospital_matrix <- data.matrix(hospital4)
numberOfTrainingSamples <- round(length(hosp_readmitted) * .7)
train_data <- hospital_matrix[1:numberOfTrainingSamples,]
train_labels <- hosp_readmitted[1:numberOfTrainingSamples]
dim(train_data)
## [1] 35508 218
test_data <- hospital_matrix[-(1:numberOfTrainingSamples),]
test_labels <- hosp_readmitted[-(1:numberOfTrainingSamples)]
dtrain <- xgb.DMatrix(data = train_data, label= train_labels)
dtest <- xgb.DMatrix(data = test_data, label= test_labels)
model <- xgboost(data = train_data, # the data
label = train_labels,
max.depth = 3, # the maximum depth of each decision tree
nround = 2, # max number of boosting iterations
objective = "binary:logistic") # the objective function
## [22:46:40] WARNING: amalgamation/../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [1] train-logloss:0.437525
## [2] train-logloss:0.296345
pred <- predict(model, dtest)
head(pred>5)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
head(test_labels)
## [1] 1 0 0 0 1 0
err <- mean(as.numeric(pred > 0.5) != test_labels)
print(paste("test-error=", err))
## [1] "test-error= 0"