library(e1071)
library(readr)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
Records <- read.csv("C:/Users/aksha/Desktop/Shiny app/Forecasting - NO Show Project/Records.csv",sep='\t')
#View(Records)
#Removing NA's from zip column
Records = Records[complete.cases(Records[ ,2]),]
str(Records)
## 'data.frame': 20212 obs. of 54 variables:
## $ MRN : int 5406458 5076135 5373626 2917014 3058711 5234503 2966088 2939130 2938995 5281107 ...
## $ ZIP : Factor w/ 636 levels "07009-1316","07013-1030",..: 611 585 263 628 582 633 603 524 623 546 ...
## $ DistanceToClinic : num 10.6 8.7 10.8 6.6 6 0.8 11.9 14.4 4.5 21.7 ...
## $ AGE : int 54 60 69 65 39 56 50 58 57 51 ...
## $ DT : Factor w/ 505 levels "1/10/2014","1/12/2015",..: 33 351 275 12 189 218 341 375 208 239 ...
## $ Time : Factor w/ 48 levels "01:00PM","01:10PM",..: 10 42 28 14 35 3 3 27 14 27 ...
## $ TimeFrame_Hour : int 2 12 8 3 10 1 1 8 3 8 ...
## $ Weekday : Factor w/ 5 levels "Friday","Monday",..: 2 2 1 3 2 2 1 2 2 4 ...
## $ Month : Factor w/ 12 levels "April","August",..: 5 7 1 5 4 8 7 7 8 8 ...
## $ Season : logi NA NA NA NA NA NA ...
## $ SCHED.PROV : Factor w/ 25 levels "AHMAD,HAROON RES",..: 5 24 2 25 7 19 5 7 11 14 ...
## $ SCHEDPROV_LastName : Factor w/ 25 levels "AHMAD","ALBRECHT",..: 5 23 2 25 6 18 5 6 10 13 ...
## $ VT : Factor w/ 17 levels "BOT","BTR","DOP",..: 5 7 7 5 7 7 7 7 7 7 ...
## $ VisitType : Factor w/ 2 levels "IPV","RPV": 1 1 1 1 1 1 1 1 1 1 ...
## $ DURATION : int 90 60 60 90 60 90 90 60 90 60 ...
## $ CANCEL.DT : Factor w/ 557 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CAN.BUMP.INITIAL : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CAN.BUMP.INITITALS : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CANCEL.REASON : Factor w/ 23 levels "","DEATH IN FAMILY",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PCC : Factor w/ 2356 levels "","ABBAS ,SHAHIDA",..: 1386 1183 2204 1745 1264 856 11 2072 791 2172 ...
## $ Lead.Time : int 0 0 1 3 3 3 4 4 4 4 ...
## $ DT.WHEN.SCHED : Factor w/ 616 levels "1/10/2014","1/12/2015",..: 42 452 334 167 207 251 487 481 295 220 ...
## $ DT.WHEN.RESCHED : Factor w/ 552 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COMMENTS : Factor w/ 3995 levels "#NAME?","?OF SZ DISORDER",..: 440 584 1959 2598 1730 1576 1823 2241 1237 1374 ...
## $ MARITAL : Factor w/ 5 levels "DIVORCED","MARRIED",..: 1 2 5 2 2 4 2 1 2 2 ...
## $ SEX : Factor w/ 2 levels "F","M": 1 1 1 2 1 2 1 2 2 2 ...
## $ EMPLOYER : Factor w/ 22 levels "","AON CONSULTING",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REG.FSC : Factor w/ 61 levels "AETNA HMO","AETNA MEDICARE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REG.FSC.1 : Factor w/ 5 levels "Commercial","Indigent",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SCH.PROV.CATEGORY : Factor w/ 4 levels "EPILEPSY","GEN NEUROLOGY/HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
## $ SCH.PROV.CATEGORY.1 : Factor w/ 4 levels "EPILEPSY","GENNEUROLOGY_HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
## $ INV..BILLED : int 12411684 11833282 12675891 11707151 11569067 11658495 11880306 11861189 11641511 12525819 ...
## $ INVBAL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ invoicebalance : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BILLING.PROVIDER : Factor w/ 26 levels "","ALBRECHT,CATHERINE",..: 26 26 25 24 8 26 12 8 26 15 ...
## $ SERVICING.PROVIDER : Factor w/ 39 levels "","AHMAD,HAROON RES",..: 10 38 4 39 13 32 10 13 17 22 ...
## $ HOS : Factor w/ 5 levels "","CANCER INSTITUTE OF NEW JERSEY",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ DX1 : Factor w/ 341 levels "","0","135","138",..: 268 133 134 155 289 292 301 303 117 88 ...
## $ DX1.DESCRIPTION : Factor w/ 343 levels "","ABDOMINAL PAIN OTHER SITE",..: 320 240 239 30 94 222 92 117 59 112 ...
## $ DX2 : Factor w/ 427 levels "","130.9","153.9",..: 1 230 1 1 1 1 1 1 1 1 ...
## $ DX2.DESCRIPTION : Factor w/ 427 levels "","ABDOM/PELVIC SWELLING UNSP SITE",..: 1 185 1 1 1 1 1 1 1 1 ...
## $ DX3 : Factor w/ 323 levels "","13.04","130.7",..: 1 119 1 1 1 1 1 1 1 1 ...
## $ DX3.DESCRIPTION : Factor w/ 323 levels "","ABNORMAL CNS FUNCT STUDY OT",..: 1 297 1 1 1 1 1 1 1 1 ...
## $ DX4 : Factor w/ 189 levels "","183","191.9",..: 1 78 1 1 1 1 1 1 1 1 ...
## $ DX4.DESCRIPTION : Factor w/ 189 levels "","ABNORM EXAM FINDINGS,OTHER",..: 1 176 1 1 1 1 1 1 1 1 ...
## $ DX5 : num NA NA NA NA NA NA NA NA NA NA ...
## $ DX5.DESCRIPTION : Factor w/ 14 levels "","ABNORMALITY OF GAIT",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REF.PROV : Factor w/ 2309 levels "","ABBAS ,SHAHIDA",..: 1 1 2157 1713 1216 830 11 2016 761 171 ...
## $ REF.PROV.ZIP : Factor w/ 366 levels "","07860-2769",..: 1 1 108 342 319 356 270 356 358 358 ...
## $ REF.PROV.SPEC : int NA NA NA NA NA NA NA NA NA NA ...
## $ STATUS : Factor w/ 3 levels "ARR","CAN","NOS": 1 1 1 1 1 1 1 1 1 1 ...
## $ CancellationTiming_Days: int NA NA NA NA NA NA NA NA NA NA ...
## $ CancellationCategory : Factor w/ 6 levels "","168to336hr",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Outcome : Factor w/ 2 levels "ARR","NOS": 1 1 1 1 1 1 1 1 1 1 ...
summary(Records)
## MRN ZIP DistanceToClinic AGE
## Min. :2902398 8831 : 976 Min. : 0.80 Min. : 6.00
## 1st Qu.:3011708 8901 : 852 1st Qu.: 6.90 1st Qu.:44.00
## Median :4715386 8873 : 817 Median : 14.10 Median :60.00
## Mean :4215219 8902 : 642 Mean : 19.94 Mean :57.23
## 3rd Qu.:5250109 8816 : 627 3rd Qu.: 30.40 3rd Qu.:71.00
## Max. :5451480 8854 : 593 Max. :120.00 Max. :97.00
## (Other):15705
## DT Time TimeFrame_Hour Weekday
## 3/16/2015: 86 12:30PM: 1790 Min. : 1.000 Friday :2627
## 11/3/2014: 82 10:30AM: 1708 1st Qu.: 3.000 Monday :5032
## 4/2/2015 : 78 09:30AM: 1506 Median : 9.000 Thursday :5021
## 3/30/2015: 77 10:00AM: 1379 Mean : 7.329 Tuesday :4711
## 4/16/2015: 76 11:00AM: 1368 3rd Qu.:11.000 Wednesday:2821
## 5/11/2015: 75 08:30AM: 1234 Max. :12.000
## (Other) :19738 (Other):11227
## Month Season SCHED.PROV
## March :1950 Mode:logical SAGE,JACOB :2605
## October :1858 NA's:20212 MARK,MARGERY :2591
## May :1797 SCHNEIDER,DANIEL :2368
## September:1756 GERHARDSTEIN,BRIAN:2242
## December :1744 GOLBE,LAWRENCE :2140
## January :1714 ALBRECHT,CATHERINE:1235
## (Other) :9393 (Other) :7031
## SCHEDPROV_LastName VT VisitType DURATION
## SAGE :2605 RPV :9147 IPV: 7147 Min. : 15.00
## MARK :2591 IPV :6338 RPV:13065 1st Qu.: 30.00
## SCHNEIDER :2368 BTR :1852 Median : 30.00
## GERHARDSTEIN:2242 P60 : 734 Mean : 45.46
## GOLBE :2140 RBH : 605 3rd Qu.: 60.00
## ALBRECHT :1235 IBH : 351 Max. :690.00
## (Other) :7031 (Other):1185
## CANCEL.DT CAN.BUMP.INITIAL CAN.BUMP.INITITALS
## :13895 :13895 :13895
## 1/26/2015: 66 RWJKXG : 1941 RWJKXG : 1941
## 3/5/2015 : 36 RWJLAC : 1313 RWJLAC : 1313
## 9/17/2013: 35 RWJYJMERCUR: 832 RWJYJMERCUR: 832
## 1/21/2014: 32 RWJVCW : 542 RWJVCW : 542
## 12/1/2014: 31 RWJAOG : 262 RWJAOG : 262
## (Other) : 6117 (Other) : 1427 (Other) : 1427
## CANCEL.REASON PCC Lead.Time
## :13904 : 832 Min. : 0.00
## OTHER : 2535 HASTINGS,SHIRIN: 222 1st Qu.: 13.00
## PER PT REQUEST : 2335 YU,FRAN : 192 Median : 36.00
## PER DOCTOR : 488 ROSENFELD,JANE : 172 Mean : 53.41
## TRANSPORTATION PROBLEMS: 175 OTHER,REFPHYS : 162 3rd Qu.: 88.00
## PER CLIENT TELL REPORT : 161 ARMAS,BARBARA J: 120 Max. :207.00
## (Other) : 614 (Other) :18512
## DT.WHEN.SCHED DT.WHEN.RESCHED COMMENTS
## 12/1/2014 : 104 :16411 RPV : 5440
## 2/2/2015 : 85 1/26/2015: 54 BTR : 1541
## 1/26/2015 : 79 5/19/2015: 26 RPV/FOLLOW UP: 1208
## 9/16/2013 : 78 1/21/2014: 24 RBH : 422
## 1/5/2015 : 76 3/5/2015 : 24 F/U : 389
## 12/15/2014: 76 11/5/2014: 22 IPV/HEADACHES: 287
## (Other) :19714 (Other) : 3651 (Other) :10925
## MARITAL SEX EMPLOYER
## DIVORCED : 1305 F:10611 :20102
## MARRIED :10909 M: 9601 SOMERSET COUNTY : 13
## SEPARATED: 257 RETIRED : 9
## SINGLE : 6384 RWJ : 9
## WIDOWED : 1357 MIDD CTY BD OF SOC SVCS: 8
## PATHMARK : 8
## (Other) : 63
## REG.FSC REG.FSC.1
## MEDICARE US :8511 Commercial:6358
## HORIZON PPO :2039 Indigent : 765
## HORIZON NJ HEALTH HORIZON MCAID:2020 Medicaid :3689
## UNITED HEALTHCARE MEDICAID :1355 Medicare :9363
## HORIZON POS : 957 Other : 37
## AETNA PPO : 621
## (Other) :4709
## SCH.PROV.CATEGORY SCH.PROV.CATEGORY.1
## EPILEPSY : 3494 EPILEPSY : 3494
## GEN NEUROLOGY/HEADACHE: 3347 GENNEUROLOGY_HEADACHE: 3347
## MOVEMENT DISORDERS :10903 MOVEMENT DISORDERS :10903
## RESIDENT : 2468 RESIDENT : 2468
##
##
##
## INV..BILLED INVBAL invoicebalance
## Min. :11004764 Min. :-310.22 Min. :-310.22
## 1st Qu.:11534858 1st Qu.: 0.00 1st Qu.: 0.00
## Median :11993128 Median : 0.00 Median : 0.00
## Mean :11973455 Mean : 11.58 Mean : 11.58
## 3rd Qu.:12426717 3rd Qu.: 0.00 3rd Qu.: 0.00
## Max. :13092699 Max. :8456.00 Max. :8456.00
## NA's :7477 NA's :7477 NA's :7477
## BILLING.PROVIDER SERVICING.PROVIDER
## :7477 :7477
## SCHNEIDER,DANIEL :2202 SAGE,JACOB :1783
## MARK,MARGERY :1904 MARK,MARGERY :1755
## SAGE,JACOB :1783 GOLBE,LAWRENCE :1468
## GERHARDSTEIN,BRIAN:1613 SCHNEIDER,DANIEL :1464
## GOLBE,LAWRENCE :1469 GERHARDSTEIN,BRIAN:1338
## (Other) :3764 (Other) :4927
## HOS DX1
## : 7482 :7477
## CANCER INSTITUTE OF NEW JERSEY : 19 332 :4079
## CHILD HEALTH INSTITUE OF NEW JE: 9 784 :1041
## CLINICAL ACADEMIC BUILDING :12658 333.83 : 659
## ROBERT WOOD JOHNSON HOSPITAL : 44 345.41 : 529
## 345.9 : 469
## (Other):5958
## DX1.DESCRIPTION DX2
## :7477 :17319
## PARALYSIS AGITANS :4079 784 : 335
## HEADACHE :1041 723.1 : 151
## SPASMODIC TORTICOLLIS : 659 332 : 115
## PARTIAL EPILEPSY IMPAIRMENT INTRAC: 529 782 : 103
## UNS EPILEPSY WO INTRACT EPILEPSY : 469 780.93 : 97
## (Other) :5958 (Other): 2092
## DX2.DESCRIPTION DX3
## :17319 :19030
## HEADACHE : 335 784 : 63
## CERVICALGIA : 151 723.1 : 61
## PARALYSIS AGITANS : 115 782 : 59
## DISTURBANCE SKIN SENSATION: 103 356.9 : 42
## MEMORY LOSS : 97 339.2 : 32
## (Other) : 2092 (Other): 925
## DX3.DESCRIPTION DX4
## :19030 :19705
## HEADACHE : 63 356.9 : 19
## CERVICALGIA : 61 782 : 19
## DISTURBANCE SKIN SENSATION : 59 784 : 17
## UNS IDIOPATHIC PERIPH NEUROPATHY : 42 V26.33 : 15
## POST-TRAUMATIC HEADACHE, UNSPECIFIED: 32 729.5 : 13
## (Other) : 925 (Other): 424
## DX4.DESCRIPTION DX5
## :19705 Min. : 93.89
## DISTURBANCE SKIN SENSATION : 19 1st Qu.:305.70
## UNS IDIOPATHIC PERIPH NEUROPATHY: 19 Median :378.82
## HEADACHE : 17 Mean :484.39
## GENETIC COUNSELING : 15 3rd Qu.:780.93
## MEMORY LOSS : 13 Max. :787.20
## (Other) : 424 NA's :20197
## DX5.DESCRIPTION REF.PROV
## :20197 : 8209
## MEMORY LOSS : 3 ROSENFELD,JANE : 106
## ABNORMALITY OF GAIT : 1 ARMAS,BARBARA J: 98
## DISPLACE INTERVERT DISC SITE UNS: 1 YU,FRAN : 92
## DYSPHAGIA,UNSPECIFIED : 1 MARK,MARGERY H : 89
## MONONEURITIS UNS SITE : 1 DANISH,SHABBAR : 81
## (Other) : 8 (Other) :11537
## REF.PROV.ZIP REF.PROV.SPEC STATUS CancellationTiming_Days
## :9074 Min. :88 ARR:12270 Min. : 0
## 8901 :2061 1st Qu.:88 CAN: 6317 1st Qu.: 1
## 8816 : 504 Median :88 NOS: 1625 Median : 6
## 8903 : 333 Mean :88 Mean : 18
## 8831 : 309 3rd Qu.:88 3rd Qu.: 23
## 8857 : 308 Max. :88 Max. :203
## (Other):7623 NA's :20211 NA's :13895
## CancellationCategory Outcome
## :13895 ARR:12270
## 168to336hr: 633 NOS: 7942
## 24hr : 2022
## 24to48hr : 289
## 336hr : 2017
## 48to168hr : 1356
##
#Conveting to nominal and numeric attributes
Records$MRN = as.numeric(Records$MRN)
Records$ZIP = as.numeric(Records$ZIP)
Records$DistanceToClinic = as.integer(Records$DistanceToClinic)
Records$TimeFrame_Hour = as.factor(Records$TimeFrame_Hour)
Records$Weekday = as.factor(Records$Weekday)
Records$Month = as.factor(Records$Month)
Records$SCHED.PROV = as.factor(Records$SCHED.PROV)
Records$VT = as.factor(Records$VT)
Records$VisitType = as.factor(Records$VisitType)
Records$MARITAL = as.factor(Records$MARITAL)
Records$SEX = as.factor(Records$SEX)
Records$REG.FSC.1 = as.factor(Records$REG.FSC.1)
Records$SCH.PROV.CATEGORY.1 = as.factor(Records$SCH.PROV.CATEGORY.1)
Records$Outcome = as.factor(Records$Outcome)
Records$STATUS = as.factor(Records$STATUS)
#overall summary of Records
summary(Records)
## MRN ZIP DistanceToClinic AGE
## Min. :2902398 Min. : 1.0 Min. : 0.00 Min. : 6.00
## 1st Qu.:3011708 1st Qu.:384.0 1st Qu.: 6.00 1st Qu.:44.00
## Median :4715386 Median :569.0 Median : 14.00 Median :60.00
## Mean :4215219 Mean :491.6 Mean : 19.46 Mean :57.23
## 3rd Qu.:5250109 3rd Qu.:609.0 3rd Qu.: 30.00 3rd Qu.:71.00
## Max. :5451480 Max. :636.0 Max. :120.00 Max. :97.00
##
## DT Time TimeFrame_Hour Weekday
## 3/16/2015: 86 12:30PM: 1790 10 :3246 Friday :2627
## 11/3/2014: 82 10:30AM: 1708 9 :2964 Monday :5032
## 4/2/2015 : 78 09:30AM: 1506 1 :2918 Thursday :5021
## 3/30/2015: 77 10:00AM: 1379 12 :2701 Tuesday :4711
## 4/16/2015: 76 11:00AM: 1368 11 :2443 Wednesday:2821
## 5/11/2015: 75 08:30AM: 1234 2 :1855
## (Other) :19738 (Other):11227 (Other):4085
## Month Season SCHED.PROV
## March :1950 Mode:logical SAGE,JACOB :2605
## October :1858 NA's:20212 MARK,MARGERY :2591
## May :1797 SCHNEIDER,DANIEL :2368
## September:1756 GERHARDSTEIN,BRIAN:2242
## December :1744 GOLBE,LAWRENCE :2140
## January :1714 ALBRECHT,CATHERINE:1235
## (Other) :9393 (Other) :7031
## SCHEDPROV_LastName VT VisitType DURATION
## SAGE :2605 RPV :9147 IPV: 7147 Min. : 15.00
## MARK :2591 IPV :6338 RPV:13065 1st Qu.: 30.00
## SCHNEIDER :2368 BTR :1852 Median : 30.00
## GERHARDSTEIN:2242 P60 : 734 Mean : 45.46
## GOLBE :2140 RBH : 605 3rd Qu.: 60.00
## ALBRECHT :1235 IBH : 351 Max. :690.00
## (Other) :7031 (Other):1185
## CANCEL.DT CAN.BUMP.INITIAL CAN.BUMP.INITITALS
## :13895 :13895 :13895
## 1/26/2015: 66 RWJKXG : 1941 RWJKXG : 1941
## 3/5/2015 : 36 RWJLAC : 1313 RWJLAC : 1313
## 9/17/2013: 35 RWJYJMERCUR: 832 RWJYJMERCUR: 832
## 1/21/2014: 32 RWJVCW : 542 RWJVCW : 542
## 12/1/2014: 31 RWJAOG : 262 RWJAOG : 262
## (Other) : 6117 (Other) : 1427 (Other) : 1427
## CANCEL.REASON PCC Lead.Time
## :13904 : 832 Min. : 0.00
## OTHER : 2535 HASTINGS,SHIRIN: 222 1st Qu.: 13.00
## PER PT REQUEST : 2335 YU,FRAN : 192 Median : 36.00
## PER DOCTOR : 488 ROSENFELD,JANE : 172 Mean : 53.41
## TRANSPORTATION PROBLEMS: 175 OTHER,REFPHYS : 162 3rd Qu.: 88.00
## PER CLIENT TELL REPORT : 161 ARMAS,BARBARA J: 120 Max. :207.00
## (Other) : 614 (Other) :18512
## DT.WHEN.SCHED DT.WHEN.RESCHED COMMENTS
## 12/1/2014 : 104 :16411 RPV : 5440
## 2/2/2015 : 85 1/26/2015: 54 BTR : 1541
## 1/26/2015 : 79 5/19/2015: 26 RPV/FOLLOW UP: 1208
## 9/16/2013 : 78 1/21/2014: 24 RBH : 422
## 1/5/2015 : 76 3/5/2015 : 24 F/U : 389
## 12/15/2014: 76 11/5/2014: 22 IPV/HEADACHES: 287
## (Other) :19714 (Other) : 3651 (Other) :10925
## MARITAL SEX EMPLOYER
## DIVORCED : 1305 F:10611 :20102
## MARRIED :10909 M: 9601 SOMERSET COUNTY : 13
## SEPARATED: 257 RETIRED : 9
## SINGLE : 6384 RWJ : 9
## WIDOWED : 1357 MIDD CTY BD OF SOC SVCS: 8
## PATHMARK : 8
## (Other) : 63
## REG.FSC REG.FSC.1
## MEDICARE US :8511 Commercial:6358
## HORIZON PPO :2039 Indigent : 765
## HORIZON NJ HEALTH HORIZON MCAID:2020 Medicaid :3689
## UNITED HEALTHCARE MEDICAID :1355 Medicare :9363
## HORIZON POS : 957 Other : 37
## AETNA PPO : 621
## (Other) :4709
## SCH.PROV.CATEGORY SCH.PROV.CATEGORY.1
## EPILEPSY : 3494 EPILEPSY : 3494
## GEN NEUROLOGY/HEADACHE: 3347 GENNEUROLOGY_HEADACHE: 3347
## MOVEMENT DISORDERS :10903 MOVEMENT DISORDERS :10903
## RESIDENT : 2468 RESIDENT : 2468
##
##
##
## INV..BILLED INVBAL invoicebalance
## Min. :11004764 Min. :-310.22 Min. :-310.22
## 1st Qu.:11534858 1st Qu.: 0.00 1st Qu.: 0.00
## Median :11993128 Median : 0.00 Median : 0.00
## Mean :11973455 Mean : 11.58 Mean : 11.58
## 3rd Qu.:12426717 3rd Qu.: 0.00 3rd Qu.: 0.00
## Max. :13092699 Max. :8456.00 Max. :8456.00
## NA's :7477 NA's :7477 NA's :7477
## BILLING.PROVIDER SERVICING.PROVIDER
## :7477 :7477
## SCHNEIDER,DANIEL :2202 SAGE,JACOB :1783
## MARK,MARGERY :1904 MARK,MARGERY :1755
## SAGE,JACOB :1783 GOLBE,LAWRENCE :1468
## GERHARDSTEIN,BRIAN:1613 SCHNEIDER,DANIEL :1464
## GOLBE,LAWRENCE :1469 GERHARDSTEIN,BRIAN:1338
## (Other) :3764 (Other) :4927
## HOS DX1
## : 7482 :7477
## CANCER INSTITUTE OF NEW JERSEY : 19 332 :4079
## CHILD HEALTH INSTITUE OF NEW JE: 9 784 :1041
## CLINICAL ACADEMIC BUILDING :12658 333.83 : 659
## ROBERT WOOD JOHNSON HOSPITAL : 44 345.41 : 529
## 345.9 : 469
## (Other):5958
## DX1.DESCRIPTION DX2
## :7477 :17319
## PARALYSIS AGITANS :4079 784 : 335
## HEADACHE :1041 723.1 : 151
## SPASMODIC TORTICOLLIS : 659 332 : 115
## PARTIAL EPILEPSY IMPAIRMENT INTRAC: 529 782 : 103
## UNS EPILEPSY WO INTRACT EPILEPSY : 469 780.93 : 97
## (Other) :5958 (Other): 2092
## DX2.DESCRIPTION DX3
## :17319 :19030
## HEADACHE : 335 784 : 63
## CERVICALGIA : 151 723.1 : 61
## PARALYSIS AGITANS : 115 782 : 59
## DISTURBANCE SKIN SENSATION: 103 356.9 : 42
## MEMORY LOSS : 97 339.2 : 32
## (Other) : 2092 (Other): 925
## DX3.DESCRIPTION DX4
## :19030 :19705
## HEADACHE : 63 356.9 : 19
## CERVICALGIA : 61 782 : 19
## DISTURBANCE SKIN SENSATION : 59 784 : 17
## UNS IDIOPATHIC PERIPH NEUROPATHY : 42 V26.33 : 15
## POST-TRAUMATIC HEADACHE, UNSPECIFIED: 32 729.5 : 13
## (Other) : 925 (Other): 424
## DX4.DESCRIPTION DX5
## :19705 Min. : 93.89
## DISTURBANCE SKIN SENSATION : 19 1st Qu.:305.70
## UNS IDIOPATHIC PERIPH NEUROPATHY: 19 Median :378.82
## HEADACHE : 17 Mean :484.39
## GENETIC COUNSELING : 15 3rd Qu.:780.93
## MEMORY LOSS : 13 Max. :787.20
## (Other) : 424 NA's :20197
## DX5.DESCRIPTION REF.PROV
## :20197 : 8209
## MEMORY LOSS : 3 ROSENFELD,JANE : 106
## ABNORMALITY OF GAIT : 1 ARMAS,BARBARA J: 98
## DISPLACE INTERVERT DISC SITE UNS: 1 YU,FRAN : 92
## DYSPHAGIA,UNSPECIFIED : 1 MARK,MARGERY H : 89
## MONONEURITIS UNS SITE : 1 DANISH,SHABBAR : 81
## (Other) : 8 (Other) :11537
## REF.PROV.ZIP REF.PROV.SPEC STATUS CancellationTiming_Days
## :9074 Min. :88 ARR:12270 Min. : 0
## 8901 :2061 1st Qu.:88 CAN: 6317 1st Qu.: 1
## 8816 : 504 Median :88 NOS: 1625 Median : 6
## 8903 : 333 Mean :88 Mean : 18
## 8831 : 309 3rd Qu.:88 3rd Qu.: 23
## 8857 : 308 Max. :88 Max. :203
## (Other):7623 NA's :20211 NA's :13895
## CancellationCategory Outcome
## :13895 ARR:12270
## 168to336hr: 633 NOS: 7942
## 24hr : 2022
## 24to48hr : 289
## 336hr : 2017
## 48to168hr : 1356
##
str(Records)
## 'data.frame': 20212 obs. of 54 variables:
## $ MRN : num 5406458 5076135 5373626 2917014 3058711 ...
## $ ZIP : num 611 585 263 628 582 633 603 524 623 546 ...
## $ DistanceToClinic : int 10 8 10 6 6 0 11 14 4 21 ...
## $ AGE : int 54 60 69 65 39 56 50 58 57 51 ...
## $ DT : Factor w/ 505 levels "1/10/2014","1/12/2015",..: 33 351 275 12 189 218 341 375 208 239 ...
## $ Time : Factor w/ 48 levels "01:00PM","01:10PM",..: 10 42 28 14 35 3 3 27 14 27 ...
## $ TimeFrame_Hour : Factor w/ 10 levels "1","2","3","4",..: 2 10 6 3 8 1 1 6 3 6 ...
## $ Weekday : Factor w/ 5 levels "Friday","Monday",..: 2 2 1 3 2 2 1 2 2 4 ...
## $ Month : Factor w/ 12 levels "April","August",..: 5 7 1 5 4 8 7 7 8 8 ...
## $ Season : logi NA NA NA NA NA NA ...
## $ SCHED.PROV : Factor w/ 25 levels "AHMAD,HAROON RES",..: 5 24 2 25 7 19 5 7 11 14 ...
## $ SCHEDPROV_LastName : Factor w/ 25 levels "AHMAD","ALBRECHT",..: 5 23 2 25 6 18 5 6 10 13 ...
## $ VT : Factor w/ 17 levels "BOT","BTR","DOP",..: 5 7 7 5 7 7 7 7 7 7 ...
## $ VisitType : Factor w/ 2 levels "IPV","RPV": 1 1 1 1 1 1 1 1 1 1 ...
## $ DURATION : int 90 60 60 90 60 90 90 60 90 60 ...
## $ CANCEL.DT : Factor w/ 557 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CAN.BUMP.INITIAL : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CAN.BUMP.INITITALS : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CANCEL.REASON : Factor w/ 23 levels "","DEATH IN FAMILY",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PCC : Factor w/ 2356 levels "","ABBAS ,SHAHIDA",..: 1386 1183 2204 1745 1264 856 11 2072 791 2172 ...
## $ Lead.Time : int 0 0 1 3 3 3 4 4 4 4 ...
## $ DT.WHEN.SCHED : Factor w/ 616 levels "1/10/2014","1/12/2015",..: 42 452 334 167 207 251 487 481 295 220 ...
## $ DT.WHEN.RESCHED : Factor w/ 552 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COMMENTS : Factor w/ 3995 levels "#NAME?","?OF SZ DISORDER",..: 440 584 1959 2598 1730 1576 1823 2241 1237 1374 ...
## $ MARITAL : Factor w/ 5 levels "DIVORCED","MARRIED",..: 1 2 5 2 2 4 2 1 2 2 ...
## $ SEX : Factor w/ 2 levels "F","M": 1 1 1 2 1 2 1 2 2 2 ...
## $ EMPLOYER : Factor w/ 22 levels "","AON CONSULTING",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REG.FSC : Factor w/ 61 levels "AETNA HMO","AETNA MEDICARE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REG.FSC.1 : Factor w/ 5 levels "Commercial","Indigent",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SCH.PROV.CATEGORY : Factor w/ 4 levels "EPILEPSY","GEN NEUROLOGY/HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
## $ SCH.PROV.CATEGORY.1 : Factor w/ 4 levels "EPILEPSY","GENNEUROLOGY_HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
## $ INV..BILLED : int 12411684 11833282 12675891 11707151 11569067 11658495 11880306 11861189 11641511 12525819 ...
## $ INVBAL : num 0 0 0 0 0 0 0 0 0 0 ...
## $ invoicebalance : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BILLING.PROVIDER : Factor w/ 26 levels "","ALBRECHT,CATHERINE",..: 26 26 25 24 8 26 12 8 26 15 ...
## $ SERVICING.PROVIDER : Factor w/ 39 levels "","AHMAD,HAROON RES",..: 10 38 4 39 13 32 10 13 17 22 ...
## $ HOS : Factor w/ 5 levels "","CANCER INSTITUTE OF NEW JERSEY",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ DX1 : Factor w/ 341 levels "","0","135","138",..: 268 133 134 155 289 292 301 303 117 88 ...
## $ DX1.DESCRIPTION : Factor w/ 343 levels "","ABDOMINAL PAIN OTHER SITE",..: 320 240 239 30 94 222 92 117 59 112 ...
## $ DX2 : Factor w/ 427 levels "","130.9","153.9",..: 1 230 1 1 1 1 1 1 1 1 ...
## $ DX2.DESCRIPTION : Factor w/ 427 levels "","ABDOM/PELVIC SWELLING UNSP SITE",..: 1 185 1 1 1 1 1 1 1 1 ...
## $ DX3 : Factor w/ 323 levels "","13.04","130.7",..: 1 119 1 1 1 1 1 1 1 1 ...
## $ DX3.DESCRIPTION : Factor w/ 323 levels "","ABNORMAL CNS FUNCT STUDY OT",..: 1 297 1 1 1 1 1 1 1 1 ...
## $ DX4 : Factor w/ 189 levels "","183","191.9",..: 1 78 1 1 1 1 1 1 1 1 ...
## $ DX4.DESCRIPTION : Factor w/ 189 levels "","ABNORM EXAM FINDINGS,OTHER",..: 1 176 1 1 1 1 1 1 1 1 ...
## $ DX5 : num NA NA NA NA NA NA NA NA NA NA ...
## $ DX5.DESCRIPTION : Factor w/ 14 levels "","ABNORMALITY OF GAIT",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REF.PROV : Factor w/ 2309 levels "","ABBAS ,SHAHIDA",..: 1 1 2157 1713 1216 830 11 2016 761 171 ...
## $ REF.PROV.ZIP : Factor w/ 366 levels "","07860-2769",..: 1 1 108 342 319 356 270 356 358 358 ...
## $ REF.PROV.SPEC : int NA NA NA NA NA NA NA NA NA NA ...
## $ STATUS : Factor w/ 3 levels "ARR","CAN","NOS": 1 1 1 1 1 1 1 1 1 1 ...
## $ CancellationTiming_Days: int NA NA NA NA NA NA NA NA NA NA ...
## $ CancellationCategory : Factor w/ 6 levels "","168to336hr",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Outcome : Factor w/ 2 levels "ARR","NOS": 1 1 1 1 1 1 1 1 1 1 ...
myvars <- c("Outcome", "DistanceToClinic","SEX","MARITAL","AGE","TimeFrame_Hour","Weekday","Month","VisitType","DURATION","REG.FSC.1","SCH.PROV.CATEGORY.1")
newdata <- Records[myvars]
summary(newdata)
## Outcome DistanceToClinic SEX MARITAL AGE
## ARR:12270 Min. : 0.00 F:10611 DIVORCED : 1305 Min. : 6.00
## NOS: 7942 1st Qu.: 6.00 M: 9601 MARRIED :10909 1st Qu.:44.00
## Median : 14.00 SEPARATED: 257 Median :60.00
## Mean : 19.46 SINGLE : 6384 Mean :57.23
## 3rd Qu.: 30.00 WIDOWED : 1357 3rd Qu.:71.00
## Max. :120.00 Max. :97.00
##
## TimeFrame_Hour Weekday Month VisitType
## 10 :3246 Friday :2627 March :1950 IPV: 7147
## 9 :2964 Monday :5032 October :1858 RPV:13065
## 1 :2918 Thursday :5021 May :1797
## 12 :2701 Tuesday :4711 September:1756
## 11 :2443 Wednesday:2821 December :1744
## 2 :1855 January :1714
## (Other):4085 (Other) :9393
## DURATION REG.FSC.1 SCH.PROV.CATEGORY.1
## Min. : 15.00 Commercial:6358 EPILEPSY : 3494
## 1st Qu.: 30.00 Indigent : 765 GENNEUROLOGY_HEADACHE: 3347
## Median : 30.00 Medicaid :3689 MOVEMENT DISORDERS :10903
## Mean : 45.46 Medicare :9363 RESIDENT : 2468
## 3rd Qu.: 60.00 Other : 37
## Max. :690.00
##
samples <- sample(nrow(newdata),as.integer(nrow(newdata)*0.75))
train.newdata = newdata[samples,]
test.newdata = newdata[-samples,]
#1) SVM Classification
model1<-svm(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour + Weekday + Month + VisitType + DURATION + REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata)
#Summarize the model
summary(model1)
##
## Call:
## svm(formula = Outcome ~ DistanceToClinic + SEX + MARITAL + AGE +
## TimeFrame_Hour + Weekday + Month + VisitType + DURATION +
## REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
## gamma: 0.02439024
##
## Number of Support Vectors: 12338
##
## ( 5983 6355 )
##
##
## Number of Classes: 2
##
## Levels:
## ARR NOS
#Predict using the model
pred_model1 = predict(model1,test.newdata,type="response")
mtab_model1<-table(pred_model1,test.newdata$Outcome)
confusionMatrix(mtab_model1)
## Confusion Matrix and Statistics
##
##
## pred_model1 ARR NOS
## ARR 3093 1956
## NOS 1 3
##
## Accuracy : 0.6127
## 95% CI : (0.5991, 0.6262)
## No Information Rate : 0.6123
## P-Value [Acc > NIR] : 0.4832
##
## Kappa : 0.0015
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.999677
## Specificity : 0.001531
## Pos Pred Value : 0.612597
## Neg Pred Value : 0.750000
## Prevalence : 0.612310
## Detection Rate : 0.612112
## Detection Prevalence : 0.999208
## Balanced Accuracy : 0.500604
##
## 'Positive' Class : ARR
##
accuracy_75_model1 = sum(diag(table(test.newdata$Outcome,pred_model1)))/nrow(test.newdata)
accuracy_75_model1
## [1] 0.6127053
#2)Neural Net
library(nnet)
model2 = nnet(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour + Weekday + Month + VisitType + DURATION + REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata,size = 4,decay = 0.0001,maxit = 500)
## # weights: 169
## initial value 10358.216566
## iter 10 value 10113.115848
## iter 20 value 10089.190258
## iter 30 value 10029.844761
## iter 40 value 9973.178266
## iter 50 value 9953.085802
## iter 60 value 9936.549075
## iter 70 value 9934.671358
## iter 80 value 9933.473612
## iter 90 value 9904.543769
## iter 100 value 9893.791045
## iter 110 value 9890.563102
## iter 120 value 9889.153431
## iter 130 value 9888.205942
## iter 140 value 9887.906276
## iter 150 value 9887.779135
## iter 160 value 9887.638217
## iter 170 value 9887.593243
## iter 180 value 9887.343541
## iter 190 value 9887.270460
## iter 200 value 9887.239130
## iter 210 value 9887.201103
## iter 220 value 9887.147069
## iter 230 value 9887.108152
## iter 240 value 9887.094426
## iter 250 value 9887.004582
## final value 9887.004297
## converged
pred_model2 = predict(model2,test.newdata,type="class")
mtab_model2<-table(pred_model2,test.newdata$Outcome)
confusionMatrix(mtab_model2)
## Confusion Matrix and Statistics
##
##
## pred_model2 ARR NOS
## ARR 2780 1754
## NOS 314 205
##
## Accuracy : 0.5907
## 95% CI : (0.577, 0.6043)
## No Information Rate : 0.6123
## P-Value [Acc > NIR] : 0.9992
##
## Kappa : 0.0037
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8985
## Specificity : 0.1046
## Pos Pred Value : 0.6131
## Neg Pred Value : 0.3950
## Prevalence : 0.6123
## Detection Rate : 0.5502
## Detection Prevalence : 0.8973
## Balanced Accuracy : 0.5016
##
## 'Positive' Class : ARR
##
accuracy_75_model2 = sum(diag(table(test.newdata$Outcome,pred_model2)))/nrow(test.newdata)
accuracy_75_model2
## [1] 0.5907382
#3)Classification and Regression Trees(CART)
library(rpart)
model3 = rpart(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour + Weekday + Month + VisitType + DURATION + REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata)
pred_model3 = predict(model3,test.newdata,type="class")
mtab_model3<-table(pred_model3,test.newdata$Outcome)
confusionMatrix(mtab_model3)
## Confusion Matrix and Statistics
##
##
## pred_model3 ARR NOS
## ARR 3094 1959
## NOS 0 0
##
## Accuracy : 0.6123
## 95% CI : (0.5987, 0.6258)
## No Information Rate : 0.6123
## P-Value [Acc > NIR] : 0.5062
##
## Kappa : 0
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.6123
## Neg Pred Value : NaN
## Prevalence : 0.6123
## Detection Rate : 0.6123
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : ARR
##
accuracy_75_model3 = sum(diag(table(test.newdata$Outcome,pred_model3)))/nrow(test.newdata)
accuracy_75_model3
## [1] 0.6123095