library(e1071)
library(readr)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
Records <- read.csv("C:/Users/aksha/Desktop/Shiny app/Forecasting - NO Show Project/Records.csv",sep='\t')

#View(Records)
#Removing NA's from zip column
Records = Records[complete.cases(Records[ ,2]),]
str(Records)
## 'data.frame':    20212 obs. of  54 variables:
##  $ MRN                    : int  5406458 5076135 5373626 2917014 3058711 5234503 2966088 2939130 2938995 5281107 ...
##  $ ZIP                    : Factor w/ 636 levels "07009-1316","07013-1030",..: 611 585 263 628 582 633 603 524 623 546 ...
##  $ DistanceToClinic       : num  10.6 8.7 10.8 6.6 6 0.8 11.9 14.4 4.5 21.7 ...
##  $ AGE                    : int  54 60 69 65 39 56 50 58 57 51 ...
##  $ DT                     : Factor w/ 505 levels "1/10/2014","1/12/2015",..: 33 351 275 12 189 218 341 375 208 239 ...
##  $ Time                   : Factor w/ 48 levels "01:00PM","01:10PM",..: 10 42 28 14 35 3 3 27 14 27 ...
##  $ TimeFrame_Hour         : int  2 12 8 3 10 1 1 8 3 8 ...
##  $ Weekday                : Factor w/ 5 levels "Friday","Monday",..: 2 2 1 3 2 2 1 2 2 4 ...
##  $ Month                  : Factor w/ 12 levels "April","August",..: 5 7 1 5 4 8 7 7 8 8 ...
##  $ Season                 : logi  NA NA NA NA NA NA ...
##  $ SCHED.PROV             : Factor w/ 25 levels "AHMAD,HAROON RES",..: 5 24 2 25 7 19 5 7 11 14 ...
##  $ SCHEDPROV_LastName     : Factor w/ 25 levels "AHMAD","ALBRECHT",..: 5 23 2 25 6 18 5 6 10 13 ...
##  $ VT                     : Factor w/ 17 levels "BOT","BTR","DOP",..: 5 7 7 5 7 7 7 7 7 7 ...
##  $ VisitType              : Factor w/ 2 levels "IPV","RPV": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DURATION               : int  90 60 60 90 60 90 90 60 90 60 ...
##  $ CANCEL.DT              : Factor w/ 557 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CAN.BUMP.INITIAL       : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CAN.BUMP.INITITALS     : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CANCEL.REASON          : Factor w/ 23 levels "","DEATH IN FAMILY",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ PCC                    : Factor w/ 2356 levels "","ABBAS ,SHAHIDA",..: 1386 1183 2204 1745 1264 856 11 2072 791 2172 ...
##  $ Lead.Time              : int  0 0 1 3 3 3 4 4 4 4 ...
##  $ DT.WHEN.SCHED          : Factor w/ 616 levels "1/10/2014","1/12/2015",..: 42 452 334 167 207 251 487 481 295 220 ...
##  $ DT.WHEN.RESCHED        : Factor w/ 552 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ COMMENTS               : Factor w/ 3995 levels "#NAME?","?OF SZ DISORDER",..: 440 584 1959 2598 1730 1576 1823 2241 1237 1374 ...
##  $ MARITAL                : Factor w/ 5 levels "DIVORCED","MARRIED",..: 1 2 5 2 2 4 2 1 2 2 ...
##  $ SEX                    : Factor w/ 2 levels "F","M": 1 1 1 2 1 2 1 2 2 2 ...
##  $ EMPLOYER               : Factor w/ 22 levels "","AON CONSULTING",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REG.FSC                : Factor w/ 61 levels "AETNA HMO","AETNA MEDICARE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REG.FSC.1              : Factor w/ 5 levels "Commercial","Indigent",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ SCH.PROV.CATEGORY      : Factor w/ 4 levels "EPILEPSY","GEN NEUROLOGY/HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
##  $ SCH.PROV.CATEGORY.1    : Factor w/ 4 levels "EPILEPSY","GENNEUROLOGY_HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
##  $ INV..BILLED            : int  12411684 11833282 12675891 11707151 11569067 11658495 11880306 11861189 11641511 12525819 ...
##  $ INVBAL                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ invoicebalance         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BILLING.PROVIDER       : Factor w/ 26 levels "","ALBRECHT,CATHERINE",..: 26 26 25 24 8 26 12 8 26 15 ...
##  $ SERVICING.PROVIDER     : Factor w/ 39 levels "","AHMAD,HAROON RES",..: 10 38 4 39 13 32 10 13 17 22 ...
##  $ HOS                    : Factor w/ 5 levels "","CANCER INSTITUTE OF NEW JERSEY",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ DX1                    : Factor w/ 341 levels "","0","135","138",..: 268 133 134 155 289 292 301 303 117 88 ...
##  $ DX1.DESCRIPTION        : Factor w/ 343 levels "","ABDOMINAL PAIN OTHER SITE",..: 320 240 239 30 94 222 92 117 59 112 ...
##  $ DX2                    : Factor w/ 427 levels "","130.9","153.9",..: 1 230 1 1 1 1 1 1 1 1 ...
##  $ DX2.DESCRIPTION        : Factor w/ 427 levels "","ABDOM/PELVIC SWELLING UNSP SITE",..: 1 185 1 1 1 1 1 1 1 1 ...
##  $ DX3                    : Factor w/ 323 levels "","13.04","130.7",..: 1 119 1 1 1 1 1 1 1 1 ...
##  $ DX3.DESCRIPTION        : Factor w/ 323 levels "","ABNORMAL CNS FUNCT STUDY OT",..: 1 297 1 1 1 1 1 1 1 1 ...
##  $ DX4                    : Factor w/ 189 levels "","183","191.9",..: 1 78 1 1 1 1 1 1 1 1 ...
##  $ DX4.DESCRIPTION        : Factor w/ 189 levels "","ABNORM EXAM FINDINGS,OTHER",..: 1 176 1 1 1 1 1 1 1 1 ...
##  $ DX5                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ DX5.DESCRIPTION        : Factor w/ 14 levels "","ABNORMALITY OF GAIT",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REF.PROV               : Factor w/ 2309 levels "","ABBAS ,SHAHIDA",..: 1 1 2157 1713 1216 830 11 2016 761 171 ...
##  $ REF.PROV.ZIP           : Factor w/ 366 levels "","07860-2769",..: 1 1 108 342 319 356 270 356 358 358 ...
##  $ REF.PROV.SPEC          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ STATUS                 : Factor w/ 3 levels "ARR","CAN","NOS": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CancellationTiming_Days: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ CancellationCategory   : Factor w/ 6 levels "","168to336hr",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Outcome                : Factor w/ 2 levels "ARR","NOS": 1 1 1 1 1 1 1 1 1 1 ...
summary(Records)
##       MRN               ZIP        DistanceToClinic      AGE       
##  Min.   :2902398   8831   :  976   Min.   :  0.80   Min.   : 6.00  
##  1st Qu.:3011708   8901   :  852   1st Qu.:  6.90   1st Qu.:44.00  
##  Median :4715386   8873   :  817   Median : 14.10   Median :60.00  
##  Mean   :4215219   8902   :  642   Mean   : 19.94   Mean   :57.23  
##  3rd Qu.:5250109   8816   :  627   3rd Qu.: 30.40   3rd Qu.:71.00  
##  Max.   :5451480   8854   :  593   Max.   :120.00   Max.   :97.00  
##                    (Other):15705                                   
##          DT             Time       TimeFrame_Hour        Weekday    
##  3/16/2015:   86   12:30PM: 1790   Min.   : 1.000   Friday   :2627  
##  11/3/2014:   82   10:30AM: 1708   1st Qu.: 3.000   Monday   :5032  
##  4/2/2015 :   78   09:30AM: 1506   Median : 9.000   Thursday :5021  
##  3/30/2015:   77   10:00AM: 1379   Mean   : 7.329   Tuesday  :4711  
##  4/16/2015:   76   11:00AM: 1368   3rd Qu.:11.000   Wednesday:2821  
##  5/11/2015:   75   08:30AM: 1234   Max.   :12.000                   
##  (Other)  :19738   (Other):11227                                    
##        Month       Season                     SCHED.PROV  
##  March    :1950   Mode:logical   SAGE,JACOB        :2605  
##  October  :1858   NA's:20212     MARK,MARGERY      :2591  
##  May      :1797                  SCHNEIDER,DANIEL  :2368  
##  September:1756                  GERHARDSTEIN,BRIAN:2242  
##  December :1744                  GOLBE,LAWRENCE    :2140  
##  January  :1714                  ALBRECHT,CATHERINE:1235  
##  (Other)  :9393                  (Other)           :7031  
##     SCHEDPROV_LastName       VT       VisitType      DURATION     
##  SAGE        :2605     RPV    :9147   IPV: 7147   Min.   : 15.00  
##  MARK        :2591     IPV    :6338   RPV:13065   1st Qu.: 30.00  
##  SCHNEIDER   :2368     BTR    :1852               Median : 30.00  
##  GERHARDSTEIN:2242     P60    : 734               Mean   : 45.46  
##  GOLBE       :2140     RBH    : 605               3rd Qu.: 60.00  
##  ALBRECHT    :1235     IBH    : 351               Max.   :690.00  
##  (Other)     :7031     (Other):1185                               
##      CANCEL.DT        CAN.BUMP.INITIAL   CAN.BUMP.INITITALS
##           :13895              :13895              :13895   
##  1/26/2015:   66   RWJKXG     : 1941   RWJKXG     : 1941   
##  3/5/2015 :   36   RWJLAC     : 1313   RWJLAC     : 1313   
##  9/17/2013:   35   RWJYJMERCUR:  832   RWJYJMERCUR:  832   
##  1/21/2014:   32   RWJVCW     :  542   RWJVCW     :  542   
##  12/1/2014:   31   RWJAOG     :  262   RWJAOG     :  262   
##  (Other)  : 6117   (Other)    : 1427   (Other)    : 1427   
##                  CANCEL.REASON                PCC          Lead.Time     
##                         :13904                  :  832   Min.   :  0.00  
##  OTHER                  : 2535   HASTINGS,SHIRIN:  222   1st Qu.: 13.00  
##  PER PT REQUEST         : 2335   YU,FRAN        :  192   Median : 36.00  
##  PER DOCTOR             :  488   ROSENFELD,JANE :  172   Mean   : 53.41  
##  TRANSPORTATION PROBLEMS:  175   OTHER,REFPHYS  :  162   3rd Qu.: 88.00  
##  PER CLIENT TELL REPORT :  161   ARMAS,BARBARA J:  120   Max.   :207.00  
##  (Other)                :  614   (Other)        :18512                   
##     DT.WHEN.SCHED    DT.WHEN.RESCHED           COMMENTS    
##  12/1/2014 :  104            :16411   RPV          : 5440  
##  2/2/2015  :   85   1/26/2015:   54   BTR          : 1541  
##  1/26/2015 :   79   5/19/2015:   26   RPV/FOLLOW UP: 1208  
##  9/16/2013 :   78   1/21/2014:   24   RBH          :  422  
##  1/5/2015  :   76   3/5/2015 :   24   F/U          :  389  
##  12/15/2014:   76   11/5/2014:   22   IPV/HEADACHES:  287  
##  (Other)   :19714   (Other)  : 3651   (Other)      :10925  
##       MARITAL      SEX                          EMPLOYER    
##  DIVORCED : 1305   F:10611                          :20102  
##  MARRIED  :10909   M: 9601   SOMERSET COUNTY        :   13  
##  SEPARATED:  257             RETIRED                :    9  
##  SINGLE   : 6384             RWJ                    :    9  
##  WIDOWED  : 1357             MIDD CTY BD OF SOC SVCS:    8  
##                              PATHMARK               :    8  
##                              (Other)                :   63  
##                             REG.FSC          REG.FSC.1   
##  MEDICARE US                    :8511   Commercial:6358  
##  HORIZON PPO                    :2039   Indigent  : 765  
##  HORIZON NJ HEALTH HORIZON MCAID:2020   Medicaid  :3689  
##  UNITED HEALTHCARE MEDICAID     :1355   Medicare  :9363  
##  HORIZON POS                    : 957   Other     :  37  
##  AETNA PPO                      : 621                    
##  (Other)                        :4709                    
##               SCH.PROV.CATEGORY            SCH.PROV.CATEGORY.1
##  EPILEPSY              : 3494   EPILEPSY             : 3494   
##  GEN NEUROLOGY/HEADACHE: 3347   GENNEUROLOGY_HEADACHE: 3347   
##  MOVEMENT DISORDERS    :10903   MOVEMENT DISORDERS   :10903   
##  RESIDENT              : 2468   RESIDENT             : 2468   
##                                                               
##                                                               
##                                                               
##   INV..BILLED           INVBAL        invoicebalance   
##  Min.   :11004764   Min.   :-310.22   Min.   :-310.22  
##  1st Qu.:11534858   1st Qu.:   0.00   1st Qu.:   0.00  
##  Median :11993128   Median :   0.00   Median :   0.00  
##  Mean   :11973455   Mean   :  11.58   Mean   :  11.58  
##  3rd Qu.:12426717   3rd Qu.:   0.00   3rd Qu.:   0.00  
##  Max.   :13092699   Max.   :8456.00   Max.   :8456.00  
##  NA's   :7477       NA's   :7477      NA's   :7477     
##            BILLING.PROVIDER          SERVICING.PROVIDER
##                    :7477                      :7477    
##  SCHNEIDER,DANIEL  :2202    SAGE,JACOB        :1783    
##  MARK,MARGERY      :1904    MARK,MARGERY      :1755    
##  SAGE,JACOB        :1783    GOLBE,LAWRENCE    :1468    
##  GERHARDSTEIN,BRIAN:1613    SCHNEIDER,DANIEL  :1464    
##  GOLBE,LAWRENCE    :1469    GERHARDSTEIN,BRIAN:1338    
##  (Other)           :3764    (Other)           :4927    
##                               HOS             DX1      
##                                 : 7482          :7477  
##  CANCER INSTITUTE OF NEW JERSEY :   19   332    :4079  
##  CHILD HEALTH INSTITUE OF NEW JE:    9   784    :1041  
##  CLINICAL ACADEMIC BUILDING     :12658   333.83 : 659  
##  ROBERT WOOD JOHNSON HOSPITAL   :   44   345.41 : 529  
##                                          345.9  : 469  
##                                          (Other):5958  
##                            DX1.DESCRIPTION      DX2       
##                                    :7477          :17319  
##  PARALYSIS AGITANS                 :4079   784    :  335  
##  HEADACHE                          :1041   723.1  :  151  
##  SPASMODIC TORTICOLLIS             : 659   332    :  115  
##  PARTIAL EPILEPSY IMPAIRMENT INTRAC: 529   782    :  103  
##  UNS EPILEPSY WO INTRACT EPILEPSY  : 469   780.93 :   97  
##  (Other)                           :5958   (Other): 2092  
##                    DX2.DESCRIPTION       DX3       
##                            :17319          :19030  
##  HEADACHE                  :  335   784    :   63  
##  CERVICALGIA               :  151   723.1  :   61  
##  PARALYSIS AGITANS         :  115   782    :   59  
##  DISTURBANCE SKIN SENSATION:  103   356.9  :   42  
##  MEMORY LOSS               :   97   339.2  :   32  
##  (Other)                   : 2092   (Other):  925  
##                              DX3.DESCRIPTION       DX4       
##                                      :19030          :19705  
##  HEADACHE                            :   63   356.9  :   19  
##  CERVICALGIA                         :   61   782    :   19  
##  DISTURBANCE SKIN SENSATION          :   59   784    :   17  
##  UNS IDIOPATHIC PERIPH NEUROPATHY    :   42   V26.33 :   15  
##  POST-TRAUMATIC HEADACHE, UNSPECIFIED:   32   729.5  :   13  
##  (Other)                             :  925   (Other):  424  
##                          DX4.DESCRIPTION       DX5        
##                                  :19705   Min.   : 93.89  
##  DISTURBANCE SKIN SENSATION      :   19   1st Qu.:305.70  
##  UNS IDIOPATHIC PERIPH NEUROPATHY:   19   Median :378.82  
##  HEADACHE                        :   17   Mean   :484.39  
##  GENETIC COUNSELING              :   15   3rd Qu.:780.93  
##  MEMORY LOSS                     :   13   Max.   :787.20  
##  (Other)                         :  424   NA's   :20197   
##                          DX5.DESCRIPTION             REF.PROV    
##                                  :20197                  : 8209  
##  MEMORY LOSS                     :    3   ROSENFELD,JANE :  106  
##  ABNORMALITY OF GAIT             :    1   ARMAS,BARBARA J:   98  
##  DISPLACE INTERVERT DISC SITE UNS:    1   YU,FRAN        :   92  
##  DYSPHAGIA,UNSPECIFIED           :    1   MARK,MARGERY H :   89  
##  MONONEURITIS UNS SITE           :    1   DANISH,SHABBAR :   81  
##  (Other)                         :    8   (Other)        :11537  
##   REF.PROV.ZIP  REF.PROV.SPEC   STATUS      CancellationTiming_Days
##         :9074   Min.   :88      ARR:12270   Min.   :  0            
##  8901   :2061   1st Qu.:88      CAN: 6317   1st Qu.:  1            
##  8816   : 504   Median :88      NOS: 1625   Median :  6            
##  8903   : 333   Mean   :88                  Mean   : 18            
##  8831   : 309   3rd Qu.:88                  3rd Qu.: 23            
##  8857   : 308   Max.   :88                  Max.   :203            
##  (Other):7623   NA's   :20211               NA's   :13895          
##  CancellationCategory Outcome    
##            :13895     ARR:12270  
##  168to336hr:  633     NOS: 7942  
##  24hr      : 2022                
##  24to48hr  :  289                
##  336hr     : 2017                
##  48to168hr : 1356                
## 
#Conveting to nominal and numeric attributes
Records$MRN = as.numeric(Records$MRN)
Records$ZIP = as.numeric(Records$ZIP)
Records$DistanceToClinic = as.integer(Records$DistanceToClinic)
Records$TimeFrame_Hour = as.factor(Records$TimeFrame_Hour)
Records$Weekday = as.factor(Records$Weekday)
Records$Month = as.factor(Records$Month)
Records$SCHED.PROV = as.factor(Records$SCHED.PROV)
Records$VT = as.factor(Records$VT)
Records$VisitType = as.factor(Records$VisitType)
Records$MARITAL = as.factor(Records$MARITAL)
Records$SEX = as.factor(Records$SEX)
Records$REG.FSC.1 = as.factor(Records$REG.FSC.1)
Records$SCH.PROV.CATEGORY.1 = as.factor(Records$SCH.PROV.CATEGORY.1)
Records$Outcome = as.factor(Records$Outcome)
Records$STATUS = as.factor(Records$STATUS)

#overall summary of Records
summary(Records)
##       MRN               ZIP        DistanceToClinic      AGE       
##  Min.   :2902398   Min.   :  1.0   Min.   :  0.00   Min.   : 6.00  
##  1st Qu.:3011708   1st Qu.:384.0   1st Qu.:  6.00   1st Qu.:44.00  
##  Median :4715386   Median :569.0   Median : 14.00   Median :60.00  
##  Mean   :4215219   Mean   :491.6   Mean   : 19.46   Mean   :57.23  
##  3rd Qu.:5250109   3rd Qu.:609.0   3rd Qu.: 30.00   3rd Qu.:71.00  
##  Max.   :5451480   Max.   :636.0   Max.   :120.00   Max.   :97.00  
##                                                                    
##          DT             Time       TimeFrame_Hour      Weekday    
##  3/16/2015:   86   12:30PM: 1790   10     :3246   Friday   :2627  
##  11/3/2014:   82   10:30AM: 1708   9      :2964   Monday   :5032  
##  4/2/2015 :   78   09:30AM: 1506   1      :2918   Thursday :5021  
##  3/30/2015:   77   10:00AM: 1379   12     :2701   Tuesday  :4711  
##  4/16/2015:   76   11:00AM: 1368   11     :2443   Wednesday:2821  
##  5/11/2015:   75   08:30AM: 1234   2      :1855                   
##  (Other)  :19738   (Other):11227   (Other):4085                   
##        Month       Season                     SCHED.PROV  
##  March    :1950   Mode:logical   SAGE,JACOB        :2605  
##  October  :1858   NA's:20212     MARK,MARGERY      :2591  
##  May      :1797                  SCHNEIDER,DANIEL  :2368  
##  September:1756                  GERHARDSTEIN,BRIAN:2242  
##  December :1744                  GOLBE,LAWRENCE    :2140  
##  January  :1714                  ALBRECHT,CATHERINE:1235  
##  (Other)  :9393                  (Other)           :7031  
##     SCHEDPROV_LastName       VT       VisitType      DURATION     
##  SAGE        :2605     RPV    :9147   IPV: 7147   Min.   : 15.00  
##  MARK        :2591     IPV    :6338   RPV:13065   1st Qu.: 30.00  
##  SCHNEIDER   :2368     BTR    :1852               Median : 30.00  
##  GERHARDSTEIN:2242     P60    : 734               Mean   : 45.46  
##  GOLBE       :2140     RBH    : 605               3rd Qu.: 60.00  
##  ALBRECHT    :1235     IBH    : 351               Max.   :690.00  
##  (Other)     :7031     (Other):1185                               
##      CANCEL.DT        CAN.BUMP.INITIAL   CAN.BUMP.INITITALS
##           :13895              :13895              :13895   
##  1/26/2015:   66   RWJKXG     : 1941   RWJKXG     : 1941   
##  3/5/2015 :   36   RWJLAC     : 1313   RWJLAC     : 1313   
##  9/17/2013:   35   RWJYJMERCUR:  832   RWJYJMERCUR:  832   
##  1/21/2014:   32   RWJVCW     :  542   RWJVCW     :  542   
##  12/1/2014:   31   RWJAOG     :  262   RWJAOG     :  262   
##  (Other)  : 6117   (Other)    : 1427   (Other)    : 1427   
##                  CANCEL.REASON                PCC          Lead.Time     
##                         :13904                  :  832   Min.   :  0.00  
##  OTHER                  : 2535   HASTINGS,SHIRIN:  222   1st Qu.: 13.00  
##  PER PT REQUEST         : 2335   YU,FRAN        :  192   Median : 36.00  
##  PER DOCTOR             :  488   ROSENFELD,JANE :  172   Mean   : 53.41  
##  TRANSPORTATION PROBLEMS:  175   OTHER,REFPHYS  :  162   3rd Qu.: 88.00  
##  PER CLIENT TELL REPORT :  161   ARMAS,BARBARA J:  120   Max.   :207.00  
##  (Other)                :  614   (Other)        :18512                   
##     DT.WHEN.SCHED    DT.WHEN.RESCHED           COMMENTS    
##  12/1/2014 :  104            :16411   RPV          : 5440  
##  2/2/2015  :   85   1/26/2015:   54   BTR          : 1541  
##  1/26/2015 :   79   5/19/2015:   26   RPV/FOLLOW UP: 1208  
##  9/16/2013 :   78   1/21/2014:   24   RBH          :  422  
##  1/5/2015  :   76   3/5/2015 :   24   F/U          :  389  
##  12/15/2014:   76   11/5/2014:   22   IPV/HEADACHES:  287  
##  (Other)   :19714   (Other)  : 3651   (Other)      :10925  
##       MARITAL      SEX                          EMPLOYER    
##  DIVORCED : 1305   F:10611                          :20102  
##  MARRIED  :10909   M: 9601   SOMERSET COUNTY        :   13  
##  SEPARATED:  257             RETIRED                :    9  
##  SINGLE   : 6384             RWJ                    :    9  
##  WIDOWED  : 1357             MIDD CTY BD OF SOC SVCS:    8  
##                              PATHMARK               :    8  
##                              (Other)                :   63  
##                             REG.FSC          REG.FSC.1   
##  MEDICARE US                    :8511   Commercial:6358  
##  HORIZON PPO                    :2039   Indigent  : 765  
##  HORIZON NJ HEALTH HORIZON MCAID:2020   Medicaid  :3689  
##  UNITED HEALTHCARE MEDICAID     :1355   Medicare  :9363  
##  HORIZON POS                    : 957   Other     :  37  
##  AETNA PPO                      : 621                    
##  (Other)                        :4709                    
##               SCH.PROV.CATEGORY            SCH.PROV.CATEGORY.1
##  EPILEPSY              : 3494   EPILEPSY             : 3494   
##  GEN NEUROLOGY/HEADACHE: 3347   GENNEUROLOGY_HEADACHE: 3347   
##  MOVEMENT DISORDERS    :10903   MOVEMENT DISORDERS   :10903   
##  RESIDENT              : 2468   RESIDENT             : 2468   
##                                                               
##                                                               
##                                                               
##   INV..BILLED           INVBAL        invoicebalance   
##  Min.   :11004764   Min.   :-310.22   Min.   :-310.22  
##  1st Qu.:11534858   1st Qu.:   0.00   1st Qu.:   0.00  
##  Median :11993128   Median :   0.00   Median :   0.00  
##  Mean   :11973455   Mean   :  11.58   Mean   :  11.58  
##  3rd Qu.:12426717   3rd Qu.:   0.00   3rd Qu.:   0.00  
##  Max.   :13092699   Max.   :8456.00   Max.   :8456.00  
##  NA's   :7477       NA's   :7477      NA's   :7477     
##            BILLING.PROVIDER          SERVICING.PROVIDER
##                    :7477                      :7477    
##  SCHNEIDER,DANIEL  :2202    SAGE,JACOB        :1783    
##  MARK,MARGERY      :1904    MARK,MARGERY      :1755    
##  SAGE,JACOB        :1783    GOLBE,LAWRENCE    :1468    
##  GERHARDSTEIN,BRIAN:1613    SCHNEIDER,DANIEL  :1464    
##  GOLBE,LAWRENCE    :1469    GERHARDSTEIN,BRIAN:1338    
##  (Other)           :3764    (Other)           :4927    
##                               HOS             DX1      
##                                 : 7482          :7477  
##  CANCER INSTITUTE OF NEW JERSEY :   19   332    :4079  
##  CHILD HEALTH INSTITUE OF NEW JE:    9   784    :1041  
##  CLINICAL ACADEMIC BUILDING     :12658   333.83 : 659  
##  ROBERT WOOD JOHNSON HOSPITAL   :   44   345.41 : 529  
##                                          345.9  : 469  
##                                          (Other):5958  
##                            DX1.DESCRIPTION      DX2       
##                                    :7477          :17319  
##  PARALYSIS AGITANS                 :4079   784    :  335  
##  HEADACHE                          :1041   723.1  :  151  
##  SPASMODIC TORTICOLLIS             : 659   332    :  115  
##  PARTIAL EPILEPSY IMPAIRMENT INTRAC: 529   782    :  103  
##  UNS EPILEPSY WO INTRACT EPILEPSY  : 469   780.93 :   97  
##  (Other)                           :5958   (Other): 2092  
##                    DX2.DESCRIPTION       DX3       
##                            :17319          :19030  
##  HEADACHE                  :  335   784    :   63  
##  CERVICALGIA               :  151   723.1  :   61  
##  PARALYSIS AGITANS         :  115   782    :   59  
##  DISTURBANCE SKIN SENSATION:  103   356.9  :   42  
##  MEMORY LOSS               :   97   339.2  :   32  
##  (Other)                   : 2092   (Other):  925  
##                              DX3.DESCRIPTION       DX4       
##                                      :19030          :19705  
##  HEADACHE                            :   63   356.9  :   19  
##  CERVICALGIA                         :   61   782    :   19  
##  DISTURBANCE SKIN SENSATION          :   59   784    :   17  
##  UNS IDIOPATHIC PERIPH NEUROPATHY    :   42   V26.33 :   15  
##  POST-TRAUMATIC HEADACHE, UNSPECIFIED:   32   729.5  :   13  
##  (Other)                             :  925   (Other):  424  
##                          DX4.DESCRIPTION       DX5        
##                                  :19705   Min.   : 93.89  
##  DISTURBANCE SKIN SENSATION      :   19   1st Qu.:305.70  
##  UNS IDIOPATHIC PERIPH NEUROPATHY:   19   Median :378.82  
##  HEADACHE                        :   17   Mean   :484.39  
##  GENETIC COUNSELING              :   15   3rd Qu.:780.93  
##  MEMORY LOSS                     :   13   Max.   :787.20  
##  (Other)                         :  424   NA's   :20197   
##                          DX5.DESCRIPTION             REF.PROV    
##                                  :20197                  : 8209  
##  MEMORY LOSS                     :    3   ROSENFELD,JANE :  106  
##  ABNORMALITY OF GAIT             :    1   ARMAS,BARBARA J:   98  
##  DISPLACE INTERVERT DISC SITE UNS:    1   YU,FRAN        :   92  
##  DYSPHAGIA,UNSPECIFIED           :    1   MARK,MARGERY H :   89  
##  MONONEURITIS UNS SITE           :    1   DANISH,SHABBAR :   81  
##  (Other)                         :    8   (Other)        :11537  
##   REF.PROV.ZIP  REF.PROV.SPEC   STATUS      CancellationTiming_Days
##         :9074   Min.   :88      ARR:12270   Min.   :  0            
##  8901   :2061   1st Qu.:88      CAN: 6317   1st Qu.:  1            
##  8816   : 504   Median :88      NOS: 1625   Median :  6            
##  8903   : 333   Mean   :88                  Mean   : 18            
##  8831   : 309   3rd Qu.:88                  3rd Qu.: 23            
##  8857   : 308   Max.   :88                  Max.   :203            
##  (Other):7623   NA's   :20211               NA's   :13895          
##  CancellationCategory Outcome    
##            :13895     ARR:12270  
##  168to336hr:  633     NOS: 7942  
##  24hr      : 2022                
##  24to48hr  :  289                
##  336hr     : 2017                
##  48to168hr : 1356                
## 
str(Records)
## 'data.frame':    20212 obs. of  54 variables:
##  $ MRN                    : num  5406458 5076135 5373626 2917014 3058711 ...
##  $ ZIP                    : num  611 585 263 628 582 633 603 524 623 546 ...
##  $ DistanceToClinic       : int  10 8 10 6 6 0 11 14 4 21 ...
##  $ AGE                    : int  54 60 69 65 39 56 50 58 57 51 ...
##  $ DT                     : Factor w/ 505 levels "1/10/2014","1/12/2015",..: 33 351 275 12 189 218 341 375 208 239 ...
##  $ Time                   : Factor w/ 48 levels "01:00PM","01:10PM",..: 10 42 28 14 35 3 3 27 14 27 ...
##  $ TimeFrame_Hour         : Factor w/ 10 levels "1","2","3","4",..: 2 10 6 3 8 1 1 6 3 6 ...
##  $ Weekday                : Factor w/ 5 levels "Friday","Monday",..: 2 2 1 3 2 2 1 2 2 4 ...
##  $ Month                  : Factor w/ 12 levels "April","August",..: 5 7 1 5 4 8 7 7 8 8 ...
##  $ Season                 : logi  NA NA NA NA NA NA ...
##  $ SCHED.PROV             : Factor w/ 25 levels "AHMAD,HAROON RES",..: 5 24 2 25 7 19 5 7 11 14 ...
##  $ SCHEDPROV_LastName     : Factor w/ 25 levels "AHMAD","ALBRECHT",..: 5 23 2 25 6 18 5 6 10 13 ...
##  $ VT                     : Factor w/ 17 levels "BOT","BTR","DOP",..: 5 7 7 5 7 7 7 7 7 7 ...
##  $ VisitType              : Factor w/ 2 levels "IPV","RPV": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DURATION               : int  90 60 60 90 60 90 90 60 90 60 ...
##  $ CANCEL.DT              : Factor w/ 557 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CAN.BUMP.INITIAL       : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CAN.BUMP.INITITALS     : Factor w/ 43 levels "","RWJAACEVEDO",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CANCEL.REASON          : Factor w/ 23 levels "","DEATH IN FAMILY",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ PCC                    : Factor w/ 2356 levels "","ABBAS ,SHAHIDA",..: 1386 1183 2204 1745 1264 856 11 2072 791 2172 ...
##  $ Lead.Time              : int  0 0 1 3 3 3 4 4 4 4 ...
##  $ DT.WHEN.SCHED          : Factor w/ 616 levels "1/10/2014","1/12/2015",..: 42 452 334 167 207 251 487 481 295 220 ...
##  $ DT.WHEN.RESCHED        : Factor w/ 552 levels "","1/10/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ COMMENTS               : Factor w/ 3995 levels "#NAME?","?OF SZ DISORDER",..: 440 584 1959 2598 1730 1576 1823 2241 1237 1374 ...
##  $ MARITAL                : Factor w/ 5 levels "DIVORCED","MARRIED",..: 1 2 5 2 2 4 2 1 2 2 ...
##  $ SEX                    : Factor w/ 2 levels "F","M": 1 1 1 2 1 2 1 2 2 2 ...
##  $ EMPLOYER               : Factor w/ 22 levels "","AON CONSULTING",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REG.FSC                : Factor w/ 61 levels "AETNA HMO","AETNA MEDICARE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REG.FSC.1              : Factor w/ 5 levels "Commercial","Indigent",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ SCH.PROV.CATEGORY      : Factor w/ 4 levels "EPILEPSY","GEN NEUROLOGY/HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
##  $ SCH.PROV.CATEGORY.1    : Factor w/ 4 levels "EPILEPSY","GENNEUROLOGY_HEADACHE",..: 4 1 1 4 2 4 4 2 4 3 ...
##  $ INV..BILLED            : int  12411684 11833282 12675891 11707151 11569067 11658495 11880306 11861189 11641511 12525819 ...
##  $ INVBAL                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ invoicebalance         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BILLING.PROVIDER       : Factor w/ 26 levels "","ALBRECHT,CATHERINE",..: 26 26 25 24 8 26 12 8 26 15 ...
##  $ SERVICING.PROVIDER     : Factor w/ 39 levels "","AHMAD,HAROON RES",..: 10 38 4 39 13 32 10 13 17 22 ...
##  $ HOS                    : Factor w/ 5 levels "","CANCER INSTITUTE OF NEW JERSEY",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ DX1                    : Factor w/ 341 levels "","0","135","138",..: 268 133 134 155 289 292 301 303 117 88 ...
##  $ DX1.DESCRIPTION        : Factor w/ 343 levels "","ABDOMINAL PAIN OTHER SITE",..: 320 240 239 30 94 222 92 117 59 112 ...
##  $ DX2                    : Factor w/ 427 levels "","130.9","153.9",..: 1 230 1 1 1 1 1 1 1 1 ...
##  $ DX2.DESCRIPTION        : Factor w/ 427 levels "","ABDOM/PELVIC SWELLING UNSP SITE",..: 1 185 1 1 1 1 1 1 1 1 ...
##  $ DX3                    : Factor w/ 323 levels "","13.04","130.7",..: 1 119 1 1 1 1 1 1 1 1 ...
##  $ DX3.DESCRIPTION        : Factor w/ 323 levels "","ABNORMAL CNS FUNCT STUDY OT",..: 1 297 1 1 1 1 1 1 1 1 ...
##  $ DX4                    : Factor w/ 189 levels "","183","191.9",..: 1 78 1 1 1 1 1 1 1 1 ...
##  $ DX4.DESCRIPTION        : Factor w/ 189 levels "","ABNORM EXAM FINDINGS,OTHER",..: 1 176 1 1 1 1 1 1 1 1 ...
##  $ DX5                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ DX5.DESCRIPTION        : Factor w/ 14 levels "","ABNORMALITY OF GAIT",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REF.PROV               : Factor w/ 2309 levels "","ABBAS ,SHAHIDA",..: 1 1 2157 1713 1216 830 11 2016 761 171 ...
##  $ REF.PROV.ZIP           : Factor w/ 366 levels "","07860-2769",..: 1 1 108 342 319 356 270 356 358 358 ...
##  $ REF.PROV.SPEC          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ STATUS                 : Factor w/ 3 levels "ARR","CAN","NOS": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CancellationTiming_Days: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ CancellationCategory   : Factor w/ 6 levels "","168to336hr",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Outcome                : Factor w/ 2 levels "ARR","NOS": 1 1 1 1 1 1 1 1 1 1 ...
myvars <- c("Outcome", "DistanceToClinic","SEX","MARITAL","AGE","TimeFrame_Hour","Weekday","Month","VisitType","DURATION","REG.FSC.1","SCH.PROV.CATEGORY.1")
newdata <- Records[myvars]


summary(newdata)
##  Outcome     DistanceToClinic SEX            MARITAL           AGE       
##  ARR:12270   Min.   :  0.00   F:10611   DIVORCED : 1305   Min.   : 6.00  
##  NOS: 7942   1st Qu.:  6.00   M: 9601   MARRIED  :10909   1st Qu.:44.00  
##              Median : 14.00             SEPARATED:  257   Median :60.00  
##              Mean   : 19.46             SINGLE   : 6384   Mean   :57.23  
##              3rd Qu.: 30.00             WIDOWED  : 1357   3rd Qu.:71.00  
##              Max.   :120.00                               Max.   :97.00  
##                                                                          
##  TimeFrame_Hour      Weekday           Month      VisitType  
##  10     :3246   Friday   :2627   March    :1950   IPV: 7147  
##  9      :2964   Monday   :5032   October  :1858   RPV:13065  
##  1      :2918   Thursday :5021   May      :1797              
##  12     :2701   Tuesday  :4711   September:1756              
##  11     :2443   Wednesday:2821   December :1744              
##  2      :1855                    January  :1714              
##  (Other):4085                    (Other)  :9393              
##     DURATION           REG.FSC.1               SCH.PROV.CATEGORY.1
##  Min.   : 15.00   Commercial:6358   EPILEPSY             : 3494   
##  1st Qu.: 30.00   Indigent  : 765   GENNEUROLOGY_HEADACHE: 3347   
##  Median : 30.00   Medicaid  :3689   MOVEMENT DISORDERS   :10903   
##  Mean   : 45.46   Medicare  :9363   RESIDENT             : 2468   
##  3rd Qu.: 60.00   Other     :  37                                 
##  Max.   :690.00                                                   
## 
samples  <- sample(nrow(newdata),as.integer(nrow(newdata)*0.75))
train.newdata = newdata[samples,]
test.newdata  = newdata[-samples,]

#1) SVM Classification
 
model1<-svm(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour + Weekday + Month + VisitType + DURATION + REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata)

#Summarize the model
summary(model1)
## 
## Call:
## svm(formula = Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + 
##     TimeFrame_Hour + Weekday + Month + VisitType + DURATION + 
##     REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.02439024 
## 
## Number of Support Vectors:  12338
## 
##  ( 5983 6355 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  ARR NOS
#Predict using the model
pred_model1 = predict(model1,test.newdata,type="response")
mtab_model1<-table(pred_model1,test.newdata$Outcome)
confusionMatrix(mtab_model1)
## Confusion Matrix and Statistics
## 
##            
## pred_model1  ARR  NOS
##         ARR 3093 1956
##         NOS    1    3
##                                           
##                Accuracy : 0.6127          
##                  95% CI : (0.5991, 0.6262)
##     No Information Rate : 0.6123          
##     P-Value [Acc > NIR] : 0.4832          
##                                           
##                   Kappa : 0.0015          
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.999677        
##             Specificity : 0.001531        
##          Pos Pred Value : 0.612597        
##          Neg Pred Value : 0.750000        
##              Prevalence : 0.612310        
##          Detection Rate : 0.612112        
##    Detection Prevalence : 0.999208        
##       Balanced Accuracy : 0.500604        
##                                           
##        'Positive' Class : ARR             
## 
accuracy_75_model1 = sum(diag(table(test.newdata$Outcome,pred_model1)))/nrow(test.newdata)
accuracy_75_model1
## [1] 0.6127053
#2)Neural Net
library(nnet)
model2 = nnet(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour + Weekday + Month + VisitType + DURATION +  REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata,size = 4,decay = 0.0001,maxit = 500)
## # weights:  169
## initial  value 10358.216566 
## iter  10 value 10113.115848
## iter  20 value 10089.190258
## iter  30 value 10029.844761
## iter  40 value 9973.178266
## iter  50 value 9953.085802
## iter  60 value 9936.549075
## iter  70 value 9934.671358
## iter  80 value 9933.473612
## iter  90 value 9904.543769
## iter 100 value 9893.791045
## iter 110 value 9890.563102
## iter 120 value 9889.153431
## iter 130 value 9888.205942
## iter 140 value 9887.906276
## iter 150 value 9887.779135
## iter 160 value 9887.638217
## iter 170 value 9887.593243
## iter 180 value 9887.343541
## iter 190 value 9887.270460
## iter 200 value 9887.239130
## iter 210 value 9887.201103
## iter 220 value 9887.147069
## iter 230 value 9887.108152
## iter 240 value 9887.094426
## iter 250 value 9887.004582
## final  value 9887.004297 
## converged
pred_model2 = predict(model2,test.newdata,type="class")
mtab_model2<-table(pred_model2,test.newdata$Outcome)
confusionMatrix(mtab_model2)
## Confusion Matrix and Statistics
## 
##            
## pred_model2  ARR  NOS
##         ARR 2780 1754
##         NOS  314  205
##                                          
##                Accuracy : 0.5907         
##                  95% CI : (0.577, 0.6043)
##     No Information Rate : 0.6123         
##     P-Value [Acc > NIR] : 0.9992         
##                                          
##                   Kappa : 0.0037         
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.8985         
##             Specificity : 0.1046         
##          Pos Pred Value : 0.6131         
##          Neg Pred Value : 0.3950         
##              Prevalence : 0.6123         
##          Detection Rate : 0.5502         
##    Detection Prevalence : 0.8973         
##       Balanced Accuracy : 0.5016         
##                                          
##        'Positive' Class : ARR            
## 
accuracy_75_model2 = sum(diag(table(test.newdata$Outcome,pred_model2)))/nrow(test.newdata)
accuracy_75_model2
## [1] 0.5907382
#3)Classification and Regression Trees(CART)

library(rpart)
model3 = rpart(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour + Weekday + Month + VisitType + DURATION +  REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata)
pred_model3 = predict(model3,test.newdata,type="class")
mtab_model3<-table(pred_model3,test.newdata$Outcome)
confusionMatrix(mtab_model3)
## Confusion Matrix and Statistics
## 
##            
## pred_model3  ARR  NOS
##         ARR 3094 1959
##         NOS    0    0
##                                           
##                Accuracy : 0.6123          
##                  95% CI : (0.5987, 0.6258)
##     No Information Rate : 0.6123          
##     P-Value [Acc > NIR] : 0.5062          
##                                           
##                   Kappa : 0               
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.6123          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.6123          
##          Detection Rate : 0.6123          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : ARR             
## 
accuracy_75_model3 = sum(diag(table(test.newdata$Outcome,pred_model3)))/nrow(test.newdata)
accuracy_75_model3
## [1] 0.6123095