library(e1071)
library(readr)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(MASS)
Records <- read.csv("C:/Users/aksha/Desktop/Shiny app/RPV/Arr - noshow RPV.csv",sep=',')

#View(Records)
#Removing NA's from zip column
Records = Records[complete.cases(Records[ ,2]),]
str(Records)
## 'data.frame':    9049 obs. of  56 variables:
##  $ ï..MRN                 : int  5169984 5169984 5169984 5169984 5169984 5169984 5169984 2908595 2929531 5169984 ...
##  $ ZIP                    : Factor w/ 446 levels "07001","07002",..: 133 133 133 133 133 133 133 391 359 133 ...
##  $ DistanceToClinic       : num  34.3 34.3 34.3 34.3 34.3 34.3 34.3 0.8 10.6 34.3 ...
##  $ AGE                    : int  60 60 60 60 60 60 60 63 61 60 ...
##  $ LOCATION               : Factor w/ 1 level "CAB 6TH FLOOR": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DT                     : Factor w/ 494 levels "1/10/2014","1/12/2015",..: 107 355 122 219 371 383 452 36 109 196 ...
##  $ Time                   : Factor w/ 45 levels "01:00PM","01:10PM",..: 29 27 29 29 31 29 27 44 33 33 ...
##  $ TimeFrame_Hour         : int  9 8 9 9 9 9 8 12 10 10 ...
##  $ Weekday                : Factor w/ 5 levels "Friday","Monday",..: 2 3 2 3 2 5 3 3 4 5 ...
##  $ Month                  : Factor w/ 12 levels "April","August",..: 10 7 3 8 6 6 2 5 10 4 ...
##  $ Season                 : logi  NA NA NA NA NA NA ...
##  $ SCHED.PROV             : Factor w/ 25 levels "AHMAD,HAROON RES",..: 3 3 3 21 3 3 3 12 3 3 ...
##  $ SCHEDPROV_LastName     : Factor w/ 25 levels "AHMAD","ALBRECHT",..: 3 3 3 20 3 3 3 11 3 3 ...
##  $ VT                     : Factor w/ 11 levels "BTR","DOP","MDR",..: 5 5 5 5 8 8 8 11 4 5 ...
##  $ VisitType              : Factor w/ 1 level "RPV": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DURATION               : int  60 60 60 60 60 60 60 45 30 60 ...
##  $ CANCEL.DT              : logi  NA NA NA NA NA NA ...
##  $ CAN.BUMP.INITIAL       : logi  NA NA NA NA NA NA ...
##  $ CAN.BUMP.INITITALS     : logi  NA NA NA NA NA NA ...
##  $ CANCEL.REASON          : logi  NA NA NA NA NA NA ...
##  $ PCC                    : Factor w/ 1515 levels "","ABBAS,SHAHIDA M",..: 1434 1434 1434 1434 1434 1434 1434 1200 1393 1434 ...
##  $ Lead.Time              : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ DT.WHEN.SCHED          : Factor w/ 600 levels "1/10/2014","1/12/2015",..: 118 452 133 253 473 485 557 47 118 216 ...
##  $ DT.WHEN.RESCHED        : logi  NA NA NA NA NA NA ...
##  $ COMMENTS               : Factor w/ 1028 levels "1 MON F/U","1 YEAR F/U (DR WONG)",..: 246 246 246 246 302 320 312 121 204 241 ...
##  $ MARITAL                : Factor w/ 5 levels "DIVORCED","MARRIED",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ SEX                    : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
##  $ EMPLOYER               : Factor w/ 15 levels "","AT&T","HAIMM,NEIL",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ EMPLOYER.GROUP         : logi  NA NA NA NA NA NA ...
##  $ REG.FSC                : Factor w/ 52 levels "AETNA HMO","AETNA MEDICARE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REG.FSC.1              : Factor w/ 5 levels "Commercial","Indigent",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ SCH.PROV.CATEGORY      : Factor w/ 4 levels "EPILEPSY","GEN NEUROLOGY/HEADACHE",..: 3 3 3 3 3 3 3 4 3 3 ...
##  $ SCH.PROV.CATEGORY.1    : Factor w/ 4 levels "EPILEPSY","GENNEUROLOGY_HEADACHE",..: 3 3 3 3 3 3 3 4 3 3 ...
##  $ INV..BILLED            : int  11426195 12223483 12434701 11807074 11426178 11426182 11426187 12403675 11525145 12868203 ...
##  $ INVBAL                 : num  0 0 0 0 0 0 0 0 0 5 ...
##  $ invoicebalance         : num  0 0 0 0 0 0 0 0 0 5 ...
##  $ BILLING.PROVIDER       : Factor w/ 20 levels "","ALBRECHT,CATHERINE",..: 18 18 4 18 4 4 18 18 18 4 ...
##  $ SERVICING.PROVIDER     : Factor w/ 33 levels "","AHMAD,HAROON RES",..: 5 5 5 29 5 5 5 17 5 5 ...
##  $ HOS                    : Factor w/ 5 levels "","CANCER INSTITUTE OF NEW JERSEY",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ DX1                    : Factor w/ 241 levels "","000","000.0",..: 64 64 64 64 64 64 64 124 64 64 ...
##  $ DX1.DESCRIPTION        : Factor w/ 241 levels "","ABNORMAL FINDINGS SEMEN",..: 161 161 161 161 161 161 161 223 161 161 ...
##  $ DX2                    : Factor w/ 325 levels "","053.19","078.5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DX2.DESCRIPTION        : Factor w/ 325 levels "","ABDOM/PELVIC SWELLING UNSP SITE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DX3                    : Factor w/ 244 levels "","013.04","042",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DX3.DESCRIPTION        : Factor w/ 244 levels "","ABNORMAL CNS FUNCT STUDY OT",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DX4                    : Factor w/ 136 levels "","183.0","209.29",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DX4.DESCRIPTION        : Factor w/ 136 levels "","ABNORM EXAM FINDINGS,OTHER",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DX5                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ DX5.DESCRIPTION        : Factor w/ 12 levels "","DISPLACE INTERVERT DISC SITE UNS",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REF.PROV               : Factor w/ 1610 levels "","ABBAS,SHAHIDA M",..: 1524 1524 1524 1524 1524 1524 1524 1271 1482 1524 ...
##  $ REF.PROV.ZIP           : Factor w/ 300 levels "","01812","06512",..: 204 204 204 204 204 204 204 255 218 204 ...
##  $ REF.PROV.SPEC          : logi  NA NA NA NA NA NA ...
##  $ STATUS                 : Factor w/ 2 levels "ARR","NOS": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CancellationTiming_Days: Factor w/ 2 levels "No Can","No Show": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CancellationCategory   : Factor w/ 1 level "0hr": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Outcome                : Factor w/ 2 levels "ARR","NOS": 1 1 1 1 1 1 1 1 1 1 ...
summary(Records)
##      ï..MRN             ZIP       DistanceToClinic      AGE       
##  Min.   :2902470   08831  : 500   Min.   :  0.80   Min.   :17.00  
##  1st Qu.:2992111   08873  : 359   1st Qu.:  8.10   1st Qu.:50.00  
##  Median :3623995   08901  : 326   Median : 14.50   Median :64.00  
##  Mean   :4013417   08816  : 268   Mean   : 20.96   Mean   :60.76  
##  3rd Qu.:5145983   08854  : 245   3rd Qu.: 32.60   3rd Qu.:73.00  
##  Max.   :5436187   08902  : 238   Max.   :120.00   Max.   :96.00  
##                    (Other):7113                                   
##           LOCATION             DT            Time      TimeFrame_Hour  
##  CAB 6TH FLOOR:9049   3/16/2015 :  42   12:30PM: 977   Min.   : 1.000  
##                       12/15/2014:  39   11:00AM: 830   1st Qu.: 3.000  
##                       3/30/2015 :  38   10:00AM: 782   Median :10.000  
##                       8/18/2014 :  38   10:30AM: 680   Mean   : 8.013  
##                       9/9/2013  :  37   09:00AM: 653   3rd Qu.:11.000  
##                       1/12/2015 :  34   09:30AM: 562   Max.   :12.000  
##                       (Other)   :8821   (Other):4565                   
##       Weekday           Month       Season       
##  Friday   : 964   October  : 868   Mode:logical  
##  Monday   :2208   May      : 856   NA's:9049     
##  Thursday :2287   March    : 838                 
##  Tuesday  :2096   April    : 799                 
##  Wednesday:1494   September: 791                 
##                   December : 737                 
##                   (Other)  :4160                 
##               SCHED.PROV   SCHEDPROV_LastName       VT       VisitType 
##  MARK,MARGERY      :1527   MARK     :1527     RPV    :6286   RPV:9049  
##  SAGE,JACOB        :1370   SAGE     :1370     BTR    :1292             
##  GOLBE,LAWRENCE    :1214   GOLBE    :1214     P60    : 550             
##  SCHNEIDER,DANIEL  :1113   SCHNEIDER:1113     RBH    : 406             
##  CAPUTO,DEBORAH    : 836   CAPUTO   : 836     PSP    : 198             
##  ALBRECHT,CATHERINE: 638   ALBRECHT : 638     P30    : 142             
##  (Other)           :2351   (Other)  :2351     (Other): 175             
##     DURATION      CANCEL.DT      CAN.BUMP.INITIAL CAN.BUMP.INITITALS
##  Min.   : 15.00   Mode:logical   Mode:logical     Mode:logical      
##  1st Qu.: 30.00   NA's:9049      NA's:9049        NA's:9049         
##  Median : 30.00                                                     
##  Mean   : 32.34                                                     
##  3rd Qu.: 30.00                                                     
##  Max.   :180.00                                                     
##                                                                     
##  CANCEL.REASON               PCC         Lead.Time        DT.WHEN.SCHED 
##  Mode:logical                  : 159   Min.   :  0.0   12/1/2014 :  50  
##  NA's:9049      HASTINGS,SHIRIN:  91   1st Qu.: 14.0   9/17/2013 :  45  
##                 YU,FRAN        :  86   Median : 45.0   2/2/2015  :  44  
##                 OTHER,REFPHYS  :  74   Mean   : 63.2   1/26/2015 :  38  
##                 ROSENFELD,JANE :  64   3rd Qu.: 98.0   12/15/2014:  38  
##                 ARMAS,BARBARA J:  60   Max.   :205.0   10/29/2013:  37  
##                 (Other)        :8515                   (Other)   :8797  
##  DT.WHEN.RESCHED          COMMENTS         MARITAL     SEX     
##  Mode:logical    RPV          :3655   DIVORCED : 555   F:4435  
##  NA's:9049       BTR          :1081   MARRIED  :5339   M:4614  
##                  RPV/FOLLOW UP: 833   SEPARATED: 100           
##                  RBH          : 290   SINGLE   :2359           
##                  F/U          : 258   WIDOWED  : 696           
##                  P60          : 203                            
##                  (Other)      :2729                            
##                     EMPLOYER    EMPLOYER.GROUP
##                         :8994   Mode:logical  
##  MIDD CTY BD OF SOC SVCS:   7   NA's:9049     
##  RETIRED                :   6                 
##  HAIMM,NEIL             :   5                 
##  phil                   :   5                 
##  RWJ                    :   5                 
##  (Other)                :  27                 
##                             REG.FSC          REG.FSC.1   
##  MEDICARE US                    :4578   Commercial:2616  
##  HORIZON PPO                    : 806   Indigent  : 224  
##  HORIZON NJ HEALTH HORIZON MCAID: 588   Medicaid  :1145  
##  HORIZON POS                    : 484   Medicare  :5057  
##  UNITED HEALTHCARE MEDICAID     : 461   Other     :   7  
##  AETNA PPO                      : 250                    
##  (Other)                        :1882                    
##               SCH.PROV.CATEGORY            SCH.PROV.CATEGORY.1
##  EPILEPSY              :1465    EPILEPSY             :1465    
##  GEN NEUROLOGY/HEADACHE: 912    GENNEUROLOGY_HEADACHE: 912    
##  MOVEMENT DISORDERS    :6060    MOVEMENT DISORDERS   :6060    
##  RESIDENT              : 612    RESIDENT             : 612    
##                                                               
##                                                               
##                                                               
##   INV..BILLED           INVBAL        invoicebalance   
##  Min.   :11004964   Min.   :-310.22   Min.   :-310.22  
##  1st Qu.:11523379   1st Qu.:   0.00   1st Qu.:   0.00  
##  Median :11980990   Median :   0.00   Median :   0.00  
##  Mean   :11968257   Mean   :  13.38   Mean   :  13.38  
##  3rd Qu.:12428877   3rd Qu.:   0.00   3rd Qu.:   0.00  
##  Max.   :13092460   Max.   :8456.00   Max.   :8456.00  
##  NA's   :843        NA's   :843       NA's   :843      
##          BILLING.PROVIDER        SERVICING.PROVIDER
##  MARK,MARGERY    :1589    MARK,MARGERY    :1452    
##  SCHNEIDER,DANIEL:1499    SAGE,JACOB      :1300    
##  SAGE,JACOB      :1300    GOLBE,LAWRENCE  :1141    
##  GOLBE,LAWRENCE  :1142    SCHNEIDER,DANIEL:1015    
##                  : 843                    : 843    
##  MANI,RAM        : 487    CAPUTO,DEBORAH  : 792    
##  (Other)         :2189    (Other)         :2506    
##                               HOS            DX1      
##                                 : 845   332.0  :3440  
##  CANCER INSTITUTE OF NEW JERSEY :  14          : 843  
##  CHILD HEALTH INSTITUE OF NEW JE:   5   333.83 : 607  
##  CLINICAL ACADEMIC BUILDING     :8174   345.41 : 373  
##  ROBERT WOOD JOHNSON HOSPITAL   :  11   784.0  : 313  
##                                         351.8  : 302  
##                                         (Other):3171  
##                            DX1.DESCRIPTION      DX2      
##  PARALYSIS AGITANS                 :3440          :7559  
##                                    : 843   784.0  : 109  
##  SPASMODIC TORTICOLLIS             : 607   332.0  :  97  
##  PARTIAL EPILEPSY IMPAIRMENT INTRAC: 373   333.83 :  60  
##  HEADACHE                          : 313   780.93 :  48  
##  OTH FACIAL NERVE DISORDERS        : 302   723.1  :  43  
##  (Other)                           :3171   (Other):1133  
##               DX2.DESCRIPTION      DX3      
##                       :7559          :8398  
##  HEADACHE             : 109   356.9  :  32  
##  PARALYSIS AGITANS    :  97   784.0  :  30  
##  SPASMODIC TORTICOLLIS:  60   780.93 :  21  
##  MEMORY LOSS          :  48   782.0  :  18  
##  CERVICALGIA          :  43   332.0  :  15  
##  (Other)              :1133   (Other): 535  
##                          DX3.DESCRIPTION      DX4      
##                                  :8398          :8726  
##  UNS IDIOPATHIC PERIPH NEUROPATHY:  32   V26.33 :  15  
##  HEADACHE                        :  30   356.9  :  13  
##  MEMORY LOSS                     :  21   724.5  :   9  
##  DISTURBANCE SKIN SENSATION      :  18   781.2  :   8  
##  PARALYSIS AGITANS               :  15   784.0  :   8  
##  (Other)                         : 535   (Other): 270  
##                          DX4.DESCRIPTION      DX5        
##                                  :8726   Min.   : 93.89  
##  GENETIC COUNSELING              :  15   1st Qu.:266.00  
##  UNS IDIOPATHIC PERIPH NEUROPATHY:  13   Median :357.86  
##  BACKACHE UNSPECIFIED            :   9   Mean   :445.65  
##  ABNORMALITY OF GAIT             :   8   3rd Qu.:736.88  
##  HEADACHE                        :   8   Max.   :787.20  
##  (Other)                         : 270   NA's   :9037    
##                            DX5.DESCRIPTION            REF.PROV   
##                                    :9037                  :1266  
##  MEMORY LOSS                       :   2   MARK,MARGERY H :  78  
##  DISPLACE INTERVERT DISC SITE UNS  :   1   ARMAS,BARBARA J:  70  
##  DYSPHAGIA,UNSPECIFIED             :   1   ROSENFELD,JANE :  62  
##  OTH BENIGN NEO CONNEC SOFT TISS UN:   1   KIM,SARANG     :  57  
##  OTHER CARDIOVASCULAR SYPHILIS     :   1   YU,FRAN        :  48  
##  (Other)                           :   6   (Other)        :7468  
##   REF.PROV.ZIP  REF.PROV.SPEC  STATUS     CancellationTiming_Days
##         :1898   Mode:logical   ARR:8227   No Can :8227           
##  08901  :1062   NA's:9049      NOS: 822   No Show: 822           
##  08816  : 342                                                    
##  08903  : 223                                                    
##  08831  : 222                                                    
##  08820  : 198                                                    
##  (Other):5104                                                    
##  CancellationCategory Outcome   
##  0hr:9049             ARR:8227  
##                       NOS: 822  
##                                 
##                                 
##                                 
##                                 
## 
#Conveting to nominal and numeric attributes
Records$ZIP = as.numeric(Records$ZIP)
Records$DistanceToClinic = as.integer(Records$DistanceToClinic)
Records$TimeFrame_Hour = as.factor(Records$TimeFrame_Hour)
Records$Weekday = as.factor(Records$Weekday)
Records$Month = as.factor(Records$Month)
Records$SCHED.PROV = as.factor(Records$SCHED.PROV)
Records$VT = as.factor(Records$VT)
Records$VisitType = as.factor(Records$VisitType)
Records$MARITAL = as.factor(Records$MARITAL)
Records$SEX = as.factor(Records$SEX)
Records$REG.FSC.1 = as.factor(Records$REG.FSC.1)
Records$SCH.PROV.CATEGORY.1 = as.factor(Records$SCH.PROV.CATEGORY.1)
Records$Outcome = as.factor(Records$Outcome)
Records$STATUS = as.factor(Records$STATUS)
myvars <- c("Outcome", "DistanceToClinic","SEX","MARITAL","AGE","TimeFrame_Hour","Weekday","Month","DURATION","REG.FSC.1","SCH.PROV.CATEGORY.1")

newdata <- Records[myvars]
newdata$AGE = as.numeric(newdata$AGE)
newdata$DURATION = as.numeric(newdata$DURATION)

summary(newdata)
##  Outcome    DistanceToClinic SEX           MARITAL          AGE       
##  ARR:8227   Min.   :  0.00   F:4435   DIVORCED : 555   Min.   :17.00  
##  NOS: 822   1st Qu.:  8.00   M:4614   MARRIED  :5339   1st Qu.:50.00  
##             Median : 14.00            SEPARATED: 100   Median :64.00  
##             Mean   : 20.48            SINGLE   :2359   Mean   :60.76  
##             3rd Qu.: 32.00            WIDOWED  : 696   3rd Qu.:73.00  
##             Max.   :120.00                             Max.   :96.00  
##                                                                       
##  TimeFrame_Hour      Weekday           Month         DURATION     
##  12     :1559   Friday   : 964   October  : 868   Min.   : 15.00  
##  10     :1543   Monday   :2208   May      : 856   1st Qu.: 30.00  
##  11     :1476   Thursday :2287   March    : 838   Median : 30.00  
##  9      :1320   Tuesday  :2096   April    : 799   Mean   : 32.34  
##  1      :1115   Wednesday:1494   September: 791   3rd Qu.: 30.00  
##  2      : 721                    December : 737   Max.   :180.00  
##  (Other):1315                    (Other)  :4160                   
##       REG.FSC.1               SCH.PROV.CATEGORY.1
##  Commercial:2616   EPILEPSY             :1465    
##  Indigent  : 224   GENNEUROLOGY_HEADACHE: 912    
##  Medicaid  :1145   MOVEMENT DISORDERS   :6060    
##  Medicare  :5057   RESIDENT             : 612    
##  Other     :   7                                 
##                                                  
## 
str(newdata)
## 'data.frame':    9049 obs. of  11 variables:
##  $ Outcome            : Factor w/ 2 levels "ARR","NOS": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DistanceToClinic   : int  34 34 34 34 34 34 34 0 10 34 ...
##  $ SEX                : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
##  $ MARITAL            : Factor w/ 5 levels "DIVORCED","MARRIED",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ AGE                : num  60 60 60 60 60 60 60 63 61 60 ...
##  $ TimeFrame_Hour     : Factor w/ 10 levels "1","2","3","4",..: 7 6 7 7 7 7 6 10 8 8 ...
##  $ Weekday            : Factor w/ 5 levels "Friday","Monday",..: 2 3 2 3 2 5 3 3 4 5 ...
##  $ Month              : Factor w/ 12 levels "April","August",..: 10 7 3 8 6 6 2 5 10 4 ...
##  $ DURATION           : num  60 60 60 60 60 60 60 45 30 60 ...
##  $ REG.FSC.1          : Factor w/ 5 levels "Commercial","Indigent",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ SCH.PROV.CATEGORY.1: Factor w/ 4 levels "EPILEPSY","GENNEUROLOGY_HEADACHE",..: 3 3 3 3 3 3 3 4 3 3 ...
set.seed(5000)

samples  <- sample(nrow(newdata),as.integer(nrow(newdata)*0.75))
train.newdata = newdata[samples,]
test.newdata  = newdata[-samples,]


#1) SVM Classification

library(e1071)
model1<-svm(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour +Weekday + Month + DURATION + REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata)

#Summarize the model
summary(model1)
## 
## Call:
## svm(formula = Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + 
##     TimeFrame_Hour + Weekday + Month + DURATION + REG.FSC.1 + 
##     SCH.PROV.CATEGORY.1, data = train.newdata)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.025 
## 
## Number of Support Vectors:  2176
## 
##  ( 1575 601 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  ARR NOS
#Predict using the model
pred_model1 = predict(model1,test.newdata,type="response")
mtab_model1<-table(pred_model1,test.newdata$Outcome)

confusionMatrix(mtab_model1)
## Confusion Matrix and Statistics
## 
##            
## pred_model1  ARR  NOS
##         ARR 2042  221
##         NOS    0    0
##                                           
##                Accuracy : 0.9023          
##                  95% CI : (0.8894, 0.9143)
##     No Information Rate : 0.9023          
##     P-Value [Acc > NIR] : 0.5179          
##                                           
##                   Kappa : 0               
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.9023          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.9023          
##          Detection Rate : 0.9023          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : ARR             
## 
accuracy_75_model1_no_show_Arrival_RPV = sum(diag(table(test.newdata$Outcome,pred_model1)))/nrow(test.newdata)
accuracy_75_model1_no_show_Arrival_RPV
## [1] 0.902342
#2) LOGISTIC Regression

model2<-glm(Outcome ~ DistanceToClinic + SEX + MARITAL + AGE + TimeFrame_Hour +Weekday + Month + DURATION + REG.FSC.1 + SCH.PROV.CATEGORY.1, data = train.newdata, family = "binomial" )
pred1 = predict(model2,test.newdata,type="response")
pred1=ifelse(pred1<0.5,0,1)
accuracy_75_log_arrival_noshow_rpv = sum(diag(table(test.newdata$Outcome,pred1)))/nrow(test.newdata)
accuracy_75_log_arrival_noshow_rpv
## [1] 0.902342
#3) STEP REGRESSION

model1_null<-glm(Outcome ~ 1, data = train.newdata,family = "binomial")
model1_all<-glm(Outcome ~ ., data = train.newdata,family = "binomial")
forward_model = stepAIC(model1_null, direction='forward', scope=list(lower=model1_null,upper=model1_all))
## Start:  AIC=4062.8
## Outcome ~ 1
## 
##                       Df Deviance    AIC
## + SCH.PROV.CATEGORY.1  3   3859.2 3867.2
## + AGE                  1   3915.6 3919.6
## + REG.FSC.1            4   3951.5 3961.5
## + MARITAL              4   3957.2 3967.2
## + DistanceToClinic     1   4029.8 4033.8
## + TimeFrame_Hour       9   4035.5 4055.5
## + Weekday              4   4047.2 4057.2
## + SEX                  1   4053.4 4057.4
## <none>                     4060.8 4062.8
## + DURATION             1   4060.8 4064.8
## + Month               11   4046.1 4070.1
## 
## Step:  AIC=3867.19
## Outcome ~ SCH.PROV.CATEGORY.1
## 
##                    Df Deviance    AIC
## + AGE               1   3835.9 3845.9
## + MARITAL           4   3839.4 3855.4
## + REG.FSC.1         4   3841.7 3857.7
## + DURATION          1   3851.2 3861.2
## + DistanceToClinic  1   3856.7 3866.7
## <none>                  3859.2 3867.2
## + Weekday           4   3851.6 3867.6
## + SEX               1   3858.0 3868.0
## + Month            11   3846.6 3876.6
## + TimeFrame_Hour    9   3854.0 3880.0
## 
## Step:  AIC=3845.88
## Outcome ~ SCH.PROV.CATEGORY.1 + AGE
## 
##                    Df Deviance    AIC
## + DURATION          1   3826.5 3838.5
## + MARITAL           4   3824.1 3842.1
## + REG.FSC.1         4   3825.4 3843.4
## + DistanceToClinic  1   3832.5 3844.5
## <none>                  3835.9 3845.9
## + SEX               1   3834.5 3846.5
## + Weekday           4   3829.2 3847.2
## + Month            11   3823.1 3855.1
## + TimeFrame_Hour    9   3829.8 3857.8
## 
## Step:  AIC=3838.46
## Outcome ~ SCH.PROV.CATEGORY.1 + AGE + DURATION
## 
##                    Df Deviance    AIC
## + MARITAL           4   3814.4 3834.4
## + REG.FSC.1         4   3816.5 3836.5
## + DistanceToClinic  1   3823.6 3837.6
## <none>                  3826.5 3838.5
## + SEX               1   3825.4 3839.4
## + Weekday           4   3821.6 3841.6
## + Month            11   3814.7 3848.7
## + TimeFrame_Hour    9   3819.0 3849.0
## 
## Step:  AIC=3834.45
## Outcome ~ SCH.PROV.CATEGORY.1 + AGE + DURATION + MARITAL
## 
##                    Df Deviance    AIC
## + DistanceToClinic  1   3812.0 3834.0
## <none>                  3814.4 3834.4
## + REG.FSC.1         4   3807.6 3835.6
## + SEX               1   3814.1 3836.1
## + Weekday           4   3809.1 3837.1
## + Month            11   3802.4 3844.4
## + TimeFrame_Hour    9   3807.0 3845.0
## 
## Step:  AIC=3834
## Outcome ~ SCH.PROV.CATEGORY.1 + AGE + DURATION + MARITAL + DistanceToClinic
## 
##                  Df Deviance    AIC
## <none>                3812.0 3834.0
## + REG.FSC.1       4   3804.6 3834.6
## + SEX             1   3811.6 3835.6
## + Weekday         4   3807.1 3837.1
## + Month          11   3800.1 3844.1
## + TimeFrame_Hour  9   3804.4 3844.4
pred1 = predict(forward_model,test.newdata,type="response")
pred1=ifelse(pred1<0.5,0,1)
accuracy_75_step_arrival_noshow_rpv = sum(diag(table(test.newdata$Outcome,pred1)))/nrow(test.newdata)
accuracy_75_step_arrival_noshow_rpv
## [1] 0.902342