## Remove rows that do not have target variable values
final <- x[!(is.na(x$default.payment.next.month)),]
library(caTools)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(88)
split <- sample.split(final$default.payment.next.month, SplitRatio = 0.75)

dresstrain <- subset(final, split == TRUE)
dresstest <- subset(final, split == FALSE)
library(randomForest)  
## Warning: package 'randomForest' was built under R version 3.3.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(e1071)  

dresstrain$default.payment.next.month <- as.factor(dresstrain$default.payment.next.month)
dresstest$default.payment.next.month <- as.factor(dresstest$default.payment.next.month)

rf = randomForest(default.payment.next.month~.,  
                   ntree = 100,
                   data = dresstrain)
plot(rf) 

varImp(rf)
##             Overall
## ID        511.20584
## LIMIT_BAL 372.63264
## SEX        74.76610
## EDUCATION 132.16961
## MARRIAGE   91.27863
## AGE       401.43206
## PAY_0     717.82297
## PAY_2     355.23851
## PAY_3     224.47560
## PAY_4     163.23384
## PAY_5     143.28872
## PAY_6     164.07542
## BILL_AMT1 427.82303
## BILL_AMT2 387.65018
## BILL_AMT3 367.98343
## BILL_AMT4 361.55032
## BILL_AMT5 349.59847
## BILL_AMT6 353.40670
## PAY_AMT1  370.41753
## PAY_AMT2  343.84676
## PAY_AMT3  329.83085
## PAY_AMT4  302.63583
## PAY_AMT5  310.36254
## PAY_AMT6  329.43405
## Important variables according to the model
varImpPlot(rf,  
           sort = T,
           n.var=25,
           main="Variable Importance")

predicted.response <- predict(rf, dresstest)


confusionMatrix(data=predicted.response,  
                reference=dresstest$default.payment.next.month)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 5497 1025
##          1  344  634
##                                           
##                Accuracy : 0.8175          
##                  95% CI : (0.8085, 0.8262)
##     No Information Rate : 0.7788          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.379           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9411          
##             Specificity : 0.3822          
##          Pos Pred Value : 0.8428          
##          Neg Pred Value : 0.6483          
##              Prevalence : 0.7788          
##          Detection Rate : 0.7329          
##    Detection Prevalence : 0.8696          
##       Balanced Accuracy : 0.6616          
##                                           
##        'Positive' Class : 0               
##