## CreditLimit Male Education MaritalStatus Age BillOutstanding LastPayment
## 1 20000 0 2 1 24 3913 0
## 2 120000 0 2 2 26 2682 0
## 3 90000 0 2 2 34 29239 1518
## 4 50000 0 2 1 37 46990 2000
## 5 50000 1 2 1 57 8617 2000
## 6 50000 1 1 2 37 64400 2500
## Default
## 1 1
## 2 1
## 3 0
## 4 0
## 5 0
## 6 0
## 'data.frame': 29601 obs. of 8 variables:
## $ CreditLimit : int 20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
## $ Male : Factor w/ 2 levels "0","1": 1 1 1 1 2 2 2 1 1 2 ...
## $ Education : Factor w/ 4 levels "1","2","3","4": 2 2 2 2 2 1 1 2 3 3 ...
## $ MaritalStatus : Factor w/ 3 levels "1","2","3": 1 2 2 1 1 2 2 2 1 2 ...
## $ Age : int 24 26 34 37 57 37 29 23 28 35 ...
## $ BillOutstanding: int 3913 2682 29239 46990 8617 64400 367965 11876 11285 0 ...
## $ LastPayment : int 0 0 1518 2000 2000 2500 55000 380 3329 0 ...
## $ Default : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
set.seed(123)
training.samples <- Default$Default %>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- Default[training.samples, ]
test.data <- Default[-training.samples, ]
dim(train.data)
## [1] 23681 8
## [1] 5920 8
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Call:
## lda(Default ~ ., data = train.transformed)
##
## Prior probabilities of groups:
## 0 1
## 0.7768675 0.2231325
##
## Group means:
## CreditLimit Male1 Education2 Education3 Education4 MaritalStatus2
## 0 0.08341306 0.3838126 0.4612165 0.1600804 0.0048921020 0.5432951
## 1 -0.29041446 0.4358441 0.5054883 0.1877366 0.0007570023 0.5081378
## MaritalStatus3 Age BillOutstanding LastPayment
## 0 0.01049084 -0.008648351 0.01247380 0.04023246
## 1 0.01249054 0.030110469 -0.04342933 -0.14007505
##
## Coefficients of linear discriminants:
## LD1
## CreditLimit -0.88790102
## Male1 0.45614403
## Education2 0.14316883
## Education3 0.07489287
## Education4 -1.95414753
## MaritalStatus2 -0.39376662
## MaritalStatus3 -0.35799274
## Age 0.10485197
## BillOutstanding 0.13783843
## LastPayment -0.27548470
## [1] "class" "posterior" "x"
## [1] 0 0 0 0 0 0
## Levels: 0 1
## 0 1
## 10 0.6947669 0.3052331
## 15 0.8215088 0.1784912
## 23 0.7618830 0.2381170
## 32 0.7067022 0.2932978
## 33 0.7412506 0.2587494
## 44 0.7572820 0.2427180
## LD1
## 10 1.1048056
## 15 -0.5276914
## 23 0.3152693
## [1] 0.7768581
It can be seen that, our model correctly classified 77.6% of observations, which is excellent.
Pred <- predictions$class
Actual <- test.transformed$Default
confusionMatrix(table(Pred,Actual),positive = "1")
## Confusion Matrix and Statistics
##
## Actual
## Pred 0 1
## 0 4599 1321
## 1 0 0
##
## Accuracy : 0.7769
## 95% CI : (0.766, 0.7874)
## No Information Rate : 0.7769
## P-Value [Acc > NIR] : 0.5074
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.7769
## Prevalence : 0.2231
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 1
##