## default student balance income
## 1 No No 729.5265 44361.625
## 2 No Yes 817.1804 12106.135
## 3 No No 1073.5492 31767.139
## 4 No No 529.2506 35704.494
## 5 No No 785.6559 38463.496
## 6 No Yes 919.5885 7491.559
set.seed(123)
training.samples <- Default$default %>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- Default[training.samples, ]
test.data <- Default[-training.samples, ]
dim(train.data)## [1] 8001 4
## [1] 1999 4
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Call:
## lda(default ~ ., data = train.transformed)
##
## Prior probabilities of groups:
## No Yes
## 0.96662917 0.03337083
##
## Group means:
## studentYes balance income
## No 0.2940264 -0.06531569 0.00265296
## Yes 0.3782772 1.89195341 -0.07684642
##
## Coefficients of linear discriminants:
## LD1
## studentYes -0.12703805
## balance 1.08587918
## income 0.08175247
## [1] "class" "posterior" "x"
## [1] No No No No No No
## Levels: No Yes
## No Yes
## 6 0.9962229 0.003777134
## 19 0.9987222 0.001277790
## 23 0.9836605 0.016339509
## 24 0.9982105 0.001789493
## 34 0.9920594 0.007940607
## 37 0.9971680 0.002831959
## LD1
## 6 -0.06404822
## 19 -0.57935144
## 23 0.63671164
## [1] 0.969985
It can be seen that, our model correctly classified 96.9% of observations, which is excellent.
Pred <- predictions$class
Actual <- test.transformed$default
confusionMatrix(table(Pred,Actual),positive = "Yes")## Confusion Matrix and Statistics
##
## Actual
## Pred No Yes
## No 1926 53
## Yes 7 13
##
## Accuracy : 0.97
## 95% CI : (0.9615, 0.977)
## No Information Rate : 0.967
## P-Value [Acc > NIR] : 0.2488
##
## Kappa : 0.2914
##
## Mcnemar's Test P-Value : 6.267e-09
##
## Sensitivity : 0.196970
## Specificity : 0.996379
## Pos Pred Value : 0.650000
## Neg Pred Value : 0.973219
## Prevalence : 0.033017
## Detection Rate : 0.006503
## Detection Prevalence : 0.010005
## Balanced Accuracy : 0.596674
##
## 'Positive' Class : Yes
##