library(readr)
cd <- read_csv("C:/Users/Lynx/Documents/MSDA/MSDA 622 - Big Data/Exam 3/credit_default.csv")
## Rows: 12000 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (24): LIMIT_BAL, SEX, EDUCATION, MARRIAGE, AGE, PAY_0, PAY_2, PAY_3, PAY...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(cd)
## LIMIT_BAL SEX EDUCATION MARRIAGE
## Min. : 10000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.: 50000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median : 140000 Median :2.000 Median :2.000 Median :2.000
## Mean : 167501 Mean :1.603 Mean :1.844 Mean :1.552
## 3rd Qu.: 240000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :1000000 Max. :2.000 Max. :4.000 Max. :3.000
## AGE PAY_0 PAY_2 PAY_3
## Min. :21.0 Min. :-2.00000 Min. :-2.000 Min. :-2.0000
## 1st Qu.:28.0 1st Qu.:-1.00000 1st Qu.:-1.000 1st Qu.:-1.0000
## Median :34.0 Median : 0.00000 Median : 0.000 Median : 0.0000
## Mean :35.5 Mean :-0.01575 Mean :-0.128 Mean :-0.1667
## 3rd Qu.:41.0 3rd Qu.: 0.00000 3rd Qu.: 0.000 3rd Qu.: 0.0000
## Max. :79.0 Max. : 8.00000 Max. : 7.000 Max. : 7.0000
## PAY_4 PAY_5 PAY_6 BILL_AMT1
## Min. :-2.0000 Min. :-2.0000 Min. :-2.0000 Min. :-15308
## 1st Qu.:-1.0000 1st Qu.:-1.0000 1st Qu.:-1.0000 1st Qu.: 3690
## Median : 0.0000 Median : 0.0000 Median : 0.0000 Median : 22658
## Mean :-0.2256 Mean :-0.2678 Mean :-0.2931 Mean : 51392
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 67207
## Max. : 7.0000 Max. : 7.0000 Max. : 8.0000 Max. :964511
## BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5
## Min. :-33350 Min. :-157264 Min. :-81334 Min. :-81334
## 1st Qu.: 3156 1st Qu.: 2980 1st Qu.: 2411 1st Qu.: 1863
## Median : 21652 Median : 20330 Median : 19078 Median : 18244
## Mean : 49246 Mean : 47080 Mean : 43102 Mean : 40314
## 3rd Qu.: 63787 3rd Qu.: 59662 3rd Qu.: 53117 3rd Qu.: 49927
## Max. :983931 Max. : 855086 Max. :891586 Max. :927171
## BILL_AMT6 PAY_AMT1 PAY_AMT2 PAY_AMT3
## Min. :-339603 Min. : 0 Min. : 0.0 Min. : 0
## 1st Qu.: 1309 1st Qu.: 1000 1st Qu.: 944.8 1st Qu.: 400
## Median : 17130 Median : 2128 Median : 2013.0 Median : 1827
## Mean : 38821 Mean : 5766 Mean : 6250.1 Mean : 5195
## 3rd Qu.: 48938 3rd Qu.: 5006 3rd Qu.: 5000.0 3rd Qu.: 4505
## Max. : 961664 Max. :493358 Max. :1227082.0 Max. :896040
## PAY_AMT4 PAY_AMT5 PAY_AMT6
## Min. : 0 Min. : 0 Min. : 0.0
## 1st Qu.: 300 1st Qu.: 300 1st Qu.: 142.8
## Median : 1500 Median : 1518 Median : 1500.0
## Mean : 4878 Mean : 4868 Mean : 5432.7
## 3rd Qu.: 4078 3rd Qu.: 4121 3rd Qu.: 4061.8
## Max. :432130 Max. :426529 Max. :528666.0
## default.payment.next.month
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2193
## 3rd Qu.:0.0000
## Max. :1.0000
set.seed(123)
index <- sample(nrow(cd), nrow(cd)/2)
cd_train <- cd[index, ]
cd_test <- cd[-index, ]
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 4.2.3
model <- neuralnet(default.payment.next.month ~ ., data = cd_train, hidden = 2, linear.output = T)
model$act.fct
## function (x)
## {
## 1/(1 + exp(-x))
## }
## <bytecode: 0x0000022e24a376c8>
## <environment: 0x0000022e24a36dd0>
## attr(,"type")
## [1] "logistic"
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
plot(model)
Not sure why after knitting, the plot doesn’t show up. Attaching it as an image here.
predict <- compute(model, cd_test[, -24])
predicted <- ifelse(predict$net.result > 0.5, 1, 0)
table(cd_test$default.payment.next.month, predicted, dnn = c("Observed", "Predicted"))
## Predicted
## Observed 0
## 0 4730
## 1 1270
According to the confusion matrix, there were 1270 misclassified rows in the testing set.