cd <- read.csv("C:/Users/justt/Desktop/School/622/Exams/Exam 3/credit_default.csv")
class(cd)
## [1] "data.frame"
maxs <- apply(cd, 2, max) # maximum value of each column
mins <- apply(cd, 2, min) #minimum value of each column
scaled <- as.data.frame(scale(cd, center = mins, scale = maxs - mins))
summary(scaled)
## LIMIT_BAL SEX EDUCATION MARRIAGE
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0404 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.1313 Median :1.0000 Median :0.3333 Median :0.5000
## Mean :0.1591 Mean :0.6029 Mean :0.2814 Mean :0.2757
## 3rd Qu.:0.2323 3rd Qu.:1.0000 3rd Qu.:0.3333 3rd Qu.:0.5000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## AGE PAY_0 PAY_2 PAY_3
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1207 1st Qu.:0.1000 1st Qu.:0.1111 1st Qu.:0.1111
## Median :0.2241 Median :0.2000 Median :0.2222 Median :0.2222
## Mean :0.2500 Mean :0.1984 Mean :0.2080 Mean :0.2037
## 3rd Qu.:0.3448 3rd Qu.:0.2000 3rd Qu.:0.2222 3rd Qu.:0.2222
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## PAY_4 PAY_5 PAY_6 BILL_AMT1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.1111 1st Qu.:0.1111 1st Qu.:0.1000 1st Qu.:0.01939
## Median :0.2222 Median :0.2222 Median :0.2000 Median :0.03875
## Mean :0.1972 Mean :0.1925 Mean :0.1707 Mean :0.06807
## 3rd Qu.:0.2222 3rd Qu.:0.2222 3rd Qu.:0.2000 3rd Qu.:0.08421
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.03589 1st Qu.:0.1583 1st Qu.:0.08608 1st Qu.:0.08250
## Median :0.05407 Median :0.1754 Median :0.10321 Median :0.09874
## Mean :0.08119 Mean :0.2019 Mean :0.12790 Mean :0.12062
## 3rd Qu.:0.09549 3rd Qu.:0.2143 3rd Qu.:0.13819 3rd Qu.:0.13015
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## BILL_AMT6 PAY_AMT1 PAY_AMT2 PAY_AMT3
## Min. :0.0000 Min. :0.000000 Min. :0.0000000 Min. :0.0000000
## 1st Qu.:0.2620 1st Qu.:0.002027 1st Qu.:0.0007699 1st Qu.:0.0004464
## Median :0.2741 Median :0.004313 Median :0.0016405 Median :0.0020390
## Mean :0.2908 Mean :0.011687 Mean :0.0050934 Mean :0.0057974
## 3rd Qu.:0.2986 3rd Qu.:0.010147 3rd Qu.:0.0040747 3rd Qu.:0.0050277
## Max. :1.0000 Max. :1.000000 Max. :1.0000000 Max. :1.0000000
## PAY_AMT4 PAY_AMT5 PAY_AMT6
## Min. :0.0000000 Min. :0.0000000 Min. :0.000000
## 1st Qu.:0.0006942 1st Qu.:0.0007034 1st Qu.:0.000270
## Median :0.0034712 Median :0.0035590 Median :0.002837
## Mean :0.0112892 Mean :0.0114142 Mean :0.010276
## 3rd Qu.:0.0094381 3rd Qu.:0.0096611 3rd Qu.:0.007683
## Max. :1.0000000 Max. :1.0000000 Max. :1.000000
## default.payment.next.month
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2193
## 3rd Qu.:0.0000
## Max. :1.0000
set.seed(1234)
index <- sample(nrow(cd), nrow(cd)*0.50)
train_cd <- scaled[index,]
test_cd <- scaled[-index,]
cdnn <- neuralnet(default.payment.next.month ~ ., data=train_cd, hidden=c(2), algorithm = 'rprop+', linear.output = F)
cdnn$act.fct
## function (x)
## {
## 1/(1 + exp(-x))
## }
## <bytecode: 0x0000020212f0eb90>
## <environment: 0x0000020212f0f488>
## attr(,"type")
## [1] "logistic"
plot(cdnn)
pcut_cdnn <- 0.5
prob_cdnn_in <- predict(cdnn, train_cd, type = "response")
pred_cdnn_in <- (prob_cdnn_in >= pcut_cdnn)*1
prob_cdnn_out <- predict(cdnn, train_cd, type = "response")
pred_cdnn_out <- (prob_cdnn_out >= pcut_cdnn)*1
table(test_cd$default.payment.next.month, pred_cdnn_out, dnn = c("Observed", "Predicted"))
## Predicted
## Observed 0 1
## 0 4104 555
## 1 1179 162
Using this model, there were 1,734 misclassified rows predicted.