Ашиглах Package-ууд
a.Мэдээллийг R программруу дуудах
## Classes 'tbl_df', 'tbl' and 'data.frame': 4455 obs. of 14 variables:
## $ Status : num 0 0 1 0 0 0 0 0 0 1 ...
## $ Seniority: num 9 17 10 0 0 1 29 9 0 0 ...
## $ Home : num 1 1 2 1 1 2 2 5 2 5 ...
## $ Time : num 60 60 36 60 36 60 60 12 60 48 ...
## $ Age : num 30 58 46 24 26 36 44 27 32 41 ...
## $ Marital : num 2 3 2 1 1 2 2 1 2 2 ...
## $ Records : num 1 1 2 1 1 1 1 1 1 1 ...
## $ Job : num 3 1 3 1 1 1 1 1 3 2 ...
## $ Expenses : num 73 48 90 63 46 75 75 35 90 90 ...
## $ Income : num 129 131 200 182 107 214 125 80 107 80 ...
## $ Assets : num 0 0 3000 2500 0 3500 10000 0 15000 0 ...
## $ Debt : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Amount : num 800 1000 2000 900 310 650 1600 200 1200 1200 ...
## $ Price : num 846 1658 2985 1325 910 ...
## $Status
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2815 1.0000 1.0000
##
## $Seniority
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 5.000 7.987 12.000 48.000
##
## $Home
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 2.000 2.657 4.000 6.000
##
## $Time
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.00 36.00 48.00 46.44 60.00 72.00
##
## $Age
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 28.00 36.00 37.08 45.00 68.00
##
## $Marital
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 2.000 1.879 2.000 5.000
##
## $Records
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.174 1.000 2.000
##
## $Job
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 1.000 1.676 3.000 4.000
##
## $Expenses
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35.00 35.00 51.00 55.57 72.00 180.00
##
## $Income
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 80 120 763317 166 99999999
##
## $Assets
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 3500 1060341 6000 99999999
##
## $Debt
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 404382 0 99999999
##
## $Amount
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 100 700 1000 1039 1300 5000
##
## $Price
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 105 1118 1400 1463 1692 11140
## V1 V2 V3 V4
## Min. :351 Min. :10027 Min. :3211 Min. :59004
## 1st Qu.:351 1st Qu.:10027 1st Qu.:3211 1st Qu.:59004
## Median :351 Median :10027 Median :3211 Median :59004
## Mean :351 Mean :10027 Mean :3211 Mean :59004
## 3rd Qu.:351 3rd Qu.:10027 3rd Qu.:3211 3rd Qu.:59004
## Max. :351 Max. :10027 Max. :3211 Max. :59004
## NA's :4454 NA's :4454 NA's :4454 NA's :4454
## V5 V6 V7 V8
## Min. :46668 Min. :2376 Min. :1456 Min. :2052
## 1st Qu.:46668 1st Qu.:2376 1st Qu.:1456 1st Qu.:2052
## Median :46668 Median :2376 Median :1456 Median :2052
## Mean :46668 Mean :2376 Mean :1456 Mean :2052
## 3rd Qu.:46668 3rd Qu.:2376 3rd Qu.:1456 3rd Qu.:2052
## Max. :46668 Max. :2376 Max. :1456 Max. :2052
## NA's :4454 NA's :4454 NA's :4454 NA's :4454
## V9 V10 V11 V12
## Min. :70036 Min. :1e+09 Min. :1.406e+09 Min. :400383590
## 1st Qu.:70036 1st Qu.:1e+09 1st Qu.:1.406e+09 1st Qu.:400383590
## Median :70036 Median :1e+09 Median :1.406e+09 Median :400383590
## Mean :70036 Mean :1e+09 Mean :1.406e+09 Mean :400383590
## 3rd Qu.:70036 3rd Qu.:1e+09 3rd Qu.:1.406e+09 3rd Qu.:400383590
## Max. :70036 Max. :1e+09 Max. :1.406e+09 Max. :400383590
## NA's :4454 NA's :4454 NA's :4454 NA's :4454
## V13 V14
## Min. :1311019 Min. :1853091
## 1st Qu.:1311019 1st Qu.:1853091
## Median :1311019 Median :1853091
## Mean :1311019 Mean :1853091
## 3rd Qu.:1311019 3rd Qu.:1853091
## Max. :1311019 Max. :1853091
## NA's :4454 NA's :4454
## V1 V2 V3 V4
## Min. :0 Min. :0 Min. :0 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0
## Median :0 Median :0 Median :0 Median :0
## Mean :0 Mean :0 Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0 Max. :0 Max. :0
## NA's :4454 NA's :4454 NA's :4454 NA's :4454
## V5 V6 V7 V8
## Min. :0 Min. :0 Min. :0 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0
## Median :0 Median :0 Median :0 Median :0
## Mean :0 Mean :0 Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0 Max. :0 Max. :0
## NA's :4454 NA's :4454 NA's :4454 NA's :4454
## V9 V10 V11 V12
## Min. :0 Min. :0 Min. :0 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0
## Median :0 Median :0 Median :0 Median :0
## Mean :0 Mean :0 Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0 Max. :0 Max. :0
## NA's :4454 NA's :4454 NA's :4454 NA's :4454
## V13 V14
## Min. :0 Min. :0
## 1st Qu.:0 1st Qu.:0
## Median :0 Median :0
## Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0
## NA's :4454 NA's :4454
c.Мэдээллийг бүлэглэх
d.Бүлэглэсэн хувьсагчуудаа ҮОЖ болгон хөрвүүлэх
## [INFO] creating woe binning ...
## [INFO] converting into woe values ...
## [INFO] converting into woe values ...
break_list өөрчлөн график байгуулж харах
library(data.table)
breaks_list = list(
Age = c(20,30,40,50,60,70 ,"Inf%,%missing")
)
bins2 = woebin(dt_list$train, y="Status", breaks_list=breaks_list)## [INFO] creating woe binning ...
## Warning in check_breaks_list(breaks_list, xs): There are 12 x variables
## that donot specified in breaks_list are using optimal binning.
Логистик регрессийн загвар байгуулж үр дүнг харах
##
## Call:
## glm(formula = Status ~ ., family = binomial(), data = dt_woe_list$train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2630 -0.6745 -0.3947 0.6077 2.6453
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.91919 0.04833 -19.017 < 2e-16 ***
## Seniority_woe 0.73204 0.07495 9.768 < 2e-16 ***
## Home_woe 0.47104 0.13355 3.527 0.00042 ***
## Time_woe 0.87611 0.18646 4.699 2.62e-06 ***
## Age_woe -0.16299 0.19097 -0.854 0.39338
## Marital_woe -0.05261 0.25372 -0.207 0.83574
## Records_woe 1.14733 0.07922 14.483 < 2e-16 ***
## Job_woe 0.66009 0.08772 7.525 5.27e-14 ***
## Expenses_woe 0.57458 0.18256 3.147 0.00165 **
## Income_woe 0.89011 0.08773 10.146 < 2e-16 ***
## Assets_woe 0.55771 0.13855 4.025 5.69e-05 ***
## Debt_woe NA NA NA NA
## Amount_woe 1.20169 0.14277 8.417 < 2e-16 ***
## Price_woe 0.86005 0.20636 4.168 3.08e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3743.1 on 3124 degrees of freedom
## Residual deviance: 2758.7 on 3112 degrees of freedom
## AIC: 2784.7
##
## Number of Fisher Scoring iterations: 5
##e. Байгуулсан логистик регрессийн загвар дэээр test датаг үнэлэх
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## 1 2 3 4 5 6
## 0.12100097 0.70967417 0.06634135 0.29484974 0.04068922 0.09211605
## 7 8 9 10 11 12
## 0.02974417 0.26843354 0.14923075 0.06642478 0.04729878 0.36357073
## 13 14 15 16 17 18
## 0.16448064 0.01584072 0.51123124 0.03642454 0.18479417 0.47180879
## 19 20
## 0.07657203 0.15936333
y_pred_num <- ifelse(pred > 0.5, 1, 0)
y_pred <- factor(y_pred_num, levels=c(0, 1))
y_act<-dt_woe_list$test$Status
table(y_pred,y_act , dnn = c("True", "Predicted"))## Predicted
## True 0 1
## 0 902 178
## 1 69 181
Логистик регрессийн загварыг машин сургалтын keras package ашиглан дэээр байгуулах
library(keras)
library(magrittr)
library(tidyverse)
library(ggthemes)
x_train <- dt_list$train %>% select(-Status) %>% as.matrix
y_train <-label_list$train
x_test <- dt_list$test %>% select(-Status) %>% as.matrix
y_test<-label_list$testmodel_keras <- keras_model_sequential()
model_keras %>%
# First hidden layer
layer_dense(
units = 100,
kernel_initializer = "uniform",
activation = "relu",
input_shape = ncol(x_train)) %>%
# Dropout to prevent overfitting
layer_dropout(rate = 0.1) %>%
# Second hidden layer
layer_dense(
units = 100,
kernel_initializer = "uniform",
activation = "relu") %>%
# Dropout to prevent overfitting
layer_dropout(rate = 0.1) %>%
# Output layer
layer_dense(
units = 1,
kernel_initializer = "uniform",
activation = "sigmoid") %>%
# Compile ANN
compile(
optimizer = 'adam',
loss = 'binary_crossentropy',
metrics = 'accuracy'
)
model_keras## Model
## ___________________________________________________________________________
## Layer (type) Output Shape Param #
## ===========================================================================
## dense (Dense) (None, 100) 1400
## ___________________________________________________________________________
## dropout (Dropout) (None, 100) 0
## ___________________________________________________________________________
## dense_1 (Dense) (None, 100) 10100
## ___________________________________________________________________________
## dropout_1 (Dropout) (None, 100) 0
## ___________________________________________________________________________
## dense_2 (Dense) (None, 1) 101
## ===========================================================================
## Total params: 11,601
## Trainable params: 11,601
## Non-trainable params: 0
## ___________________________________________________________________________
history <- fit(
object = model_keras,
x = as.matrix(x_train),
y = y_train,
batch_size = dim(x_train)[1],
epochs = 50,
validation_split = 0
)
plot(history)+theme_economist()