library(readxl)
fondo <- read_excel("C:/Users/User/Downloads/BD-1.xlsx")
View(fondo)
library(readxl)
library(MASS)
library(stats)
library( psych )
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(caret)
## Loading required package: lattice
summary(fondo)
##        ID             Year         Default           WCTA         
##  Min.   :  1.0   Min.   :1995   Min.   :0.000   Min.   :-2.24027  
##  1st Qu.:192.0   1st Qu.:1998   1st Qu.:0.000   1st Qu.: 0.03117  
##  Median :358.0   Median :2000   Median :0.000   Median : 0.11727  
##  Mean   :356.3   Mean   :2000   Mean   :0.018   Mean   : 0.14251  
##  3rd Qu.:521.0   3rd Qu.:2002   3rd Qu.:0.000   3rd Qu.: 0.24178  
##  Max.   :830.0   Max.   :2004   Max.   :1.000   Max.   : 0.76604  
##       RETA              EBITTA              METL               STA         
##  Min.   :-3.31242   Min.   :-0.59182   Min.   : 0.02372   Min.   :0.03586  
##  1st Qu.: 0.09474   1st Qu.: 0.03899   1st Qu.: 0.62349   1st Qu.:0.17084  
##  Median : 0.21893   Median : 0.05156   Median : 1.13646   Median :0.26104  
##  Mean   : 0.21047   Mean   : 0.05180   Mean   : 1.95396   Mean   :0.30365  
##  3rd Qu.: 0.37342   3rd Qu.: 0.06509   3rd Qu.: 2.24429   3rd Qu.:0.36694  
##  Max.   : 1.63965   Max.   : 0.19804   Max.   :60.60715   Max.   :5.00777
str(fondo)
## tibble [4,000 x 8] (S3: tbl_df/tbl/data.frame)
##  $ ID     : num [1:4000] 1 1 1 1 1 1 2 2 2 2 ...
##  $ Year   : num [1:4000] 1999 2000 2001 2002 2003 ...
##  $ Default: num [1:4000] 0 0 0 0 0 0 0 0 0 0 ...
##  $ WCTA   : num [1:4000] 0.501 0.548 0.451 0.307 0.447 ...
##  $ RETA   : num [1:4000] 0.307 0.322 0.225 0.192 0.217 ...
##  $ EBITTA : num [1:4000] 0.0434 0.0518 0.0268 0.0301 0.0325 ...
##  $ METL   : num [1:4000] 0.956 1.065 0.804 0.387 0.792 ...
##  $ STA    : num [1:4000] 0.335 0.335 0.246 0.253 0.276 ...
set.seed(1234) 
train<-createDataPartition(y=fondo$Default, p=0.8, list=FALSE,times=1)
dt_train<-fondo[train,]
dt_test<-fondo[-train,]
set.seed(1234)
glm.null.P=glm(Default ~ 1 ,data=dt_train,family=binomial(link="probit"))
model.aic.forward.P<-step(glm.null.P,direction="forward",trace=1,scope=~WCTA+RETA+EBITTA+METL+STA,family=binomial(link="probit"))
## Start:  AIC=574.15
## Default ~ 1
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## + METL    1   482.56 486.56
## + RETA    1   495.18 499.18
## + EBITTA  1   526.09 530.09
## + WCTA    1   554.13 558.13
## <none>        572.15 574.15
## + STA     1   570.51 574.51
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=486.56
## Default ~ METL
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## + RETA    1   446.07 452.07
## + EBITTA  1   465.89 471.89
## + WCTA    1   478.54 484.54
## <none>        482.56 486.56
## + STA     1   481.37 487.37
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=452.07
## Default ~ METL + RETA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## + EBITTA  1   437.33 445.33
## <none>        446.07 452.07
## + STA     1   444.70 452.70
## + WCTA    1   446.03 454.03
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=445.33
## Default ~ METL + RETA + EBITTA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## + STA   1   434.54 444.54
## <none>      437.33 445.33
## + WCTA  1   437.18 447.18
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=444.54
## Default ~ METL + RETA + EBITTA + STA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## <none>      434.54 444.54
## + WCTA  1   434.50 446.50
summary(glm.null.P)
## 
## Call:
## glm(formula = Default ~ 1, family = binomial(link = "probit"), 
##     data = dt_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1896  -0.1896  -0.1896  -0.1896   2.8383  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.10118    0.05329  -39.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 572.15  on 3199  degrees of freedom
## Residual deviance: 572.15  on 3199  degrees of freedom
## AIC: 574.15
## 
## Number of Fisher Scoring iterations: 6
set.seed(1234)
glm.null.L=glm(Default~ 1,data=dt_train,family=binomial(link="logit"))
model.aic.forward.L<-step(glm.null.L,direction="forward",trace=1,scope=~WCTA+RETA+EBITTA+METL+STA,family=binomial(link="logit"))
## Start:  AIC=574.15
## Default ~ 1
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## + METL    1   464.35 468.35
## + RETA    1   506.58 510.58
## + EBITTA  1   532.14 536.14
## + WCTA    1   554.12 558.12
## <none>        572.15 574.15
## + STA     1   570.71 574.71
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=468.35
## Default ~ METL
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## + RETA    1   440.42 446.42
## + EBITTA  1   451.23 457.23
## + WCTA    1   460.50 466.50
## <none>        464.35 468.35
## + STA     1   463.64 469.64
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=446.42
## Default ~ METL + RETA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## + EBITTA  1   432.89 440.89
## <none>        440.42 446.42
## + STA     1   439.64 447.64
## + WCTA    1   440.32 448.32
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=440.89
## Default ~ METL + RETA + EBITTA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## <none>      432.89 440.89
## + STA   1   431.16 441.16
## + WCTA  1   432.74 442.74
summary(glm.null.L) 
## 
## Call:
## glm(formula = Default ~ 1, family = binomial(link = "logit"), 
##     data = dt_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1896  -0.1896  -0.1896  -0.1896   2.8383  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.0099     0.1336  -30.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 572.15  on 3199  degrees of freedom
## Residual deviance: 572.15  on 3199  degrees of freedom
## AIC: 574.15
## 
## Number of Fisher Scoring iterations: 6
mod_HIP<-train(Default~ METL+RETA+EBITTA,data = dt_train,method="glm",family = binomial(link = "probit"))
## Warning in train.default(x, y, weights = w, ...): You are trying to do
## regression and your outcome only has two possible values Are you trying to do
## classification? If so, use a 2 level factor as your outcome column.
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
mod_HIP$finalModel
## 
## Call:  NULL
## 
## Coefficients:
## (Intercept)         METL         RETA       EBITTA  
##     -1.3586      -0.5756      -0.7428      -5.2781  
## 
## Degrees of Freedom: 3199 Total (i.e. Null);  3196 Residual
## Null Deviance:       572.2 
## Residual Deviance: 437.3     AIC: 445.3
exp(-5.2781)
## [1] 0.005102116
Predicciones1<-predict(mod_HIP,dt_test)

predicciones <-ifelse(Predicciones1>0.5,yes = 1,no=0)
matriz_confusion<-table(dt_test$Default,predicciones,dnn=c("observaciones","predicciones"))
matriz_confusion
##              predicciones
## observaciones   0   1
##             0 784   1
##             1  15   0
(784+0)/800
## [1] 0.98