Import used libraries

library(dplyr)
library(GGally)
library(reshape2)
library(caret)
library(FactoMineR)
library(factoextra)
library(class)
library(Ardian) # My personal Package

Read the data

companies <- read.csv('datasets/companies.csv')

Inspect the data

Top 6 rows

companies %>% head()

Bottom 6 rows

companies %>% tail()

Data Pre-processing

Check missing values

companies %>% duplicated() %>% any()
## [1] FALSE

Alhamdulillah, there are no duplicated rows

Check missing values

companies %>% anyNA()
## [1] FALSE

Alhamdulillah, there are no missing values

Inspect data structure

companies %>% glimpse()
## Rows: 6,819
## Columns: 96
## $ Bankrupt.                                               <int> 1, 1, 1, 1, 1,…
## $ ROA.C..before.interest.and.depreciation.before.interest <dbl> 0.3705943, 0.4…
## $ ROA.A..before.interest.and...after.tax                  <dbl> 0.4243894, 0.5…
## $ ROA.B..before.interest.and.depreciation.after.tax       <dbl> 0.4057498, 0.5…
## $ Operating.Gross.Margin                                  <dbl> 0.6014572, 0.6…
## $ Realized.Sales.Gross.Margin                             <dbl> 0.6014572, 0.6…
## $ Operating.Profit.Rate                                   <dbl> 0.9989692, 0.9…
## $ Pre.tax.net.Interest.Rate                               <dbl> 0.7968871, 0.7…
## $ After.tax.net.Interest.Rate                             <dbl> 0.8088094, 0.8…
## $ Non.industry.income.and.expenditure.revenue             <dbl> 0.3026464, 0.3…
## $ Continuous.interest.rate..after.tax.                    <dbl> 0.7809849, 0.7…
## $ Operating.Expense.Rate                                  <dbl> 1.256969e-04, …
## $ Research.and.development.expense.rate                   <dbl> 0.00e+00, 0.00…
## $ Cash.flow.rate                                          <dbl> 0.4581431, 0.4…
## $ Interest.bearing.debt.interest.rate                     <dbl> 0.0007250725, …
## $ Tax.rate..A.                                            <dbl> 0.000000000, 0…
## $ Net.Value.Per.Share..B.                                 <dbl> 0.1479499, 0.1…
## $ Net.Value.Per.Share..A.                                 <dbl> 0.1479499, 0.1…
## $ Net.Value.Per.Share..C.                                 <dbl> 0.1479499, 0.1…
## $ Persistent.EPS.in.the.Last.Four.Seasons                 <dbl> 0.1691406, 0.2…
## $ Cash.Flow.Per.Share                                     <dbl> 0.3116644, 0.3…
## $ Revenue.Per.Share..Yuan...                              <dbl> 0.017559780, 0…
## $ Operating.Profit.Per.Share..Yuan...                     <dbl> 0.09592053, 0.…
## $ Per.Share.Net.profit.before.tax..Yuan...                <dbl> 0.1387362, 0.1…
## $ Realized.Sales.Gross.Profit.Growth.Rate                 <dbl> 0.02210228, 0.…
## $ Operating.Profit.Growth.Rate                            <dbl> 0.8481950, 0.8…
## $ After.tax.Net.Profit.Growth.Rate                        <dbl> 0.6889795, 0.6…
## $ Regular.Net.Profit.Growth.Rate                          <dbl> 0.6889795, 0.6…
## $ Continuous.Net.Profit.Growth.Rate                       <dbl> 0.2175354, 0.2…
## $ Total.Asset.Growth.Rate                                 <dbl> 4.98e+09, 6.11…
## $ Net.Value.Growth.Rate                                   <dbl> 0.0003269773, …
## $ Total.Asset.Return.Growth.Rate.Ratio                    <dbl> 0.2631000, 0.2…
## $ Cash.Reinvestment..                                     <dbl> 0.3637253, 0.3…
## $ Current.Ratio                                           <dbl> 0.002258963, 0…
## $ Quick.Ratio                                             <dbl> 0.0012077551, …
## $ Interest.Expense.Ratio                                  <dbl> 0.6299513, 0.6…
## $ Total.debt.Total.net.worth                              <dbl> 0.021265924, 0…
## $ Debt.ratio..                                            <dbl> 0.20757626, 0.…
## $ Net.worth.Assets                                        <dbl> 0.7924237, 0.8…
## $ Long.term.fund.suitability.ratio..A.                    <dbl> 0.005024455, 0…
## $ Borrowing.dependency                                    <dbl> 0.3902844, 0.3…
## $ Contingent.liabilities.Net.worth                        <dbl> 0.006478502, 0…
## $ Operating.profit.Paid.in.capital                        <dbl> 0.09588483, 0.…
## $ Net.profit.before.tax.Paid.in.capital                   <dbl> 0.1377573, 0.1…
## $ Inventory.and.accounts.receivable.Net.value             <dbl> 0.3980357, 0.3…
## $ Total.Asset.Turnover                                    <dbl> 0.08695652, 0.…
## $ Accounts.Receivable.Turnover                            <dbl> 0.0018138841, …
## $ Average.Collection.Days                                 <dbl> 0.003487364, 0…
## $ Inventory.Turnover.Rate..times.                         <dbl> 1.820926e-04, …
## $ Fixed.Assets.Turnover.Frequency                         <dbl> 1.165007e-04, …
## $ Net.Worth.Turnover.Rate..times.                         <dbl> 0.03290323, 0.…
## $ Revenue.per.person                                      <dbl> 0.034164182, 0…
## $ Operating.profit.per.person                             <dbl> 0.3929129, 0.3…
## $ Allocation.rate.per.person                              <dbl> 0.037135302, 0…
## $ Working.Capital.to.Total.Assets                         <dbl> 0.6727753, 0.7…
## $ Quick.Assets.Total.Assets                               <dbl> 0.16667296, 0.…
## $ Current.Assets.Total.Assets                             <dbl> 0.1906430, 0.1…
## $ Cash.Total.Assets                                       <dbl> 0.0040944060, …
## $ Quick.Assets.Current.Liability                          <dbl> 0.001996771, 0…
## $ Cash.Current.Liability                                  <dbl> 1.473360e-04, …
## $ Current.Liability.to.Assets                             <dbl> 0.14730845, 0.…
## $ Operating.Funds.to.Liability                            <dbl> 0.3340152, 0.3…
## $ Inventory.Working.Capital                               <dbl> 0.2769202, 0.2…
## $ Inventory.Current.Liability                             <dbl> 0.001035990, 0…
## $ Current.Liabilities.Liability                           <dbl> 0.6762692, 0.3…
## $ Working.Capital.Equity                                  <dbl> 0.7212746, 0.7…
## $ Current.Liabilities.Equity                              <dbl> 0.3390770, 0.3…
## $ Long.term.Liability.to.Current.Assets                   <dbl> 0.025592368, 0…
## $ Retained.Earnings.to.Total.Assets                       <dbl> 0.9032248, 0.9…
## $ Total.income.Total.expense                              <dbl> 0.002021613, 0…
## $ Total.expense.Assets                                    <dbl> 0.064855708, 0…
## $ Current.Asset.Turnover.Rate                             <dbl> 7.010000e+08, …
## $ Quick.Asset.Turnover.Rate                               <dbl> 6.550000e+09, …
## $ Working.capitcal.Turnover.Rate                          <dbl> 0.5938305, 0.5…
## $ Cash.Turnover.Rate                                      <dbl> 4.580000e+08, …
## $ Cash.Flow.to.Sales                                      <dbl> 0.6715677, 0.6…
## $ Fixed.Assets.to.Assets                                  <dbl> 0.4242058, 0.4…
## $ Current.Liability.to.Liability                          <dbl> 0.6762692, 0.3…
## $ Current.Liability.to.Equity                             <dbl> 0.3390770, 0.3…
## $ Equity.to.Long.term.Liability                           <dbl> 0.1265495, 0.1…
## $ Cash.Flow.to.Total.Assets                               <dbl> 0.6375554, 0.6…
## $ Cash.Flow.to.Liability                                  <dbl> 0.4586091, 0.4…
## $ CFO.to.Assets                                           <dbl> 0.5203819, 0.5…
## $ Cash.Flow.to.Equity                                     <dbl> 0.3129049, 0.3…
## $ Current.Liability.to.Current.Assets                     <dbl> 0.11825048, 0.…
## $ Liability.Assets.Flag                                   <int> 0, 0, 0, 0, 0,…
## $ Net.Income.to.Total.Assets                              <dbl> 0.7168453, 0.7…
## $ Total.assets.to.GNP.price                               <dbl> 0.0092194400, …
## $ No.credit.Interval                                      <dbl> 0.6228790, 0.6…
## $ Gross.Profit.to.Sales                                   <dbl> 0.6014533, 0.6…
## $ Net.Income.to.Stockholder.s.Equity                      <dbl> 0.8278902, 0.8…
## $ Liability.to.Equity                                     <dbl> 0.2902019, 0.2…
## $ Degree.of.Financial.Leverage..DFL.                      <dbl> 0.02660063, 0.…
## $ Interest.Coverage.Ratio..Interest.expense.to.EBIT.      <dbl> 0.5640501, 0.5…
## $ Net.Income.Flag                                         <int> 1, 1, 1, 1, 1,…
## $ Equity.to.Liability                                     <dbl> 0.01646874, 0.…

Wow, so many columns

Parse categorical columns

companies <- companies %>% 
  mutate_at(vars(Bankrupt., Liability.Assets.Flag, Net.Income.Flag), as.factor)

Check categorical columns summary

companies %>% select_if(is.factor) %>% summary()
##  Bankrupt. Liability.Assets.Flag Net.Income.Flag
##  0:6599    0:6811                1:6819         
##  1: 220    1:   8

Turns out they’re near zero variance, I’ll just remove them.

And about the target proportion, I’ll talk about it

Remove near zero variance columns

companies <- companies %>% select(-Net.Income.Flag, -Liability.Assets.Flag)

Check target variable proportion

companies$Bankrupt. %>% table() %>% barplot()

Too imbalanced, I’ll upsample the data

Upsample the data

up_companies <- upSample(x = companies %>% select(-Bankrupt.),
                         y = companies$Bankrupt.,
                         yname = "Bankrupt")

up_companies$Bankrupt %>% table() %>% barplot()

Nice. It’s balanced now!

Feature Engineering (with Unsupervised Learning)

Check features correlations

ggcorr(up_companies)

Ah hell nah. There are bunch of correlated features.

Because there are too many columns, I’ll implement Principal Component Analysis (PCA)!

Principal Component Analysis (PCA)

pca_companies <- PCA(X = up_companies,
                     scale.unit = T,
                     graph = F,
                     quali.sup = which(colnames(up_companies) == "Bankrupt"),
                     ncp = ncol(up_companies) - 1)

Feature Selection

Check cumulative percentage

pca_companies$eig
##           eigenvalue percentage of variance cumulative percentage of variance
## comp 1  1.458003e+01           1.567745e+01                          15.67745
## comp 2  7.119303e+00           7.655164e+00                          23.33262
## comp 3  4.686360e+00           5.039097e+00                          28.37171
## comp 4  3.961041e+00           4.259184e+00                          32.63090
## comp 5  3.620691e+00           3.893217e+00                          36.52411
## comp 6  3.026526e+00           3.254329e+00                          39.77844
## comp 7  2.869226e+00           3.085189e+00                          42.86363
## comp 8  2.675507e+00           2.876889e+00                          45.74052
## comp 9  2.622784e+00           2.820198e+00                          48.56072
## comp 10 2.491720e+00           2.679269e+00                          51.23999
## comp 11 2.138641e+00           2.299614e+00                          53.53960
## comp 12 1.846405e+00           1.985382e+00                          55.52498
## comp 13 1.722693e+00           1.852358e+00                          57.37734
## comp 14 1.485455e+00           1.597264e+00                          58.97460
## comp 15 1.451848e+00           1.561127e+00                          60.53573
## comp 16 1.379655e+00           1.483500e+00                          62.01923
## comp 17 1.309361e+00           1.407915e+00                          63.42715
## comp 18 1.273898e+00           1.369783e+00                          64.79693
## comp 19 1.212811e+00           1.304098e+00                          66.10103
## comp 20 1.176381e+00           1.264926e+00                          67.36595
## comp 21 1.141858e+00           1.227804e+00                          68.59376
## comp 22 1.133436e+00           1.218749e+00                          69.81251
## comp 23 1.104190e+00           1.187301e+00                          70.99981
## comp 24 1.065804e+00           1.146026e+00                          72.14583
## comp 25 1.061716e+00           1.141630e+00                          73.28746
## comp 26 1.041052e+00           1.119411e+00                          74.40687
## comp 27 1.025830e+00           1.103043e+00                          75.50992
## comp 28 1.006724e+00           1.082499e+00                          76.59242
## comp 29 1.002591e+00           1.078054e+00                          77.67047
## comp 30 9.978076e-01           1.072911e+00                          78.74338
## comp 31 9.782320e-01           1.051862e+00                          79.79524
## comp 32 9.634694e-01           1.035989e+00                          80.83123
## comp 33 9.516170e-01           1.023244e+00                          81.85448
## comp 34 9.409444e-01           1.011768e+00                          82.86624
## comp 35 9.063638e-01           9.745847e-01                          83.84083
## comp 36 8.939967e-01           9.612868e-01                          84.80212
## comp 37 8.766077e-01           9.425889e-01                          85.74471
## comp 38 8.266622e-01           8.888841e-01                          86.63359
## comp 39 8.046771e-01           8.652442e-01                          87.49883
## comp 40 7.842140e-01           8.432408e-01                          88.34207
## comp 41 7.641683e-01           8.216863e-01                          89.16376
## comp 42 7.482722e-01           8.045938e-01                          89.96835
## comp 43 7.085771e-01           7.619108e-01                          90.73027
## comp 44 6.908354e-01           7.428337e-01                          91.47310
## comp 45 6.476289e-01           6.963751e-01                          92.16947
## comp 46 6.102706e-01           6.562050e-01                          92.82568
## comp 47 6.007414e-01           6.459585e-01                          93.47164
## comp 48 5.630936e-01           6.054770e-01                          94.07711
## comp 49 5.458399e-01           5.869247e-01                          94.66404
## comp 50 5.281706e-01           5.679254e-01                          95.23196
## comp 51 4.907362e-01           5.276733e-01                          95.75964
## comp 52 4.481332e-01           4.818636e-01                          96.24150
## comp 53 4.292430e-01           4.615517e-01                          96.70305
## comp 54 4.084045e-01           4.391446e-01                          97.14220
## comp 55 3.617653e-01           3.889949e-01                          97.53119
## comp 56 2.506969e-01           2.695666e-01                          97.80076
## comp 57 2.227310e-01           2.394957e-01                          98.04026
## comp 58 2.148097e-01           2.309782e-01                          98.27123
## comp 59 2.038736e-01           2.192189e-01                          98.49045
## comp 60 1.821797e-01           1.958922e-01                          98.68634
## comp 61 1.746238e-01           1.877676e-01                          98.87411
## comp 62 1.647918e-01           1.771955e-01                          99.05131
## comp 63 1.384499e-01           1.488708e-01                          99.20018
## comp 64 1.157654e-01           1.244789e-01                          99.32466
## comp 65 1.092266e-01           1.174479e-01                          99.44211
## comp 66 9.154976e-02           9.844060e-02                          99.54055
## comp 67 8.656378e-02           9.307933e-02                          99.63362
## comp 68 7.403729e-02           7.960998e-02                          99.71323
## comp 69 6.478772e-02           6.966422e-02                          99.78290
## comp 70 5.461050e-02           5.872096e-02                          99.84162
## comp 71 3.370029e-02           3.623687e-02                          99.87786
## comp 72 2.657250e-02           2.857258e-02                          99.90643
## comp 73 2.400051e-02           2.580700e-02                          99.93224
## comp 74 1.640146e-02           1.763598e-02                          99.94987
## comp 75 1.492970e-02           1.605345e-02                          99.96593
## comp 76 9.362633e-03           1.006735e-02                          99.97599
## comp 77 5.921611e-03           6.367324e-03                          99.98236
## comp 78 4.279930e-03           4.602075e-03                          99.98696
## comp 79 3.293646e-03           3.541555e-03                          99.99050
## comp 80 2.921894e-03           3.141822e-03                          99.99365
## comp 81 2.375983e-03           2.554820e-03                          99.99620
## comp 82 1.205621e-03           1.296366e-03                          99.99750
## comp 83 6.783903e-04           7.294519e-04                          99.99823
## comp 84 5.551237e-04           5.969072e-04                          99.99882
## comp 85 4.103825e-04           4.412715e-04                          99.99926
## comp 86 3.553406e-04           3.820867e-04                          99.99965
## comp 87 3.282632e-04           3.529712e-04                         100.00000
## comp 88 7.082070e-09           7.615129e-09                         100.00000
## comp 89 1.778111e-11           1.911947e-11                         100.00000
## comp 90 1.405168e-18           1.510934e-18                         100.00000
## comp 91 1.772578e-24           1.905998e-24                         100.00000
## comp 92 9.240342e-30           9.935851e-30                         100.00000
## comp 93 4.060072e-31           4.365669e-31                         100.00000

I will accumulate up to 95% of the total information, means I will use up to 50 PCs

pc_companies <- as.data.frame(pca_companies$ind$coord[, 1:50]) %>%
  mutate(Bankrupt = up_companies$Bankrupt)

pc_companies %>% head()

Check features correlations again

ggcorr(pc_companies)

Magnificent. I now don’t have correlated features. This is the beauty of PCA!

Exploratory Data Analysis

PC1 vs PC2

Variable Factor Map

plot.PCA(x = pca_companies,
         choix = "var")

Individual Factor Map

plot.PCA(
  x = pca_companies,           
  choix = "ind",      
  habillage = "Bankrupt",
  select = "contrib 0",
  invisible = "quali"
)

Top 5 Contributed Variables

fviz_contrib(pca_companies,
             "var",
             top = 5)

Cross Validation

Set training indices

set.seed(1)

indices <- createDataPartition(y = pc_companies$Bankrupt,
                               p = 0.8,
                               list = FALSE)

Train test split

train_data <- pc_companies[indices, ]
test_data <- pc_companies[-indices, ]

X_train <- train_data %>% select(-Bankrupt)
y_train <- train_data$Bankrupt

X_test <- test_data %>% select(-Bankrupt)
y_test <- test_data$Bankrupt

Model Fitting

Logistic Regression Algorithm

model_lgr <- glm(formula = Bankrupt ~ .,
                 family = "binomial",
                 data = train_data)

model_lgr %>% summary()
## 
## Call:
## glm(formula = Bankrupt ~ ., family = "binomial", data = train_data)
## 
## Coefficients:
##               Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)  3.139e+14  6.562e+05  4.783e+08   <2e-16 ***
## Dim.1       -2.883e+14  1.743e+05 -1.654e+09   <2e-16 ***
## Dim.2       -4.688e+13  2.383e+05 -1.967e+08   <2e-16 ***
## Dim.3       -1.225e+14  3.035e+05 -4.038e+08   <2e-16 ***
## Dim.4        4.098e+13  3.079e+05  1.331e+08   <2e-16 ***
## Dim.5        5.255e+13  3.421e+05  1.536e+08   <2e-16 ***
## Dim.6       -7.041e+13  3.766e+05 -1.870e+08   <2e-16 ***
## Dim.7        8.677e+13  4.425e+05  1.961e+08   <2e-16 ***
## Dim.8        1.070e+14  4.027e+05  2.656e+08   <2e-16 ***
## Dim.9       -2.530e+13  4.785e+05 -5.287e+07   <2e-16 ***
## Dim.10       1.066e+13  4.177e+05  2.551e+07   <2e-16 ***
## Dim.11       6.728e+12  5.669e+05  1.187e+07   <2e-16 ***
## Dim.12      -6.961e+13  5.264e+05 -1.322e+08   <2e-16 ***
## Dim.13       7.400e+13  5.429e+05  1.363e+08   <2e-16 ***
## Dim.14      -1.794e+14  3.840e+06 -4.672e+07   <2e-16 ***
## Dim.15      -1.143e+14  3.079e+06 -3.712e+07   <2e-16 ***
## Dim.16      -2.294e+14  7.476e+05 -3.069e+08   <2e-16 ***
## Dim.17      -1.506e+14  5.972e+05 -2.522e+08   <2e-16 ***
## Dim.18      -5.329e+13  5.742e+05 -9.281e+07   <2e-16 ***
## Dim.19      -2.213e+14  6.708e+05 -3.300e+08   <2e-16 ***
## Dim.20       4.860e+13  6.416e+05  7.574e+07   <2e-16 ***
## Dim.21      -7.760e+13  6.403e+05 -1.212e+08   <2e-16 ***
## Dim.22       1.019e+14  6.355e+05  1.604e+08   <2e-16 ***
## Dim.23      -3.813e+13  7.173e+05 -5.316e+07   <2e-16 ***
## Dim.24       1.816e+14  6.477e+05  2.804e+08   <2e-16 ***
## Dim.25      -9.701e+13  6.626e+05 -1.464e+08   <2e-16 ***
## Dim.26       6.235e+13  6.402e+05  9.740e+07   <2e-16 ***
## Dim.27       1.023e+13  6.309e+05  1.622e+07   <2e-16 ***
## Dim.28      -8.672e+13  6.564e+05 -1.321e+08   <2e-16 ***
## Dim.29      -7.871e+11  6.366e+05 -1.236e+06   <2e-16 ***
## Dim.30      -3.246e+13  6.366e+05 -5.099e+07   <2e-16 ***
## Dim.31       4.428e+13  6.957e+05  6.365e+07   <2e-16 ***
## Dim.32       2.618e+13  6.925e+05  3.780e+07   <2e-16 ***
## Dim.33      -1.822e+13  7.374e+05 -2.471e+07   <2e-16 ***
## Dim.34      -5.052e+13  6.874e+05 -7.349e+07   <2e-16 ***
## Dim.35      -1.326e+14  6.841e+05 -1.938e+08   <2e-16 ***
## Dim.36      -5.256e+13  7.310e+05 -7.190e+07   <2e-16 ***
## Dim.37      -6.583e+13  7.104e+05 -9.267e+07   <2e-16 ***
## Dim.38       4.456e+13  7.165e+05  6.219e+07   <2e-16 ***
## Dim.39       2.135e+14  7.473e+05  2.857e+08   <2e-16 ***
## Dim.40      -2.782e+13  7.657e+05 -3.633e+07   <2e-16 ***
## Dim.41      -2.795e+13  7.360e+05 -3.797e+07   <2e-16 ***
## Dim.42       7.046e+13  8.598e+05  8.195e+07   <2e-16 ***
## Dim.43       8.952e+13  7.765e+05  1.153e+08   <2e-16 ***
## Dim.44      -4.303e+13  7.941e+05 -5.419e+07   <2e-16 ***
## Dim.45      -7.293e+13  8.437e+05 -8.644e+07   <2e-16 ***
## Dim.46       1.428e+14  9.191e+05  1.554e+08   <2e-16 ***
## Dim.47       1.119e+13  8.399e+05  1.332e+07   <2e-16 ***
## Dim.48      -8.794e+12  1.419e+06 -6.198e+06   <2e-16 ***
## Dim.49      -2.625e+13  3.727e+06 -7.044e+06   <2e-16 ***
## Dim.50       6.910e+13  3.254e+06  2.124e+07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  14639  on 10559  degrees of freedom
## Residual deviance: 144823  on 10509  degrees of freedom
## AIC: 144925
## 
## Number of Fisher Scoring iterations: 25

Logistic Regression Model Evaluation

pred_lgr_raw <- predict(model_lgr, X_test, type = "response")
pred_lgr <- ifelse(pred_lgr_raw > 0.5, 1, 0)

Confusion Matrix

confusionMatrix(as.factor(pred_lgr), y_test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0  882   76
##          1  437 1243
##                                           
##                Accuracy : 0.8055          
##                  95% CI : (0.7899, 0.8205)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6111          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.6687          
##             Specificity : 0.9424          
##          Pos Pred Value : 0.9207          
##          Neg Pred Value : 0.7399          
##              Prevalence : 0.5000          
##          Detection Rate : 0.3343          
##    Detection Prevalence : 0.3632          
##       Balanced Accuracy : 0.8055          
##                                           
##        'Positive' Class : 0               
## 

AUC of ROC

plotROC(pred_lgr_raw, y_test)

Not bad. But let’s try K-Nearest Neighbour algorithm!

K-Nearest Neighbour Algorithm

pred_knn <- knn(train = X_train,
                test = X_test,
                cl = y_train,
                k = round(sqrt(nrow(X_train))))

KNN Evaluation

confusionMatrix(pred_knn, y_test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 1077   91
##          1  242 1228
##                                           
##                Accuracy : 0.8738          
##                  95% CI : (0.8605, 0.8862)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7475          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.8165          
##             Specificity : 0.9310          
##          Pos Pred Value : 0.9221          
##          Neg Pred Value : 0.8354          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4083          
##    Detection Prevalence : 0.4428          
##       Balanced Accuracy : 0.8738          
##                                           
##        'Positive' Class : 0               
## 

It’s way better! Predicting wether the company is gonna go bankrupt or not at that score of Accuracy is amazing! Also, don’t forget to pay attention to the Sensitivity score and Specificity score, the Specificity is higher than the Sensitivity which means this model better at predicting a company that is not gonna go bankrupt