library(ggcorrplot) : Correlation Matrix library(tidyverse) : Data cleaning library(caret) : Data Cleaning library(knitr) : Create table comparison library(caTools) : Fast calculation of AUC, LogitBoost Classifier library(DMwR) : Data Mining library(xgboost) : Extreme Gradient Boosting Algorithm library(ROCR) : AUC Curve

1. Import Libraries

library(ggcorrplot)
## Loading required package: ggplot2
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(knitr)
library(caTools)
library(DMwR)
## Loading required package: grid
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(e1071)
library(xgboost)
## 
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
## 
##     slice
library(tinytex)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ROCR)

2. Data Extraction

#set the working directory
setwd("C:/Users/lenovo/Documents/R/Practice/RHB")

#Import the data
BankData <- read.csv(file = 'Data Analysis Skill.csv')
head(BankData)
##    CIF_KEY No Age  Gender Marital.Status Premier.Customer..1..Yes.
## 1  2092996  1  71 UNKNOWN         Single                         1
## 2 11934718  2  68 UNKNOWN        Married                         0
## 3  2327694  3  65 UNKNOWN        Married                         1
## 4 10556315  4  60 UNKNOWN        Married                         0
## 5  6136151  5  71 UNKNOWN         Single                         0
## 6  2239881  6  46 UNKNOWN        Married                         0
##              State    Race    Occupation   Salary All.Deposit.Acc.Balance
## 1 01. KLANG VALLEY CHINESE  White Collar  4000.00              406,936.3 
## 2 01. KLANG VALLEY CHINESE        Others     0.00              346,415.2 
## 3 01. KLANG VALLEY CHINESE  White Collar  6200.00              325,803.6 
## 4 01. KLANG VALLEY CHINESE       Unknown     0.00              159,000.0 
## 5 01. KLANG VALLEY  INDIAN  White Collar     0.00              137,410.2 
## 6 08. PULAU PINANG CHINESE Self Employed 17171.12              119,802.4 
##   Count.of.Deposit.Acc Count.of.Product.with.Bank Credit.Card.Balance
## 1                   10                          2                  NA
## 2                    9                          2                  NA
## 3                    2                          4            10043.92
## 4                    8                          2                  NA
## 5                    4                          3                  NA
## 6                    6                          4             1969.01
##   Count.of.Credit.Card Fixed.Deposit.Balance Housing.Loan.Balance
## 1                   NA             383800.00                   NA
## 2                   NA             340000.00                   NA
## 3                    2                    NA                   NA
## 4                   NA             159000.00                   NA
## 5                   NA             107000.00                   NA
## 6                    1              86398.08                   NA
##   Savings.Acc.Balance Internet.Bank.User..1.Yes. Latest.Product.Brough.Date
## 1            23136.25                          0                 14/06/2016
## 2             6415.18                          0                 05/05/2016
## 3                  NA                          0                 18/04/2016
## 4                  NA                          0                 09/01/2017
## 5            26788.51                          0                 04/02/2016
## 6            33404.33                          1                 12/10/2016
##   Relation.with.Bank..in.month. New.Acct.open.last.6.mths..Count.
## 1                           314                                NA
## 2                           684                                NA
## 3                           209                                NA
## 4                            55                                 4
## 5                           212                                NA
## 6                           357                                 1
##   Internet.Bank.Tranx.last.6.mth..count. Total.Asset.Under.Management..AUM.
## 1                                     NA                           406,936 
## 2                                     NA                           346,415 
## 3                                     NA                           325,808 
## 4                                     NA                           159,000 
## 5                                     NA                           137,410 
## 6                                    171                           119,802 
##   Total.Loan AUM.Movement.Slope.last.6.mths
## 1         NA                     -1478.0774
## 2         NA                      -806.9297
## 3         NA                     -2818.1753
## 4         NA                     -2142.8571
## 5         NA                       272.3869
## 6         NA                     -5846.2591
#The data is converted to dataframe
class(BankData)
## [1] "data.frame"
str(BankData)
## 'data.frame':    100 obs. of  26 variables:
##  $ CIF_KEY                               : int  2092996 11934718 2327694 10556315 6136151 2239881 1310026 2176385 450425 9369079 ...
##  $ No                                    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Age                                   : int  71 68 65 60 71 46 69 72 77 28 ...
##  $ Gender                                : chr  "UNKNOWN" "UNKNOWN" "UNKNOWN" "UNKNOWN" ...
##  $ Marital.Status                        : chr  "Single" "Married" "Married" "Married" ...
##  $ Premier.Customer..1..Yes.             : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ State                                 : chr  "01. KLANG VALLEY" "01. KLANG VALLEY" "01. KLANG VALLEY" "01. KLANG VALLEY" ...
##  $ Race                                  : chr  "CHINESE" "CHINESE" "CHINESE" "CHINESE" ...
##  $ Occupation                            : chr  "White Collar" "Others" "White Collar" "Unknown" ...
##  $ Salary                                : num  4000 0 6200 0 0 ...
##  $ All.Deposit.Acc.Balance               : chr  " 406,936.3 " " 346,415.2 " " 325,803.6 " " 159,000.0 " ...
##  $ Count.of.Deposit.Acc                  : int  10 9 2 8 4 6 5 4 3 4 ...
##  $ Count.of.Product.with.Bank            : int  2 2 4 2 3 4 2 7 1 3 ...
##  $ Credit.Card.Balance                   : num  NA NA 10044 NA NA ...
##  $ Count.of.Credit.Card                  : int  NA NA 2 NA NA 1 NA 1 NA NA ...
##  $ Fixed.Deposit.Balance                 : num  383800 340000 NA 159000 107000 ...
##  $ Housing.Loan.Balance                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Savings.Acc.Balance                   : num  23136 6415 NA NA 26789 ...
##  $ Internet.Bank.User..1.Yes.            : int  0 0 0 0 0 1 0 1 0 1 ...
##  $ Latest.Product.Brough.Date            : chr  "14/06/2016" "05/05/2016" "18/04/2016" "09/01/2017" ...
##  $ Relation.with.Bank..in.month.         : int  314 684 209 55 212 357 218 261 222 84 ...
##  $ New.Acct.open.last.6.mths..Count.     : int  NA NA NA 4 NA 1 NA NA NA NA ...
##  $ Internet.Bank.Tranx.last.6.mth..count.: int  NA NA NA NA NA 171 NA NA NA 11 ...
##  $ Total.Asset.Under.Management..AUM.    : chr  " 406,936 " " 346,415 " " 325,808 " " 159,000 " ...
##  $ Total.Loan                            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ AUM.Movement.Slope.last.6.mths        : num  -1478 -807 -2818 -2143 272 ...
summary(BankData)
##     CIF_KEY               No              Age           Gender         
##  Min.   :  450425   Min.   :  1.00   Min.   : 5.00   Length:100        
##  1st Qu.: 3276540   1st Qu.: 25.75   1st Qu.:33.00   Class :character  
##  Median : 7835438   Median : 50.50   Median :47.00   Mode  :character  
##  Mean   : 7376156   Mean   : 50.50   Mean   :47.38                     
##  3rd Qu.:11178092   3rd Qu.: 75.25   3rd Qu.:61.00                     
##  Max.   :13239950   Max.   :100.00   Max.   :87.00                     
##                                                                        
##  Marital.Status     Premier.Customer..1..Yes.    State          
##  Length:100         Min.   :0.00              Length:100        
##  Class :character   1st Qu.:0.00              Class :character  
##  Mode  :character   Median :0.00              Mode  :character  
##                     Mean   :0.04                                
##                     3rd Qu.:0.00                                
##                     Max.   :1.00                                
##                                                                 
##      Race            Occupation            Salary      All.Deposit.Acc.Balance
##  Length:100         Length:100         Min.   :    0   Length:100             
##  Class :character   Class :character   1st Qu.:    0   Class :character       
##  Mode  :character   Mode  :character   Median :    0   Mode  :character       
##                                        Mean   : 2147                          
##                                        3rd Qu.: 3855                          
##                                        Max.   :17171                          
##                                        NA's   :1                              
##  Count.of.Deposit.Acc Count.of.Product.with.Bank Credit.Card.Balance
##  Min.   : 1           Min.   :1.00               Min.   :   -0.1    
##  1st Qu.: 1           1st Qu.:2.00               1st Qu.:  281.9    
##  Median : 1           Median :2.00               Median : 1969.0    
##  Mean   : 2           Mean   :2.36               Mean   : 2929.6    
##  3rd Qu.: 2           3rd Qu.:3.00               3rd Qu.: 4079.6    
##  Max.   :10           Max.   :7.00               Max.   :10043.9    
##                                                  NA's   :91         
##  Count.of.Credit.Card Fixed.Deposit.Balance Housing.Loan.Balance
##  Min.   :1.000        Min.   :  1046        Min.   :   8175     
##  1st Qu.:1.000        1st Qu.: 17201        1st Qu.: 116734     
##  Median :1.000        Median : 38820        Median : 277737     
##  Mean   :1.182        Mean   : 71819        Mean   : 391881     
##  3rd Qu.:1.000        3rd Qu.: 89430        3rd Qu.: 316471     
##  Max.   :2.000        Max.   :383800        Max.   :1757071     
##  NA's   :89           NA's   :70            NA's   :91          
##  Savings.Acc.Balance Internet.Bank.User..1.Yes. Latest.Product.Brough.Date
##  Min.   :    5.47    Min.   :0.00               Length:100                
##  1st Qu.: 2188.96    1st Qu.:0.00               Class :character          
##  Median : 4285.59    Median :0.00               Mode  :character          
##  Mean   : 8612.27    Mean   :0.49                                         
##  3rd Qu.: 9736.36    3rd Qu.:1.00                                         
##  Max.   :67951.19    Max.   :1.00                                         
##  NA's   :14                                                               
##  Relation.with.Bank..in.month. New.Acct.open.last.6.mths..Count.
##  Min.   :  2.00                Min.   :1.000                    
##  1st Qu.: 44.75                1st Qu.:1.000                    
##  Median :130.00                Median :1.000                    
##  Mean   :171.68                Mean   :1.667                    
##  3rd Qu.:261.50                3rd Qu.:2.000                    
##  Max.   :684.00                Max.   :4.000                    
##                                NA's   :61                       
##  Internet.Bank.Tranx.last.6.mth..count. Total.Asset.Under.Management..AUM.
##  Min.   :  0.00                         Length:100                        
##  1st Qu.:  0.00                         Class :character                  
##  Median :  2.00                         Mode  :character                  
##  Mean   : 14.96                                                           
##  3rd Qu.: 14.00                                                           
##  Max.   :171.00                                                           
##  NA's   :75                                                               
##    Total.Loan      AUM.Movement.Slope.last.6.mths
##  Min.   :   5849   Min.   :-5846.26              
##  1st Qu.:  34017   1st Qu.: -758.67              
##  Median : 140071   Median : -261.78              
##  Mean   : 289771   Mean   :  162.99              
##  3rd Qu.: 316471   3rd Qu.:    2.86              
##  Max.   :1757071   Max.   :44420.44              
##  NA's   :87        NA's   :2
dim(BankData)
## [1] 100  26

3. Data Wrangling

#Check NA
sapply(BankData, function(x) sum(is.na(x)))
##                                CIF_KEY                                     No 
##                                      0                                      0 
##                                    Age                                 Gender 
##                                      0                                      0 
##                         Marital.Status              Premier.Customer..1..Yes. 
##                                      0                                      0 
##                                  State                                   Race 
##                                      0                                      0 
##                             Occupation                                 Salary 
##                                      0                                      1 
##                All.Deposit.Acc.Balance                   Count.of.Deposit.Acc 
##                                      0                                      0 
##             Count.of.Product.with.Bank                    Credit.Card.Balance 
##                                      0                                     91 
##                   Count.of.Credit.Card                  Fixed.Deposit.Balance 
##                                     89                                     70 
##                   Housing.Loan.Balance                    Savings.Acc.Balance 
##                                     91                                     14 
##             Internet.Bank.User..1.Yes.             Latest.Product.Brough.Date 
##                                      0                                      0 
##          Relation.with.Bank..in.month.      New.Acct.open.last.6.mths..Count. 
##                                      0                                     61 
## Internet.Bank.Tranx.last.6.mth..count.     Total.Asset.Under.Management..AUM. 
##                                     75                                      0 
##                             Total.Loan         AUM.Movement.Slope.last.6.mths 
##                                     87                                      2
is.integer(BankData$All.Deposit.Acc.Balance)
## [1] FALSE
#Remove the unwanted column and convert the necessary features into factors
BankData <- BankData %>% 
  dplyr::select(-CIF_KEY, -Gender, -Housing.Loan.Balance, -Credit.Card.Balance, -Total.Loan, -Count.of.Credit.Card, -New.Acct.open.last.6.mths..Count., -Latest.Product.Brough.Date, -Internet.Bank.Tranx.last.6.mth..count., - AUM.Movement.Slope.last.6.mths, -Savings.Acc.Balance ) %>% 
  mutate(Marital.Status= as.factor(Marital.Status), State=as.factor(State), Race=as.factor(Race), Occupation=as.factor(Occupation), Premier.Customer..1..Yes.=as.factor(Premier.Customer..1..Yes.), Internet.Bank.User..1.Yes.=as.factor(Internet.Bank.User..1.Yes.))
BankData
##      No Age Marital.Status Premier.Customer..1..Yes.            State      Race
## 1     1  71         Single                         1 01. KLANG VALLEY   CHINESE
## 2     2  68        Married                         0 01. KLANG VALLEY   CHINESE
## 3     3  65        Married                         1 01. KLANG VALLEY   CHINESE
## 4     4  60        Married                         0 01. KLANG VALLEY   CHINESE
## 5     5  71         Single                         0 01. KLANG VALLEY    INDIAN
## 6     6  46        Married                         0 08. PULAU PINANG   CHINESE
## 7     7  69         Single                         0       04. PAHANG   CHINESE
## 8     8  72        Married                         0 01. KLANG VALLEY    OTHERS
## 9     9  77        Married                         0        02. JOHOR   CHINESE
## 10   10  28         Single                         0        02. JOHOR   CHINESE
## 11   11  85        Married                         0        05. PERAK   CHINESE
## 12   12  70        Married                         0      12. SARAWAK   CHINESE
## 13   13  59        Married                         0       03. MELAKA   CHINESE
## 14   14   9         Others                         0        05. PERAK   CHINESE
## 15   15  45         Single                         0        09. KEDAH   CHINESE
## 16   16  74        Married                         0 08. PULAU PINANG   CHINESE
## 17   17  47         Single                         0      12. SARAWAK   CHINESE
## 18   18  87         Single                         0        02. JOHOR   CHINESE
## 19   19  64         Single                         0        02. JOHOR   CHINESE
## 20   20  57        Married                         0        05. PERAK   CHINESE
## 21   21  58        Married                         0 01. KLANG VALLEY    INDIAN
## 22   22  33         Single                         0        13. SABAH   CHINESE
## 23   23  47         Single                         0  06. N. SEMBILAN   CHINESE
## 24   24  49        Married                         0              N/A    OTHERS
## 25   25  63        Married                         0      12. SARAWAK   CHINESE
## 26   26  46        Married                         0        02. JOHOR   CHINESE
## 27   27  23         Single                         0        09. KEDAH    INDIAN
## 28   28  45        Married                         0       04. PAHANG   CHINESE
## 29   29  50        Married                         0 01. KLANG VALLEY   CHINESE
## 30   30  56         Single                         0        02. JOHOR   CHINESE
## 31   31  33        Married                         0 01. KLANG VALLEY   CHINESE
## 32   32  61         Single                         0  06. N. SEMBILAN    INDIAN
## 33   33  49        Married                         1        05. PERAK   CHINESE
## 34   34  53         Single                         0       03. MELAKA   CHINESE
## 35   35  47        Married                         0      12. SARAWAK    OTHERS
## 36   36  31         Single                         0 01. KLANG VALLEY   CHINESE
## 37   37  25         Single                         0 08. PULAU PINANG BUMIPUTRA
## 38   38  35        Married                         0 08. PULAU PINANG   CHINESE
## 39   39  43         Single                         0        05. PERAK   CHINESE
## 40   40  56       Divorced                         0 01. KLANG VALLEY BUMIPUTRA
## 41   41  66         Single                         0 01. KLANG VALLEY   CHINESE
## 42   42  55        Married                         0 08. PULAU PINANG   CHINESE
## 43   43  79        Married                         0 01. KLANG VALLEY   CHINESE
## 44   44  48         Single                         0 01. KLANG VALLEY   CHINESE
## 45   45  49        Married                         0 01. KLANG VALLEY BUMIPUTRA
## 46   46  64        Married                         0        05. PERAK   CHINESE
## 47   47  65        Married                         0 01. KLANG VALLEY BUMIPUTRA
## 48   48  60         Single                         0        05. PERAK   CHINESE
## 49   49  42         Single                         0 01. KLANG VALLEY   CHINESE
## 50   50  73         Single                         0  06. N. SEMBILAN   CHINESE
## 51   51  60        Married                         0 01. KLANG VALLEY   CHINESE
## 52   52  64        Married                         0 08. PULAU PINANG   CHINESE
## 53   53  70        Married                         0        02. JOHOR   CHINESE
## 54   54  45         Single                         0 01. KLANG VALLEY   CHINESE
## 55   55  25         Single                         0        02. JOHOR BUMIPUTRA
## 56   56  32         Single                         0        02. JOHOR BUMIPUTRA
## 57   57  62         Single                         0      12. SARAWAK BUMIPUTRA
## 58   58  56        Married                         0  06. N. SEMBILAN   CHINESE
## 59   59  47         Single                         0        05. PERAK   CHINESE
## 60   60  36         Single                         0 01. KLANG VALLEY    INDIAN
## 61   61  73        Married                         0       04. PAHANG   CHINESE
## 62   62  60        Married                         0 01. KLANG VALLEY BUMIPUTRA
## 63   63  58         Single                         0      12. SARAWAK   CHINESE
## 64   64  36         Single                         0        02. JOHOR   CHINESE
## 65   65  50         Single                         0 08. PULAU PINANG BUMIPUTRA
## 66   66  22         Single                         0 01. KLANG VALLEY   CHINESE
## 67   67  30         Single                         0        02. JOHOR   CHINESE
## 68   68  23         Single                         0 08. PULAU PINANG   CHINESE
## 69   69  39        Married                         0 01. KLANG VALLEY BUMIPUTRA
## 70   70  38         Single                         0        13. SABAH    INDIAN
## 71   71  70        Married                         0        05. PERAK BUMIPUTRA
## 72   72  35         Single                         1  06. N. SEMBILAN   CHINESE
## 73   73  54        Married                         0 01. KLANG VALLEY   CHINESE
## 74   74  31         Single                         0        02. JOHOR BUMIPUTRA
## 75   75  30         Single                         0              N/A    OTHERS
## 76   76  45        Widowed                         0 01. KLANG VALLEY    OTHERS
## 77   77  55        Married                         0 01. KLANG VALLEY   CHINESE
## 78   78  33         Single                         0       04. PAHANG BUMIPUTRA
## 79   79  66         Single                         0      12. SARAWAK BUMIPUTRA
## 80   80  24         Single                         0 01. KLANG VALLEY BUMIPUTRA
## 81   81  40        Married                         0 01. KLANG VALLEY   CHINESE
## 82   82  16         Single                         0 01. KLANG VALLEY   CHINESE
## 83   83  41         Single                         0        02. JOHOR   CHINESE
## 84   84  48        Married                         0        02. JOHOR BUMIPUTRA
## 85   85  36        Married                         0   07. TERENGGANU BUMIPUTRA
## 86   86  24         Single                         0        13. SABAH   CHINESE
## 87   87  31         Single                         0 01. KLANG VALLEY    OTHERS
## 88   88  36        Married                         0 01. KLANG VALLEY   CHINESE
## 89   89   5         Single                         0 01. KLANG VALLEY   CHINESE
## 90   90  10         Single                         0 08. PULAU PINANG   CHINESE
## 91   91  27         Single                         0        13. SABAH BUMIPUTRA
## 92   92  21         Single                         0      12. SARAWAK   CHINESE
## 93   93  25         Single                         0        13. SABAH BUMIPUTRA
## 94   94  23         Single                         0      12. SARAWAK BUMIPUTRA
## 95   95  61        Married                         0        05. PERAK   CHINESE
## 96   96  21         Single                         0   07. TERENGGANU BUMIPUTRA
## 97   97  45        Married                         0 01. KLANG VALLEY BUMIPUTRA
## 98   98  37        Married                         0   07. TERENGGANU BUMIPUTRA
## 99   99  60        Married                         0      12. SARAWAK BUMIPUTRA
## 100 100  34         Single                         0        05. PERAK   CHINESE
##        Occupation    Salary All.Deposit.Acc.Balance Count.of.Deposit.Acc
## 1    White Collar  4000.000              406,936.3                    10
## 2          Others     0.000              346,415.2                     9
## 3    White Collar  6200.000              325,803.6                     2
## 4         Unknown     0.000              159,000.0                     8
## 5    White Collar     0.000              137,410.2                     4
## 6   Self Employed 17171.120              119,802.4                     6
## 7   Self Employed     0.000              115,002.8                     5
## 8    White Collar  8333.000              104,018.8                     4
## 9         Unknown     0.000               96,179.3                     3
## 10    Blue Collar     0.000               90,474.4                     4
## 11         Others     0.000               90,440.6                     8
## 12        Retired     0.000               85,059.2                     4
## 13   White Collar     0.000               74,137.8                     3
## 14        Unknown        NA               72,017.6                     1
## 15   White Collar     0.000               67,951.2                     1
## 16         Others     0.000               58,651.4                     1
## 17   White Collar  4543.917               58,216.1                     2
## 18        Unknown     0.000               54,477.8                     3
## 19        Unknown     0.000               44,833.5                     1
## 20         Others     0.000               41,038.8                     4
## 21        Unknown     0.000               39,557.1                     2
## 22   White Collar     0.000               34,955.2                     2
## 23   White Collar     0.000               34,360.0                     2
## 24  Self Employed  6812.260               32,011.0                     1
## 25         Others     0.000               30,984.7                     8
## 26  Self Employed     0.000               30,261.7                     2
## 27        Student     0.000               30,005.5                     2
## 28   White Collar  7241.090               27,713.6                     1
## 29        Unknown     0.000               24,309.8                     3
## 30         Others     0.000               24,308.6                     2
## 31    Blue Collar     0.000               23,719.6                     1
## 32  Self Employed     0.000               21,528.8                     2
## 33  Self Employed 14004.000               21,032.9                     1
## 34   White Collar  7500.000               20,259.7                     1
## 35         Others     0.000               19,481.0                     1
## 36  Self Employed     0.000               19,337.6                     1
## 37    Blue Collar     0.000               19,249.2                     1
## 38   White Collar     0.000               18,190.3                     3
## 39    Blue Collar     0.000               17,798.9                     3
## 40    Blue Collar     0.000               13,826.2                     1
## 41        Unknown     0.000               13,638.4                     1
## 42   White Collar     0.000               13,498.7                     2
## 43        Unknown     0.000               12,894.1                     1
## 44        Unknown     0.000               12,220.4                     2
## 45   White Collar     0.000               11,198.3                     1
## 46        Unknown     0.000               10,550.0                     2
## 47        Unknown     0.000               10,424.5                     1
## 48   White Collar     0.000               10,078.5                     2
## 49    Blue Collar     0.000                9,971.0                     1
## 50        Retired     0.000                9,788.2                     3
## 51   White Collar  8333.333                8,377.8                     2
## 52        Retired     0.000                8,346.5                     2
## 53        Unknown     0.000                7,912.9                     1
## 54   White Collar  1500.000                7,768.0                     4
## 55   White Collar  8225.360                7,366.3                     1
## 56    Blue Collar  5962.420                7,218.9                     2
## 57        Retired     0.000                7,083.1                     1
## 58   White Collar  4166.667                6,959.7                     1
## 59        Unknown     0.000                6,601.3                     1
## 60   White Collar  6500.000                6,287.8                     1
## 61        Unknown     0.000                6,165.5                     1
## 62        Unknown  4583.020                5,973.9                     1
## 63    Blue Collar  3700.000                5,291.4                     1
## 64        Unknown     0.000                5,049.9                     1
## 65        Unknown     0.000                4,857.5                     1
## 66        Student     0.000                4,613.2                     1
## 67    Blue Collar 10863.660                4,480.3                     1
## 68    Blue Collar  2935.800                4,262.6                     1
## 69   White Collar     0.000                4,256.6                     1
## 70   White Collar  4837.490                4,168.1                     3
## 71        Unknown     0.000                4,119.5                     1
## 72        Unknown  3949.800                4,036.6                     1
## 73        Unknown     0.000                3,886.4                     1
## 74    Blue Collar  4114.050                3,831.3                     1
## 75   White Collar  5737.620                3,732.8                     1
## 76         Others     0.000                3,449.1                     1
## 77   White Collar 11125.850                3,124.1                     1
## 78   White Collar  3096.900                3,103.7                     1
## 79         Others     0.000                3,059.9                     1
## 80        Student     0.000                3,016.7                     1
## 81    Blue Collar     0.000                2,852.5                     1
## 82        Student     0.000                2,653.5                     1
## 83   White Collar  6800.580                2,564.1                     1
## 84  Self Employed  8742.010                2,534.7                     2
## 85    Blue Collar  1884.560                2,452.0                     1
## 86   White Collar  4149.930                2,412.1                     1
## 87    Blue Collar     0.000                2,286.6                     1
## 88   White Collar  3760.260                2,230.3                     1
## 89        Student     0.000                2,175.2                     1
## 90        Unknown     0.000                2,167.5                     1
## 91   White Collar  1681.500                2,162.3                     1
## 92        Student     0.000                2,065.7                     1
## 93        Student  1641.050                2,059.1                     1
## 94    Blue Collar   771.370                2,028.1                     1
## 95  Self Employed 10700.000                2,019.9                     2
## 96        Student     0.000                2,013.4                     1
## 97   White Collar  3300.000                1,997.4                     1
## 98    Blue Collar  2327.250                1,980.9                     1
## 99   White Collar  1366.667                1,780.4                     2
## 100   Blue Collar     0.000                1,750.0                     1
##     Count.of.Product.with.Bank Fixed.Deposit.Balance Internet.Bank.User..1.Yes.
## 1                            2             383800.00                          0
## 2                            2             340000.00                          0
## 3                            4                    NA                          0
## 4                            2             159000.00                          0
## 5                            3             107000.00                          0
## 6                            4              86398.08                          1
## 7                            2             113000.00                          0
## 8                            7             100000.00                          1
## 9                            1              96179.25                          0
## 10                           3              85000.00                          1
## 11                           1              90440.64                          0
## 12                           3              80548.04                          1
## 13                           2              71000.00                          0
## 14                           1                    NA                          0
## 15                           1                    NA                          0
## 16                           1              58651.35                          0
## 17                           4              58000.00                          1
## 18                           2              40000.00                          0
## 19                           1                    NA                          0
## 20                           3              37040.39                          0
## 21                           2              37640.72                          0
## 22                           2                    NA                          1
## 23                           2              16267.83                          0
## 24                           3                    NA                          1
## 25                           2              30913.58                          0
## 26                           3              30000.00                          1
## 27                           3              30000.00                          1
## 28                           2                    NA                          1
## 29                           2              23450.84                          0
## 30                           3              20000.00                          1
## 31                           2                    NA                          1
## 32                           3                    NA                          1
## 33                           3                    NA                          0
## 34                           2                    NA                          0
## 35                           1                    NA                          0
## 36                           2                    NA                          1
## 37                           2                    NA                          1
## 38                           3              10025.48                          1
## 39                           2              12903.78                          0
## 40                           1                    NA                          0
## 41                           1                    NA                          0
## 42                           2                    NA                          0
## 43                           1                    NA                          0
## 44                           3              12207.14                          0
## 45                           2                    NA                          1
## 46                           1                    NA                          0
## 47                           1                    NA                          0
## 48                           3               1046.05                          1
## 49                           1                    NA                          0
## 50                           2               9700.00                          0
## 51                           4                    NA                          1
## 52                           3               8322.08                          0
## 53                           1                    NA                          0
## 54                           3               6045.63                          1
## 55                           2                    NA                          1
## 56                           5                    NA                          0
## 57                           1                    NA                          0
## 58                           2                    NA                          1
## 59                           2                    NA                          1
## 60                           4                    NA                          1
## 61                           1                    NA                          0
## 62                           4                    NA                          1
## 63                           3                    NA                          0
## 64                           1                    NA                          0
## 65                           3                    NA                          1
## 66                           1                    NA                          0
## 67                           1                    NA                          0
## 68                           2                    NA                          1
## 69                           2                    NA                          1
## 70                           7                    NA                          1
## 71                           2                    NA                          1
## 72                           2                    NA                          1
## 73                           2                    NA                          1
## 74                           4                    NA                          1
## 75                           4                    NA                          1
## 76                           2                    NA                          1
## 77                           2                    NA                          1
## 78                           2                    NA                          1
## 79                           1                    NA                          0
## 80                           2                    NA                          1
## 81                           2                    NA                          1
## 82                           1                    NA                          0
## 83                           3                    NA                          1
## 84                           4                    NA                          1
## 85                           2                    NA                          1
## 86                           2                    NA                          1
## 87                           2                    NA                          1
## 88                           2                    NA                          0
## 89                           1                    NA                          0
## 90                           1                    NA                          0
## 91                           4                    NA                          1
## 92                           1                    NA                          0
## 93                           2                    NA                          1
## 94                           2                    NA                          1
## 95                           5                    NA                          0
## 96                           2                    NA                          1
## 97                           4                    NA                          0
## 98                           4                    NA                          0
## 99                           4                    NA                          1
## 100                          2                    NA                          0
##     Relation.with.Bank..in.month. Total.Asset.Under.Management..AUM.
## 1                             314                           406,936 
## 2                             684                           346,415 
## 3                             209                           325,808 
## 4                              55                           159,000 
## 5                             212                           137,410 
## 6                             357                           119,802 
## 7                             218                           115,003 
## 8                             261                           104,019 
## 9                             222                            96,179 
## 10                             84                            90,474 
## 11                            256                            90,441 
## 12                            130                            85,059 
## 13                             20                            74,138 
## 14                            103                            72,018 
## 15                            232                            67,951 
## 16                             43                            58,651 
## 17                            268                            58,216 
## 18                            233                            54,478 
## 19                            208                            44,833 
## 20                            361                            41,039 
## 21                            293                            39,557 
## 22                            123                            34,955 
## 23                            309                            34,360 
## 24                             45                            32,011 
## 25                            130                            30,985 
## 26                             40                            30,262 
## 27                             56                            30,005 
## 28                             95                            27,714 
## 29                            169                            24,310 
## 30                            362                            24,309 
## 31                             24                            23,720 
## 32                            381                            21,529 
## 33                            234                            21,033 
## 34                            362                            20,260 
## 35                            161                            19,481 
## 36                              5                            19,338 
## 37                             82                            19,249 
## 38                             10                            18,190 
## 39                            263                            17,799 
## 40                             33                            13,826 
## 41                            445                            13,638 
## 42                            244                            13,499 
## 43                            451                            12,894 
## 44                            287                            22,220 
## 45                             55                            11,198 
## 46                            411                            10,550 
## 47                            684                            10,424 
## 48                            124                            10,079 
## 49                            127                             9,971 
## 50                            395                             9,788 
## 51                            320                             8,378 
## 52                            289                             8,347 
## 53                            502                             7,913 
## 54                            189                             7,768 
## 55                             15                             7,366 
## 56                            108                             7,219 
## 57                            172                             7,083 
## 58                            153                             6,960 
## 59                            375                             6,601 
## 60                            117                             6,288 
## 61                            291                             6,165 
## 62                            378                             5,974 
## 63                            286                             5,291 
## 64                            197                             5,050 
## 65                            233                             4,857 
## 66                            118                             4,613 
## 67                             33                             4,480 
## 68                              4                             4,263 
## 69                             34                             4,257 
## 70                             61                             4,168 
## 71                            231                             4,120 
## 72                             24                             4,037 
## 73                            174                             3,886 
## 74                             49                             3,831 
## 75                             29                             3,733 
## 76                              3                             3,449 
## 77                             29                             3,124 
## 78                             56                             3,104 
## 79                            166                             3,060 
## 80                             10                             3,017 
## 81                              7                             2,852 
## 82                             51                             2,654 
## 83                             34                             2,564 
## 84                            277                             2,535 
## 85                             43                             2,452 
## 86                             40                             2,412 
## 87                             44                             2,287 
## 88                             53                             2,230 
## 89                             34                             2,175 
## 90                            107                             2,168 
## 91                             32                             2,162 
## 92                            130                             2,066 
## 93                             79                             2,059 
## 94                             46                             2,028 
## 95                            220                             2,020 
## 96                             31                             2,013 
## 97                            175                             1,997 
## 98                              2                             1,981 
## 99                            199                             1,780 
## 100                            48                             1,750

4. Exploratory Data Analysis (EDA)

EDA is done to see the patterns of the data and to extract some insights from it.

  1. To compare the difference between the number of premier customers and non-premier customers.
ggplot(BankData, aes(Premier.Customer..1..Yes., fill = Premier.Customer..1..Yes.)) +
  geom_bar() +
  theme(legend.position = 'none')

table(BankData$Premier.Customer..1..Yes.)
## 
##  0  1 
## 96  4

The number of non-premier customers are larger than premium customers. There are 4 premium customers and 96 non-premier customers.

  1. To compare the difference between the number of customers who are using the internet banking and those customers who do not use the internet service.
ggplot(BankData, aes(Internet.Bank.User..1.Yes. , fill = Internet.Bank.User..1.Yes. )) +
  geom_bar() +
  theme(legend.position = 'none')

table(BankData$Internet.Bank.User..1.Yes.)
## 
##  0  1 
## 51 49

There is almost the same amount of customers who use internet banking and those who do not use the online banking service.

  1. Continuous Variable Disribution
BankData %>%
  dplyr::select(-Internet.Bank.User..1.Yes., -Premier.Customer..1..Yes. ) %>% 
  keep(is.numeric) %>%
  gather() %>%
  ggplot() +
  geom_histogram(mapping = aes(x=value,fill=key), color="black") +
  facet_wrap(~ key, scales = "free") +
  theme_minimal() +
  theme(legend.position = 'none')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 71 rows containing non-finite values (stat_bin).

From the result we can observe that:

  1. Categorical Variable Distribution
BankData %>%
  
  keep(is.factor) %>%
  gather() %>%
  group_by(key, value) %>% 
  summarize(n = n()) %>% 
  ggplot() +
  geom_bar(mapping=aes(x = value, y = n, fill=key), color="black", stat='identity') + 
  coord_flip() +
  facet_wrap(~ key, scales = "free") +
  theme_minimal() +
  theme(legend.position = 'none')
## Warning: attributes are not identical across measure variables;
## they will be dropped
## `summarise()` regrouping output by 'key' (override with `.groups` argument)

From the graph shown above, we can observe that:

  1. Correlation Matrix
numericVarName <- names(which(sapply(BankData, is.numeric)))
corr <- cor(BankData[,numericVarName], use = 'pairwise.complete.obs')
ggcorrplot(corr, lab = TRUE)

4. ANOVA Test

To determine is there are any significance difference between Period of Customer relation with Bank and Occupation using ANOVA table.

#install.packages("ggpubr")
library(ggpubr) 
res.aov<- aov(Relation.with.Bank..in.month.~Occupation, BankData)
summary(res.aov)
##             Df  Sum Sq Mean Sq F value   Pr(>F)    
## Occupation   6  590585   98431   5.921 2.88e-05 ***
## Residuals   93 1546157   16625                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
box_plot <- ggplot(BankData, aes(x = Relation.with.Bank..in.month., y = Occupation))
box_plot +geom_boxplot()

Hypothesis Test

H0 = There is no significant difference between the Period of Customer relation with Bank and the Occupation.

H1 = There is significant difference between the Period of Customer relation with Bank and the Occupation.

p-value 2.88e-05 < 0.05, reject H0.

There is enough evidence to reject H0. Thus, there is significance difference between the Period of Customer relation with the Bank and the Occupation.