Project Part 1 (Summary and Visualization)

Adarsh Adwait

---

MLM Project

Part 1 (Summary and Visualization)

Reading the Dataset

library(ISLR)
library(ggplot2)
library(knitr)
library(dplyr)
FinData <- read.csv("AutoFinanaceData.csv")
attach(FinData)

Dimensions of the Dataset

# printing Dimensions
dim(FinData)
[1] 28906    21

Structure of the Dataset

# structure of the dataset
str(FinData)
'data.frame':   28906 obs. of  21 variables:
 $ Agmt.No       : Factor w/ 28906 levels "AP18100009","AP18100010",..: 36 79 106 115 116 135 136 142 145 160 ...
 $ ContractStatus: Factor w/ 4 levels "Closed","Foreclosed",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ StartDate     : Factor w/ 1814 levels "","01-01-00",..: 1065 566 292 177 129 1394 292 1394 752 1394 ...
 $ AGE           : int  26 28 32 31 36 33 41 47 43 27 ...
 $ NOOFDEPE      : int  2 2 2 0 2 2 2 0 0 0 ...
 $ MTHINCTH      : num  4.5 5.59 8.8 5 12 ...
 $ SALDATFR      : num  1 1 1 1 1 1 1 1 0.97 1 ...
 $ TENORYR       : num  1.5 2 1 1 1 2 1 2 1.5 2 ...
 $ DWNPMFR       : num  0.27 0.25 0.51 0.66 0.17 0.18 0.37 0.42 0.27 0.47 ...
 $ PROFBUS       : int  0 0 0 0 0 0 0 0 0 0 ...
 $ QUALHSC       : int  0 0 0 0 0 0 1 0 0 0 ...
 $ QUAL_PG       : int  0 0 0 0 0 0 0 0 0 0 ...
 $ SEXCODE       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ FULLPDC       : int  1 1 1 1 1 0 0 1 1 1 ...
 $ FRICODE       : int  0 1 1 1 1 0 0 0 0 0 ...
 $ WASHCODE      : int  0 0 1 1 0 0 0 0 0 0 ...
 $ Region        : Factor w/ 8 levels "AP1","AP2","Chennai",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ Branch        : Factor w/ 14 levels "Bangalore","Chennai",..: 14 14 14 14 14 14 14 14 14 14 ...
 $ DefaulterFlag : int  0 0 0 0 0 0 0 0 0 0 ...
 $ DefaulterType : int  0 0 0 0 0 0 0 0 0 0 ...
 $ DATASET       : Factor w/ 3 levels " ","BUILD","VALIDATE": 1 2 2 2 2 2 2 2 2 2 ...

Descriptive Statistics of the Dataset

# structure of the dataset
library(psych)
describe(FinData)
                vars     n     mean      sd   median  trimmed      mad
Agmt.No*           1 28906 14453.50 8344.59 14453.50 14453.50 10714.01
ContractStatus*    2 28906     1.33    0.77     1.00     1.12     0.00
StartDate*         3 28906   827.85  552.47   812.00   821.02   705.72
AGE                4 28906    36.44    9.82    35.00    35.86    10.38
NOOFDEPE           5 28906     2.85    1.61     3.00     2.83     1.48
MTHINCTH           6 28906     8.94    4.81     8.00     8.30     4.08
SALDATFR           7 28906     0.44    0.46     0.17     0.42     0.21
TENORYR            8 28906     1.28    0.52     1.00     1.22     0.25
DWNPMFR            9 28906     0.38    0.16     0.38     0.38     0.15
PROFBUS           10 28906     0.15    0.36     0.00     0.06     0.00
QUALHSC           11 28906     0.23    0.42     0.00     0.16     0.00
QUAL_PG           12 28906     0.04    0.20     0.00     0.00     0.00
SEXCODE           13 28906     0.92    0.27     1.00     1.00     0.00
FULLPDC           14 28906     0.39    0.49     0.00     0.36     0.00
FRICODE           15 28906     0.42    0.49     0.00     0.40     0.00
WASHCODE          16 28906     0.19    0.39     0.00     0.11     0.00
Region*           17 28906     5.33    1.51     6.00     5.43     0.00
Branch*           18 28906     5.93    3.47     6.00     5.78     4.45
DefaulterFlag     19 28906     0.71    0.45     1.00     0.76     0.00
DefaulterType     20 28906     0.85    0.63     1.00     0.81     0.00
DATASET*          21 28906     2.52    0.50     3.00     2.53     0.00
                  min      max    range  skew kurtosis    se
Agmt.No*         1.00 28906.00 28905.00  0.00    -1.20 49.08
ContractStatus*  1.00     4.00     3.00  2.25     3.91  0.00
StartDate*       1.00  1814.00  1813.00  0.04    -1.17  3.25
AGE             18.00    70.00    52.00  0.50    -0.40  0.06
NOOFDEPE         0.00    10.00    10.00  0.43     0.89  0.01
MTHINCTH         0.10    39.50    39.40  1.62     3.85  0.03
SALDATFR         0.03     1.03     1.00  0.38    -1.82  0.00
TENORYR          0.17     4.00     3.83  1.32     1.43  0.00
DWNPMFR          0.02     0.88     0.86 -0.13    -0.13  0.00
PROFBUS          0.00     1.00     1.00  1.98     1.91  0.00
QUALHSC          0.00     1.00     1.00  1.27    -0.38  0.00
QUAL_PG          0.00     1.00     1.00  4.67    19.81  0.00
SEXCODE          0.00     1.00     1.00 -3.18     8.13  0.00
FULLPDC          0.00     1.00     1.00  0.45    -1.80  0.00
FRICODE          0.00     1.00     1.00  0.32    -1.90  0.00
WASHCODE         0.00     1.00     1.00  1.58     0.50  0.00
Region*          1.00     8.00     7.00 -0.76     0.08  0.01
Branch*          1.00    14.00    13.00  0.27    -0.90  0.02
DefaulterFlag    0.00     1.00     1.00 -0.94    -1.13  0.00
DefaulterType    0.00     2.00     2.00  0.14    -0.58  0.00
DATASET*         1.00     3.00     2.00 -0.09    -1.99  0.00

DATA DISTRIBUTION - DISCRETE

(ONE-WAY, TWO-WAY AND THREE-WAY CONTINGENCY TABLES)

Percentage of the Defaulters (Defaulted / Not Defaulted)

round(prop.table(with(FinData, table(DefaulterFlag)))*100,2)
DefaulterFlag
    0     1 
28.82 71.18 

Percentage of the Defaulters by Profession

mytable <- xtabs(~DefaulterFlag+PROFBUS, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
             PROFBUS
DefaulterFlag      0      1    Sum
            0  84.59  15.41 100.00
            1  85.39  14.61 100.00

Percentage of the Defaulters by Defaulter Type

mytable <- xtabs(~ DefaulterFlag+DefaulterType, data=FinData)
addmargins(mytable)
             DefaulterType
DefaulterFlag     0     1     2   Sum
          0    8331     0     0  8331
          1       1 16663  3911 20575
          Sum  8332 16663  3911 28906

Percentage of the Defaulters by Region

mytable <- xtabs(~DefaulterFlag+Region, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),1)
             Region
DefaulterFlag   AP1   AP2 Chennai   KA1   KE2   TN1   TN2 Vellore
          0    1.36  1.74   18.74 10.85 10.60 46.20  7.81    2.70
          1    1.81  2.50   10.88  9.65  7.70 52.10  9.85    5.51
          Sum  3.17  4.24   29.62 20.50 18.30 98.30 17.66    8.21

Percentage of the Defaulters by Gender

mytable <- xtabs(~DefaulterFlag+SEXCODE, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
             SEXCODE
DefaulterFlag      0      1    Sum
            0   9.17  90.83 100.00
            1   7.06  92.94 100.00

Percentage of the Defaulters by PostDatedCheck

mytable <- xtabs(~DefaulterFlag+FULLPDC, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
             FULLPDC
DefaulterFlag      0      1    Sum
            0  38.40  61.60 100.00
            1  70.03  29.97 100.00

Percentage of the Defaulters by If Person Owns Refrigerator or not

mytable <- xtabs(~DefaulterFlag+FRICODE, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
             FRICODE
DefaulterFlag      0      1    Sum
            0  49.51  50.49 100.00
            1  61.31  38.69 100.00

Percentage of the Defaulters by If Person Owns Washing Machine or not

mytable <- xtabs(~DefaulterFlag+WASHCODE, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
             WASHCODE
DefaulterFlag      0      1    Sum
            0  74.78  25.22 100.00
            1  83.52  16.48 100.00

DATA DISTRIBUTION - CONTINUOUS

(SUMMARY TABLES)

Average Age of the Defaulters(Defaulted/Not Defaulted)

result<-aggregate(AGE, by=list(Status=DefaulterFlag), FUN=mean)
names(result)[2]<-"AverageAgeofDefaulters"
result$AverageAgeofDefaulters<-round(result$AverageAgeofDefaulters, 2)
result
  Status AverageAgeofDefaulters
1      0                  37.22
2      1                  36.12

Visualization of the Dataset

# Pie Chart from data frame with Appended Sample Sizes
mytable <- table(DefaulterType)
pie(mytable, 
   main="Pie Chart of Percentage of each Defaulter Type")

plot of chunk unnamed-chunk-14

Visualization of Defaulter Type vs Gender

             SEXCODE
DefaulterType     0     1
            0   764  7568
            1  1190 15473
            2   262  3649

plot of chunk unnamed-chunk-15

Visualization of Defaulter Type vs Owns Fridge or not

             FRICODE
DefaulterType    0    1
            0 4126 4206
            1 9931 6732
            2 2683 1228

plot of chunk unnamed-chunk-16

Visualization of Defaulter Type vs Owns Washing Machine or not

             WASHCODE
DefaulterType     0     1
            0  6231  2101
            1 13864  2799
            2  3320   591

plot of chunk unnamed-chunk-17

Visualization of Defaulter Type vs paid PDC in full or not

             FULLPDC
DefaulterType     0     1
            0  3200  5132
            1 11161  5502
            2  3246   665

plot of chunk unnamed-chunk-18

Visualization of Defaulter Type vs No. of Dependents

             NOOFDEPE
DefaulterType    0    1    2    3    4    5    6    7    8    9   10
            0  693  582 2528 2095 1542  590  183   63   34   12   10
            1 1433  876 4286 4097 3410 1604  580  202   98   36   41
            2  651  269 1326  719  550  246   94   34   13    5    4

plot of chunk unnamed-chunk-19

Boxplot of Age by Defaulter Type

plot of chunk unnamed-chunk-20

Scatter Plot of Monthly salary and Age by Defaulter Type

plot of chunk unnamed-chunk-21

Scatter Plot of Monthly salary and Salary date by Defaulter Type

plot of chunk unnamed-chunk-22

Scatter Plot of Age and Tenor by Defaulter Type

plot of chunk unnamed-chunk-23