Adarsh Adwait
---
library(ISLR)
library(ggplot2)
library(knitr)
library(dplyr)
FinData <- read.csv("AutoFinanaceData.csv")
attach(FinData)
# printing Dimensions
dim(FinData)
[1] 28906 21
# structure of the dataset
str(FinData)
'data.frame': 28906 obs. of 21 variables:
$ Agmt.No : Factor w/ 28906 levels "AP18100009","AP18100010",..: 36 79 106 115 116 135 136 142 145 160 ...
$ ContractStatus: Factor w/ 4 levels "Closed","Foreclosed",..: 1 1 1 1 1 1 1 1 1 1 ...
$ StartDate : Factor w/ 1814 levels "","01-01-00",..: 1065 566 292 177 129 1394 292 1394 752 1394 ...
$ AGE : int 26 28 32 31 36 33 41 47 43 27 ...
$ NOOFDEPE : int 2 2 2 0 2 2 2 0 0 0 ...
$ MTHINCTH : num 4.5 5.59 8.8 5 12 ...
$ SALDATFR : num 1 1 1 1 1 1 1 1 0.97 1 ...
$ TENORYR : num 1.5 2 1 1 1 2 1 2 1.5 2 ...
$ DWNPMFR : num 0.27 0.25 0.51 0.66 0.17 0.18 0.37 0.42 0.27 0.47 ...
$ PROFBUS : int 0 0 0 0 0 0 0 0 0 0 ...
$ QUALHSC : int 0 0 0 0 0 0 1 0 0 0 ...
$ QUAL_PG : int 0 0 0 0 0 0 0 0 0 0 ...
$ SEXCODE : int 1 1 1 1 1 1 1 1 1 1 ...
$ FULLPDC : int 1 1 1 1 1 0 0 1 1 1 ...
$ FRICODE : int 0 1 1 1 1 0 0 0 0 0 ...
$ WASHCODE : int 0 0 1 1 0 0 0 0 0 0 ...
$ Region : Factor w/ 8 levels "AP1","AP2","Chennai",..: 2 2 2 2 2 2 2 2 2 2 ...
$ Branch : Factor w/ 14 levels "Bangalore","Chennai",..: 14 14 14 14 14 14 14 14 14 14 ...
$ DefaulterFlag : int 0 0 0 0 0 0 0 0 0 0 ...
$ DefaulterType : int 0 0 0 0 0 0 0 0 0 0 ...
$ DATASET : Factor w/ 3 levels " ","BUILD","VALIDATE": 1 2 2 2 2 2 2 2 2 2 ...
# structure of the dataset
library(psych)
describe(FinData)
vars n mean sd median trimmed mad
Agmt.No* 1 28906 14453.50 8344.59 14453.50 14453.50 10714.01
ContractStatus* 2 28906 1.33 0.77 1.00 1.12 0.00
StartDate* 3 28906 827.85 552.47 812.00 821.02 705.72
AGE 4 28906 36.44 9.82 35.00 35.86 10.38
NOOFDEPE 5 28906 2.85 1.61 3.00 2.83 1.48
MTHINCTH 6 28906 8.94 4.81 8.00 8.30 4.08
SALDATFR 7 28906 0.44 0.46 0.17 0.42 0.21
TENORYR 8 28906 1.28 0.52 1.00 1.22 0.25
DWNPMFR 9 28906 0.38 0.16 0.38 0.38 0.15
PROFBUS 10 28906 0.15 0.36 0.00 0.06 0.00
QUALHSC 11 28906 0.23 0.42 0.00 0.16 0.00
QUAL_PG 12 28906 0.04 0.20 0.00 0.00 0.00
SEXCODE 13 28906 0.92 0.27 1.00 1.00 0.00
FULLPDC 14 28906 0.39 0.49 0.00 0.36 0.00
FRICODE 15 28906 0.42 0.49 0.00 0.40 0.00
WASHCODE 16 28906 0.19 0.39 0.00 0.11 0.00
Region* 17 28906 5.33 1.51 6.00 5.43 0.00
Branch* 18 28906 5.93 3.47 6.00 5.78 4.45
DefaulterFlag 19 28906 0.71 0.45 1.00 0.76 0.00
DefaulterType 20 28906 0.85 0.63 1.00 0.81 0.00
DATASET* 21 28906 2.52 0.50 3.00 2.53 0.00
min max range skew kurtosis se
Agmt.No* 1.00 28906.00 28905.00 0.00 -1.20 49.08
ContractStatus* 1.00 4.00 3.00 2.25 3.91 0.00
StartDate* 1.00 1814.00 1813.00 0.04 -1.17 3.25
AGE 18.00 70.00 52.00 0.50 -0.40 0.06
NOOFDEPE 0.00 10.00 10.00 0.43 0.89 0.01
MTHINCTH 0.10 39.50 39.40 1.62 3.85 0.03
SALDATFR 0.03 1.03 1.00 0.38 -1.82 0.00
TENORYR 0.17 4.00 3.83 1.32 1.43 0.00
DWNPMFR 0.02 0.88 0.86 -0.13 -0.13 0.00
PROFBUS 0.00 1.00 1.00 1.98 1.91 0.00
QUALHSC 0.00 1.00 1.00 1.27 -0.38 0.00
QUAL_PG 0.00 1.00 1.00 4.67 19.81 0.00
SEXCODE 0.00 1.00 1.00 -3.18 8.13 0.00
FULLPDC 0.00 1.00 1.00 0.45 -1.80 0.00
FRICODE 0.00 1.00 1.00 0.32 -1.90 0.00
WASHCODE 0.00 1.00 1.00 1.58 0.50 0.00
Region* 1.00 8.00 7.00 -0.76 0.08 0.01
Branch* 1.00 14.00 13.00 0.27 -0.90 0.02
DefaulterFlag 0.00 1.00 1.00 -0.94 -1.13 0.00
DefaulterType 0.00 2.00 2.00 0.14 -0.58 0.00
DATASET* 1.00 3.00 2.00 -0.09 -1.99 0.00
(ONE-WAY, TWO-WAY AND THREE-WAY CONTINGENCY TABLES)
round(prop.table(with(FinData, table(DefaulterFlag)))*100,2)
DefaulterFlag
0 1
28.82 71.18
mytable <- xtabs(~DefaulterFlag+PROFBUS, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
PROFBUS
DefaulterFlag 0 1 Sum
0 84.59 15.41 100.00
1 85.39 14.61 100.00
mytable <- xtabs(~ DefaulterFlag+DefaulterType, data=FinData)
addmargins(mytable)
DefaulterType
DefaulterFlag 0 1 2 Sum
0 8331 0 0 8331
1 1 16663 3911 20575
Sum 8332 16663 3911 28906
mytable <- xtabs(~DefaulterFlag+Region, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),1)
Region
DefaulterFlag AP1 AP2 Chennai KA1 KE2 TN1 TN2 Vellore
0 1.36 1.74 18.74 10.85 10.60 46.20 7.81 2.70
1 1.81 2.50 10.88 9.65 7.70 52.10 9.85 5.51
Sum 3.17 4.24 29.62 20.50 18.30 98.30 17.66 8.21
mytable <- xtabs(~DefaulterFlag+SEXCODE, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
SEXCODE
DefaulterFlag 0 1 Sum
0 9.17 90.83 100.00
1 7.06 92.94 100.00
mytable <- xtabs(~DefaulterFlag+FULLPDC, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
FULLPDC
DefaulterFlag 0 1 Sum
0 38.40 61.60 100.00
1 70.03 29.97 100.00
mytable <- xtabs(~DefaulterFlag+FRICODE, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
FRICODE
DefaulterFlag 0 1 Sum
0 49.51 50.49 100.00
1 61.31 38.69 100.00
mytable <- xtabs(~DefaulterFlag+WASHCODE, data=FinData)
addmargins(round(prop.table(mytable,1)*100,2),2)
WASHCODE
DefaulterFlag 0 1 Sum
0 74.78 25.22 100.00
1 83.52 16.48 100.00
result<-aggregate(AGE, by=list(Status=DefaulterFlag), FUN=mean)
names(result)[2]<-"AverageAgeofDefaulters"
result$AverageAgeofDefaulters<-round(result$AverageAgeofDefaulters, 2)
result
Status AverageAgeofDefaulters
1 0 37.22
2 1 36.12
# Pie Chart from data frame with Appended Sample Sizes
mytable <- table(DefaulterType)
pie(mytable,
main="Pie Chart of Percentage of each Defaulter Type")
SEXCODE
DefaulterType 0 1
0 764 7568
1 1190 15473
2 262 3649
FRICODE
DefaulterType 0 1
0 4126 4206
1 9931 6732
2 2683 1228
WASHCODE
DefaulterType 0 1
0 6231 2101
1 13864 2799
2 3320 591
FULLPDC
DefaulterType 0 1
0 3200 5132
1 11161 5502
2 3246 665
NOOFDEPE
DefaulterType 0 1 2 3 4 5 6 7 8 9 10
0 693 582 2528 2095 1542 590 183 63 34 12 10
1 1433 876 4286 4097 3410 1604 580 202 98 36 41
2 651 269 1326 719 550 246 94 34 13 5 4