# reading the data
df <- read.csv("AutoFinanaceData.csv")
# attach the data frame
attach(df)
# Number of rows and columns
dim(df)## [1] 28906 21
## [1] "Agmt.No" "ContractStatus" "StartDate" "AGE"
## [5] "NOOFDEPE" "MTHINCTH" "SALDATFR" "TENORYR"
## [9] "DWNPMFR" "PROFBUS" "QUALHSC" "QUAL_PG"
## [13] "SEXCODE" "FULLPDC" "FRICODE" "WASHCODE"
## [17] "Region" "Branch" "DefaulterFlag" "DefaulterType"
## [21] "DATASET"
Defaulter Flag
1: Customer has delayed paying at least once
0: Otherwise
Gender
SEXCODE = 1 (Male)
SEXCODE = 0 (Female)
Age
Education
QUALHSC
QUAL_PG
Income
Monthly Income in Thousands (MTHINCTH)
Owns a Fridge (FRICODE)
Owns a Washing Machine (WASHCODE)
Profession
No. of Dependents
Region
## 'data.frame': 28906 obs. of 21 variables:
## $ Agmt.No : chr "AP18100057" "AP18100140" "AP18100198" "AP18100217" ...
## $ ContractStatus: chr "Closed" "Closed" "Closed" "Closed" ...
## $ StartDate : chr "19-01-01" "10-05-01" "05-08-01" "03-09-01" ...
## $ AGE : int 26 28 32 31 36 33 41 47 43 27 ...
## $ NOOFDEPE : int 2 2 2 0 2 2 2 0 0 0 ...
## $ MTHINCTH : num 4.5 5.59 8.8 5 12 ...
## $ SALDATFR : num 1 1 1 1 1 1 1 1 0.97 1 ...
## $ TENORYR : num 1.5 2 1 1 1 2 1 2 1.5 2 ...
## $ DWNPMFR : num 0.27 0.25 0.51 0.66 0.17 0.18 0.37 0.42 0.27 0.47 ...
## $ PROFBUS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ QUALHSC : int 0 0 0 0 0 0 1 0 0 0 ...
## $ QUAL_PG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ SEXCODE : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FULLPDC : int 1 1 1 1 1 0 0 1 1 1 ...
## $ FRICODE : int 0 1 1 1 1 0 0 0 0 0 ...
## $ WASHCODE : int 0 0 1 1 0 0 0 0 0 0 ...
## $ Region : chr "AP2" "AP2" "AP2" "AP2" ...
## $ Branch : chr "Vizag" "Vizag" "Vizag" "Vizag" ...
## $ DefaulterFlag : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DefaulterType : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DATASET : chr " " "BUILD" "BUILD" "BUILD" ...
factor## 'data.frame': 28906 obs. of 21 variables:
## $ Agmt.No : chr "AP18100057" "AP18100140" "AP18100198" "AP18100217" ...
## $ ContractStatus: chr "Closed" "Closed" "Closed" "Closed" ...
## $ StartDate : chr "19-01-01" "10-05-01" "05-08-01" "03-09-01" ...
## $ AGE : int 26 28 32 31 36 33 41 47 43 27 ...
## $ NOOFDEPE : int 2 2 2 0 2 2 2 0 0 0 ...
## $ MTHINCTH : num 4.5 5.59 8.8 5 12 ...
## $ SALDATFR : num 1 1 1 1 1 1 1 1 0.97 1 ...
## $ TENORYR : num 1.5 2 1 1 1 2 1 2 1.5 2 ...
## $ DWNPMFR : num 0.27 0.25 0.51 0.66 0.17 0.18 0.37 0.42 0.27 0.47 ...
## $ PROFBUS : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ QUALHSC : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ QUAL_PG : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ SEXCODE : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ FULLPDC : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 1 2 2 2 ...
## $ FRICODE : Factor w/ 2 levels "0","1": 1 2 2 2 2 1 1 1 1 1 ...
## $ WASHCODE : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 1 ...
## $ Region : Factor w/ 8 levels "AP1","AP2","Chennai",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Branch : Factor w/ 14 levels "Bangalore","Chennai",..: 14 14 14 14 14 14 14 14 14 14 ...
## $ DefaulterFlag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ DefaulterType : Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ DATASET : chr " " "BUILD" "BUILD" "BUILD" ...
## n mean sd median min max
## Agmt.No* 28906 14453.50 8344.59 14453.50 1.00 28906.00
## ContractStatus* 28906 1.33 0.77 1.00 1.00 4.00
## StartDate* 28906 827.85 552.47 812.00 1.00 1814.00
## AGE 28906 36.44 9.82 35.00 18.00 70.00
## NOOFDEPE 28906 2.85 1.61 3.00 0.00 10.00
## MTHINCTH 28906 8.94 4.81 8.00 0.10 39.50
## SALDATFR 28906 0.44 0.46 0.17 0.03 1.03
## TENORYR 28906 1.28 0.52 1.00 0.17 4.00
## DWNPMFR 28906 0.38 0.16 0.38 0.02 0.88
## PROFBUS* 28906 1.15 0.36 1.00 1.00 2.00
## QUALHSC* 28906 1.23 0.42 1.00 1.00 2.00
## QUAL_PG* 28906 1.04 0.20 1.00 1.00 2.00
## SEXCODE* 28906 1.92 0.27 2.00 1.00 2.00
## FULLPDC* 28906 1.39 0.49 1.00 1.00 2.00
## FRICODE* 28906 1.42 0.49 1.00 1.00 2.00
## WASHCODE* 28906 1.19 0.39 1.00 1.00 2.00
## Region* 28906 5.33 1.51 6.00 1.00 8.00
## Branch* 28906 5.93 3.47 6.00 1.00 14.00
## DefaulterFlag* 28906 1.71 0.45 2.00 1.00 2.00
## DefaulterType* 28906 1.85 0.63 2.00 1.00 3.00
## DATASET* 28906 2.52 0.50 3.00 1.00 3.00
## DefaulterFlag
## 0 1
## 28.82 71.18
# percentage of the defaulters, based on DefaulterFlag
tab1 <- round(prop.table(table(DefaulterFlag))*100, 2)
# assign labels
lbls <- paste(names(tab1))
# assign percentage
pct <- round(prop.table(table(DefaulterFlag))*100, 2)
lbls <- paste(lbls, pct)
lbls <- paste(lbls, "%") # ad % to labels
# pie chart
pie(tab1, labels = lbls,
main = "Defaulters based on DefaulterFlag",
col = c("grey","black"))# Defaulters based on Gender
addmargins(round(prop.table(table(DefaulterFlag, SEXCODE), 2)*100, 2),1)## SEXCODE
## DefaulterFlag 0 1
## 0 34.48 28.36
## 1 65.52 71.64
## Sum 100.00 100.00
SEXCODE = 1 (Male), SEXCODE = 0 (Female)
# Percentage of defaulters by Gender
tab2 <- round(prop.table(table(DefaulterFlag,SEXCODE),2)*100,2)
# bar-plot
bp <- barplot(tab2, beside = TRUE, main = "Bar Chart For Defaulters By Gender",
col = c("lightblue", "mistyrose"),
xlab = "Male",
ylab = "Percent (%)", legend = c("No", "Yes"),
args.legend = list(title = "Default", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab2, 1),cex=1,pos=3) ## PROFBUS
## DefaulterFlag 0 1
## 0 28.63 29.93
## 1 71.37 70.07
## Sum 100.00 100.00
PROFBUS = 1 (BUSINESS), PROFBUS = 0 (PROFESSIONAL)
# Percentage of defaulters by Profession
tab2 <- round(prop.table(table(DefaulterFlag,PROFBUS),2)*100,2)
# bar-plot
bp <- barplot(tab2, beside = TRUE, main = "Bar Chart For Defaulters By Profession",
col = c("lightblue", "mistyrose"),
xlab = "PROFBUS",
ylab = "Percent (%)", legend = c("No", "Yes"),
args.legend = list(title = "Default", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab2, 1),cex=1,pos=3) # Percentage of the DefaulterFlag by Qualification
addmargins(round(prop.table(table(DefaulterFlag, QUALHSC), 2)*100, 2),1)## QUALHSC
## DefaulterFlag 0 1
## 0 29.66 26.05
## 1 70.34 73.95
## Sum 100.00 100.00
# Percentage of defaulters by Profession
tab2 <- round(prop.table(table(DefaulterFlag,QUALHSC),2)*100,2)
# bar-plot
bp <- barplot(tab2, beside = TRUE, main = "Bar Chart For Defaulters By Education",
col = c("lightblue", "mistyrose"),
xlab = "QUALHSC",
ylab = "Percent (%)", legend = c("No", "Yes"),
args.legend = list(title = "Default", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab2, 1),cex=1,pos=3) # Percentage of the DefaulterFlag by Qualification
addmargins(round(prop.table(table(DefaulterFlag, QUAL_PG), 2)*100, 2),1)## QUAL_PG
## DefaulterFlag 0 1
## 0 28.38 39.42
## 1 71.62 60.58
## Sum 100.00 100.00
# Percentage of defaulters by Profession
tab2 <- round(prop.table(table(DefaulterFlag,QUAL_PG),2)*100,2)
# bar-plot
bp <- barplot(tab2, beside = TRUE, main = "Bar Chart For Defaulters By Education",
col = c("lightblue", "mistyrose"),
xlab = "QUAL_PG",
ylab = "Percent (%)", legend = c("No", "Yes"),
args.legend = list(title = "Default", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab2, 1),cex=1,pos=3) library(data.table)
dt <- data.table(df)
# average monthly income of the borrower by Defaulter Flag
dt[, .(Mean_MTHINCTH = round(mean(MTHINCTH), 2)),
by=list(DefaulterFlag)]## DefaulterFlag Mean_MTHINCTH
## 1: 0 9.5
## 2: 1 8.7
library(gplots)
plotmeans(MTHINCTH ~ DefaulterFlag,data = df,
ylab = "Montly Income",
mean.labels= TRUE,
n.label = FALSE, digits= 2,
col = "red", lwd = 1.5)library(data.table)
dt <- data.table(df)
# average Age of the borrower by Defaulter Flag
dt[, .(Mean_TENORYR = round(mean(TENORYR), 2)),
by=list(DefaulterFlag)]## DefaulterFlag Mean_TENORYR
## 1: 0 1.15
## 2: 1 1.34
library(gplots)
plotmeans(TENORYR ~ DefaulterFlag,data = df,
ylab = "TENORYR",
mean.labels= TRUE,
n.label = FALSE, digits= 2,
col = "red", lwd = 1.5)