# reading the data
df <- read.csv("AutoFinanaceData.csv")
# attach the data frame
attach(df)
# Number of rows and columns
dim(df)## [1] 28906 21
## [1] "Agmt.No" "ContractStatus" "StartDate" "AGE"
## [5] "NOOFDEPE" "MTHINCTH" "SALDATFR" "TENORYR"
## [9] "DWNPMFR" "PROFBUS" "QUALHSC" "QUAL_PG"
## [13] "SEXCODE" "FULLPDC" "FRICODE" "WASHCODE"
## [17] "Region" "Branch" "DefaulterFlag" "DefaulterType"
## [21] "DATASET"
Defaulter Flag
1: Customer has delayed paying at least once
0: Otherwise
Defaulter Type
0: Never Delayed Paying (Good Customer)
1: At least one delay, but always paid before 90 days (OK Customer)
2: At least one delay and did not pay even after 90 days (Bad Customer)
Gender
Age
Education
QUALHSC
QUAL_PG
Income
Monthly Income in Thousands (MTHINCTH)
Owns a Fridge (FRICODE)
Owns a Washing Machine (WASHCODE)
Profession
No. of Dependents
Region
## 'data.frame': 28906 obs. of 21 variables:
## $ Agmt.No : chr "AP18100057" "AP18100140" "AP18100198" "AP18100217" ...
## $ ContractStatus: chr "Closed" "Closed" "Closed" "Closed" ...
## $ StartDate : chr "19-01-01" "10-05-01" "05-08-01" "03-09-01" ...
## $ AGE : int 26 28 32 31 36 33 41 47 43 27 ...
## $ NOOFDEPE : int 2 2 2 0 2 2 2 0 0 0 ...
## $ MTHINCTH : num 4.5 5.59 8.8 5 12 ...
## $ SALDATFR : num 1 1 1 1 1 1 1 1 0.97 1 ...
## $ TENORYR : num 1.5 2 1 1 1 2 1 2 1.5 2 ...
## $ DWNPMFR : num 0.27 0.25 0.51 0.66 0.17 0.18 0.37 0.42 0.27 0.47 ...
## $ PROFBUS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ QUALHSC : int 0 0 0 0 0 0 1 0 0 0 ...
## $ QUAL_PG : int 0 0 0 0 0 0 0 0 0 0 ...
## $ SEXCODE : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FULLPDC : int 1 1 1 1 1 0 0 1 1 1 ...
## $ FRICODE : int 0 1 1 1 1 0 0 0 0 0 ...
## $ WASHCODE : int 0 0 1 1 0 0 0 0 0 0 ...
## $ Region : chr "AP2" "AP2" "AP2" "AP2" ...
## $ Branch : chr "Vizag" "Vizag" "Vizag" "Vizag" ...
## $ DefaulterFlag : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DefaulterType : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DATASET : chr " " "BUILD" "BUILD" "BUILD" ...
factor## 'data.frame': 28906 obs. of 21 variables:
## $ Agmt.No : chr "AP18100057" "AP18100140" "AP18100198" "AP18100217" ...
## $ ContractStatus: chr "Closed" "Closed" "Closed" "Closed" ...
## $ StartDate : chr "19-01-01" "10-05-01" "05-08-01" "03-09-01" ...
## $ AGE : int 26 28 32 31 36 33 41 47 43 27 ...
## $ NOOFDEPE : int 2 2 2 0 2 2 2 0 0 0 ...
## $ MTHINCTH : num 4.5 5.59 8.8 5 12 ...
## $ SALDATFR : num 1 1 1 1 1 1 1 1 0.97 1 ...
## $ TENORYR : num 1.5 2 1 1 1 2 1 2 1.5 2 ...
## $ DWNPMFR : num 0.27 0.25 0.51 0.66 0.17 0.18 0.37 0.42 0.27 0.47 ...
## $ PROFBUS : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ QUALHSC : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ QUAL_PG : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ SEXCODE : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ FULLPDC : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 1 2 2 2 ...
## $ FRICODE : Factor w/ 2 levels "0","1": 1 2 2 2 2 1 1 1 1 1 ...
## $ WASHCODE : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 1 ...
## $ Region : Factor w/ 8 levels "AP1","AP2","Chennai",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Branch : Factor w/ 14 levels "Bangalore","Chennai",..: 14 14 14 14 14 14 14 14 14 14 ...
## $ DefaulterFlag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ DefaulterType : Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ DATASET : chr " " "BUILD" "BUILD" "BUILD" ...
Now we completed the data setup…
## [1] "Percentage of defaulters in the data"
## DefaulterFlag
## 0 1
## 28.82 71.18
## [1] "Percentage of defaulters in the data"
## DefaulterType
## 0 1 2
## 28.82 57.65 13.53
## SEXCODE
## DefaulterFlag 0 1 Sum
## 0 9.17 90.83 100.00
## 1 7.06 92.94 100.00
## SEXCODE
## DefaulterFlag 0 1
## 0 34.48 28.36
## 1 65.52 71.64
## Sum 100.00 100.00
SEXCODE = 1 (Male), SEXCODE = 0 (Female)
## PROFBUS
## DefaulterFlag 0 1 Sum
## 0 84.59 15.41 100.00
## 1 85.39 14.61 100.00
## PROFBUS
## DefaulterFlag 0 1
## 0 28.63 29.93
## 1 71.37 70.07
## Sum 100.00 100.00
PROFBUS = 1 (BUSINESS), PROFBUS = 0 (PROFESSIONAL)
## QUALHSC
## DefaulterFlag 0 1 Sum
## 0 79.06 20.94 100.00
## 1 75.92 24.08 100.00
## QUALHSC
## DefaulterFlag 0 1
## 0 29.66 26.05
## 1 70.34 73.95
## Sum 100.00 100.00
## QUAL_PG
## DefaulterFlag 0 1 Sum
## 0 94.48 5.52 100.00
## 1 96.56 3.44 100.00
## QUAL_PG
## DefaulterFlag 0 1
## 0 28.38 39.42
## 1 71.62 60.58
## Sum 100.00 100.00
## DefaulterFlag AvgIncome
## 1: 0 9.5
## 2: 1 8.7
## DefaulterFlag AvgAge
## 1: 0 37.22
## 2: 1 36.12
## Warning: package 'caTools' was built under R version 4.0.4
## [1] 28906 21
## [1] 21679 21
## [1] 7227 21
glm()The output is as follows
##
## Call:
## glm(formula = DefaulterFlag ~ AGE + NOOFDEPE + MTHINCTH + NOOFDEPE +
## SALDATFR + TENORYR + DWNPMFR + PROFBUS + QUALHSC + QUAL_PG +
## SEXCODE + FULLPDC + FRICODE + WASHCODE + Region, family = binomial(),
## data = trainingSet)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7470 -1.0215 0.5716 0.7801 2.0874
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.2291732 0.1890284 11.793 < 2e-16 ***
## AGE -0.0144968 0.0016680 -8.691 < 2e-16 ***
## NOOFDEPE 0.0566860 0.0107923 5.252 1.50e-07 ***
## MTHINCTH -0.0004025 0.0035613 -0.113 0.910023
## SALDATFR -0.3833870 0.0420223 -9.123 < 2e-16 ***
## TENORYR 0.7727065 0.0456475 16.928 < 2e-16 ***
## DWNPMFR -1.3074501 0.1274734 -10.257 < 2e-16 ***
## PROFBUS1 0.1966576 0.0487903 4.031 5.56e-05 ***
## QUALHSC1 0.1853120 0.0401652 4.614 3.95e-06 ***
## QUAL_PG1 -0.2990904 0.0787907 -3.796 0.000147 ***
## SEXCODE1 0.2339445 0.0600322 3.897 9.74e-05 ***
## FULLPDC1 -1.2365885 0.0368674 -33.541 < 2e-16 ***
## FRICODE1 -0.1761473 0.0377247 -4.669 3.02e-06 ***
## WASHCODE1 -0.2644245 0.0476814 -5.546 2.93e-08 ***
## RegionAP2 -0.5788864 0.1796029 -3.223 0.001268 **
## RegionChennai -1.4136987 0.1408192 -10.039 < 2e-16 ***
## RegionKA1 -0.6529787 0.1411987 -4.625 3.75e-06 ***
## RegionKE2 -0.5753874 0.1450753 -3.966 7.30e-05 ***
## RegionTN1 -0.8084619 0.1362745 -5.933 2.98e-09 ***
## RegionTN2 -0.6142186 0.1458691 -4.211 2.55e-05 ***
## RegionVellore -0.6570233 0.1595604 -4.118 3.83e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 26040 on 21678 degrees of freedom
## Residual deviance: 23025 on 21658 degrees of freedom
## AIC: 23067
##
## Number of Fisher Scoring iterations: 4
Age = meanAge,
Male,
Education = UG,
MTHINCTH = mean(MTHINCTH)
NoOfDepe = mean(NOOFDEPE),
Owns a Fridge,
Owns a Washing Machine
Working Professional,
SALDATFR = mean(SALDATFR),
Lives in TN1
Tenure = mean(TENORYR)
Down Payment = mean(DWNPMFR) %
Did not submit FULLPDC