1. Load the required packages and dataset
library(openxlsx)
library(dplyr)
library(randomForest)
library(tree)
library(ggplot2)
library(ggpubr)
dta <- read.xlsx("DataSet.xlsx")
  1. Investigation of the dataset
dim(dta)
[1] 21662    34
summary(dta)
       ID        Requested_Amount      Emi_Amount           Age       
 Min.   :    1   Min.   :7.200e+01   Min.   :      0   Min.   :18.10  
 1st Qu.: 6466   1st Qu.:2.750e+05   1st Qu.:   8664   1st Qu.:31.20  
 Median :13046   Median :4.050e+05   Median :  12696   Median :37.58  
 Mean   :13145   Mean   :7.324e+07   Mean   :  17882   Mean   :39.09  
 3rd Qu.:19760   3rd Qu.:6.500e+05   3rd Qu.:  19800   3rd Qu.:45.67  
 Max.   :26508   Max.   :4.630e+11   Max.   :1379000   Max.   :83.80  
                                     NA's   :602                      
 Applicant_Postal_Code Duration_Of_Current_Emp Product_Name1     
 Min.   :110001        Min.   : 0.000          Length:21662      
 1st Qu.:208006        1st Qu.: 2.000          Class :character  
 Median :400610        Median : 4.000          Mode  :character  
 Mean   :394240        Mean   : 6.858                            
 3rd Qu.:500090        3rd Qu.: 8.000                            
 Max.   :835103        Max.   :50.000                            
 NA's   :3             NA's   :16648                             
 Product_Line         Loan_Term      Variant_Code       Manufacturer_Desc 
 Length:21662       Min.   :  3.00   Length:21662       Length:21662      
 Class :character   1st Qu.: 36.00   Class :character   Class :character  
 Mode  :character   Median : 36.00   Mode  :character   Mode  :character  
                    Mean   : 43.98                                        
                    3rd Qu.: 60.00                                        
                    Max.   :960.00                                        
                    NA's   :2                                             
 Gender_Desc        Marital_Status_Desc Applicant_State_Desc
 Length:21662       Length:21662        Length:21662        
 Class :character   Class :character    Class :character    
 Mode  :character   Mode  :character    Mode  :character    
                                                            
                                                            
                                                            
                                                            
 Applicant_City_Desc Resid_Owned_By_Desc Employment_Type_Desc
 Length:21662        Length:21662        Length:21662        
 Class :character    Class :character    Class :character    
 Mode  :character    Mode  :character    Mode  :character    
                                                             
                                                             
                                                             
                                                             
 Total_Work_Experience application_creation_date Ex_Showroom_Price 
 Min.   :   0.000      Min.   :41277             Min.   :       0  
 1st Qu.:   2.700      1st Qu.:41753             1st Qu.:  335440  
 Median :   5.000      Median :41988             Median :  488544  
 Mean   :   8.573      Mean   :41904             Mean   :  686893  
 3rd Qu.:  10.000      3rd Qu.:42105             3rd Qu.:  744948  
 Max.   :1212.000      Max.   :42186             Max.   :81882000  
 NA's   :16708                                   NA's   :2779      
   Segment          Current.Valuation   Years_In_Current_Business
 Length:21662       Min.   :1.000e+00   Min.   : 0.000           
 Class :character   1st Qu.:3.349e+05   1st Qu.: 4.000           
 Mode  :character   Median :4.879e+05   Median : 7.000           
                    Mean   :2.302e+07   Mean   : 8.888           
                    3rd Qu.:7.444e+05   3rd Qu.:10.000           
                    Max.   :3.470e+11   Max.   :54.000           
                    NA's   :2787        NA's   :7455             
 No_Of_Years_At_Residence No_Of_Years_In_City No_Of_Years_At_Business
 Min.   : 0.00            Min.   :0.000e+00   Min.   : NA            
 1st Qu.: 5.00            1st Qu.:5.000e+00   1st Qu.: NA            
 Median :10.00            Median :1.000e+01   Median : NA            
 Mean   :13.93            Mean   :4.361e+05   Mean   :NaN            
 3rd Qu.:20.00            3rd Qu.:2.000e+01   3rd Qu.: NA            
 Max.   :69.00            Max.   :9.291e+09   Max.   : NA            
 NA's   :2723             NA's   :354         NA's   :21662          
 Segment_Desc       Cost_Of_Vehicle     Average_Bank_Balance
 Length:21662       Min.   :    48000   Min.   :-1.798e+07  
 Class :character   1st Qu.:   350000   1st Qu.: 6.307e+03  
 Mode  :character   Median :   506730   Median : 1.704e+04  
                    Mean   :   771360   Mean   : 2.226e+06  
                    3rd Qu.:   800000   3rd Qu.: 5.069e+04  
                    Max.   :504486334   Max.   : 1.864e+10  
                    NA's   :2           NA's   :12689       
  cibil_score     Disbursed              IRR         MAX_OD_F_12M   
 Min.   : -1.0   Length:21662       Min.   :10.69   Min.   : 0.000  
 1st Qu.:597.0   Class :character   1st Qu.:15.71   1st Qu.: 0.000  
 Median :757.0   Mode  :character   Median :17.51   Median : 0.000  
 Mean   :606.2                      Mean   :18.23   Mean   : 0.349  
 3rd Qu.:796.0                      3rd Qu.:20.60   3rd Qu.: 0.000  
 Max.   :897.0                      Max.   :26.50   Max.   :12.000  
 NA's   :8                                          NA's   :13356   
 TOTAL_NTECH_BNC_F12M
 Min.   : 0.000      
 1st Qu.: 0.000      
 Median : 0.000      
 Mean   : 1.259      
 3rd Qu.: 1.000      
 Max.   :12.000      
 NA's   :13356       
str(dta)
'data.frame':   21662 obs. of  34 variables:
 $ ID                       : num  1 2 3 4 5 6 7 8 9 10 ...
 $ Requested_Amount         : num  200000 1900000 1600000 300000 250000 150000 430000 405000 276000 234000 ...
 $ Emi_Amount               : num  5750 NA NA 9050 8750 ...
 $ Age                      : num  40.5 41.2 39.5 35 34 ...
 $ Applicant_Postal_Code    : num  400091 641002 641002 390019 390023 ...
 $ Duration_Of_Current_Emp  : num  5 NA NA 16 NA NA NA NA NA NA ...
 $ Product_Name1            : chr  "CHEVROLET SPARK" "FORTUNER 4 WD" NA "NISSAN MICRA" ...
 $ Product_Line             : chr  "CHEVROLET SPARK LT 1.0 BS-IV OBDII" "TOYOTA - FORTUNER 4 WD" "TOYOTA - FORTUNER 3.0 L" "NISSAN MICRA" ...
 $ Loan_Term                : num  48 48 48 48 36 36 36 36 36 36 ...
 $ Variant_Code             : chr  "LT 1.0 BS-IV OBDII" "FORTUNER 4 WD" "FORTUNER 3.0 L" "NISSAN  MICRA  XL" ...
 $ Manufacturer_Desc        : chr  "CHEVROLET INDIA  LTD" "TOYOTA" "TOYOTA" "NISSAN" ...
 $ Gender_Desc              : chr  "Male" "Male" "Male" "Male" ...
 $ Marital_Status_Desc      : chr  NA "Married" "Married" "Married" ...
 $ Applicant_State_Desc     : chr  "MAHARASHTRA" "TAMILNADU" "TAMILNADU" "GUJRAT" ...
 $ Applicant_City_Desc      : chr  "MUMBAI" "COIMBATORE" "COIMBATORE" "BARODA" ...
 $ Resid_Owned_By_Desc      : chr  "SELF" "SELF" "SELF" "SELF" ...
 $ Employment_Type_Desc     : chr  "SALARIED" "RETIRED" "RETIRED" "SALARIED" ...
 $ Total_Work_Experience    : num  5 NA NA 16 NA NA NA NA NA NA ...
 $ application_creation_date: num  42112 41753 41755 41277 41287 ...
 $ Ex_Showroom_Price        : num  NA NA NA 365750 548750 ...
 $ Segment                  : chr  "A2" "SUV" "SUV" "A2" ...
 $ Current.Valuation        : num  NA NA NA 365750 548750 ...
 $ Years_In_Current_Business: num  NA NA NA NA 3 6 15 10 5 5 ...
 $ No_Of_Years_At_Residence : num  NA NA NA 7 2 25 40 9 10 25 ...
 $ No_Of_Years_In_City      : num  35 1 1 7 2 25 40 9 10 25 ...
 $ No_Of_Years_At_Business  : num  NA NA NA NA NA NA NA NA NA NA ...
 $ Segment_Desc             : chr  "Compact" "SUV" "SUV" "Compact" ...
 $ Cost_Of_Vehicle          : num  237342 2400000 2200000 365750 548750 ...
 $ Average_Bank_Balance     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ cibil_score              : num  594 754 779 818 853 777 817 789 828 837 ...
 $ Disbursed                : chr  "No" "Yes" "Yes" "Yes" ...
 $ IRR                      : num  24.7 24.7 16.5 18.1 17.7 ...
 $ MAX_OD_F_12M             : num  NA 0 0 0 0 0 0 0 0 0 ...
 $ TOTAL_NTECH_BNC_F12M     : num  NA 0 0 0 1 0 0 0 0 0 ...
dta$IRRcut <- cut(dta$IRR,breaks = c(0,10,15,20,25,30))

Every sample has more than 2 missing, therefore we can’t merely use complete data. Therefore we use those variables with missing rate <0.25

na.omit(dta)
 [1] ID                        Requested_Amount         
 [3] Emi_Amount                Age                      
 [5] Applicant_Postal_Code     Duration_Of_Current_Emp  
 [7] Product_Name1             Product_Line             
 [9] Loan_Term                 Variant_Code             
[11] Manufacturer_Desc         Gender_Desc              
[13] Marital_Status_Desc       Applicant_State_Desc     
[15] Applicant_City_Desc       Resid_Owned_By_Desc      
[17] Employment_Type_Desc      Total_Work_Experience    
[19] application_creation_date Ex_Showroom_Price        
[21] Segment                   Current.Valuation        
[23] Years_In_Current_Business No_Of_Years_At_Residence 
[25] No_Of_Years_In_City       No_Of_Years_At_Business  
[27] Segment_Desc              Cost_Of_Vehicle          
[29] Average_Bank_Balance      cibil_score              
[31] Disbursed                 IRR                      
[33] MAX_OD_F_12M              TOTAL_NTECH_BNC_F12M     
[35] IRRcut                   
<0 rows> (or 0-length row.names)
is.na(dta) %>% apply(.,1,sum) %>% `<`(.,2) %>% sum
[1] 0
less_missing_variables <- is.na(dta) %>% apply(.,2,sum) %>% `<`(.,ncol(dta)*0.25)
less_missing_dta <- dta[,less_missing_variables]
dta <- less_missing_dta
str(dta)
'data.frame':   21662 obs. of  17 variables:
 $ ID                       : num  1 2 3 4 5 6 7 8 9 10 ...
 $ Requested_Amount         : num  200000 1900000 1600000 300000 250000 150000 430000 405000 276000 234000 ...
 $ Age                      : num  40.5 41.2 39.5 35 34 ...
 $ Applicant_Postal_Code    : num  400091 641002 641002 390019 390023 ...
 $ Product_Line             : chr  "CHEVROLET SPARK LT 1.0 BS-IV OBDII" "TOYOTA - FORTUNER 4 WD" "TOYOTA - FORTUNER 3.0 L" "NISSAN MICRA" ...
 $ Loan_Term                : num  48 48 48 48 36 36 36 36 36 36 ...
 $ Variant_Code             : chr  "LT 1.0 BS-IV OBDII" "FORTUNER 4 WD" "FORTUNER 3.0 L" "NISSAN  MICRA  XL" ...
 $ Manufacturer_Desc        : chr  "CHEVROLET INDIA  LTD" "TOYOTA" "TOYOTA" "NISSAN" ...
 $ Gender_Desc              : chr  "Male" "Male" "Male" "Male" ...
 $ Applicant_State_Desc     : chr  "MAHARASHTRA" "TAMILNADU" "TAMILNADU" "GUJRAT" ...
 $ Employment_Type_Desc     : chr  "SALARIED" "RETIRED" "RETIRED" "SALARIED" ...
 $ application_creation_date: num  42112 41753 41755 41277 41287 ...
 $ Cost_Of_Vehicle          : num  237342 2400000 2200000 365750 548750 ...
 $ cibil_score              : num  594 754 779 818 853 777 817 789 828 837 ...
 $ Disbursed                : chr  "No" "Yes" "Yes" "Yes" ...
 $ IRR                      : num  24.7 24.7 16.5 18.1 17.7 ...
 $ IRRcut                   : Factor w/ 5 levels "(0,10]","(10,15]",..: 4 4 3 3 3 2 2 2 2 2 ...
rabefore <-ggplot(dta, aes(x=Requested_Amount,y = IRR))+ layer(
  geom = "point",
  stat = "identity",
  position = "identity") +theme_classic()

(dta$Requested_Amount > 10^8) %>% which
[1]   369  1397  1751 11802 17098
dta <- dta[-c((dta$Requested_Amount > 10^8) %>% which),]

rabafter <- ggplot(dta, aes(x=Requested_Amount/10000,y=IRR,color=Disbursed))+ 
  geom_point(stat="identity")+
  geom_smooth(method='lm')+
  theme_classic()

ggarrange(rabefore, rabafter,
          labels = c("Requested Amount(with outliers)", "Requested Amount(outliers removed)"),
          ncol = 1, nrow = 2)

ggplot(dta, aes(y = IRR,x = Age,color=Disbursed))+
  geom_smooth(method="lm",na.rm = TRUE)+
  theme_classic()

unique(dta$Product_Line) %>% length
[1] 1443
#to get the short product line (first word) of the car
position <- gregexpr(" ",dta$Product_Line)
endposition <- sapply(position, function(x) x[[1]][1])
dta$Product_Line_group <- substring(dta$Product_Line,first = 1,last = endposition-1)
unique(dta$Product_Line_group) %>% length
[1] 67
plgfill <- ggplot(data=dta, aes(x=Product_Line_group, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="fill")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

plgstack <-ggplot(data=dta, aes(x=Product_Line_group, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="stack")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

ggarrange(plgfill, plgstack,
          labels = c("P. Line(Partiton)", "P. Line(count)"),
          ncol = 1, nrow = 2)

lt <- ggplot(data=dta,aes(x=Loan_Term,y=IRR)) +
  geom_point()+
  theme_classic()

(dta$Loan_Term > 250) %>% which
[1] 3325
dta <- dta[-c((dta$Loan_Term > 250) %>% which),]

lt2 <-ggplot(data=dta,aes(x=Loan_Term,y=IRR,color=Disbursed)) +
  geom_point()+
  geom_smooth(method='lm')+
  theme_classic()


ggarrange(lt, lt2,
          labels = c("Loan Term(with outliers)", "Loan Term(outliers removed)"),
          ncol = 1, nrow = 2)

unique(dta$Variant_Code) %>% length
[1] 1259
#to get the brand name (first word) of the car
position <- gregexpr(" ",dta$Variant_Code)
endposition <- sapply(position, function(x) x[[1]][1])
dta$Variant_Code_Brand <- substring(dta$Variant_Code,first = 1,last = endposition-1)
unique(dta$Variant_Code_Brand) %>% length
[1] 291
Variant_Code_Brand1 <- ggplot(data=dta, aes(x=Variant_Code_Brand, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="fill")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

Variant_Code_Brand2 <- ggplot(data=dta, aes(x=Variant_Code_Brand, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="stack")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

ggarrange(Variant_Code_Brand1, Variant_Code_Brand2,
          labels = c("Variant Code (Partition)", "Variant Code (counts)"),
          ncol = 1, nrow = 2)

unique(dta$Manufacturer_Desc) %>% length
[1] 33
manufacturer1 <- ggplot(data=dta, aes(x=Manufacturer_Desc, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="fill")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

manufacturer2 <- ggplot(data=dta, aes(x=Manufacturer_Desc, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="stack")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

ggarrange(manufacturer1, manufacturer2,
          labels = c("Manufacturer (Counts)", "Manufacturer (Partition)"),
          ncol = 1, nrow = 2)

ggplot(data=dta, aes(x=Gender_Desc, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="stack")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

asd1 <- ggplot(data=dta, aes(x=Applicant_State_Desc, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="stack")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

asd2 <- ggplot(data=dta, aes(x=Applicant_State_Desc, y=IRR, fill=IRRcut)) +
  geom_bar(stat="identity", position="fill")+
  theme_classic()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

ggarrange(asd2, asd1,
          labels = c("Applicant State(count)", "Applicant State(fill)"),
          ncol = 1, nrow = 2)

cost1 <- ggplot(data=dta, aes(x=Cost_Of_Vehicle , y=IRR)) +
  geom_point(stat="identity")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

dta <- dta[-c((dta$Cost_Of_Vehicle> 10^8) %>% which),]

cost2 <- ggplot(data=dta, aes(x=Cost_Of_Vehicle , y=IRR)) +
  geom_point(stat="identity")+
  geom_smooth(method='lm')+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

dta2 <- dta[-c((dta$Cost_Of_Vehicle> 10^7) %>% which),]

cost3 <-ggplot(data=dta2, aes(x=Cost_Of_Vehicle , y=IRR,color=Disbursed)) +
  geom_point(stat="identity")+
  geom_smooth(method='lm')+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggarrange(cost1, cost2, cost3,
          labels = c("Cost of Vihicle", "Cost of Vihicle","Cost of Vihicle"),ncol = 1, nrow = 3)

ggplot(data=dta, aes(x=Employment_Type_Desc, y=IRR, color=IRRcut)) +
  geom_bar(stat="identity", position="stack")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(data=dta, aes(x=cibil_score, y=IRR, color=Disbursed)) +
  geom_point(stat="identity")+
  geom_smooth(method = "lm")+
  theme_classic()

3. Generate IRR model using variables: Requested Amount/Age/Loan_Term/Gender_Desc/Employment_Type_Desc/Cost_Of_Vehicle/cibil_score

## set a regression model for IRR

IRRdta <- dta[,c("Requested_Amount",
                 "Age",
                 "Loan_Term",
                 "Gender_Desc",
                 "Cost_Of_Vehicle",
                 "cibil_score",
                 "IRR")]

IRRdta$cibil_score[(IRRdta$cibil_score == -1)] <- NA
IRRdta$Requested_Amount <- IRRdta$Requested_Amount/100000
IRRdta$Cost_Of_Vehicle <- IRRdta$Cost_Of_Vehicle/100000


IRRdta <- na.omit(IRRdta)


IRRfit <- lm(IRR~.,IRRdta)
summary(IRRfit)

Call:
lm(formula = IRR ~ ., data = IRRdta)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.8707 -2.5235 -0.6947  2.3566  8.7315 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      19.013434   0.227452  83.593  < 2e-16 ***
Requested_Amount -0.009175   0.005069  -1.810   0.0703 .  
Age              -0.005278   0.002812  -1.877   0.0605 .  
Loan_Term         0.006240   0.002479   2.517   0.0118 *  
Gender_DescMale  -0.035151   0.090426  -0.389   0.6975    
Cost_Of_Vehicle   0.002626   0.003076   0.854   0.3933    
cibil_score      -0.001135   0.000192  -5.913 3.42e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.699 on 17993 degrees of freedom
Multiple R-squared:  0.002955,  Adjusted R-squared:  0.002622 
F-statistic: 8.887 on 6 and 17993 DF,  p-value: 1.045e-09
IRRfit_simple <- lm(IRR ~ Requested_Amount +
                      Age+
                      Loan_Term+
                      cibil_score, data= IRRdta)

summary(IRRfit_simple)

Call:
lm(formula = IRR ~ Requested_Amount + Age + Loan_Term + cibil_score, 
    data = IRRdta)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.8733 -2.5242 -0.6962  2.3650  8.8337 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)      18.9801656  0.2070477  91.671  < 2e-16 ***
Requested_Amount -0.0065412  0.0040293  -1.623   0.1045    
Age              -0.0052105  0.0028081  -1.856   0.0635 .  
Loan_Term         0.0063040  0.0024775   2.545   0.0110 *  
cibil_score      -0.0011348  0.0001919  -5.914 3.39e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.699 on 17995 degrees of freedom
Multiple R-squared:  0.002905,  Adjusted R-squared:  0.002683 
F-statistic: 13.11 on 4 and 17995 DF,  p-value: 1.163e-10
Disburseddta <- dta[,c("Requested_Amount",
                       "Age",
                       "Loan_Term",
                       "Gender_Desc",
                       "Cost_Of_Vehicle",
                       "cibil_score",
                       "IRR",
                       "Disbursed")]
Disburseddta$cibil_score[(Disburseddta$cibil_score == -1)] <- NA
Disburseddta$Requested_Amount <- Disburseddta$Requested_Amount/100000
Disburseddta$Cost_Of_Vehicle <- Disburseddta$Cost_Of_Vehicle/100000
Disburseddta$Disbursed <- as.factor(Disburseddta$Disbursed)

Disburseddta <- na.omit(Disburseddta)

disbursefit <- glm(Disbursed~.,
                   family = binomial,
                   data = Disburseddta)

summary(disbursefit)

Call:
glm(formula = Disbursed ~ ., family = binomial, data = Disburseddta)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.7749  -0.9962  -0.6285   1.2185   2.6554  

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)       1.0996547  0.1732910   6.346 2.21e-10 ***
Requested_Amount  0.0027963  0.0033042   0.846   0.3974    
Age               0.0082572  0.0016302   5.065 4.08e-07 ***
Loan_Term        -0.0120967  0.0014543  -8.318  < 2e-16 ***
Gender_DescMale   0.0327846  0.0527666   0.621   0.5344    
Cost_Of_Vehicle  -0.0049477  0.0024017  -2.060   0.0394 *  
cibil_score       0.0022760  0.0001465  15.532  < 2e-16 ***
IRR              -0.1689325  0.0048179 -35.063  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 24109  on 17999  degrees of freedom
Residual deviance: 22212  on 17992  degrees of freedom
AIC: 22228

Number of Fisher Scoring iterations: 4