1 Connecting to DB:

library(readr)
letter1<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_1.csv")
## Rows: 208909 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): case_id
## dbl (11): woe_NumberOfOutgoingCalls, woe_NumberOfDaysFromCaseRegDate, woe_Am...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter2<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_2.csv")
## Rows: 266019 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): case_id
## dbl (10): woe_NumberOfOutgoingCalls, woe_AmountOfCustomerOpenCases, woe_Amou...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter3<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_3.csv")
## Rows: 198936 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): case_id
## dbl (11): woe_NumberOfOutgoingCalls, woe_AmountOfCasePayments, woe_NumberOfO...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter4<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_4.csv")
## Rows: 239873 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): case_id
## dbl (11): woe_NumberOfOutgoingCalls, woe_AmountOfCustomerOpenCases, woe_Amou...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter5<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_5.csv")
## Rows: 190744 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): case_id
## dbl (10): woe_NumberOfDaysFromCaseRegDate, woe_AmountOfCasePayments, woe_Num...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# colnames(letter1)
# 
# 
# head(letter1)
library(readxl)
DESCRIBE <- read_excel("C:/Users/navaral/Downloads/coefficients_letters.xlsx")
View(DESCRIBE)
datatable(DESCRIBE)
# ATRIBUTES_FILTERED <- DESCRIBE_L1 %>%
#   filter(var_class %in% c("variable")) %>%
#   filter(Missings <0.6) %>%
#   filter(!(`check_ 575`=="TRUE")) 
# table(ATRIBUTES_FILTERED$TYPE)
# 
# ATRIBUTES_FILTERED_1<- ATRIBUTES_FILTERED %>%
#     filter(TYPE %in% c("CAPITAL","CASES","DATES","GEO_ID","LETTERS","TELEPHONES" )) 

# ATRIBUTES_FILTERED <- DESCRIBE_L1 %>%
#    filter(INCLUDE ==1) 


preds<- DESCRIBE %>%
  select(varcode) %>%
  unlist %>% unname
#topVars_1<-as_tibble(topVars) %>% filter(!(topVars %in% c("c9057", "b0015"))) %>% unlist %>% unname

topVars_1<-DESCRIBE %>%select(varcode) %>%filter(varcode %in% c(
"woe_AddressStatus",
"woe_AmountOfCasePayments",
"woe_AmountOfCustomerPayments",
"woe_CustomerSegmentation",
"woe_NumberOfDaysFromCaseRegDate",
"woe_NumberOfOutgoingCalls",
"woe_NumberOfOutgoingSMS",
"woe_OriginalCapitalOfCaseInvoices",
"woe_PortfolioId")) %>% unlist %>% unname

topVars_2<-DESCRIBE %>%filter(letter2 >=0)%>%select(varcode)  %>% unlist %>% unname
 
topVars_3<-DESCRIBE %>%filter(letter3 >=0)%>%select(varcode)  %>% unlist %>% unname
 
topVars_4<-DESCRIBE %>%filter(letter4 >=0)%>%select(varcode)  %>% unlist %>% unname
 
topVars_5<-DESCRIBE %>%filter(letter5 >=0)%>%select(varcode)  %>% unlist %>% unname
fmla1<-paste0("target90Days~",paste(c(topVars_1),collapse = "+"))
fmla2<-paste0("target90Days~",paste(c(topVars_2),collapse = "+"))
fmla3<-paste0("target90Days~",paste(c(topVars_3),collapse = "+"))
fmla4<-paste0("target90Days~",paste(c(topVars_4),collapse = "+"))
fmla5<-paste0("target90Days~",paste(c(topVars_5),collapse = "+"))

2 Letter 1

mod1<-glm(fmla1, gaussian(link = "identity"), letter1)

summary(mod1)
## 
## Call:
## glm(formula = fmla1, family = gaussian(link = "identity"), data = letter1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1376  -0.0121  -0.0074  -0.0022   1.0100  
## 
## Coefficients:
##                                    Estimate Std. Error t value
## (Intercept)                        0.015147   0.000228   66.57
## woe_AddressStatus                  0.011175   0.001142    9.79
## woe_AmountOfCasePayments           0.025018   0.001252   19.99
## woe_AmountOfCustomerPayments       0.015869   0.001087   14.60
## woe_CustomerSegmentation           0.005891   0.001076    5.47
## woe_NumberOfDaysFromCaseRegDate    0.002032   0.000429    4.74
## woe_NumberOfOutgoingCalls         -0.002141   0.001254   -1.71
## woe_NumberOfOutgoingSMS           -0.002615   0.003157   -0.83
## woe_OriginalCapitalOfCaseInvoices  0.003775   0.001084    3.48
## woe_PortfolioId                    0.007086   0.000288   24.57
##                                               Pr(>|t|)    
## (Intercept)                       < 0.0000000000000002 ***
## woe_AddressStatus                 < 0.0000000000000002 ***
## woe_AmountOfCasePayments          < 0.0000000000000002 ***
## woe_AmountOfCustomerPayments      < 0.0000000000000002 ***
## woe_CustomerSegmentation                   0.000000044 ***
## woe_NumberOfDaysFromCaseRegDate            0.000002132 ***
## woe_NumberOfOutgoingCalls                       0.0877 .  
## woe_NumberOfOutgoingSMS                         0.4076    
## woe_OriginalCapitalOfCaseInvoices               0.0005 ***
## woe_PortfolioId                   < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.0082913)
## 
##     Null deviance: 1763.9  on 208908  degrees of freedom
## Residual deviance: 1732.0  on 208899  degrees of freedom
## AIC: -408337
## 
## Number of Fisher Scoring iterations: 2

3 Letter 2

mod2<-glm(fmla2, gaussian(link = "identity"), letter2)

summary(mod2)
## 
## Call:
## glm(formula = fmla2, family = gaussian(link = "identity"), data = letter2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1165  -0.0128  -0.0093  -0.0035   1.0141  
## 
## Coefficients: (1 not defined because of singularities)
##                                  Estimate Std. Error t value
## (Intercept)                      0.018893   0.000221   85.55
## woe_AmountOfCaseCapital          0.003724   0.000597    6.24
## woe_AmountOfCustomerOpenCases   -0.007799   0.002254   -3.46
## woe_CustomerSegmentation         0.026683   0.000367   72.65
## woe_NumberOfDaysFromCaseRegDate  0.067880   0.007490    9.06
## woe_NumberOfOpenCases            0.006647   0.001624    4.09
## woe_NumberOfOutgoingCalls       -0.004568   0.000465   -9.82
## woe_NumberOfOutgoingSMS          0.016628   0.003891    4.27
## woe_NumberOfRegisteredCases            NA         NA      NA
## woe_PortfolioId                  0.004481   0.000329   13.62
##                                             Pr(>|t|)    
## (Intercept)                     < 0.0000000000000002 ***
## woe_AmountOfCaseCapital                0.00000000045 ***
## woe_AmountOfCustomerOpenCases                0.00054 ***
## woe_CustomerSegmentation        < 0.0000000000000002 ***
## woe_NumberOfDaysFromCaseRegDate < 0.0000000000000002 ***
## woe_NumberOfOpenCases                  0.00004248399 ***
## woe_NumberOfOutgoingCalls       < 0.0000000000000002 ***
## woe_NumberOfOutgoingSMS                0.00001925991 ***
## woe_NumberOfRegisteredCases                       NA    
## woe_PortfolioId                 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.0096497)
## 
##     Null deviance: 2630.5  on 266018  degrees of freedom
## Residual deviance: 2566.9  on 266010  degrees of freedom
## AIC: -479608
## 
## Number of Fisher Scoring iterations: 2

4 Letter 3

mod3<-glm(fmla3, gaussian(link = "identity"), letter3)

summary(mod3)
## 
## Call:
## glm(formula = fmla3, family = gaussian(link = "identity"), data = letter3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.0908  -0.0099  -0.0069  -0.0040   1.0057  
## 
## Coefficients: (1 not defined because of singularities)
##                                              Estimate Std. Error t value
## (Intercept)                                  0.010242   0.000208   49.18
## woe_AddressStatus                            0.004260   0.002327    1.83
## woe_AmountOfCasePayments                     0.021165   0.000953   22.20
## woe_AmountOfCustomerCosts                    0.005012   0.000995    5.04
## woe_NumberOfDaysFromCaseRegDate              0.006705   0.000818    8.20
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD  0.005086   0.000657    7.74
## woe_NumberOfOutgoingCalls                   -0.003301   0.000926   -3.56
## woe_NumberOfOutgoingSMS                            NA         NA      NA
## woe_NumberOfRegisteredCases                  0.000826   0.001018    0.81
## woe_OriginalCapitalOfCaseInvoices            0.013255   0.001060   12.50
## woe_PortfolioId                              0.005109   0.000475   10.75
##                                                         Pr(>|t|)    
## (Intercept)                                 < 0.0000000000000002 ***
## woe_AddressStatus                                        0.06721 .  
## woe_AmountOfCasePayments                    < 0.0000000000000002 ***
## woe_AmountOfCustomerCosts                    0.00000047599748337 ***
## woe_NumberOfDaysFromCaseRegDate              0.00000000000000024 ***
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD  0.00000000000000964 ***
## woe_NumberOfOutgoingCalls                                0.00037 ***
## woe_NumberOfOutgoingSMS                                       NA    
## woe_NumberOfRegisteredCases                              0.41714    
## woe_OriginalCapitalOfCaseInvoices           < 0.0000000000000002 ***
## woe_PortfolioId                             < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.0075637)
## 
##     Null deviance: 1514.3  on 198935  degrees of freedom
## Residual deviance: 1504.6  on 198926  degrees of freedom
## AIC: -407115
## 
## Number of Fisher Scoring iterations: 2

5 Letter 4

mod4<-glm(fmla4, gaussian(link = "identity"), letter4)

summary(mod4)
## 
## Call:
## glm(formula = fmla4, family = gaussian(link = "identity"), data = letter4)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1203  -0.0163  -0.0107  -0.0064   1.0076  
## 
## Coefficients: (2 not defined because of singularities)
##                                             Estimate Std. Error t value
## (Intercept)                                 0.015337   0.000235   65.19
## woe_AddressStatus                           0.023476   0.001731   13.56
## woe_AmountOfCasePayments                    0.030426   0.001265   24.05
## woe_AmountOfCustomerOpenCases               0.008945   0.000819   10.92
## woe_NumberOfDaysFromCaseRegDate             0.035763   0.003454   10.36
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD 0.005958   0.000790    7.54
## woe_NumberOfOutgoingCalls                   0.006107   0.001364    4.48
## woe_NumberOfOutgoingSMS                           NA         NA      NA
## woe_NumberOfRegisteredCases                       NA         NA      NA
## woe_OriginalCapitalOfCaseInvoices           0.012013   0.001485    8.09
## woe_PortfolioId                             0.008070   0.000459   17.57
##                                                         Pr(>|t|)    
## (Intercept)                                 < 0.0000000000000002 ***
## woe_AddressStatus                           < 0.0000000000000002 ***
## woe_AmountOfCasePayments                    < 0.0000000000000002 ***
## woe_AmountOfCustomerOpenCases               < 0.0000000000000002 ***
## woe_NumberOfDaysFromCaseRegDate             < 0.0000000000000002 ***
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD  0.00000000000004556 ***
## woe_NumberOfOutgoingCalls                    0.00000756363081084 ***
## woe_NumberOfOutgoingSMS                                       NA    
## woe_NumberOfRegisteredCases                                   NA    
## woe_OriginalCapitalOfCaseInvoices            0.00000000000000061 ***
## woe_PortfolioId                             < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.011845)
## 
##     Null deviance: 2863.0  on 239872  degrees of freedom
## Residual deviance: 2841.2  on 239864  degrees of freedom
## AIC: -383296
## 
## Number of Fisher Scoring iterations: 2

6 Letter 5

mod5<-glm(fmla5, gaussian(link = "identity"), letter5)

summary(mod5)
## 
## Call:
## glm(formula = fmla5, family = gaussian(link = "identity"), data = letter5)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.1257  -0.0164  -0.0103  -0.0038   1.0114  
## 
## Coefficients:
##                                   Estimate Std. Error t value
## (Intercept)                       0.031206   0.000330   94.54
## woe_AmountOfCasePayments          0.037866   0.000470   80.52
## woe_AmountOfCustomerCosts         0.009076   0.002607    3.48
## woe_NumberOfDaysFromCaseRegDate   0.022748   0.001089   20.90
## woe_NumberOfOutgoingCalls         0.006610   0.000992    6.66
## woe_NumberOfOutgoingSMS           0.003006   0.002270    1.32
## woe_NumberOfRegisteredCases       0.008688   0.003207    2.71
## woe_OriginalCapitalOfCaseInvoices 0.007255   0.000727    9.98
## woe_PortfolioId                   0.004841   0.000537    9.01
##                                               Pr(>|t|)    
## (Intercept)                       < 0.0000000000000002 ***
## woe_AmountOfCasePayments          < 0.0000000000000002 ***
## woe_AmountOfCustomerCosts                       0.0005 ***
## woe_NumberOfDaysFromCaseRegDate   < 0.0000000000000002 ***
## woe_NumberOfOutgoingCalls               0.000000000027 ***
## woe_NumberOfOutgoingSMS                         0.1854    
## woe_NumberOfRegisteredCases                     0.0068 ** 
## woe_OriginalCapitalOfCaseInvoices < 0.0000000000000002 ***
## woe_PortfolioId                   < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.015064)
## 
##     Null deviance: 2992.5  on 190743  degrees of freedom
## Residual deviance: 2873.1  on 190735  degrees of freedom
## AIC: -258944
## 
## Number of Fisher Scoring iterations: 2