Connecting to DB:
library(readr)
letter1<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_1.csv")
## Rows: 208909 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): case_id
## dbl (11): woe_NumberOfOutgoingCalls, woe_NumberOfDaysFromCaseRegDate, woe_Am...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter2<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_2.csv")
## Rows: 266019 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): case_id
## dbl (10): woe_NumberOfOutgoingCalls, woe_AmountOfCustomerOpenCases, woe_Amou...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter3<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_3.csv")
## Rows: 198936 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): case_id
## dbl (11): woe_NumberOfOutgoingCalls, woe_AmountOfCasePayments, woe_NumberOfO...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter4<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_4.csv")
## Rows: 239873 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): case_id
## dbl (11): woe_NumberOfOutgoingCalls, woe_AmountOfCustomerOpenCases, woe_Amou...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
letter5<- read_csv("C:/Users/navaral/Intrum/Advanced Analytics COE_SelfManaged - 04. Projects - 04. Projects/01. GALILEO/02. LETTER STRATEGY/10. Development/3. Intrum Version 2.0/Codes_alex/data_train_letter_5.csv")
## Rows: 190744 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): case_id
## dbl (10): woe_NumberOfDaysFromCaseRegDate, woe_AmountOfCasePayments, woe_Num...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# colnames(letter1)
#
#
# head(letter1)
library(readxl)
DESCRIBE <- read_excel("C:/Users/navaral/Downloads/coefficients_letters.xlsx")
View(DESCRIBE)
datatable(DESCRIBE)
# ATRIBUTES_FILTERED <- DESCRIBE_L1 %>%
# filter(var_class %in% c("variable")) %>%
# filter(Missings <0.6) %>%
# filter(!(`check_ 575`=="TRUE"))
# table(ATRIBUTES_FILTERED$TYPE)
#
# ATRIBUTES_FILTERED_1<- ATRIBUTES_FILTERED %>%
# filter(TYPE %in% c("CAPITAL","CASES","DATES","GEO_ID","LETTERS","TELEPHONES" ))
# ATRIBUTES_FILTERED <- DESCRIBE_L1 %>%
# filter(INCLUDE ==1)
preds<- DESCRIBE %>%
select(varcode) %>%
unlist %>% unname
#topVars_1<-as_tibble(topVars) %>% filter(!(topVars %in% c("c9057", "b0015"))) %>% unlist %>% unname
topVars_1<-DESCRIBE %>%select(varcode) %>%filter(varcode %in% c(
"woe_AddressStatus",
"woe_AmountOfCasePayments",
"woe_AmountOfCustomerPayments",
"woe_CustomerSegmentation",
"woe_NumberOfDaysFromCaseRegDate",
"woe_NumberOfOutgoingCalls",
"woe_NumberOfOutgoingSMS",
"woe_OriginalCapitalOfCaseInvoices",
"woe_PortfolioId")) %>% unlist %>% unname
topVars_2<-DESCRIBE %>%filter(letter2 >=0)%>%select(varcode) %>% unlist %>% unname
topVars_3<-DESCRIBE %>%filter(letter3 >=0)%>%select(varcode) %>% unlist %>% unname
topVars_4<-DESCRIBE %>%filter(letter4 >=0)%>%select(varcode) %>% unlist %>% unname
topVars_5<-DESCRIBE %>%filter(letter5 >=0)%>%select(varcode) %>% unlist %>% unname
fmla1<-paste0("target90Days~",paste(c(topVars_1),collapse = "+"))
fmla2<-paste0("target90Days~",paste(c(topVars_2),collapse = "+"))
fmla3<-paste0("target90Days~",paste(c(topVars_3),collapse = "+"))
fmla4<-paste0("target90Days~",paste(c(topVars_4),collapse = "+"))
fmla5<-paste0("target90Days~",paste(c(topVars_5),collapse = "+"))
Letter 1
mod1<-glm(fmla1, gaussian(link = "identity"), letter1)
summary(mod1)
##
## Call:
## glm(formula = fmla1, family = gaussian(link = "identity"), data = letter1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.1376 -0.0121 -0.0074 -0.0022 1.0100
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.015147 0.000228 66.57
## woe_AddressStatus 0.011175 0.001142 9.79
## woe_AmountOfCasePayments 0.025018 0.001252 19.99
## woe_AmountOfCustomerPayments 0.015869 0.001087 14.60
## woe_CustomerSegmentation 0.005891 0.001076 5.47
## woe_NumberOfDaysFromCaseRegDate 0.002032 0.000429 4.74
## woe_NumberOfOutgoingCalls -0.002141 0.001254 -1.71
## woe_NumberOfOutgoingSMS -0.002615 0.003157 -0.83
## woe_OriginalCapitalOfCaseInvoices 0.003775 0.001084 3.48
## woe_PortfolioId 0.007086 0.000288 24.57
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## woe_AddressStatus < 0.0000000000000002 ***
## woe_AmountOfCasePayments < 0.0000000000000002 ***
## woe_AmountOfCustomerPayments < 0.0000000000000002 ***
## woe_CustomerSegmentation 0.000000044 ***
## woe_NumberOfDaysFromCaseRegDate 0.000002132 ***
## woe_NumberOfOutgoingCalls 0.0877 .
## woe_NumberOfOutgoingSMS 0.4076
## woe_OriginalCapitalOfCaseInvoices 0.0005 ***
## woe_PortfolioId < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0082913)
##
## Null deviance: 1763.9 on 208908 degrees of freedom
## Residual deviance: 1732.0 on 208899 degrees of freedom
## AIC: -408337
##
## Number of Fisher Scoring iterations: 2
Letter 2
mod2<-glm(fmla2, gaussian(link = "identity"), letter2)
summary(mod2)
##
## Call:
## glm(formula = fmla2, family = gaussian(link = "identity"), data = letter2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.1165 -0.0128 -0.0093 -0.0035 1.0141
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 0.018893 0.000221 85.55
## woe_AmountOfCaseCapital 0.003724 0.000597 6.24
## woe_AmountOfCustomerOpenCases -0.007799 0.002254 -3.46
## woe_CustomerSegmentation 0.026683 0.000367 72.65
## woe_NumberOfDaysFromCaseRegDate 0.067880 0.007490 9.06
## woe_NumberOfOpenCases 0.006647 0.001624 4.09
## woe_NumberOfOutgoingCalls -0.004568 0.000465 -9.82
## woe_NumberOfOutgoingSMS 0.016628 0.003891 4.27
## woe_NumberOfRegisteredCases NA NA NA
## woe_PortfolioId 0.004481 0.000329 13.62
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## woe_AmountOfCaseCapital 0.00000000045 ***
## woe_AmountOfCustomerOpenCases 0.00054 ***
## woe_CustomerSegmentation < 0.0000000000000002 ***
## woe_NumberOfDaysFromCaseRegDate < 0.0000000000000002 ***
## woe_NumberOfOpenCases 0.00004248399 ***
## woe_NumberOfOutgoingCalls < 0.0000000000000002 ***
## woe_NumberOfOutgoingSMS 0.00001925991 ***
## woe_NumberOfRegisteredCases NA
## woe_PortfolioId < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0096497)
##
## Null deviance: 2630.5 on 266018 degrees of freedom
## Residual deviance: 2566.9 on 266010 degrees of freedom
## AIC: -479608
##
## Number of Fisher Scoring iterations: 2
Letter 3
mod3<-glm(fmla3, gaussian(link = "identity"), letter3)
summary(mod3)
##
## Call:
## glm(formula = fmla3, family = gaussian(link = "identity"), data = letter3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.0908 -0.0099 -0.0069 -0.0040 1.0057
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 0.010242 0.000208 49.18
## woe_AddressStatus 0.004260 0.002327 1.83
## woe_AmountOfCasePayments 0.021165 0.000953 22.20
## woe_AmountOfCustomerCosts 0.005012 0.000995 5.04
## woe_NumberOfDaysFromCaseRegDate 0.006705 0.000818 8.20
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD 0.005086 0.000657 7.74
## woe_NumberOfOutgoingCalls -0.003301 0.000926 -3.56
## woe_NumberOfOutgoingSMS NA NA NA
## woe_NumberOfRegisteredCases 0.000826 0.001018 0.81
## woe_OriginalCapitalOfCaseInvoices 0.013255 0.001060 12.50
## woe_PortfolioId 0.005109 0.000475 10.75
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## woe_AddressStatus 0.06721 .
## woe_AmountOfCasePayments < 0.0000000000000002 ***
## woe_AmountOfCustomerCosts 0.00000047599748337 ***
## woe_NumberOfDaysFromCaseRegDate 0.00000000000000024 ***
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD 0.00000000000000964 ***
## woe_NumberOfOutgoingCalls 0.00037 ***
## woe_NumberOfOutgoingSMS NA
## woe_NumberOfRegisteredCases 0.41714
## woe_OriginalCapitalOfCaseInvoices < 0.0000000000000002 ***
## woe_PortfolioId < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0075637)
##
## Null deviance: 1514.3 on 198935 degrees of freedom
## Residual deviance: 1504.6 on 198926 degrees of freedom
## AIC: -407115
##
## Number of Fisher Scoring iterations: 2
Letter 4
mod4<-glm(fmla4, gaussian(link = "identity"), letter4)
summary(mod4)
##
## Call:
## glm(formula = fmla4, family = gaussian(link = "identity"), data = letter4)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.1203 -0.0163 -0.0107 -0.0064 1.0076
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 0.015337 0.000235 65.19
## woe_AddressStatus 0.023476 0.001731 13.56
## woe_AmountOfCasePayments 0.030426 0.001265 24.05
## woe_AmountOfCustomerOpenCases 0.008945 0.000819 10.92
## woe_NumberOfDaysFromCaseRegDate 0.035763 0.003454 10.36
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD 0.005958 0.000790 7.54
## woe_NumberOfOutgoingCalls 0.006107 0.001364 4.48
## woe_NumberOfOutgoingSMS NA NA NA
## woe_NumberOfRegisteredCases NA NA NA
## woe_OriginalCapitalOfCaseInvoices 0.012013 0.001485 8.09
## woe_PortfolioId 0.008070 0.000459 17.57
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## woe_AddressStatus < 0.0000000000000002 ***
## woe_AmountOfCasePayments < 0.0000000000000002 ***
## woe_AmountOfCustomerOpenCases < 0.0000000000000002 ***
## woe_NumberOfDaysFromCaseRegDate < 0.0000000000000002 ***
## woe_NumberOfDaysFromLastInvoiceDateToCaseRD 0.00000000000004556 ***
## woe_NumberOfOutgoingCalls 0.00000756363081084 ***
## woe_NumberOfOutgoingSMS NA
## woe_NumberOfRegisteredCases NA
## woe_OriginalCapitalOfCaseInvoices 0.00000000000000061 ***
## woe_PortfolioId < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.011845)
##
## Null deviance: 2863.0 on 239872 degrees of freedom
## Residual deviance: 2841.2 on 239864 degrees of freedom
## AIC: -383296
##
## Number of Fisher Scoring iterations: 2
Letter 5
mod5<-glm(fmla5, gaussian(link = "identity"), letter5)
summary(mod5)
##
## Call:
## glm(formula = fmla5, family = gaussian(link = "identity"), data = letter5)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.1257 -0.0164 -0.0103 -0.0038 1.0114
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.031206 0.000330 94.54
## woe_AmountOfCasePayments 0.037866 0.000470 80.52
## woe_AmountOfCustomerCosts 0.009076 0.002607 3.48
## woe_NumberOfDaysFromCaseRegDate 0.022748 0.001089 20.90
## woe_NumberOfOutgoingCalls 0.006610 0.000992 6.66
## woe_NumberOfOutgoingSMS 0.003006 0.002270 1.32
## woe_NumberOfRegisteredCases 0.008688 0.003207 2.71
## woe_OriginalCapitalOfCaseInvoices 0.007255 0.000727 9.98
## woe_PortfolioId 0.004841 0.000537 9.01
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## woe_AmountOfCasePayments < 0.0000000000000002 ***
## woe_AmountOfCustomerCosts 0.0005 ***
## woe_NumberOfDaysFromCaseRegDate < 0.0000000000000002 ***
## woe_NumberOfOutgoingCalls 0.000000000027 ***
## woe_NumberOfOutgoingSMS 0.1854
## woe_NumberOfRegisteredCases 0.0068 **
## woe_OriginalCapitalOfCaseInvoices < 0.0000000000000002 ***
## woe_PortfolioId < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.015064)
##
## Null deviance: 2992.5 on 190743 degrees of freedom
## Residual deviance: 2873.1 on 190735 degrees of freedom
## AIC: -258944
##
## Number of Fisher Scoring iterations: 2