cat("\014") # cleans screen
rm(list=ls(all=TRUE)) # remove variables in working memory
setwd("C:/Users/Erik Ernesto Vazquez/Downloads") # sets working directory
MainStudy<-read.csv("Andres-IngeInnova.csv",header=T) # reads raw data from Qualtrics
str(MainStudy)
## 'data.frame': 780 obs. of 64 variables:
## $ Observación : int 1 2 3 4 5 6 9 10 11 12 ...
## $ ApellidoPaterno : int 47 48 49 50 51 336 713 714 715 716 ...
## $ ApellidoMaterno : int 47 48 49 50 51 336 713 714 715 716 ...
## $ Cliente : int 47 48 49 50 51 336 713 714 715 716 ...
## $ Edad : int 49 49 55 48 31 24 49 40 58 42 ...
## $ RFC : Factor w/ 691 levels "","1","155751105",..: 248 82 404 220 100 366 224 1 158 235 ...
## $ CURP : Factor w/ 698 levels "","0155751105MNLVRC14",..: 252 84 408 224 102 371 228 1 162 239 ...
## $ NumeroSeguridadSocial : logi NA NA NA NA NA NA ...
## $ Nacionalidad : Factor w/ 1 level "MX": 1 1 1 1 1 1 1 1 1 1 ...
## $ Residencia : int 1 2 1 1 1 1 2 2 2 2 ...
## $ NumeroLicenciaConducir : logi NA NA NA NA NA NA ...
## $ EstadoCivil : Factor w/ 3 levels "C","D","S": 1 1 1 2 1 1 1 1 2 1 ...
## $ Genero : Factor w/ 2 levels "F","M": 1 1 1 2 1 2 1 1 1 2 ...
## $ GeneroCliente : int 1 1 1 0 1 0 1 1 1 0 ...
## $ ClaveElectorIFE : Factor w/ 245 levels "","1.01E+11",..: 132 132 132 77 132 132 132 134 132 135 ...
## $ NumeroDependientes : int 1 0 0 0 0 1 0 1 0 1 ...
## $ TipoPersona : Factor w/ 1 level "PF": 1 1 1 1 1 1 1 1 1 1 ...
## $ Dirección : Factor w/ 676 levels "10 DE OCTUBRE #112",..: 445 107 440 513 355 444 441 61 439 502 ...
## $ ColoniaPoblacion : Factor w/ 243 levels ""," OBISPO","0",..: 164 164 164 186 129 164 164 201 164 3 ...
## $ DelegacionMunicipio : Factor w/ 16 levels "Abasolo","Apodaca",..: 14 14 14 15 14 14 14 14 14 14 ...
## $ Ciudad : Factor w/ 16 levels "Abasolo","Apodaca",..: 14 14 14 15 14 14 14 14 14 14 ...
## $ Estado : Factor w/ 1 level "Nuevo León": 1 1 1 1 1 1 1 1 1 1 ...
## $ CP : int 66218 66218 66218 66351 66231 66218 66216 66237 66218 66230 ...
## $ NumeroTelefono : Factor w/ 709 levels "","13426034",..: 228 438 114 442 561 540 474 463 369 454 ...
## $ TipoDomicilio : Factor w/ 1 level "C": 1 1 1 1 1 1 1 1 1 1 ...
## $ TipoAsentamiento : int 7 7 7 7 7 7 7 17 7 7 ...
## $ CuentaActual : Factor w/ 718 levels "G01259-005401",..: 78 79 80 81 82 411 647 648 649 650 ...
## $ Promotor : Factor w/ 40 levels "ANA MARIA DE JESUS DE LEON RAMIREZ",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ GeneroPromotor : int 1 1 1 1 1 1 1 1 1 1 ...
## $ SexoOpuesto : int 0 0 0 1 0 1 0 0 0 1 ...
## $ TipoResponsabilidad : Factor w/ 2 levels "I","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ TipoCuenta : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ TipoContrato : Factor w/ 1 level "PP": 1 1 1 1 1 1 1 1 1 1 ...
## $ ClaveUnidadMonetaria : Factor w/ 1 level "MX": 1 1 1 1 1 1 1 1 1 1 ...
## $ ValorActivoValuacion : Factor w/ 1 level "Activo": 1 1 1 1 1 1 1 1 1 1 ...
## $ NumeroPagos : int 16 16 16 16 16 16 16 16 16 16 ...
## $ FrecuenciaPagos : Factor w/ 1 level "S": 1 1 1 1 1 1 1 1 1 1 ...
## $ MontoPagar : num 1092 1014 936 1170 468 ...
## $ FechaAperturaCuenta : Factor w/ 18 levels "1/2/2018","12/2/2018",..: 7 7 7 7 7 4 11 11 11 11 ...
## $ FechaUltimoPago : Factor w/ 19 levels "12/2/2018","12/3/2018",..: 16 7 7 7 7 8 7 7 16 16 ...
## $ FechaUltimaCompra : Factor w/ 18 levels "1/2/2018","12/2/2018",..: 7 7 7 7 7 4 11 11 11 11 ...
## $ FechaCorte : Factor w/ 1 level "21/03/2018": 1 1 1 1 1 1 1 1 1 1 ...
## $ Garantia : Factor w/ 85 levels "","?LASMA","AIRE LAVADO",..: 26 26 59 53 52 19 39 59 61 26 ...
## $ CreditoMaximo : int 19422 19422 28422 45822 18222 38022 28422 15000 33222 19422 ...
## $ SaldoActual : int 13104 11154 10296 12570 5148 15000 20920 14644 16058 13764 ...
## $ LimiteCredito : int 19422 19422 28422 45822 18222 38022 28422 15000 33222 19422 ...
## $ SaldoVencido : int 2184 1014 936 870 468 4000 640 448 1862 1596 ...
## $ NumeroPagosVencidos : int 2 1 1 0 1 4 0 0 1 1 ...
## $ PagoActual : Factor w/ 8 levels "0","1","2","3",..: 3 2 2 1 2 5 1 1 2 2 ...
## $ TotalPagosReportados : int 4 5 5 5 5 1 2 2 1 1 ...
## $ FechaPrimerIncumplimiento : Factor w/ 8 levels "1/1/1901","10/2/2018",..: 3 5 5 5 5 7 5 5 3 3 ...
## $ MontoUltimoPago : num 1092 1014 936 1170 468 ...
## $ TotalAbonado : num 4368 5070 4680 6150 2340 ...
## $ TotalRecuperado : int NA NA NA NA NA NA NA NA NA NA ...
## $ PlazoMeses : int 4 4 4 4 4 4 4 4 4 4 ...
## $ MontoCreditoOriginacion : int 14000 13000 12000 15000 6000 12000 20000 14000 14000 12000 ...
## $ TotalSaldosActuales : int 13104 11154 10296 12570 5148 15000 20920 14644 16058 13764 ...
## $ TotalSaldosVencidos : int 2184 1014 936 870 468 4000 640 448 1862 1596 ...
## $ SaldoVencidoDummy : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TotalElementosNombreReportados : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TotalElementosDireccionReportados: int 1 1 1 1 1 1 1 1 1 1 ...
## $ TotalElementosEmpleoReportados : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TotalElementosCuentaReportados : int 1 1 1 1 1 1 1 1 1 1 ...
## $ SaldoInsoluto : Factor w/ 140 levels "1,483.07","1,487.21",..: 12 135 131 140 69 15 26 17 22 13 ...
summary(MainStudy)
## Observación ApellidoPaterno ApellidoMaterno Cliente
## Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 1.0
## 1st Qu.:200.8 1st Qu.:196.8 1st Qu.:196.8 1st Qu.:196.8
## Median :396.5 Median :395.5 Median :395.5 Median :395.5
## Mean :397.3 Mean :397.0 Mean :397.0 Mean :397.0
## 3rd Qu.:595.2 3rd Qu.:597.2 3rd Qu.:597.2 3rd Qu.:597.2
## Max. :793.0 Max. :794.0 Max. :794.0 Max. :794.0
##
## Edad RFC CURP
## Min. : 0.00 : 28 : 21
## 1st Qu.: 35.00 AIRS791106: 2 AIRS791106HNLVMR04: 2
## Median : 46.00 AOHR921104: 2 AOHR921104MNLLRS05: 2
## Mean : 48.56 BAAO960812: 2 BAAO960812 : 2
## 3rd Qu.: 49.00 BAMA950907: 2 BAMA950907MNLNRL04: 2
## Max. :2019.00 BANZ890520: 2 BANZ890520 : 2
## (Other) :742 (Other) :749
## NumeroSeguridadSocial Nacionalidad Residencia NumeroLicenciaConducir
## Mode:logical MX:780 Min. :1.000 Mode:logical
## NA's:780 1st Qu.:1.000 NA's:780
## Median :1.000
## Mean :1.038
## 3rd Qu.:1.000
## Max. :2.000
##
## EstadoCivil Genero GeneroCliente ClaveElectorIFE NumeroDependientes
## C:551 F:612 Min. :0.0000 2.32E+11: 28 Min. :0.0000
## D: 21 M:168 1st Qu.:1.0000 2.15E+12: 23 1st Qu.:0.0000
## S:208 Median :1.0000 2.31E+12: 23 Median :0.0000
## Mean :0.7846 4.69E+11: 23 Mean :0.3359
## 3rd Qu.:1.0000 7.03E+11: 17 3rd Qu.:1.0000
## Max. :1.0000 1.63E+12: 16 Max. :2.0000
## (Other) :650
## TipoPersona Dirección ColoniaPoblacion
## PF:780 MIRAMAR #2305: 5 MartÍnez : 17
## LIRIO #7003 : 4 Residencial Terranova : 17
## MONCLOVA #338: 4 Vistas de San Juan : 17
## ACAPULCO #115: 3 Buenos Aires : 16
## CUARTA #308 : 3 Ciudad San Marcos Sector Pionero: 16
## PALMAS #197 : 3 Evolución : 16
## (Other) :758 (Other) :681
## DelegacionMunicipio Ciudad
## Guadalupe :142 Guadalupe :142
## Apodaca :127 Apodaca :127
## Monterrey :121 Monterrey :121
## Juárez :106 Juárez :106
## General Escobedo : 88 General Escobedo : 88
## San Nicolás de los Garza: 53 San Nicolás de los Garza: 53
## (Other) :143 (Other) :143
## Estado CP NumeroTelefono TipoDomicilio
## Nuevo León:780 Min. : 6428 8110061169: 2 C:780
## 1st Qu.:66001 8110063352: 2
## Median :66491 8110389567: 2
## Mean :66024 8110404027: 2
## 3rd Qu.:67121 8111167932: 2
## Max. :67493 8111167937: 2
## NA's :1 (Other) :768
## TipoAsentamiento CuentaActual Promotor
## Min. : 7.00 G01341-004373: 2 MARTHA GUADALUPE VALADEZ BANDA: 52
## 1st Qu.: 7.00 G01341-004374: 2 JOSE ADRIAN MEDINA CASAS : 48
## Median : 7.00 G01341-004375: 2 SOFIA ISELA ESPARZA PADILLA : 39
## Mean :13.54 G01341-004377: 2 MARIA PATRICIA BOLAÑOS LUNA : 38
## 3rd Qu.:24.00 G01341-004379: 2 MARIA DE JESUS SOTO ROJAS : 36
## Max. :40.00 G01341-004380: 2 JUANA HERRERA FRAUSTRO : 35
## (Other) :768 (Other) :532
## GeneroPromotor SexoOpuesto TipoResponsabilidad TipoCuenta
## Min. :0.0000 Min. :0.0000 I: 12 Mode :logical
## 1st Qu.:1.0000 1st Qu.:0.0000 M:768 FALSE:780
## Median :1.0000 Median :0.0000
## Mean :0.8013 Mean :0.3321
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
##
## TipoContrato ClaveUnidadMonetaria ValorActivoValuacion NumeroPagos
## PP:780 MX:780 Activo:780 Min. :12.00
## 1st Qu.:16.00
## Median :16.00
## Mean :15.94
## 3rd Qu.:16.00
## Max. :16.00
##
## FrecuenciaPagos MontoPagar FechaAperturaCuenta FechaUltimoPago
## S:780 Min. : 94.0 15/02/2018:124 20/03/2018:542
## 1st Qu.: 390.0 9/2/2018 :124 12/3/2018 : 39
## Median : 468.0 2/2/2018 : 90 14/03/2018: 34
## Mean : 500.3 23/02/2018: 70 16/03/2018: 34
## 3rd Qu.: 546.0 16/02/2018: 68 21/03/2018: 33
## Max. :2020.0 21/02/2018: 63 5/3/2018 : 25
## (Other) :241 (Other) : 73
## FechaUltimaCompra FechaCorte Garantia CreditoMaximo
## 15/02/2018:124 21/03/2018:780 REFRIGERADOR:234 Min. : -978
## 9/2/2018 :124 LAVADORA :114 1st Qu.: 17622
## 2/2/2018 : 90 TV : 65 Median : 22422
## 23/02/2018: 70 PANTALLA : 57 Mean : 24222
## 16/02/2018: 68 PLASMA : 37 3rd Qu.: 28797
## 21/02/2018: 63 ESTEREO : 33 Max. :119622
## (Other) :241 (Other) :240
## SaldoActual LimiteCredito SaldoVencido NumeroPagosVencidos
## Min. : 1104 Min. : -978 Min. : 0.0 Min. :0.000
## 1st Qu.: 4680 1st Qu.: 17622 1st Qu.: 0.0 1st Qu.:0.000
## Median : 5616 Median : 22422 Median : 0.0 Median :1.000
## Mean : 6102 Mean : 24222 Mean : 278.9 Mean :1.205
## 3rd Qu.: 6552 3rd Qu.: 28797 3rd Qu.: 390.0 3rd Qu.:2.000
## Max. :24240 Max. :119622 Max. :10100.0 Max. :6.000
## NA's :20
## PagoActual TotalPagosReportados FechaPrimerIncumplimiento
## V :530 Min. :0.000 1/1/1901 :530
## 1 :131 1st Qu.:3.000 17/03/2018:106
## 2 : 45 Median :4.000 10/3/2018 : 62
## 0 : 35 Mean :3.628 24/02/2018: 30
## 3 : 16 3rd Qu.:5.000 3/3/2018 : 24
## 4 : 10 Max. :6.000 10/2/2018 : 15
## (Other): 13 (Other) : 13
## MontoUltimoPago TotalAbonado TotalRecuperado PlazoMeses
## Min. : 4 Min. : 0 Min. : 0.0 Min. :3.000
## 1st Qu.: 390 1st Qu.:1170 1st Qu.: 2.0 1st Qu.:4.000
## Median : 468 Median :1872 Median : 300.0 Median :4.000
## Mean : 489 Mean :1856 Mean : 356.9 Mean :3.985
## 3rd Qu.: 546 3rd Qu.:2340 3rd Qu.: 546.0 3rd Qu.:4.000
## Max. :1560 Max. :6479 Max. :1102.0 Max. :4.000
## NA's :21 NA's :9 NA's :733
## MontoCreditoOriginacion TotalSaldosActuales TotalSaldosVencidos
## Min. : 1204 Min. : 1104 Min. : 0.0
## 1st Qu.: 5000 1st Qu.: 4680 1st Qu.: 0.0
## Median : 6000 Median : 5616 Median : 0.0
## Mean : 6362 Mean : 6102 Mean : 278.9
## 3rd Qu.: 7000 3rd Qu.: 6552 3rd Qu.: 390.0
## Max. :20000 Max. :24240 Max. :10100.0
##
## SaldoVencidoDummy TotalElementosNombreReportados
## Min. :0.0000 Min. :1
## 1st Qu.:0.0000 1st Qu.:1
## Median :0.0000 Median :1
## Mean :0.3205 Mean :1
## 3rd Qu.:1.0000 3rd Qu.:1
## Max. :1.0000 Max. :1
##
## TotalElementosDireccionReportados TotalElementosEmpleoReportados
## Min. :1 Min. :1
## 1st Qu.:1 1st Qu.:1
## Median :1 Median :1
## Mean :1 Mean :1
## 3rd Qu.:1 3rd Qu.:1
## Max. :1 Max. :1
##
## TotalElementosCuentaReportados SaldoInsoluto
## Min. :1 4,461.62: 86
## 1st Qu.:1 4,077.03: 75
## Median :1 4,846.22: 69
## Mean :1 5,230.81: 38
## 3rd Qu.:1 3,397.52: 33
## Max. :1 3,718.02: 30
## (Other) :449
## Variables independientes: Genero del cliente, Genero del promotor
## Variables de control (demográficos): Edad, Estado Civil
## Variable dependiente: Saldo vencido
## H1> Mujeres tienen menos saldo vencido que los hombres
## H2> Cuando el promotor es del sexo opuesto que del cliente hay menos probabilidad de que el cliente tenga en saldo vencido
MainStudyF<-subset(MainStudy,MainStudy$Genero=="F")
MainStudyM<-subset(MainStudy,MainStudy$Genero=="M")
mean(MainStudyF$TotalSaldosVencidos)
## [1] 265.6029
mean(MainStudyF$SaldoVencidoDummy)
## [1] 0.2990196
mean(MainStudyM$TotalSaldosVencidos)
## [1] 327.2024
mean(MainStudyM$SaldoVencidoDummy)
## [1] 0.3988095
t.test(MainStudyF$TotalSaldosVencidos,MainStudyM$TotalSaldosVencidos) ## No hay diferencia en saldo vencido promedio
##
## Welch Two Sample t-test
##
## data: MainStudyF$TotalSaldosVencidos and MainStudyM$TotalSaldosVencidos
## t = -1.1136, df = 335.07, p-value = 0.2663
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -170.41163 47.21275
## sample estimates:
## mean of x mean of y
## 265.6029 327.2024
t.test(MainStudyF$SaldoVencidoDummy,MainStudyM$SaldoVencidoDummy) ## H1 Mujeres pagan mejor
##
## Welch Two Sample t-test
##
## data: MainStudyF$SaldoVencidoDummy and MainStudyM$SaldoVencidoDummy
## t = -2.3661, df = 252.4, p-value = 0.01873
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.18285004 -0.01672979
## sample estimates:
## mean of x mean of y
## 0.2990196 0.3988095
chisq.test(MainStudy$SaldoVencidoDummy,MainStudy$Genero) ## H1 Mujeres pagan mejor
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: MainStudy$SaldoVencidoDummy and MainStudy$Genero
## X-squared = 5.5777, df = 1, p-value = 0.01819
MainStudySexoOpuesto<-subset(MainStudy,MainStudy$SexoOpuesto=="1")
MainStudySexoIgual<-subset(MainStudy,MainStudy$SexoOpuesto=="0")
mean(MainStudySexoOpuesto$TotalSaldosVencidos)
## [1] 234.0734
mean(MainStudySexoOpuesto$SaldoVencidoDummy)
## [1] 0.3204633
mean(MainStudySexoIgual$TotalSaldosVencidos)
## [1] 301.1401
mean(MainStudySexoIgual$SaldoVencidoDummy)
## [1] 0.3205374
t.test(MainStudySexoOpuesto$TotalSaldosVencidos,MainStudySexoIgual$TotalSaldosVencidos) ## No hay diferencia en saldo vencido promedio
##
## Welch Two Sample t-test
##
## data: MainStudySexoOpuesto$TotalSaldosVencidos and MainStudySexoIgual$TotalSaldosVencidos
## t = -1.4026, df = 743.23, p-value = 0.1611
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -160.9358 26.8023
## sample estimates:
## mean of x mean of y
## 234.0734 301.1401
t.test(MainStudySexoOpuesto$SaldoVencidoDummy,MainStudySexoIgual$SaldoVencidoDummy) ## H1 no hay diferencia
##
## Welch Two Sample t-test
##
## data: MainStudySexoOpuesto$SaldoVencidoDummy and MainStudySexoIgual$SaldoVencidoDummy
## t = -0.0020854, df = 514.69, p-value = 0.9983
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06988983 0.06974161
## sample estimates:
## mean of x mean of y
## 0.3204633 0.3205374
chisq.test(MainStudy$SaldoVencidoDummy,MainStudy$SexoOpuesto) ## H1 no hay diferencia
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: MainStudy$SaldoVencidoDummy and MainStudy$SexoOpuesto
## X-squared = 1.186e-29, df = 1, p-value = 1
## logistic regression
linearregression<-lm(SaldoVencidoDummy~GeneroCliente+Edad+EstadoCivil+GeneroPromotor,data=MainStudy)
summary(linearregression)
##
## Call:
## lm(formula = SaldoVencidoDummy ~ GeneroCliente + Edad + EstadoCivil +
## GeneroPromotor, data = MainStudy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4601 -0.3312 -0.3132 0.6633 0.8103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4228678 0.0519129 8.146 1.51e-15 ***
## GeneroCliente -0.1219570 0.0407151 -2.995 0.00283 **
## Edad 0.0004310 0.0001648 2.616 0.00907 **
## EstadoCivilD -0.1110426 0.1027689 -1.081 0.28025
## EstadoCivilS -0.1236838 0.0380906 -3.247 0.00122 **
## GeneroPromotor 0.0104548 0.0416924 0.251 0.80207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4617 on 774 degrees of freedom
## Multiple R-squared: 0.02882, Adjusted R-squared: 0.02254
## F-statistic: 4.593 on 5 and 774 DF, p-value: 0.0003893
logitregression<-glm(SaldoVencidoDummy~GeneroCliente+Edad+EstadoCivil+GeneroPromotor,data=MainStudy,family=binomial)
summary(logitregression)
##
## Call:
## glm(formula = SaldoVencidoDummy ~ GeneroCliente + Edad + EstadoCivil +
## GeneroPromotor, family = binomial, data = MainStudy)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.159 -0.898 -0.834 1.452 1.862
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.482393 0.327493 -1.473 0.14075
## GeneroCliente -0.559775 0.185795 -3.013 0.00259 **
## Edad 0.006362 0.005243 1.213 0.22499
## EstadoCivilD -0.565959 0.523535 -1.081 0.27968
## EstadoCivilS -0.586017 0.190603 -3.075 0.00211 **
## GeneroPromotor 0.043718 0.196353 0.223 0.82381
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 978.52 on 779 degrees of freedom
## Residual deviance: 954.64 on 774 degrees of freedom
## AIC: 966.64
##
## Number of Fisher Scoring iterations: 6