library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("nycflights13") # Para join
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(RSQLite)
library(proto)
library(gsubfn)
library(ggplot2)
clientes <- read.csv("WA_Fn-UseC_-Telco-Customer-Churn.csv",
header = TRUE, sep = ",",
stringsAsFactors = TRUE)
head(clientes)
## customerID gender SeniorCitizen Partner Dependents tenure PhoneService
## 1 7590-VHVEG Female 0 Yes No 1 No
## 2 5575-GNVDE Male 0 No No 34 Yes
## 3 3668-QPYBK Male 0 No No 2 Yes
## 4 7795-CFOCW Male 0 No No 45 No
## 5 9237-HQITU Female 0 No No 2 Yes
## 6 9305-CDSKC Female 0 No No 8 Yes
## MultipleLines InternetService OnlineSecurity OnlineBackup
## 1 No phone service DSL No Yes
## 2 No DSL Yes No
## 3 No DSL Yes Yes
## 4 No phone service DSL Yes No
## 5 No Fiber optic No No
## 6 Yes Fiber optic No No
## DeviceProtection TechSupport StreamingTV StreamingMovies Contract
## 1 No No No No Month-to-month
## 2 Yes No No No One year
## 3 No No No No Month-to-month
## 4 Yes Yes No No One year
## 5 No No No No Month-to-month
## 6 Yes No Yes Yes Month-to-month
## PaperlessBilling PaymentMethod MonthlyCharges TotalCharges
## 1 Yes Electronic check 29.85 29.85
## 2 No Mailed check 56.95 1889.50
## 3 Yes Mailed check 53.85 108.15
## 4 No Bank transfer (automatic) 42.30 1840.75
## 5 Yes Electronic check 70.70 151.65
## 6 Yes Electronic check 99.65 820.50
## Churn
## 1 No
## 2 No
## 3 Yes
## 4 No
## 5 Yes
## 6 Yes
str(clientes)
## 'data.frame': 7043 obs. of 21 variables:
## $ customerID : Factor w/ 7043 levels "0002-ORFBO","0003-MKNFE",..: 5376 3963 2565 5536 6512 6552 1003 4771 5605 4535 ...
## $ gender : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 1 2 1 1 2 ...
## $ SeniorCitizen : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Partner : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 2 1 ...
## $ Dependents : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 2 ...
## $ tenure : int 1 34 2 45 2 8 22 10 28 62 ...
## $ PhoneService : Factor w/ 2 levels "No","Yes": 1 2 2 1 2 2 2 1 2 2 ...
## $ MultipleLines : Factor w/ 3 levels "No","No phone service",..: 2 1 1 2 1 3 3 2 3 1 ...
## $ InternetService : Factor w/ 3 levels "DSL","Fiber optic",..: 1 1 1 1 2 2 2 1 2 1 ...
## $ OnlineSecurity : Factor w/ 3 levels "No","No internet service",..: 1 3 3 3 1 1 1 3 1 3 ...
## $ OnlineBackup : Factor w/ 3 levels "No","No internet service",..: 3 1 3 1 1 1 3 1 1 3 ...
## $ DeviceProtection: Factor w/ 3 levels "No","No internet service",..: 1 3 1 3 1 3 1 1 3 1 ...
## $ TechSupport : Factor w/ 3 levels "No","No internet service",..: 1 1 1 3 1 1 1 1 3 1 ...
## $ StreamingTV : Factor w/ 3 levels "No","No internet service",..: 1 1 1 1 1 3 3 1 3 1 ...
## $ StreamingMovies : Factor w/ 3 levels "No","No internet service",..: 1 1 1 1 1 3 1 1 3 1 ...
## $ Contract : Factor w/ 3 levels "Month-to-month",..: 1 2 1 2 1 1 1 1 1 2 ...
## $ PaperlessBilling: Factor w/ 2 levels "No","Yes": 2 1 2 1 2 2 2 1 2 1 ...
## $ PaymentMethod : Factor w/ 4 levels "Bank transfer (automatic)",..: 3 4 4 1 3 3 2 4 3 1 ...
## $ MonthlyCharges : num 29.9 57 53.9 42.3 70.7 ...
## $ TotalCharges : num 29.9 1889.5 108.2 1840.8 151.7 ...
## $ Churn : Factor w/ 2 levels "No","Yes": 1 1 2 1 2 2 1 1 2 1 ...
colnames(clientes)
## [1] "customerID" "gender" "SeniorCitizen"
## [4] "Partner" "Dependents" "tenure"
## [7] "PhoneService" "MultipleLines" "InternetService"
## [10] "OnlineSecurity" "OnlineBackup" "DeviceProtection"
## [13] "TechSupport" "StreamingTV" "StreamingMovies"
## [16] "Contract" "PaperlessBilling" "PaymentMethod"
## [19] "MonthlyCharges" "TotalCharges" "Churn"
summary(clientes)
## customerID gender SeniorCitizen Partner Dependents
## 0002-ORFBO: 1 Female:3488 Min. :0.0000 No :3641 No :4933
## 0003-MKNFE: 1 Male :3555 1st Qu.:0.0000 Yes:3402 Yes:2110
## 0004-TLHLJ: 1 Median :0.0000
## 0011-IGKFF: 1 Mean :0.1621
## 0013-EXCHZ: 1 3rd Qu.:0.0000
## 0013-MHZWF: 1 Max. :1.0000
## (Other) :7037
## tenure PhoneService MultipleLines InternetService
## Min. : 0.00 No : 682 No :3390 DSL :2421
## 1st Qu.: 9.00 Yes:6361 No phone service: 682 Fiber optic:3096
## Median :29.00 Yes :2971 No :1526
## Mean :32.37
## 3rd Qu.:55.00
## Max. :72.00
##
## OnlineSecurity OnlineBackup
## No :3498 No :3088
## No internet service:1526 No internet service:1526
## Yes :2019 Yes :2429
##
##
##
##
## DeviceProtection TechSupport
## No :3095 No :3473
## No internet service:1526 No internet service:1526
## Yes :2422 Yes :2044
##
##
##
##
## StreamingTV StreamingMovies
## No :2810 No :2785
## No internet service:1526 No internet service:1526
## Yes :2707 Yes :2732
##
##
##
##
## Contract PaperlessBilling PaymentMethod
## Month-to-month:3875 No :2872 Bank transfer (automatic):1544
## One year :1473 Yes:4171 Credit card (automatic) :1522
## Two year :1695 Electronic check :2365
## Mailed check :1612
##
##
##
## MonthlyCharges TotalCharges Churn
## Min. : 18.25 Min. : 18.8 No :5174
## 1st Qu.: 35.50 1st Qu.: 401.4 Yes:1869
## Median : 70.35 Median :1397.5
## Mean : 64.76 Mean :2283.3
## 3rd Qu.: 89.85 3rd Qu.:3794.7
## Max. :118.75 Max. :8684.8
## NA's :11
frecuencias <- data.frame(table(clientes$Churn))
colnames(frecuencias) <- c("Retirado", "Frecuencia")
frecuencias <- arrange(frecuencias, desc(Frecuencia))
frecuencias
## Retirado Frecuencia
## 1 No 5174
## 2 Yes 1869
Fugas <- frecuencias$Retirado # Eje x
Frecuencia <- frecuencias$Frecuencia # Eje y
ggplot(data=frecuencias, aes(x=Fugas, y=Frecuencia,
fill=Fugas)) +
geom_bar(stat="identity") +
#scale_fill_grey(start = 0.2, end = 0.8,na.value = "red") +
ggtitle("Cantidad de clientes retirados de la empresa")
clientesfugados <- filter(clientes, Churn == 'Yes')
head(clientesfugados)
## customerID gender SeniorCitizen Partner Dependents tenure PhoneService
## 1 3668-QPYBK Male 0 No No 2 Yes
## 2 9237-HQITU Female 0 No No 2 Yes
## 3 9305-CDSKC Female 0 No No 8 Yes
## 4 7892-POOKP Female 0 Yes No 28 Yes
## 5 0280-XJGEX Male 0 No No 49 Yes
## 6 4190-MFLUW Female 0 Yes Yes 10 Yes
## MultipleLines InternetService OnlineSecurity OnlineBackup
## 1 No DSL Yes Yes
## 2 No Fiber optic No No
## 3 Yes Fiber optic No No
## 4 Yes Fiber optic No No
## 5 Yes Fiber optic No Yes
## 6 No DSL No No
## DeviceProtection TechSupport StreamingTV StreamingMovies Contract
## 1 No No No No Month-to-month
## 2 No No No No Month-to-month
## 3 Yes No Yes Yes Month-to-month
## 4 Yes Yes Yes Yes Month-to-month
## 5 Yes No Yes Yes Month-to-month
## 6 Yes Yes No No Month-to-month
## PaperlessBilling PaymentMethod MonthlyCharges TotalCharges
## 1 Yes Mailed check 53.85 108.15
## 2 Yes Electronic check 70.70 151.65
## 3 Yes Electronic check 99.65 820.50
## 4 Yes Electronic check 104.80 3046.05
## 5 Yes Bank transfer (automatic) 103.70 5036.30
## 6 No Credit card (automatic) 55.20 528.35
## Churn
## 1 Yes
## 2 Yes
## 3 Yes
## 4 Yes
## 5 Yes
## 6 Yes
summary(clientesfugados)
## customerID gender SeniorCitizen Partner Dependents
## 0004-TLHLJ: 1 Female:939 Min. :0.0000 No :1200 No :1543
## 0011-IGKFF: 1 Male :930 1st Qu.:0.0000 Yes: 669 Yes: 326
## 0013-EXCHZ: 1 Median :0.0000
## 0022-TCJCI: 1 Mean :0.2547
## 0023-HGHWL: 1 3rd Qu.:1.0000
## 0023-XUOPT: 1 Max. :1.0000
## (Other) :1863
## tenure PhoneService MultipleLines InternetService
## Min. : 1.00 No : 170 No :849 DSL : 459
## 1st Qu.: 2.00 Yes:1699 No phone service:170 Fiber optic:1297
## Median :10.00 Yes :850 No : 113
## Mean :17.98
## 3rd Qu.:29.00
## Max. :72.00
##
## OnlineSecurity OnlineBackup
## No :1461 No :1233
## No internet service: 113 No internet service: 113
## Yes : 295 Yes : 523
##
##
##
##
## DeviceProtection TechSupport
## No :1211 No :1446
## No internet service: 113 No internet service: 113
## Yes : 545 Yes : 310
##
##
##
##
## StreamingTV StreamingMovies
## No :942 No :938
## No internet service:113 No internet service:113
## Yes :814 Yes :818
##
##
##
##
## Contract PaperlessBilling PaymentMethod
## Month-to-month:1655 No : 469 Bank transfer (automatic): 258
## One year : 166 Yes:1400 Credit card (automatic) : 232
## Two year : 48 Electronic check :1071
## Mailed check : 308
##
##
##
## MonthlyCharges TotalCharges Churn
## Min. : 18.85 Min. : 18.85 No : 0
## 1st Qu.: 56.15 1st Qu.: 134.50 Yes:1869
## Median : 79.65 Median : 703.55
## Mean : 74.44 Mean :1531.80
## 3rd Qu.: 94.20 3rd Qu.:2331.30
## Max. :118.35 Max. :8684.80
##
summary(clientesfugados$gender)
## Female Male
## 939 930
summary(as.factor(clientesfugados$SeniorCitizen))
## 0 1
## 1393 476
summary(as.factor(clientesfugados$Partner))
## No Yes
## 1200 669
summary(as.factor(clientesfugados$Dependents))
## No Yes
## 1543 326
summary(clientesfugados$tenure)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 2.00 10.00 17.98 29.00 72.00
summary(clientesfugados$MonthlyCharges)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.85 56.15 79.65 74.44 94.20 118.35
# Otras variables
summary(clientesfugados$InternetService)
## DSL Fiber optic No
## 459 1297 113
summary(clientesfugados$OnlineSecurity)
## No No internet service Yes
## 1461 113 295
summary(clientesfugados$OnlineBackup)
## No No internet service Yes
## 1233 113 523
summary(clientesfugados$DeviceProtection)
## No No internet service Yes
## 1211 113 545
summary(clientesfugados$TechSupport)
## No No internet service Yes
## 1446 113 310
Con esto tal vez podemos argumentar una hipótesis de que una razón por la que los clientes se retiran de la empresas es por el alto costo de la cuota mensual o del pago mensual que tienen que pagar.
Existen variables tales como InternetService, OnlineSecurity ,OnlineBackup, DeviceProtection, TechSupport que tienen frecuencias en alguna de sus factores por encima de mil, sin embargo, existen variables con una mayor frecuencia que bajo esta interpretación, deben ser de mayor interés.
summary(clientesfugados$PhoneService)
## No Yes
## 170 1699
frecuencias <- data.frame(table(clientesfugados$PhoneService))
colnames(frecuencias) <- c("Serv.Telefonico", "Frecuencia")
frecuencias <- arrange(frecuencias, desc(Frecuencia))
frecuencias
## Serv.Telefonico Frecuencia
## 1 Yes 1699
## 2 No 170
ServTel <- frecuencias$Serv.Telefonico # Eje x
Frecuencia <- frecuencias$Frecuencia # Eje y
ggplot(data=frecuencias, aes(x=ServTel, y=Frecuencia,
fill=ServTel)) +
geom_bar(stat="identity") +
#scale_fill_grey(start = 0.2, end = 0.8,na.value = "red") +
ggtitle("Cantidad de clientes retirados con Servicio Telefonico")
summary(clientesfugados$Contract)
## Month-to-month One year Two year
## 1655 166 48
frecuencias <- data.frame(table(clientesfugados$Contract))
colnames(frecuencias) <- c("Contrato", "Frecuencia")
frecuencias <- arrange(frecuencias, desc(Frecuencia))
frecuencias
## Contrato Frecuencia
## 1 Month-to-month 1655
## 2 One year 166
## 3 Two year 48
Contratos <- frecuencias$Contrato # Eje x
Frecuencia <- frecuencias$Frecuencia # Eje y
ggplot(data=frecuencias, aes(x=Contratos, y=Frecuencia,
fill=Contratos)) +
geom_bar(stat="identity") +
#scale_fill_grey(start = 0.2, end = 0.8,na.value = "red") +
ggtitle("Cantidad de clientes retirados de acuerdo al tipo de contrato")