Telco_Cusomer_Churn = read.table("Telco_Cusomer_Churn.csv",
header = TRUE, sep = ",",
dec = ".", row.names = 1,
stringsAsFactors = TRUE)
summary(Telco_Cusomer_Churn)
## gender SeniorCitizen Partner Dependents tenure
## Female:3488 Min. :0.0000 No :3641 No :4933 Min. : 0.00
## Male :3555 1st Qu.:0.0000 Yes:3402 Yes:2110 1st Qu.: 9.00
## Median :0.0000 Median :29.00
## Mean :0.1621 Mean :32.37
## 3rd Qu.:0.0000 3rd Qu.:55.00
## Max. :1.0000 Max. :72.00
##
## PhoneService MultipleLines InternetService
## No : 682 No :3390 DSL :2421
## Yes:6361 No phone service: 682 Fiber optic:3096
## Yes :2971 No :1526
##
##
##
##
## OnlineSecurity OnlineBackup
## No :3498 No :3088
## No internet service:1526 No internet service:1526
## Yes :2019 Yes :2429
##
##
##
##
## DeviceProtection TechSupport
## No :3095 No :3473
## No internet service:1526 No internet service:1526
## Yes :2422 Yes :2044
##
##
##
##
## StreamingTV StreamingMovies Contract
## No :2810 No :2785 Month-to-month:3875
## No internet service:1526 No internet service:1526 One year :1473
## Yes :2707 Yes :2732 Two year :1695
##
##
##
##
## PaperlessBilling PaymentMethod MonthlyCharges
## No :2872 Bank transfer (automatic):1544 Min. : 18.25
## Yes:4171 Credit card (automatic) :1522 1st Qu.: 35.50
## Electronic check :2365 Median : 70.35
## Mailed check :1612 Mean : 64.76
## 3rd Qu.: 89.85
## Max. :118.75
##
## TotalCharges Churn
## Min. : 18.8 No :5174
## 1st Qu.: 401.4 Yes:1869
## Median :1397.5
## Mean :2283.3
## 3rd Qu.:3794.7
## Max. :8684.8
## NA's :11
summary(Telco_Cusomer_Churn$MonthlyCharges) # resumen completo Monthlycharges
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.25 35.50 70.35 64.76 89.85 118.75
var(Telco_Cusomer_Churn$MonthlyCharges) #varianza Monthlycharges
## [1] 905.4109
sd(Telco_Cusomer_Churn$MonthlyCharges) #desviacion estandar Monthlycharges
## [1] 30.09005
sd(Telco_Cusomer_Churn$MonthlyCharges)/mean(Telco_Cusomer_Churn$MonthlyCharges) #coeficiente de desviacion Monthlycharges
## [1] 0.4646273
summary(Telco_Cusomer_Churn$tenure) # resumen completo Tenure
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.00 29.00 32.37 55.00 72.00
var(Telco_Cusomer_Churn$tenure) #varianza Tenure
## [1] 603.1681
sd(Telco_Cusomer_Churn$tenure) #desviacion estandar Tenure
## [1] 24.55948
sd(Telco_Cusomer_Churn$tenure)/mean(Telco_Cusomer_Churn$tenure) #coeficiente de desviacion Tenure
## [1] 0.7586843
summary(Telco_Cusomer_Churn$TotalCharges) #resumen completo Totalcharges
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.8 401.4 1397.5 2283.3 3794.7 8684.8 11
var(Telco_Cusomer_Churn$TotalCharges) #varianza Totalcharges
## [1] NA
sd(Telco_Cusomer_Churn$TotalCharges) #desviacion estandar Totalcharges
## [1] NA
sd(Telco_Cusomer_Churn$TotalCharges)/mean(Telco_Cusomer_Churn$TotalCharges) #coeficiente de desviacion Totalcharges
## [1] NA
# Eliminar NA al calcular estadísticos en Totalcharges
var(Telco_Cusomer_Churn$TotalCharges, na.rm = TRUE) #varianza Totalcharges con NA eliminado
## [1] 5138252
sd(Telco_Cusomer_Churn$TotalCharges, na.rm = TRUE) #desviacion estandar Totalcharges con NA eliminado
## [1] 2266.771
mean(Telco_Cusomer_Churn$TotalCharges, na.rm = TRUE) #coeficiente de desviacion Totalcharges con NA eliminado
## [1] 2283.3
library(corrplot)
## corrplot 0.95 loaded
attach(Telco_Cusomer_Churn)
nuevosDatos= data.frame(tenure, MonthlyCharges, TotalCharges)
r= cor(na.omit(nuevosDatos))
corrplot(r,method = "number")
##Diagrama de caja MensualCharges vs Rotación
library(ggplot2)
#Diagrama de caja: Cargas mensuales vs Rotación
boxplot(MonthlyCharges ~ Churn, data = Telco_Cusomer_Churn,
main = "Cargos Mensuales según Rotación",
xlab = "Rotación (Churn)",
ylab = "Cargos mensuales (MonthlyCharges)",
col = c("lightblue","salmon"))
##Diagrama de caja TotalCharges vs Rotación
boxplot(TotalCharges ~ Churn, data = Telco_Cusomer_Churn,
main = "Cargos Totales según Rotación",
xlab = "Rotación (Churn)",
ylab = "Cargos totales (TotalCharges)",
col = c("lightblue","purple"))
## Diagrama de barras apiladas Rotación Vs Genero
library(ggplot2)
ggplot(Telco_Cusomer_Churn, aes(x = Churn, fill = gender)) +
geom_bar(position = "stack") +
labs(
title = "Rotación de clientes según género",
x = "Rotación (Churn)",
y = "Cantidad de clientes",
fill = "Género"
) +
theme_minimal()
x <- Telco_Cusomer_Churn$tenure # antigüedad del cliente
y <- Telco_Cusomer_Churn$MonthlyCharges # cargos mensuales
cor(x, y, use = "complete.obs")
## [1] 0.2478999
library(ggplot2)
ggplot(Telco_Cusomer_Churn, aes(x = tenure, y = MonthlyCharges)) +
geom_point(color = "pink", alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Relación entre antigüedad y cargos mensuales",
x = "Antigüedad del cliente (meses)",
y = "Cargos mensuales ($)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
modelo <- lm(MonthlyCharges ~ tenure, data = Telco_Cusomer_Churn)
summary(modelo)
##
## Call:
## lm(formula = MonthlyCharges ~ tenure, data = Telco_Cusomer_Churn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.498 -27.251 6.245 24.943 54.376
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.92978 0.57476 95.57 <2e-16 ***
## tenure 0.30372 0.01415 21.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.15 on 7041 degrees of freedom
## Multiple R-squared: 0.06145, Adjusted R-squared: 0.06132
## F-statistic: 461 on 1 and 7041 DF, p-value: < 2.2e-16
#54.92978+(0.30372* Antiguedad del cliente en meses)
# Punto 4: boxplot TotalCharges ~ Churn
p4 <- ggplot(Telco_Cusomer_Churn, aes(x = Churn, y = TotalCharges, fill = Churn)) +
geom_boxplot() +
labs(title = "Cargos Totales según Rotación", x = "Rotación (Churn)", y = "TotalCharges")
# Punto 5: boxplot MonthlyCharges ~ Churn
p5 <- ggplot(Telco_Cusomer_Churn, aes(x = Churn, y = MonthlyCharges, fill = Churn)) +
geom_boxplot() +
labs(title = "Cargos Mensuales según Rotación", x = "Rotación (Churn)", y = "MonthlyCharges")
# Punto 6: diagrama de barras apiladas Churn ~ Gender
p6 <- ggplot(Telco_Cusomer_Churn, aes(x = Churn, fill = gender)) +
geom_bar(position = "stack") +
labs(title = "Rotación de clientes según género", x = "Rotación (Churn)", y = "Cantidad de clientes")
# Punto 7: dispersión MonthlyCharges ~ tenure
p7 <- ggplot(Telco_Cusomer_Churn, aes(x = tenure, y = MonthlyCharges)) +
geom_point(color = "pink", alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Relación entre antigüedad y cargos mensuales", x = "Antigüedad (meses)", y = "Cargos mensuales ($)")
library(gridExtra)
grid.arrange(p4, p5, p6, p7, ncol = 2)
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## `geom_smooth()` using formula = 'y ~ x'