# EstadÃsticas descriptivas generales
summary(data)
## customerID gender SeniorCitizen Partner Dependents
## 0002-ORFBO: 1 Female:3488 Min. :0.0000 No :3641 No :4933
## 0003-MKNFE: 1 Male :3555 1st Qu.:0.0000 Yes:3402 Yes:2110
## 0004-TLHLJ: 1 Median :0.0000
## 0011-IGKFF: 1 Mean :0.1621
## 0013-EXCHZ: 1 3rd Qu.:0.0000
## 0013-MHZWF: 1 Max. :1.0000
## (Other) :7037
## tenure PhoneService MultipleLines InternetService
## Min. : 0.00 No : 682 No :3390 DSL :2421
## 1st Qu.: 9.00 Yes:6361 No phone service: 682 Fiber optic:3096
## Median :29.00 Yes :2971 No :1526
## Mean :32.37
## 3rd Qu.:55.00
## Max. :72.00
##
## OnlineSecurity OnlineBackup
## No :3498 No :3088
## No internet service:1526 No internet service:1526
## Yes :2019 Yes :2429
##
##
##
##
## DeviceProtection TechSupport
## No :3095 No :3473
## No internet service:1526 No internet service:1526
## Yes :2422 Yes :2044
##
##
##
##
## StreamingTV StreamingMovies Contract
## No :2810 No :2785 Month-to-month:3875
## No internet service:1526 No internet service:1526 One year :1473
## Yes :2707 Yes :2732 Two year :1695
##
##
##
##
## PaperlessBilling PaymentMethod MonthlyCharges
## No :2872 Bank transfer (automatic):1544 Min. : 18.25
## Yes:4171 Credit card (automatic) :1522 1st Qu.: 35.50
## Electronic check :2365 Median : 70.35
## Mailed check :1612 Mean : 64.76
## 3rd Qu.: 89.85
## Max. :118.75
##
## TotalCharges Churn
## Min. : 18.8 No :5174
## 1st Qu.: 401.4 Yes:1869
## Median :1397.5
## Mean :2283.3
## 3rd Qu.:3794.7
## Max. :8684.8
## NA's :11
# Tablas de frecuencias
table(data$gender)
##
## Female Male
## 3488 3555
table(data$SeniorCitizen)
##
## 0 1
## 5901 1142
table(data$Churn)
##
## No Yes
## 5174 1869
# Selección de variables numéricas
num_vars <- data %>% select(tenure, MonthlyCharges, TotalCharges)
# Matriz de correlación
cor_matrix <- cor(num_vars, use="complete.obs")
# Visualización
corrplot(cor_matrix, method="number", type="upper")
p1 <- ggplot(data, aes(x=Churn, y=MonthlyCharges, fill=Churn)) +
geom_boxplot() +
labs(title="Cargos mensuales según fuga")
p1
p2 <- ggplot(data, aes(x=Churn, y=TotalCharges, fill=Churn)) +
geom_boxplot() +
labs(title="Cargos totales según fuga")
p2
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
p3 <- ggplot(data, aes(x=Churn, fill=gender)) +
geom_bar(position="stack") +
labs(title="Rotación por género")
p3
p4 <- ggplot(data, aes(x=tenure, y=MonthlyCharges)) +
geom_point(alpha=0.4) +
geom_smooth(method="lm", se=TRUE, color="red") +
labs(title="Antigüedad vs Cargos mensuales")
p4
## `geom_smooth()` using formula = 'y ~ x'
# Correlación
cor(data$tenure, data$MonthlyCharges, use="complete.obs")
## [1] 0.2478999
# Modelo lineal
modelo <- lm(MonthlyCharges ~ tenure, data=data)
summary(modelo)
##
## Call:
## lm(formula = MonthlyCharges ~ tenure, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.498 -27.251 6.245 24.943 54.376
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.92978 0.57476 95.57 <2e-16 ***
## tenure 0.30372 0.01415 21.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.15 on 7041 degrees of freedom
## Multiple R-squared: 0.06145, Adjusted R-squared: 0.06132
## F-statistic: 461 on 1 and 7041 DF, p-value: < 2.2e-16
grid.arrange(p1, p2, p3, p4, ncol=2)
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## `geom_smooth()` using formula = 'y ~ x'
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.