Punto 1. Importación de datos

# Estadísticas descriptivas generales
summary(data)
##       customerID      gender     SeniorCitizen    Partner    Dependents
##  0002-ORFBO:   1   Female:3488   Min.   :0.0000   No :3641   No :4933  
##  0003-MKNFE:   1   Male  :3555   1st Qu.:0.0000   Yes:3402   Yes:2110  
##  0004-TLHLJ:   1                 Median :0.0000                        
##  0011-IGKFF:   1                 Mean   :0.1621                        
##  0013-EXCHZ:   1                 3rd Qu.:0.0000                        
##  0013-MHZWF:   1                 Max.   :1.0000                        
##  (Other)   :7037                                                       
##      tenure      PhoneService          MultipleLines     InternetService
##  Min.   : 0.00   No : 682     No              :3390   DSL        :2421  
##  1st Qu.: 9.00   Yes:6361     No phone service: 682   Fiber optic:3096  
##  Median :29.00                Yes             :2971   No         :1526  
##  Mean   :32.37                                                          
##  3rd Qu.:55.00                                                          
##  Max.   :72.00                                                          
##                                                                         
##              OnlineSecurity              OnlineBackup 
##  No                 :3498   No                 :3088  
##  No internet service:1526   No internet service:1526  
##  Yes                :2019   Yes                :2429  
##                                                       
##                                                       
##                                                       
##                                                       
##             DeviceProtection              TechSupport  
##  No                 :3095    No                 :3473  
##  No internet service:1526    No internet service:1526  
##  Yes                :2422    Yes                :2044  
##                                                        
##                                                        
##                                                        
##                                                        
##               StreamingTV              StreamingMovies           Contract   
##  No                 :2810   No                 :2785   Month-to-month:3875  
##  No internet service:1526   No internet service:1526   One year      :1473  
##  Yes                :2707   Yes                :2732   Two year      :1695  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  PaperlessBilling                   PaymentMethod  MonthlyCharges  
##  No :2872         Bank transfer (automatic):1544   Min.   : 18.25  
##  Yes:4171         Credit card (automatic)  :1522   1st Qu.: 35.50  
##                   Electronic check         :2365   Median : 70.35  
##                   Mailed check             :1612   Mean   : 64.76  
##                                                    3rd Qu.: 89.85  
##                                                    Max.   :118.75  
##                                                                    
##   TotalCharges    Churn     
##  Min.   :  18.8   No :5174  
##  1st Qu.: 401.4   Yes:1869  
##  Median :1397.5             
##  Mean   :2283.3             
##  3rd Qu.:3794.7             
##  Max.   :8684.8             
##  NA's   :11
# Tablas de frecuencias
table(data$gender)
## 
## Female   Male 
##   3488   3555
table(data$SeniorCitizen)
## 
##    0    1 
## 5901 1142
table(data$Churn)
## 
##   No  Yes 
## 5174 1869
# Selección de variables numéricas
num_vars <- data %>% select(tenure, MonthlyCharges, TotalCharges)

# Matriz de correlación
cor_matrix <- cor(num_vars, use="complete.obs")

# Visualización
corrplot(cor_matrix, method="number", type="upper")

p1 <- ggplot(data, aes(x=Churn, y=MonthlyCharges, fill=Churn)) +
  geom_boxplot() +
  labs(title="Cargos mensuales según fuga")

p1

p2 <- ggplot(data, aes(x=Churn, y=TotalCharges, fill=Churn)) +
  geom_boxplot() +
  labs(title="Cargos totales según fuga")

p2
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

p3 <- ggplot(data, aes(x=Churn, fill=gender)) +
  geom_bar(position="stack") +
  labs(title="Rotación por género")

p3

p4 <- ggplot(data, aes(x=tenure, y=MonthlyCharges)) +
  geom_point(alpha=0.4) +
  geom_smooth(method="lm", se=TRUE, color="red") +
  labs(title="Antigüedad vs Cargos mensuales")

p4
## `geom_smooth()` using formula = 'y ~ x'

# Correlación
cor(data$tenure, data$MonthlyCharges, use="complete.obs")
## [1] 0.2478999
# Modelo lineal
modelo <- lm(MonthlyCharges ~ tenure, data=data)
summary(modelo)
## 
## Call:
## lm(formula = MonthlyCharges ~ tenure, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.498 -27.251   6.245  24.943  54.376 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 54.92978    0.57476   95.57   <2e-16 ***
## tenure       0.30372    0.01415   21.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.15 on 7041 degrees of freedom
## Multiple R-squared:  0.06145,    Adjusted R-squared:  0.06132 
## F-statistic:   461 on 1 and 7041 DF,  p-value: < 2.2e-16
grid.arrange(p1, p2, p3, p4, ncol=2)
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## `geom_smooth()` using formula = 'y ~ x'

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.