# Librerías
library(ggplot2)
library(dplyr)
library(corrplot)
library(gridExtra)
Telco_Cusomer_Churn = read.table("Telco_Cusomer_Churn.csv",
          header=TRUE, sep="," ,dec=".",row.names=1,
          stringsAsFactors = TRUE) 
summary(Telco_Cusomer_Churn)
##     gender     SeniorCitizen    Partner    Dependents     tenure     
##  Female:3488   Min.   :0.0000   No :3641   No :4933   Min.   : 0.00  
##  Male  :3555   1st Qu.:0.0000   Yes:3402   Yes:2110   1st Qu.: 9.00  
##                Median :0.0000                         Median :29.00  
##                Mean   :0.1621                         Mean   :32.37  
##                3rd Qu.:0.0000                         3rd Qu.:55.00  
##                Max.   :1.0000                         Max.   :72.00  
##                                                                      
##  PhoneService          MultipleLines     InternetService
##  No : 682     No              :3390   DSL        :2421  
##  Yes:6361     No phone service: 682   Fiber optic:3096  
##               Yes             :2971   No         :1526  
##                                                         
##                                                         
##                                                         
##                                                         
##              OnlineSecurity              OnlineBackup 
##  No                 :3498   No                 :3088  
##  No internet service:1526   No internet service:1526  
##  Yes                :2019   Yes                :2429  
##                                                       
##                                                       
##                                                       
##                                                       
##             DeviceProtection              TechSupport  
##  No                 :3095    No                 :3473  
##  No internet service:1526    No internet service:1526  
##  Yes                :2422    Yes                :2044  
##                                                        
##                                                        
##                                                        
##                                                        
##               StreamingTV              StreamingMovies           Contract   
##  No                 :2810   No                 :2785   Month-to-month:3875  
##  No internet service:1526   No internet service:1526   One year      :1473  
##  Yes                :2707   Yes                :2732   Two year      :1695  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  PaperlessBilling                   PaymentMethod  MonthlyCharges  
##  No :2872         Bank transfer (automatic):1544   Min.   : 18.25  
##  Yes:4171         Credit card (automatic)  :1522   1st Qu.: 35.50  
##                   Electronic check         :2365   Median : 70.35  
##                   Mailed check             :1612   Mean   : 64.76  
##                                                    3rd Qu.: 89.85  
##                                                    Max.   :118.75  
##                                                                    
##   TotalCharges    Churn     
##  Min.   :  18.8   No :5174  
##  1st Qu.: 401.4   Yes:1869  
##  Median :1397.5             
##  Mean   :2283.3             
##  3rd Qu.:3794.7             
##  Max.   :8684.8             
##  NA's   :11

Histogramas punto 2

ggplot(Telco_Cusomer_Churn, aes(x = MonthlyCharges)) +
  geom_histogram(fill = "tan") +
  labs(title = "Histograma: Cargos mensuales", x = "MonthlyCharges", y = "Frecuencia")
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(Telco_Cusomer_Churn, aes(x = TotalCharges)) +
  geom_histogram(fill = "blue") +
  labs(title = "Histograma: Cargos totales (TotalCharges)", x = "TotalCharges", y = "Frecuencia")
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(Telco_Cusomer_Churn, aes(x = tenure)) +
  geom_histogram(fill = "green") +
  labs(title = "Histograma: Antigüedad (meses)", x = "tenure (meses)", y = "Frecuencia")
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

# Distribución de clientes por Rotación
ggplot(Telco_Cusomer_Churn, aes(x = Churn)) +
  geom_bar(fill = "gold") +
  labs(title = "Conteo por Rotación (Churn)", x = "Churn", y = "Conteo")

panel grafico

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_bin()`).

r = cor(Telco_Cusomer_Churn[, c("tenure","MonthlyCharges","TotalCharges","SeniorCitizen")],
        use = "pairwise.complete.obs")
corrplot(r, method = "number")

Diagrama de cajas por (comparativos)

# Punto 4: MonthlyCharges ~ Churn
ggplot(Telco_Cusomer_Churn, aes(y = MonthlyCharges, x = Churn, fill = Churn))+
  geom_boxplot() +
  labs(title = "Cargos mensuales por Rotación", x = "Churn", y = "MonthlyCharges")

# Punto 5: TotalCharges ~ Churn
ggplot(Telco_Cusomer_Churn, aes(x = Churn, y = TotalCharges, fill = Churn)) +
  geom_boxplot() +
  labs(title = "Cargos totales por Rotación", x = "Churn", y = "TotalCharges")
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Extra 
ggplot(Telco_Cusomer_Churn, aes(x = Churn, y = tenure, fill = Churn)) +
  geom_boxplot() +
  labs(title = "Antigüedad (tenure) por Rotación", x = "Churn", y = "tenure (meses)")

# Extra 
ggplot(Telco_Cusomer_Churn, aes(x = Contract, y = MonthlyCharges, fill = Contract)) +
  geom_boxplot() +
  labs(title = "Cargos mensuales por tipo de contrato", x = "Contract", y = "MonthlyCharges")

# Punto 6 (apiladas): Churn (X) con fill = gender
ggplot(Telco_Cusomer_Churn, aes(x = Churn, fill = gender)) +
  geom_bar() +
  labs(title = "Barras apiladas: Churn por Género", x = "Churn", y = "Conteo", fill = "Género")

# Punto 7: Dispersión + recta (tenure vs MonthlyCharges)
ggplot(Telco_Cusomer_Churn, aes(x = tenure, y = MonthlyCharges)) +
  geom_jitter() +
  geom_smooth(method = "lm", colour = "red") +
  labs(title = "tenure vs MonthlyCharges", x = "tenure (meses)", y = "MonthlyCharges")
## `geom_smooth()` using formula = 'y ~ x'

##Correlación Pearson tenure vs MonthlyCharges 
cor(Telco_Cusomer_Churn$tenure,
    Telco_Cusomer_Churn$MonthlyCharges,
    use = "pairwise.complete.obs")
## [1] 0.2478999
cor.test(Telco_Cusomer_Churn$tenure,
         Telco_Cusomer_Churn$MonthlyCharges)
## 
##  Pearson's product-moment correlation
## 
## data:  Telco_Cusomer_Churn$tenure and Telco_Cusomer_Churn$MonthlyCharges
## t = 21.472, df = 7041, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2258523 0.2696936
## sample estimates:
##       cor 
## 0.2478999
## Modelo lineal simple: MonthlyCharges ~ tenure 
modelo <- lm(MonthlyCharges ~ tenure, data = Telco_Cusomer_Churn)
summary(modelo)   # R^2, p-valores, etc.
## 
## Call:
## lm(formula = MonthlyCharges ~ tenure, data = Telco_Cusomer_Churn)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -57.498 -27.251   6.245  24.943  54.376 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 54.92978    0.57476   95.57   <2e-16 ***
## tenure       0.30372    0.01415   21.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.15 on 7041 degrees of freedom
## Multiple R-squared:  0.06145,    Adjusted R-squared:  0.06132 
## F-statistic:   461 on 1 and 7041 DF,  p-value: < 2.2e-16
coef(modelo)      # β0 (intercepto) y β1 (pendiente)
## (Intercept)      tenure 
##  54.9297785   0.3037246
# Extra
ggplot(Telco_Cusomer_Churn, aes(x = MonthlyCharges, y = TotalCharges)) +
  geom_jitter() +
  geom_smooth(method = "lm", colour = "blue") +
  labs(title = "MonthlyCharges vs TotalCharges", x = "MonthlyCharges", y = "TotalCharges")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Extra
ggplot(Telco_Cusomer_Churn, aes(x = tenure, y = TotalCharges)) +
  geom_jitter() +
  geom_smooth(method = "lm", colour = "tan") +
  labs(title = "tenure vs TotalCharges", x = "tenure (meses)", y = "TotalCharges")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).

### punto 8
p4 = ggplot(Telco_Cusomer_Churn, aes(y = MonthlyCharges, x = Churn, fill = Churn))+
  geom_boxplot()+labs(title="P4: MonthlyCharges ~ Churn", x="Churn", y="MonthlyCharges")

p5 = ggplot(Telco_Cusomer_Churn, aes(x = Churn, y = TotalCharges, fill = Churn))+
  geom_boxplot()+labs(title="P5: TotalCharges ~ Churn", x="Churn", y="TotalCharges")

p6 = ggplot(Telco_Cusomer_Churn, aes(x = Churn, fill = gender))+
  geom_bar()+labs(title="P6: Churn × Género", x="Churn", y="Conteo", fill="Género")

p7 = ggplot(Telco_Cusomer_Churn, aes(x = tenure, y = MonthlyCharges))+
  geom_jitter()+geom_smooth(method="lm", colour="red")+
  labs(title="P7: tenure vs MonthlyCharges", x="tenure (meses)", y="MonthlyCharges")

gridExtra::grid.arrange(p4, p5, p6, p7, ncol = 2)
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## `geom_smooth()` using formula = 'y ~ x'