Ejercicio 1 - Análisis exploratorio

#Análisis y preparación de los datos

library(ggplot2)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Adjuntando el paquete: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
churn_data= read.csv("555173.csv")
churn_data_limpio <- na.omit(churn_data)
summary(churn_data_limpio)
##   customerID           gender          SeniorCitizen      Partner         
##  Length:7032        Length:7032        Min.   :0.0000   Length:7032       
##  Class :character   Class :character   1st Qu.:0.0000   Class :character  
##  Mode  :character   Mode  :character   Median :0.0000   Mode  :character  
##                                        Mean   :0.1624                     
##                                        3rd Qu.:0.0000                     
##                                        Max.   :1.0000                     
##   Dependents            tenure      PhoneService       MultipleLines     
##  Length:7032        Min.   : 1.00   Length:7032        Length:7032       
##  Class :character   1st Qu.: 9.00   Class :character   Class :character  
##  Mode  :character   Median :29.00   Mode  :character   Mode  :character  
##                     Mean   :32.42                                        
##                     3rd Qu.:55.00                                        
##                     Max.   :72.00                                        
##  InternetService    OnlineSecurity     OnlineBackup       DeviceProtection  
##  Length:7032        Length:7032        Length:7032        Length:7032       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  TechSupport        StreamingTV        StreamingMovies      Contract        
##  Length:7032        Length:7032        Length:7032        Length:7032       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  PaperlessBilling   PaymentMethod      MonthlyCharges    TotalCharges   
##  Length:7032        Length:7032        Min.   : 18.25   Min.   :  18.8  
##  Class :character   Class :character   1st Qu.: 35.59   1st Qu.: 401.4  
##  Mode  :character   Mode  :character   Median : 70.35   Median :1397.5  
##                                        Mean   : 64.80   Mean   :2283.3  
##                                        3rd Qu.: 89.86   3rd Qu.:3794.7  
##                                        Max.   :118.75   Max.   :8684.8  
##     Churn          
##  Length:7032       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
str(churn_data_limpio)
## 'data.frame':    7032 obs. of  21 variables:
##  $ customerID      : chr  "7590-VHVEG" "5575-GNVDE" "3668-QPYBK" "7795-CFOCW" ...
##  $ gender          : chr  "Female" "Male" "Male" "Male" ...
##  $ SeniorCitizen   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Partner         : chr  "Yes" "No" "No" "No" ...
##  $ Dependents      : chr  "No" "No" "No" "No" ...
##  $ tenure          : int  1 34 2 45 2 8 22 10 28 62 ...
##  $ PhoneService    : chr  "No" "Yes" "Yes" "No" ...
##  $ MultipleLines   : chr  "No phone service" "No" "No" "No phone service" ...
##  $ InternetService : chr  "DSL" "DSL" "DSL" "DSL" ...
##  $ OnlineSecurity  : chr  "No" "Yes" "Yes" "Yes" ...
##  $ OnlineBackup    : chr  "Yes" "No" "Yes" "No" ...
##  $ DeviceProtection: chr  "No" "Yes" "No" "Yes" ...
##  $ TechSupport     : chr  "No" "No" "No" "Yes" ...
##  $ StreamingTV     : chr  "No" "No" "No" "No" ...
##  $ StreamingMovies : chr  "No" "No" "No" "No" ...
##  $ Contract        : chr  "Month-to-month" "One year" "Month-to-month" "One year" ...
##  $ PaperlessBilling: chr  "Yes" "No" "Yes" "No" ...
##  $ PaymentMethod   : chr  "Electronic check" "Mailed check" "Mailed check" "Bank transfer (automatic)" ...
##  $ MonthlyCharges  : num  29.9 57 53.9 42.3 70.7 ...
##  $ TotalCharges    : num  29.9 1889.5 108.2 1840.8 151.7 ...
##  $ Churn           : chr  "No" "No" "Yes" "No" ...
##  - attr(*, "na.action")= 'omit' Named int [1:11] 489 754 937 1083 1341 3332 3827 4381 5219 6671 ...
##   ..- attr(*, "names")= chr [1:11] "489" "754" "937" "1083" ...
churn_data_limpio$SeniorCitizen <- replace(churn_data_limpio$SeniorCitizen, churn_data_limpio$SeniorCitizen == 0, "No")
churn_data_limpio$SeniorCitizen <- replace(churn_data_limpio$SeniorCitizen, churn_data_limpio$SeniorCitizen == 1, "Yes")
variables_cualitativas <- c("gender", "SeniorCitizen", "Partner", "Dependents", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "Churn")

variables_cuantitativas<- c("tenure", "MonthlyCharges", "TotalCharges")

churn_data_limpio[variables_cualitativas] <- lapply(churn_data_limpio[variables_cualitativas], factor)
str(churn_data_limpio[variables_cualitativas])
## 'data.frame':    7032 obs. of  17 variables:
##  $ gender          : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 1 2 1 1 2 ...
##  $ SeniorCitizen   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Partner         : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 2 1 ...
##  $ Dependents      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 2 ...
##  $ PhoneService    : Factor w/ 2 levels "No","Yes": 1 2 2 1 2 2 2 1 2 2 ...
##  $ MultipleLines   : Factor w/ 3 levels "No","No phone service",..: 2 1 1 2 1 3 3 2 3 1 ...
##  $ InternetService : Factor w/ 3 levels "DSL","Fiber optic",..: 1 1 1 1 2 2 2 1 2 1 ...
##  $ OnlineSecurity  : Factor w/ 3 levels "No","No internet service",..: 1 3 3 3 1 1 1 3 1 3 ...
##  $ OnlineBackup    : Factor w/ 3 levels "No","No internet service",..: 3 1 3 1 1 1 3 1 1 3 ...
##  $ DeviceProtection: Factor w/ 3 levels "No","No internet service",..: 1 3 1 3 1 3 1 1 3 1 ...
##  $ TechSupport     : Factor w/ 3 levels "No","No internet service",..: 1 1 1 3 1 1 1 1 3 1 ...
##  $ StreamingTV     : Factor w/ 3 levels "No","No internet service",..: 1 1 1 1 1 3 3 1 3 1 ...
##  $ StreamingMovies : Factor w/ 3 levels "No","No internet service",..: 1 1 1 1 1 3 1 1 3 1 ...
##  $ Contract        : Factor w/ 3 levels "Month-to-month",..: 1 2 1 2 1 1 1 1 1 2 ...
##  $ PaperlessBilling: Factor w/ 2 levels "No","Yes": 2 1 2 1 2 2 2 1 2 1 ...
##  $ PaymentMethod   : Factor w/ 4 levels "Bank transfer (automatic)",..: 3 4 4 1 3 3 2 4 3 1 ...
##  $ Churn           : Factor w/ 2 levels "No","Yes": 1 1 2 1 2 2 1 1 2 1 ...
clean_data <- subset(churn_data_limpio, select = -customerID)
str(clean_data)
## 'data.frame':    7032 obs. of  20 variables:
##  $ gender          : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 1 2 1 1 2 ...
##  $ SeniorCitizen   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Partner         : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 2 1 ...
##  $ Dependents      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 2 ...
##  $ tenure          : int  1 34 2 45 2 8 22 10 28 62 ...
##  $ PhoneService    : Factor w/ 2 levels "No","Yes": 1 2 2 1 2 2 2 1 2 2 ...
##  $ MultipleLines   : Factor w/ 3 levels "No","No phone service",..: 2 1 1 2 1 3 3 2 3 1 ...
##  $ InternetService : Factor w/ 3 levels "DSL","Fiber optic",..: 1 1 1 1 2 2 2 1 2 1 ...
##  $ OnlineSecurity  : Factor w/ 3 levels "No","No internet service",..: 1 3 3 3 1 1 1 3 1 3 ...
##  $ OnlineBackup    : Factor w/ 3 levels "No","No internet service",..: 3 1 3 1 1 1 3 1 1 3 ...
##  $ DeviceProtection: Factor w/ 3 levels "No","No internet service",..: 1 3 1 3 1 3 1 1 3 1 ...
##  $ TechSupport     : Factor w/ 3 levels "No","No internet service",..: 1 1 1 3 1 1 1 1 3 1 ...
##  $ StreamingTV     : Factor w/ 3 levels "No","No internet service",..: 1 1 1 1 1 3 3 1 3 1 ...
##  $ StreamingMovies : Factor w/ 3 levels "No","No internet service",..: 1 1 1 1 1 3 1 1 3 1 ...
##  $ Contract        : Factor w/ 3 levels "Month-to-month",..: 1 2 1 2 1 1 1 1 1 2 ...
##  $ PaperlessBilling: Factor w/ 2 levels "No","Yes": 2 1 2 1 2 2 2 1 2 1 ...
##  $ PaymentMethod   : Factor w/ 4 levels "Bank transfer (automatic)",..: 3 4 4 1 3 3 2 4 3 1 ...
##  $ MonthlyCharges  : num  29.9 57 53.9 42.3 70.7 ...
##  $ TotalCharges    : num  29.9 1889.5 108.2 1840.8 151.7 ...
##  $ Churn           : Factor w/ 2 levels "No","Yes": 1 1 2 1 2 2 1 1 2 1 ...

#Diagramas e histogramas

for (var in variables_cualitativas) {
  if (var %in% names(clean_data)) {
    print(
      ggplot(clean_data, aes_string(x = var)) +
        geom_bar(fill = "steelblue") +
        labs(title = paste("Frecuencia de", var), x = var, y = "Frecuencia") +
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    )
  }
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

for (var in variables_cuantitativas) {
  if (var %in% names(clean_data)) {
    print(
      ggplot(clean_data, aes_string(x = var)) +
        geom_histogram(binwidth = 5, fill = "darkorange", color = "black") +
        labs(title = paste("Histograma de", var), x = var, y = "Frecuencia") +
        theme_minimal()
    )
  }
}

#Diagramas e histrogramas en función del Churn

for (var in variables_cualitativas) {
  if (var %in% names(churn_data_limpio) && var != "Churn") {
    print(
      ggplot(churn_data_limpio, aes_string(x = var, fill = "Churn")) +
        geom_bar(position = "dodge") +
        labs(title = paste("Frecuencia de", var, "según Churn"), x = var, y = "Frecuencia") +
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    )
  }
}

for (var in variables_cuantitativas) {
  if (var %in% names(churn_data_limpio)) {
    print(
      ggplot(churn_data_limpio, aes_string(x = var, fill = "Churn")) +
        geom_histogram(position = "identity", alpha = 0.6, bins = 30) +
        labs(title = paste("Histograma de", var, "según Churn"), x = var, y = "Frecuencia") +
        theme_minimal()
    )
  }
}

#Boxplots

for (var in variables_cualitativas) {
  if (var %in% names(churn_data_limpio) && var != "MonthlyCharges" && var != "Churn") {
    print(
      ggplot(churn_data_limpio, aes_string(x = var, y = "MonthlyCharges")) +
        geom_boxplot(fill = "lightblue", outlier.color = "red", outlier.shape = 1) +
        labs(
          title = paste("Distribución de MonthlyCharges según", var),
          x = var,
          y = "MonthlyCharges"
        ) +
        theme_minimal() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    )
  }
}

library(ggpubr)
var_par <- combn(variables_cuantitativas, 2, simplify = FALSE)

# Crear gráfico de dispersión para cada par
for (par in var_par) {
  x <- par[1]
  y <- par[2]
  
  if (x %in% names(churn_data_limpio) && y %in% names(churn_data_limpio)) {
    print(
      ggplot(churn_data_limpio, aes_string(x = x, y = y)) +
        geom_point(alpha = 0.6, color = "steelblue") +
        stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") +
        labs(title = paste("Dispersión entre", x, "y", y),
             x = x, y = y) +
        theme_minimal()
    )
  }
}

Ejercicio 2 - Modelo de regresión

churn_train1 <- clean_data[1:5000, ]
churn_test1 <- clean_data[5001:nrow(clean_data),]

#Diseño del modelo de regresión

lmodel <- lm(MonthlyCharges ~ . -TotalCharges -Churn, data= churn_train1)
summary(lmodel)
## 
## Call:
## lm(formula = MonthlyCharges ~ . - TotalCharges - Churn, data = churn_train1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1927 -0.5992  0.0026  0.6025  4.8593 
## 
## Coefficients: (7 not defined because of singularities)
##                                        Estimate Std. Error  t value Pr(>|t|)
## (Intercept)                           2.493e+01  6.918e-02  360.357   <2e-16
## genderMale                            7.461e-03  2.886e-02    0.259    0.796
## SeniorCitizenYes                      1.183e-03  4.200e-02    0.028    0.978
## PartnerYes                           -1.705e-02  3.518e-02   -0.485    0.628
## DependentsYes                         3.445e-02  3.710e-02    0.928    0.353
## tenure                                4.699e-04  1.002e-03    0.469    0.639
## PhoneServiceYes                       2.007e+01  5.755e-02  348.781   <2e-16
## MultipleLinesNo phone service                NA         NA       NA       NA
## MultipleLinesYes                      5.005e+00  3.460e-02  144.665   <2e-16
## InternetServiceFiber optic            2.498e+01  4.087e-02  611.186   <2e-16
## InternetServiceNo                    -2.505e+01  5.693e-02 -439.966   <2e-16
## OnlineSecurityNo internet service            NA         NA       NA       NA
## OnlineSecurityYes                     4.995e+00  3.811e-02  131.085   <2e-16
## OnlineBackupNo internet service              NA         NA       NA       NA
## OnlineBackupYes                       5.010e+00  3.576e-02  140.096   <2e-16
## DeviceProtectionNo internet service          NA         NA       NA       NA
## DeviceProtectionYes                   5.024e+00  3.700e-02  135.786   <2e-16
## TechSupportNo internet service               NA         NA       NA       NA
## TechSupportYes                        5.025e+00  3.899e-02  128.865   <2e-16
## StreamingTVNo internet service               NA         NA       NA       NA
## StreamingTVYes                        9.944e+00  3.805e-02  261.318   <2e-16
## StreamingMoviesNo internet service           NA         NA       NA       NA
## StreamingMoviesYes                    9.986e+00  3.798e-02  262.944   <2e-16
## ContractOne year                      7.533e-03  4.540e-02    0.166    0.868
## ContractTwo year                     -1.692e-04  5.533e-02   -0.003    0.998
## PaperlessBillingYes                  -1.526e-03  3.233e-02   -0.047    0.962
## PaymentMethodCredit card (automatic) -6.455e-02  4.352e-02   -1.483    0.138
## PaymentMethodElectronic check        -1.711e-02  4.284e-02   -0.399    0.690
## PaymentMethodMailed check            -1.150e-02  4.676e-02   -0.246    0.806
##                                         
## (Intercept)                          ***
## genderMale                              
## SeniorCitizenYes                        
## PartnerYes                              
## DependentsYes                           
## tenure                                  
## PhoneServiceYes                      ***
## MultipleLinesNo phone service           
## MultipleLinesYes                     ***
## InternetServiceFiber optic           ***
## InternetServiceNo                    ***
## OnlineSecurityNo internet service       
## OnlineSecurityYes                    ***
## OnlineBackupNo internet service         
## OnlineBackupYes                      ***
## DeviceProtectionNo internet service     
## DeviceProtectionYes                  ***
## TechSupportNo internet service          
## TechSupportYes                       ***
## StreamingTVNo internet service          
## StreamingTVYes                       ***
## StreamingMoviesNo internet service      
## StreamingMoviesYes                   ***
## ContractOne year                        
## ContractTwo year                        
## PaperlessBillingYes                     
## PaymentMethodCredit card (automatic)    
## PaymentMethodElectronic check           
## PaymentMethodMailed check               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.018 on 4978 degrees of freedom
## Multiple R-squared:  0.9989, Adjusted R-squared:  0.9989 
## F-statistic: 2.081e+05 on 21 and 4978 DF,  p-value: < 2.2e-16
predicciones <- predict(lmodel, newdata = churn_test1)

rmse <- sqrt(mean((churn_test1$MonthlyCharges - predicciones)^2))
print(rmse)
## [1] 1.044388

##Ejercicio 3 - Modelo de Clasificación

clean_data2 <- subset(clean_data, select = -TotalCharges )
churn_train2 <- clean_data2[1:5000, ]
churn_test2 <- clean_data2[5001:nrow (clean_data2), ]

Modelo de clasificación 1

clean_data2$Churn <- as.factor(clean_data2$Churn)
clean_data2[sapply(clean_data2, is.character)] <- lapply(clean_data2[sapply(clean_data2, is.character)], as.factor)
set.seed(123)
gmodel1 <- randomForest(Churn ~ ., data = churn_train2, importance = TRUE)
prob_preds <- predict(gmodel1, newdata = churn_test2, type = "prob")
prob_yes <- prob_preds[, "Yes"]
churn_pred <- ifelse(prob_yes > 0.5, "Yes", "No")
churn_pred <- factor(churn_pred, levels = c("No", "Yes"))
mc <- table(Predicted = churn_pred, Actual = churn_test2$Churn)
TN <- mc["No", "No"]
FP <- mc["Yes", "No"]
FN <- mc["No", "Yes"]
TP <- mc["Yes", "Yes"]
accuracy <- (TP + TN) / (TP + TN + FP + FN)
fpr <- FP / (FP + TN)
fnr <- FN / (FN + TP)
tpr <- TP / (TP + FN)
cat("Accuracy:", round(accuracy, 4), "\n")
## Accuracy: 0.7918
cat("FPR:", round(fpr, 4), "\n")
## FPR: 0.1034
cat("FNR:", round(fnr, 4), "\n")
## FNR: 0.4891
cat("TPR:", round(tpr, 4), "\n")
## TPR: 0.5109

Modelo de clasificación 2

gmodel2 <- randomForest(Churn ~ Contract + tenure + InternetService, data = churn_train2, importance = TRUE)
prob_preds2 <- predict(gmodel2, newdata = churn_test2, type = "prob")
prob_yes2 <- prob_preds2[, "Yes"]
churn_pred2 <- ifelse(prob_yes2 > 0.5, "Yes", "No")
churn_pred2 <- factor(churn_pred2, levels = c("No", "Yes"))
mc2 <- table(Predicted = churn_pred2, Actual = churn_test2$Churn)
TN2 <- mc2["No", "No"]
FP2 <- mc2["Yes", "No"]
FN2 <- mc2["No", "Yes"]
TP2 <- mc2["Yes", "Yes"]

accuracy2 <- (TP2 + TN2) / (TP2 + TN2 + FP2 + FN2)
fpr2 <- FP2 / (FP2 + TN2)
fnr2 <- FN2 / (FN2 + TP2)
tpr2 <- TP2 / (TP2 + FN2)
cat("Accuracy:", round(accuracy2, 4), "\n")
## Accuracy: 0.7904
cat("FPR:", round(fpr2, 4), "\n")
## FPR: 0.0595
cat("FNR:", round(fnr2, 4), "\n")
## FNR: 0.6123
cat("TPR:", round(tpr2, 4), "\n")
## TPR: 0.3877

Ejercicio 4 - Simulación de campaña

gmodel3 <- randomForest(Churn ~ ., data = churn_train2, importance = TRUE)
clean_data2$Churn <- as.factor(clean_data2$Churn)
probabilidades <- predict(gmodel3, newdata = churn_test2, type = "prob")
churn_pred <- ifelse(probabilidades[, "Yes"] > 0.5, "Yes", "No")
churn_pred <- factor(churn_pred, levels = c("No", "Yes"))
mc2 <- table(Predicted = churn_pred, Actual = churn_test2$Churn)
print(mc2)
##          Actual
## Predicted   No  Yes
##       No  1332  268
##       Yes  148  284

#Escenario 1

CT <- 200
AR <- 0.4
R <- 500
umbrales <- seq(0,1, by = 0.1)

real <- churn_test2$Churn
beneficios <- numeric(length(umbrales)) 

for (i in seq_along(umbrales)) {
  umbral <- umbrales[i]
pred_churn <- ifelse(probabilidades[, "Yes"] > umbral, "Yes", "No")
pred_churn <- factor(pred_churn, levels = c("No", "Yes"))
mc <- table(Predicted = pred_churn, Actual = real)
TP <- ifelse(!is.na(mc["Yes", "Yes"]), mc["Yes", "Yes"], 0)
FP <- ifelse(!is.na(mc["Yes", "No"]), mc["Yes", "No"], 0)
FN <- ifelse(!is.na(mc["No", "Yes"]), mc["No", "Yes"], 0)
resultado_con <- -FP * AR * CT - TP * AR * CT - TP * (1 - AR) * R - FN * R
resultado_sin <- -(FN + TP) * R
beneficio <- resultado_con - resultado_sin
beneficios[i] <- beneficio
}

resultados <- data.frame(Umbral = umbrales, Beneficio = beneficios)
optimo <- resultados[which.max(resultados$Beneficio), ]
print(resultados)
##    Umbral Beneficio
## 1     0.0    -44200
## 2     0.1       240
## 3     0.2     14120
## 4     0.3     20800
## 5     0.4     22280
## 6     0.5     22240
## 7     0.6     20240
## 8     0.7     17880
## 9     0.8     11000
## 10    0.9      4760
## 11    1.0         0
cat("\n✅ Umbral óptimo:", optimo$Umbral, "\n💶 Beneficio máximo:", round(optimo$Beneficio, 2), "€\n")
## 
## ✅ Umbral óptimo: 0.4 
## 💶 Beneficio máximo: 22280 €

#Escenario 2

CT <- 400
AR <- 0.8
R <- 500
umbrales <-  seq(0,1, by = 0.1)

real <- churn_test2$Churn
beneficios <- numeric(length(umbrales)) 

for (i in seq_along(umbrales)) {
  umbral <- umbrales[i]
pred_churn <- ifelse(probabilidades[, "Yes"] > umbral, "Yes", "No")
pred_churn <- factor(pred_churn, levels = c("No", "Yes"))
mc <- table(Predicted = pred_churn, Actual = real)
TP <- ifelse(!is.na(mc["Yes", "Yes"]), mc["Yes", "Yes"], 0)
FP <- ifelse(!is.na(mc["Yes", "No"]), mc["Yes", "No"], 0)
FN <- ifelse(!is.na(mc["No", "Yes"]), mc["No", "Yes"], 0)
resultado_con <- -FP * AR * CT - TP * AR * CT - TP * (1 - AR) * R - FN * R
resultado_sin <- -(FN + TP) * R
beneficio <- resultado_con - resultado_sin
beneficios[i] <- beneficio
}

resultados <- data.frame(Umbral = umbrales, Beneficio = beneficios)
optimo <- resultados[which.max(resultados$Beneficio), ]
print(resultados)
##    Umbral Beneficio
## 1     0.0   -397200
## 2     0.1   -199040
## 3     0.2   -122320
## 4     0.3    -76000
## 5     0.4    -48080
## 6     0.5    -24640
## 7     0.6    -11040
## 8     0.7     -1680
## 9     0.8       400
## 10    0.9      1040
## 11    1.0         0
cat("\n✅ Umbral óptimo:", optimo$Umbral, "\n💶 Beneficio máximo:", round(optimo$Beneficio, 2), "€\n")
## 
## ✅ Umbral óptimo: 0.9 
## 💶 Beneficio máximo: 1040 €