Paso 1: Cargar librerias y set de datos

#install.packages("naivebayes")
library(e1071)      
## Warning: package 'e1071' was built under R version 3.5.3
library(naivebayes) 
## Warning: package 'naivebayes' was built under R version 3.5.3
library(caret)       
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
library(C50)        #Paquete que contiene el dataset churn, algoritmo de arboles de decision
## Warning: package 'C50' was built under R version 3.5.3
data(churn)         #Dataset de operador de telefonia

Paso 2: Preparar set de datos

#unir los dos dataframe cargados
churn<- rbind(churnTest, churnTrain) 
#Eliminar los dos dataframe
rm(churnTest,churnTrain)         
#Elegimos las columnas que vamos a utilizar
churn<- churn[,c(4,7,8,16,19,17,20)] 
#Cambiamos los nombres de las columnas
names(churn)<- c("Tiene plan internacional","Minutos/dia","Llamadas/dia","Minutos internacionales","Reclamaciones","Llamadas internacionales","Cancelacion") 
knitr::kable(head(churn), caption = "Datos Cargados")
Datos Cargados
Tiene plan internacional Minutos/dia Llamadas/dia Minutos internacionales Reclamaciones Llamadas internacionales Cancelacion
no 70.9 123 10.6 3 3 no
no 223.6 86 9.5 0 7 no
no 294.7 95 13.7 1 6 no
no 216.8 123 15.7 1 2 no
no 197.4 78 7.7 2 4 no
no 226.5 85 6.9 1 5 no

Paso 3: Crear set de entrenamiento y de test

se divide el set en 60% entrenamiento y 40% test

ind <- sample(2,nrow(churn), replace = TRUE, prob = c(0.7,0.3) ) #60% entrenamiento y 40% test
trainData<- churn[ind==1,]
testData<- churn[ind==2,]

Paso 4: Crear modelo de Naive Bayes

mod <- naiveBayes(Cancelacion ~ ., data = trainData)
mod
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##       yes        no 
## 0.1446233 0.8553767 
## 
## Conditional probabilities:
##      Tiene plan internacional
## Y             no        yes
##   yes 0.72962227 0.27037773
##   no  0.93781513 0.06218487
## 
##      Minutos/dia
## Y         [,1]     [,2]
##   yes 207.7773 68.98522
##   no  176.3009 48.92913
## 
##      Llamadas/dia
## Y          [,1]     [,2]
##   yes 100.60040 20.53161
##   no   99.96975 19.48133
## 
##      Minutos internacionales
## Y         [,1]     [,2]
##   yes 10.63419 2.741845
##   no  10.18571 2.799832
## 
##      Reclamaciones
## Y         [,1]     [,2]
##   yes 2.210736 1.791782
##   no  1.470588 1.155551
## 
##      Llamadas internacionales
## Y         [,1]     [,2]
##   yes 4.172962 2.624567
##   no  4.434958 2.392487

Paso 5: Probar modelo

pred <- predict(mod, testData)
tab <- table(testData$Cancelacion, pred, dnn = c("Actual", "Predicha"))
confusionMatrix(tab)
## Confusion Matrix and Statistics
## 
##       Predicha
## Actual  yes   no
##    yes   54  150
##    no    37 1281
##                                           
##                Accuracy : 0.8771          
##                  95% CI : (0.8596, 0.8932)
##     No Information Rate : 0.9402          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.309           
##  Mcnemar's Test P-Value : 2.607e-16       
##                                           
##             Sensitivity : 0.59341         
##             Specificity : 0.89518         
##          Pos Pred Value : 0.26471         
##          Neg Pred Value : 0.97193         
##              Prevalence : 0.05979         
##          Detection Rate : 0.03548         
##    Detection Prevalence : 0.13403         
##       Balanced Accuracy : 0.74429         
##                                           
##        'Positive' Class : yes             
##