# UNIVERSIDAD NACIONAL DEL ALTIPLANO
# FACULTAD DE INGENIERIA ESTADISTICA E INFORMATICA
# ESTADISTICA BAYESIANA
# NAIVE BAYES

library(e1071)
## Warning: package 'e1071' was built under R version 4.0.5
library(naivebayes) 
## Warning: package 'naivebayes' was built under R version 4.0.5
## naivebayes 0.9.7 loaded
library(caret)  
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.0.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.2
NaiveBayes <- read_excel("E:/ESTADISTICA BAYESIANA/TAREA 05/NaiveBayes.xlsx")
attach(NaiveBayes)
names(NaiveBayes)
## [1] "TienePlanInternacional"           "MinutosDias"                     
## [3] "LlamadasDias"                     "MinutosInternacionalesReclamos"  
## [5] "Reclamos"                         "LlamadasInternacionalesCancelada"
## [7] "Cancelada"
#View(NaiveBayes)

#unir dos dataframe cargados
#NB<- rbind(NaiveBayesTest, NaiveBayesTrain) 
#Eliminar los dos dataframe
#rm(churnTest,churnTrain)         
#Elegimos las columnas que vamos a utilizar
NB<- NaiveBayes[,c(2,4,5,1,3,6)] 
#Cambiamos los nombres de las columnas
names(NaiveBayes)<- c("Tiene plan internacional","Minutos/dia","Llamadas/dia","Minutos internacionales","Reclamaciones","Llamadas internacionales","Cancelacion") 
knitr::kable(head(NaiveBayes), caption = "Datos Cargados")
Datos Cargados
Tiene plan internacional Minutos/dia Llamadas/dia Minutos internacionales Reclamaciones Llamadas internacionales Cancelacion
Si 87 107 15 3 6 Si
No 80 107 9 1 5 Si
No 174 115 13 4 9 No
No 133 122 10 2 7 No
Si 199 119 14 3 7 Si
Si 170 85 14 0 8 No
ind <- sample(2,nrow(NaiveBayes), replace = TRUE, prob = c(0.7,0.3) ) #60% entrenamiento y 40% test
trainData<- NaiveBayes[ind==1,]
testData<- NaiveBayes[ind==2,]

mod <- naiveBayes(Cancelacion ~ ., data = trainData)
mod
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##        No        Si 
## 0.6037736 0.3962264 
## 
## Conditional probabilities:
##     Tiene plan internacional
## Y           No        Si
##   No 0.4375000 0.5625000
##   Si 0.4285714 0.5714286
## 
##     Minutos/dia
## Y        [,1]     [,2]
##   No 151.8594 43.05400
##   Si 139.0714 42.07507
## 
##     Llamadas/dia
## Y        [,1]     [,2]
##   No 101.0156 15.14637
##   Si 105.0476 13.85456
## 
##     Minutos internacionales
## Y        [,1]     [,2]
##   No 11.34375 2.094731
##   Si 11.69048 2.454345
## 
##     Reclamaciones
## Y        [,1]     [,2]
##   No 2.671875 1.727965
##   Si 2.595238 1.697337
## 
##     Llamadas internacionales
## Y        [,1]     [,2]
##   No 5.906250 2.473470
##   Si 5.642857 2.195577
pred <- predict(mod, testData)
tab <- table(testData$Cancelacion, pred, dnn = c("Actual", "Predicha"))
confusionMatrix(tab)
## Confusion Matrix and Statistics
## 
##       Predicha
## Actual No Si
##     No 14  1
##     Si 26  6
##                                           
##                Accuracy : 0.4255          
##                  95% CI : (0.2826, 0.5782)
##     No Information Rate : 0.8511          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0838          
##                                           
##  Mcnemar's Test P-Value : 3.86e-06        
##                                           
##             Sensitivity : 0.3500          
##             Specificity : 0.8571          
##          Pos Pred Value : 0.9333          
##          Neg Pred Value : 0.1875          
##              Prevalence : 0.8511          
##          Detection Rate : 0.2979          
##    Detection Prevalence : 0.3191          
##       Balanced Accuracy : 0.6036          
##                                           
##        'Positive' Class : No              
##