# UNIVERSIDAD NACIONAL DEL ALTIPLANO
# FACULTAD DE INGENIERIA ESTADISTICA E INFORMATICA
# ESTADISTICA BAYESIANA
# NAIVE BAYES
library(e1071)
## Warning: package 'e1071' was built under R version 4.0.5
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 4.0.5
## naivebayes 0.9.7 loaded
library(caret)
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.0.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.2
NaiveBayes <- read_excel("E:/ESTADISTICA BAYESIANA/TAREA 05/NaiveBayes.xlsx")
attach(NaiveBayes)
names(NaiveBayes)
## [1] "TienePlanInternacional" "MinutosDias"
## [3] "LlamadasDias" "MinutosInternacionalesReclamos"
## [5] "Reclamos" "LlamadasInternacionalesCancelada"
## [7] "Cancelada"
#View(NaiveBayes)
#unir dos dataframe cargados
#NB<- rbind(NaiveBayesTest, NaiveBayesTrain)
#Eliminar los dos dataframe
#rm(churnTest,churnTrain)
#Elegimos las columnas que vamos a utilizar
NB<- NaiveBayes[,c(2,4,5,1,3,6)]
#Cambiamos los nombres de las columnas
names(NaiveBayes)<- c("Tiene plan internacional","Minutos/dia","Llamadas/dia","Minutos internacionales","Reclamaciones","Llamadas internacionales","Cancelacion")
knitr::kable(head(NaiveBayes), caption = "Datos Cargados")
Datos Cargados
| Si |
87 |
107 |
15 |
3 |
6 |
Si |
| No |
80 |
107 |
9 |
1 |
5 |
Si |
| No |
174 |
115 |
13 |
4 |
9 |
No |
| No |
133 |
122 |
10 |
2 |
7 |
No |
| Si |
199 |
119 |
14 |
3 |
7 |
Si |
| Si |
170 |
85 |
14 |
0 |
8 |
No |
ind <- sample(2,nrow(NaiveBayes), replace = TRUE, prob = c(0.7,0.3) ) #60% entrenamiento y 40% test
trainData<- NaiveBayes[ind==1,]
testData<- NaiveBayes[ind==2,]
mod <- naiveBayes(Cancelacion ~ ., data = trainData)
mod
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## No Si
## 0.6037736 0.3962264
##
## Conditional probabilities:
## Tiene plan internacional
## Y No Si
## No 0.4375000 0.5625000
## Si 0.4285714 0.5714286
##
## Minutos/dia
## Y [,1] [,2]
## No 151.8594 43.05400
## Si 139.0714 42.07507
##
## Llamadas/dia
## Y [,1] [,2]
## No 101.0156 15.14637
## Si 105.0476 13.85456
##
## Minutos internacionales
## Y [,1] [,2]
## No 11.34375 2.094731
## Si 11.69048 2.454345
##
## Reclamaciones
## Y [,1] [,2]
## No 2.671875 1.727965
## Si 2.595238 1.697337
##
## Llamadas internacionales
## Y [,1] [,2]
## No 5.906250 2.473470
## Si 5.642857 2.195577
pred <- predict(mod, testData)
tab <- table(testData$Cancelacion, pred, dnn = c("Actual", "Predicha"))
confusionMatrix(tab)
## Confusion Matrix and Statistics
##
## Predicha
## Actual No Si
## No 14 1
## Si 26 6
##
## Accuracy : 0.4255
## 95% CI : (0.2826, 0.5782)
## No Information Rate : 0.8511
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0838
##
## Mcnemar's Test P-Value : 3.86e-06
##
## Sensitivity : 0.3500
## Specificity : 0.8571
## Pos Pred Value : 0.9333
## Neg Pred Value : 0.1875
## Prevalence : 0.8511
## Detection Rate : 0.2979
## Detection Prevalence : 0.3191
## Balanced Accuracy : 0.6036
##
## 'Positive' Class : No
##