#install.packages("naivebayes")
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.3
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.5.3
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
library(C50) #Paquete que contiene el dataset churn, algoritmo de arboles de decision
## Warning: package 'C50' was built under R version 3.5.3
data(churn) #Dataset de operador de telefonia
#unir los dos dataframe cargados
churn<- rbind(churnTest, churnTrain)
#Eliminar los dos dataframe
rm(churnTest,churnTrain)
#Elegimos las columnas que vamos a utilizar
churn<- churn[,c(4,7,8,16,19,17,20)]
#Cambiamos los nombres de las columnas
names(churn)<- c("Tiene plan internacional","Minutos/dia","Llamadas/dia","Minutos internacionales","Reclamaciones","Llamadas internacionales","Cancelacion")
knitr::kable(head(churn), caption = "Datos Cargados")
| Tiene plan internacional | Minutos/dia | Llamadas/dia | Minutos internacionales | Reclamaciones | Llamadas internacionales | Cancelacion |
|---|---|---|---|---|---|---|
| no | 70.9 | 123 | 10.6 | 3 | 3 | no |
| no | 223.6 | 86 | 9.5 | 0 | 7 | no |
| no | 294.7 | 95 | 13.7 | 1 | 6 | no |
| no | 216.8 | 123 | 15.7 | 1 | 2 | no |
| no | 197.4 | 78 | 7.7 | 2 | 4 | no |
| no | 226.5 | 85 | 6.9 | 1 | 5 | no |
se divide el set en 60% entrenamiento y 40% test
ind <- sample(2,nrow(churn), replace = TRUE, prob = c(0.7,0.3) ) #60% entrenamiento y 40% test
trainData<- churn[ind==1,]
testData<- churn[ind==2,]
mod <- naiveBayes(Cancelacion ~ ., data = trainData)
mod
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## yes no
## 0.1446233 0.8553767
##
## Conditional probabilities:
## Tiene plan internacional
## Y no yes
## yes 0.72962227 0.27037773
## no 0.93781513 0.06218487
##
## Minutos/dia
## Y [,1] [,2]
## yes 207.7773 68.98522
## no 176.3009 48.92913
##
## Llamadas/dia
## Y [,1] [,2]
## yes 100.60040 20.53161
## no 99.96975 19.48133
##
## Minutos internacionales
## Y [,1] [,2]
## yes 10.63419 2.741845
## no 10.18571 2.799832
##
## Reclamaciones
## Y [,1] [,2]
## yes 2.210736 1.791782
## no 1.470588 1.155551
##
## Llamadas internacionales
## Y [,1] [,2]
## yes 4.172962 2.624567
## no 4.434958 2.392487
pred <- predict(mod, testData)
tab <- table(testData$Cancelacion, pred, dnn = c("Actual", "Predicha"))
confusionMatrix(tab)
## Confusion Matrix and Statistics
##
## Predicha
## Actual yes no
## yes 54 150
## no 37 1281
##
## Accuracy : 0.8771
## 95% CI : (0.8596, 0.8932)
## No Information Rate : 0.9402
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.309
## Mcnemar's Test P-Value : 2.607e-16
##
## Sensitivity : 0.59341
## Specificity : 0.89518
## Pos Pred Value : 0.26471
## Neg Pred Value : 0.97193
## Prevalence : 0.05979
## Detection Rate : 0.03548
## Detection Prevalence : 0.13403
## Balanced Accuracy : 0.74429
##
## 'Positive' Class : yes
##