El paquete CARET (Classification And REgression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático. # Instalar paquetes y llamar librerías
library(ggplot2)
library(lattice)
library(caret)
library(datasets)
library(DataExplorer)
df <- read.csv("C:\\Users\\Emili\\Downloads\\M1_data.csv", stringsAsFactors = TRUE)
summary(df)
## trust_apple interest_computers age_computer user_pcmac appleproducts_count
## No : 19 Min. :2.000 Min. :0.000 Apple:86 Min. :0.000
## Yes:114 1st Qu.:3.000 1st Qu.:1.000 Hp : 1 1st Qu.:1.000
## Median :4.000 Median :3.000 Other: 1 Median :3.000
## Mean :3.812 Mean :2.827 PC :45 Mean :2.609
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :9.000 Max. :8.000
##
## familiarity_m1 f_batterylife f_price f_size f_multitasking
## No :75 Min. :1.000 Min. :1.000 Min. :1.000 Min. :2.00
## Yes:58 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:4.00
## Median :5.000 Median :4.000 Median :3.000 Median :4.00
## Mean :4.526 Mean :3.872 Mean :3.158 Mean :4.12
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.00
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00
##
## f_noise f_performance f_neural f_synergy
## Min. :1.000 Min. :2.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:3.000
## Median :4.000 Median :5.000 Median :3.000 Median :4.000
## Mean :3.729 Mean :4.398 Mean :3.165 Mean :3.466
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
##
## f_performanceloss m1_consideration m1_purchase gender age_group
## Min. :1.000 Min. :1.000 No :45 Female:61 Min. : 1.00
## 1st Qu.:3.000 1st Qu.:3.000 Yes:88 Male :72 1st Qu.: 2.00
## Median :4.000 Median :4.000 Median : 2.00
## Mean :3.376 Mean :3.609 Mean : 2.97
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.: 3.00
## Max. :5.000 Max. :5.000 Max. :10.00
##
## income_group status domain
## Min. :1.00 Employed :41 IT & Technology:33
## 1st Qu.:1.00 Retired : 1 Marketing :21
## Median :2.00 Self-Employed : 5 Business :14
## Mean :2.97 Student :84 Engineering : 7
## 3rd Qu.:4.00 Student ant employed: 1 Finance : 7
## Max. :7.00 Unemployed : 1 Science : 7
## (Other) :44
str(df)
## 'data.frame': 133 obs. of 22 variables:
## $ trust_apple : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 1 2 2 ...
## $ interest_computers : int 4 2 5 2 4 3 3 3 4 5 ...
## $ age_computer : int 8 4 6 6 4 1 2 0 2 0 ...
## $ user_pcmac : Factor w/ 4 levels "Apple","Hp","Other",..: 4 4 4 1 1 1 1 4 1 1 ...
## $ appleproducts_count: int 0 1 0 4 7 2 7 0 6 7 ...
## $ familiarity_m1 : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 2 2 ...
## $ f_batterylife : int 5 5 3 4 5 5 4 5 4 5 ...
## $ f_price : int 4 5 4 3 3 5 3 5 4 3 ...
## $ f_size : int 3 5 2 3 3 4 4 4 3 5 ...
## $ f_multitasking : int 4 3 4 4 4 4 5 4 4 5 ...
## $ f_noise : int 4 4 1 4 4 5 5 3 4 5 ...
## $ f_performance : int 2 5 4 4 5 5 5 3 4 5 ...
## $ f_neural : int 2 2 2 4 3 5 3 2 3 3 ...
## $ f_synergy : int 1 2 2 4 4 4 3 2 3 5 ...
## $ f_performanceloss : int 1 4 2 3 4 2 2 3 4 5 ...
## $ m1_consideration : int 1 2 4 2 4 2 3 1 5 5 ...
## $ m1_purchase : Factor w/ 2 levels "No","Yes": 2 1 2 1 2 1 2 1 2 2 ...
## $ gender : Factor w/ 2 levels "Female","Male": 2 2 2 1 2 1 2 2 2 2 ...
## $ age_group : int 2 2 2 2 5 2 6 2 8 4 ...
## $ income_group : int 2 3 2 2 7 2 7 2 7 6 ...
## $ status : Factor w/ 6 levels "Employed","Retired",..: 4 1 4 4 1 4 1 4 1 1 ...
## $ domain : Factor w/ 22 levels "Administration & Public Services",..: 21 10 13 3 12 17 13 22 13 12 ...
plot_missing(df)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the DataExplorer package.
## Please report the issue at
## <https://github.com/boxuancui/DataExplorer/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot_histogram(df)
plot_correlation(df)
## 1 features with more than 20 categories ignored!
## domain: 22 categories
NOTA: La variable que queremos predecir debe tener formato de
FACTOR # Partir la base de datos
set.seed(123)
# Partición 80-20
renglones_entrenamiento <- createDataPartition(df$m1_purchase, p=0.8, list=FALSE)
entrenamiento <- df[renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento, ]
Los métodos más utilizados para modelar aprendizaje automático son: * SVM: Support Vector Machine o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc. * Árbol de Decisión: rpart * Redes Neuronales: nnet * Random Forest o Bosques Aleatorios: rf # Modelo 1. SVM Lineal
modelo1 <- train(m1_purchase ~ ., data=entrenamiento,
method = "svmLinear",
preProcess = c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGrid = data.frame(C=1)
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusStudent ant employed, statusUnemployed,
## domainCommunication , domainRealestate, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetail, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLogistics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$m1_purchase)
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$m1_purchase)
mcrp1
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 3 6
## Yes 6 11
##
## Accuracy : 0.5385
## 95% CI : (0.3337, 0.7341)
## No Information Rate : 0.6538
## P-Value [Acc > NIR] : 0.9231
##
## Kappa : -0.0196
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.3333
## Specificity : 0.6471
## Pos Pred Value : 0.3333
## Neg Pred Value : 0.6471
## Prevalence : 0.3462
## Detection Rate : 0.1154
## Detection Prevalence : 0.3462
## Balanced Accuracy : 0.4902
##
## 'Positive' Class : No
##
modelo2 <- train(m1_purchase ~ ., data=entrenamiento,
method = "svmRadial",
preProcess = c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGrid = data.frame(sigma=0.05, C=1)
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainAgriculture, domainCommunication ,
## domainLogistics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetail, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRealestate, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainEconomics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict(modelo2, prueba)
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$m1_purchase)
mcrp2
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 1 1
## Yes 8 16
##
## Accuracy : 0.6538
## 95% CI : (0.4433, 0.8279)
## No Information Rate : 0.6538
## P-Value [Acc > NIR] : 0.5894
##
## Kappa : 0.064
##
## Mcnemar's Test P-Value : 0.0455
##
## Sensitivity : 0.11111
## Specificity : 0.94118
## Pos Pred Value : 0.50000
## Neg Pred Value : 0.66667
## Prevalence : 0.34615
## Detection Rate : 0.03846
## Detection Prevalence : 0.07692
## Balanced Accuracy : 0.52614
##
## 'Positive' Class : No
##
modelo3 <- train(m1_purchase ~ ., data=entrenamiento,
method = "svmPoly",
preProcess = c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGrid = data.frame(degree=2, scale=1, C=1)
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusSelf-Employed, statusStudent ant employed, statusUnemployed,
## domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLogistics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainLaw,
## domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$m1_purchase)
mcrp3
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 1 4
## Yes 8 13
##
## Accuracy : 0.5385
## 95% CI : (0.3337, 0.7341)
## No Information Rate : 0.6538
## P-Value [Acc > NIR] : 0.9231
##
## Kappa : -0.1387
##
## Mcnemar's Test P-Value : 0.3865
##
## Sensitivity : 0.11111
## Specificity : 0.76471
## Pos Pred Value : 0.20000
## Neg Pred Value : 0.61905
## Prevalence : 0.34615
## Detection Rate : 0.03846
## Detection Prevalence : 0.19231
## Balanced Accuracy : 0.43791
##
## 'Positive' Class : No
##
modelo4 <- train(m1_purchase ~ ., data=entrenamiento,
method = "rpart",
preProcess = c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneLength = 10
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLogistics, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetail, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusStudent ant employed, statusUnemployed,
## domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainLaw,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$m1_purchase)
mcrp4
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 4 6
## Yes 5 11
##
## Accuracy : 0.5769
## 95% CI : (0.3692, 0.7665)
## No Information Rate : 0.6538
## P-Value [Acc > NIR] : 0.8485
##
## Kappa : 0.0892
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4444
## Specificity : 0.6471
## Pos Pred Value : 0.4000
## Neg Pred Value : 0.6875
## Prevalence : 0.3462
## Detection Rate : 0.1538
## Detection Prevalence : 0.3846
## Balanced Accuracy : 0.5458
##
## 'Positive' Class : No
##
modelo5 <- train(m1_purchase ~ ., data=entrenamiento,
method = "nnet",
preProcess = c("scale", "center"),
trControl = trainControl(method="cv", number=10),
trace = FALSE
)
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$m1_purchase)
mcrp5
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 4 6
## Yes 5 11
##
## Accuracy : 0.5769
## 95% CI : (0.3692, 0.7665)
## No Information Rate : 0.6538
## P-Value [Acc > NIR] : 0.8485
##
## Kappa : 0.0892
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4444
## Specificity : 0.6471
## Pos Pred Value : 0.4000
## Neg Pred Value : 0.6875
## Prevalence : 0.3462
## Detection Rate : 0.1538
## Detection Prevalence : 0.3846
## Balanced Accuracy : 0.5458
##
## 'Positive' Class : No
##
modelo6 <- train(m1_purchase ~ ., data=entrenamiento,
method = "rf",
preProcess = c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGrid = expand.grid(mtry = c(2, 5, 10))
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainLogistics,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainLogistics,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainLogistics,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$m1_purchase)
mcrp6
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 5 8
## Yes 4 9
##
## Accuracy : 0.5385
## 95% CI : (0.3337, 0.7341)
## No Information Rate : 0.6538
## P-Value [Acc > NIR] : 0.9231
##
## Kappa : 0.0769
##
## Mcnemar's Test P-Value : 0.3865
##
## Sensitivity : 0.5556
## Specificity : 0.5294
## Pos Pred Value : 0.3846
## Neg Pred Value : 0.6923
## Prevalence : 0.3462
## Detection Rate : 0.1923
## Detection Prevalence : 0.5000
## Balanced Accuracy : 0.5425
##
## 'Positive' Class : No
##
resultados <- data.frame(
"svmLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
"svmRadial" = c(mcrp2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
"svmPoly" = c(mcrp3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
"rpart" = c(mcrp4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
"nnet" = c(mcrp5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
"rf" = c(mcrp6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados
## svmLinear svmRadial svmPoly rpart nnet
## Precisión de entrenamiento 0.9065421 0.6538462 0.5384615 0.5769231 0.5769231
## Precisión de prueba 0.5384615 0.6538462 0.5384615 0.5769231 0.5769231
## rf
## Precisión de entrenamiento 0.5384615
## Precisión de prueba 0.5384615
Después de ver el código podemos ver que el modelo que obtuvo la mayor accuracy en el set de prueba fue el radial, se observa que la familiaridad previa con la marca y la valoración de la duración de la batería son los predictores determinantes. Esto sugiere que Apple debería enfocar su comunicación en la eficiencia energética para captar usuarios que actualmen