Teoría

El paquete CARET (Classification And REgression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático. # Instalar paquetes y llamar librerías

library(ggplot2)
library(lattice)
library(caret)
library(datasets)
library(DataExplorer)

Crear la base de datos

df <- read.csv("C:\\Users\\Emili\\Downloads\\M1_data.csv", stringsAsFactors = TRUE)

Entender la base de datos

summary(df)
##  trust_apple interest_computers  age_computer   user_pcmac appleproducts_count
##  No : 19     Min.   :2.000      Min.   :0.000   Apple:86   Min.   :0.000      
##  Yes:114     1st Qu.:3.000      1st Qu.:1.000   Hp   : 1   1st Qu.:1.000      
##              Median :4.000      Median :3.000   Other: 1   Median :3.000      
##              Mean   :3.812      Mean   :2.827   PC   :45   Mean   :2.609      
##              3rd Qu.:5.000      3rd Qu.:5.000              3rd Qu.:4.000      
##              Max.   :5.000      Max.   :9.000              Max.   :8.000      
##                                                                               
##  familiarity_m1 f_batterylife      f_price          f_size      f_multitasking
##  No :75         Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :2.00  
##  Yes:58         1st Qu.:4.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:4.00  
##                 Median :5.000   Median :4.000   Median :3.000   Median :4.00  
##                 Mean   :4.526   Mean   :3.872   Mean   :3.158   Mean   :4.12  
##                 3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.00  
##                 Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.00  
##                                                                               
##     f_noise      f_performance      f_neural       f_synergy    
##  Min.   :1.000   Min.   :2.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:4.000   1st Qu.:2.000   1st Qu.:3.000  
##  Median :4.000   Median :5.000   Median :3.000   Median :4.000  
##  Mean   :3.729   Mean   :4.398   Mean   :3.165   Mean   :3.466  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##                                                                 
##  f_performanceloss m1_consideration m1_purchase    gender     age_group    
##  Min.   :1.000     Min.   :1.000    No :45      Female:61   Min.   : 1.00  
##  1st Qu.:3.000     1st Qu.:3.000    Yes:88      Male  :72   1st Qu.: 2.00  
##  Median :4.000     Median :4.000                            Median : 2.00  
##  Mean   :3.376     Mean   :3.609                            Mean   : 2.97  
##  3rd Qu.:4.000     3rd Qu.:5.000                            3rd Qu.: 3.00  
##  Max.   :5.000     Max.   :5.000                            Max.   :10.00  
##                                                                            
##   income_group                   status               domain  
##  Min.   :1.00   Employed            :41   IT & Technology:33  
##  1st Qu.:1.00   Retired             : 1   Marketing      :21  
##  Median :2.00   Self-Employed       : 5   Business       :14  
##  Mean   :2.97   Student             :84   Engineering    : 7  
##  3rd Qu.:4.00   Student ant employed: 1   Finance        : 7  
##  Max.   :7.00   Unemployed          : 1   Science        : 7  
##                                           (Other)        :44
str(df)
## 'data.frame':    133 obs. of  22 variables:
##  $ trust_apple        : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 1 2 2 ...
##  $ interest_computers : int  4 2 5 2 4 3 3 3 4 5 ...
##  $ age_computer       : int  8 4 6 6 4 1 2 0 2 0 ...
##  $ user_pcmac         : Factor w/ 4 levels "Apple","Hp","Other",..: 4 4 4 1 1 1 1 4 1 1 ...
##  $ appleproducts_count: int  0 1 0 4 7 2 7 0 6 7 ...
##  $ familiarity_m1     : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 2 2 ...
##  $ f_batterylife      : int  5 5 3 4 5 5 4 5 4 5 ...
##  $ f_price            : int  4 5 4 3 3 5 3 5 4 3 ...
##  $ f_size             : int  3 5 2 3 3 4 4 4 3 5 ...
##  $ f_multitasking     : int  4 3 4 4 4 4 5 4 4 5 ...
##  $ f_noise            : int  4 4 1 4 4 5 5 3 4 5 ...
##  $ f_performance      : int  2 5 4 4 5 5 5 3 4 5 ...
##  $ f_neural           : int  2 2 2 4 3 5 3 2 3 3 ...
##  $ f_synergy          : int  1 2 2 4 4 4 3 2 3 5 ...
##  $ f_performanceloss  : int  1 4 2 3 4 2 2 3 4 5 ...
##  $ m1_consideration   : int  1 2 4 2 4 2 3 1 5 5 ...
##  $ m1_purchase        : Factor w/ 2 levels "No","Yes": 2 1 2 1 2 1 2 1 2 2 ...
##  $ gender             : Factor w/ 2 levels "Female","Male": 2 2 2 1 2 1 2 2 2 2 ...
##  $ age_group          : int  2 2 2 2 5 2 6 2 8 4 ...
##  $ income_group       : int  2 3 2 2 7 2 7 2 7 6 ...
##  $ status             : Factor w/ 6 levels "Employed","Retired",..: 4 1 4 4 1 4 1 4 1 1 ...
##  $ domain             : Factor w/ 22 levels "Administration & Public Services",..: 21 10 13 3 12 17 13 22 13 12 ...
plot_missing(df)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the DataExplorer package.
##   Please report the issue at
##   <https://github.com/boxuancui/DataExplorer/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot_histogram(df)

plot_correlation(df)
## 1 features with more than 20 categories ignored!
## domain: 22 categories

NOTA: La variable que queremos predecir debe tener formato de FACTOR # Partir la base de datos

set.seed(123)
# Partición 80-20
renglones_entrenamiento <- createDataPartition(df$m1_purchase, p=0.8, list=FALSE)
entrenamiento <- df[renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento, ]

Distintos tipos de Métodos para Modelar

Los métodos más utilizados para modelar aprendizaje automático son: * SVM: Support Vector Machine o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc. * Árbol de Decisión: rpart * Redes Neuronales: nnet * Random Forest o Bosques Aleatorios: rf # Modelo 1. SVM Lineal

modelo1 <- train(m1_purchase ~ ., data=entrenamiento,
                 method = "svmLinear",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = data.frame(C=1) 
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusStudent ant employed, statusUnemployed,
## domainCommunication , domainRealestate, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetail, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLogistics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)

mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$m1_purchase)
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$m1_purchase)
mcrp1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   3   6
##        Yes  6  11
##                                           
##                Accuracy : 0.5385          
##                  95% CI : (0.3337, 0.7341)
##     No Information Rate : 0.6538          
##     P-Value [Acc > NIR] : 0.9231          
##                                           
##                   Kappa : -0.0196         
##                                           
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.3333          
##             Specificity : 0.6471          
##          Pos Pred Value : 0.3333          
##          Neg Pred Value : 0.6471          
##              Prevalence : 0.3462          
##          Detection Rate : 0.1154          
##    Detection Prevalence : 0.3462          
##       Balanced Accuracy : 0.4902          
##                                           
##        'Positive' Class : No              
## 

Modelo 2. SVM Radial

modelo2 <- train(m1_purchase ~ ., data=entrenamiento,
                 method = "svmRadial",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = data.frame(sigma=0.05, C=1)
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainAgriculture, domainCommunication ,
## domainLogistics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetail, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRealestate, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainEconomics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict(modelo2, prueba)

mcrp2 <- confusionMatrix(resultado_prueba2, prueba$m1_purchase)
mcrp2
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   1   1
##        Yes  8  16
##                                           
##                Accuracy : 0.6538          
##                  95% CI : (0.4433, 0.8279)
##     No Information Rate : 0.6538          
##     P-Value [Acc > NIR] : 0.5894          
##                                           
##                   Kappa : 0.064           
##                                           
##  Mcnemar's Test P-Value : 0.0455          
##                                           
##             Sensitivity : 0.11111         
##             Specificity : 0.94118         
##          Pos Pred Value : 0.50000         
##          Neg Pred Value : 0.66667         
##              Prevalence : 0.34615         
##          Detection Rate : 0.03846         
##    Detection Prevalence : 0.07692         
##       Balanced Accuracy : 0.52614         
##                                           
##        'Positive' Class : No              
## 

Modelo 3. SVM Polinómico

modelo3 <- train(m1_purchase ~ ., data=entrenamiento,
                 method = "svmPoly",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = data.frame(degree=2, scale=1, C=1)
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusSelf-Employed, statusStudent ant employed, statusUnemployed,
## domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLogistics, domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainLaw,
## domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)

mcrp3 <- confusionMatrix(resultado_prueba3, prueba$m1_purchase)
mcrp3
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   1   4
##        Yes  8  13
##                                           
##                Accuracy : 0.5385          
##                  95% CI : (0.3337, 0.7341)
##     No Information Rate : 0.6538          
##     P-Value [Acc > NIR] : 0.9231          
##                                           
##                   Kappa : -0.1387         
##                                           
##  Mcnemar's Test P-Value : 0.3865          
##                                           
##             Sensitivity : 0.11111         
##             Specificity : 0.76471         
##          Pos Pred Value : 0.20000         
##          Neg Pred Value : 0.61905         
##              Prevalence : 0.34615         
##          Detection Rate : 0.03846         
##    Detection Prevalence : 0.19231         
##       Balanced Accuracy : 0.43791         
##                                           
##        'Positive' Class : No              
## 

Modelo 4. Árbol de Decisión

modelo4 <- train(m1_purchase ~ ., data=entrenamiento,
                 method = "rpart",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneLength = 10
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLogistics, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetail, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusStudent ant employed, statusUnemployed,
## domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainLaw,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)

mcrp4 <- confusionMatrix(resultado_prueba4, prueba$m1_purchase)
mcrp4
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   4   6
##        Yes  5  11
##                                           
##                Accuracy : 0.5769          
##                  95% CI : (0.3692, 0.7665)
##     No Information Rate : 0.6538          
##     P-Value [Acc > NIR] : 0.8485          
##                                           
##                   Kappa : 0.0892          
##                                           
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4444          
##             Specificity : 0.6471          
##          Pos Pred Value : 0.4000          
##          Neg Pred Value : 0.6875          
##              Prevalence : 0.3462          
##          Detection Rate : 0.1538          
##    Detection Prevalence : 0.3846          
##       Balanced Accuracy : 0.5458          
##                                           
##        'Positive' Class : No              
## 

Modelo 5. Redes Neuronales

modelo5 <- train(m1_purchase ~ ., data=entrenamiento,
                 method = "nnet",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 trace = FALSE
)

resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)

mcrp5 <- confusionMatrix(resultado_prueba5, prueba$m1_purchase)
mcrp5
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   4   6
##        Yes  5  11
##                                           
##                Accuracy : 0.5769          
##                  95% CI : (0.3692, 0.7665)
##     No Information Rate : 0.6538          
##     P-Value [Acc > NIR] : 0.8485          
##                                           
##                   Kappa : 0.0892          
##                                           
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4444          
##             Specificity : 0.6471          
##          Pos Pred Value : 0.4000          
##          Neg Pred Value : 0.6875          
##              Prevalence : 0.3462          
##          Detection Rate : 0.1538          
##    Detection Prevalence : 0.3846          
##       Balanced Accuracy : 0.5458          
##                                           
##        'Positive' Class : No              
## 

Modelo 6. Bosques Aleatorios

modelo6 <- train(m1_purchase ~ ., data=entrenamiento,
                 method = "rf",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = expand.grid(mtry = c(2, 5, 10))
)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusStudent ant employed, statusUnemployed, domainCommunication ,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainAgriculture, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRealestate, domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacHp, user_pcmacOther,
## statusRetired, statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainConsulting , domainRetail,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainLogistics,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainLogistics,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainLaw, domainLogistics,
## domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: user_pcmacOther, statusRetired,
## statusUnemployed, domainCommunication , domainRetired
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)

mcrp6 <- confusionMatrix(resultado_prueba6, prueba$m1_purchase)
mcrp6
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   5   8
##        Yes  4   9
##                                           
##                Accuracy : 0.5385          
##                  95% CI : (0.3337, 0.7341)
##     No Information Rate : 0.6538          
##     P-Value [Acc > NIR] : 0.9231          
##                                           
##                   Kappa : 0.0769          
##                                           
##  Mcnemar's Test P-Value : 0.3865          
##                                           
##             Sensitivity : 0.5556          
##             Specificity : 0.5294          
##          Pos Pred Value : 0.3846          
##          Neg Pred Value : 0.6923          
##              Prevalence : 0.3462          
##          Detection Rate : 0.1923          
##    Detection Prevalence : 0.5000          
##       Balanced Accuracy : 0.5425          
##                                           
##        'Positive' Class : No              
## 

Tabla de Resultados

resultados <- data.frame(
  "svmLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
  "svmRadial" = c(mcrp2$overall["Accuracy"], mcrp2$overall["Accuracy"]), 
  "svmPoly"   = c(mcrp3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart"     = c(mcrp4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "nnet"      = c(mcrp5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "rf"        = c(mcrp6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados
##                            svmLinear svmRadial   svmPoly     rpart      nnet
## Precisión de entrenamiento 0.9065421 0.6538462 0.5384615 0.5769231 0.5769231
## Precisión de prueba        0.5384615 0.6538462 0.5384615 0.5769231 0.5769231
##                                   rf
## Precisión de entrenamiento 0.5384615
## Precisión de prueba        0.5384615

Conclusiones

Después de ver el código podemos ver que el modelo que obtuvo la mayor accuracy en el set de prueba fue el radial, se observa que la familiaridad previa con la marca y la valoración de la duración de la batería son los predictores determinantes. Esto sugiere que Apple debería enfocar su comunicación en la eficiencia energética para captar usuarios que actualmen