imagen:
imagen:
Kernel lineal: K(xi,xj)=xiT∗xj
Kernel polinomial: K(xi,xj)=(p+ΥxiT∗xj)d,Υ>0
Kernel gaussiano RBF: K(xi,xj)=exp(−Υ||xi−xj||2),Υ>0
# install.packages("ISLR")
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.6.3
library(knitr) # Para ver tablas mas amigables en formato html markdown
library(ggplot2) # Gráficas
library(dplyr) # Varias operaciones
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret) # Para particionar datos. De entranamiento y de validación
## Loading required package: lattice
# install.packages("e1071") # Para SVM
library(e1071)
## Warning: package 'e1071' was built under R version 3.6.3
datos <- OJ
str(datos)
## 'data.frame': 1070 obs. of 18 variables:
## $ Purchase : Factor w/ 2 levels "CH","MM": 1 1 1 2 1 1 1 1 1 1 ...
## $ WeekofPurchase: num 237 239 245 227 228 230 232 234 235 238 ...
## $ StoreID : num 1 1 1 1 7 7 7 7 7 7 ...
## $ PriceCH : num 1.75 1.75 1.86 1.69 1.69 1.69 1.69 1.75 1.75 1.75 ...
## $ PriceMM : num 1.99 1.99 2.09 1.69 1.69 1.99 1.99 1.99 1.99 1.99 ...
## $ DiscCH : num 0 0 0.17 0 0 0 0 0 0 0 ...
## $ DiscMM : num 0 0.3 0 0 0 0 0.4 0.4 0.4 0.4 ...
## $ SpecialCH : num 0 0 0 0 0 0 1 1 0 0 ...
## $ SpecialMM : num 0 1 0 0 0 1 1 0 0 0 ...
## $ LoyalCH : num 0.5 0.6 0.68 0.4 0.957 ...
## $ SalePriceMM : num 1.99 1.69 2.09 1.69 1.69 1.99 1.59 1.59 1.59 1.59 ...
## $ SalePriceCH : num 1.75 1.75 1.69 1.69 1.69 1.69 1.69 1.75 1.75 1.75 ...
## $ PriceDiff : num 0.24 -0.06 0.4 0 0 0.3 -0.1 -0.16 -0.16 -0.16 ...
## $ Store7 : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 2 2 2 2 2 ...
## $ PctDiscMM : num 0 0.151 0 0 0 ...
## $ PctDiscCH : num 0 0 0.0914 0 0 ...
## $ ListPriceDiff : num 0.24 0.24 0.23 0 0 0.3 0.3 0.24 0.24 0.24 ...
## $ STORE : num 1 1 1 1 0 0 0 0 0 0 ...
kable(summary(datos))
| Purchase | WeekofPurchase | StoreID | PriceCH | PriceMM | DiscCH | DiscMM | SpecialCH | SpecialMM | LoyalCH | SalePriceMM | SalePriceCH | PriceDiff | Store7 | PctDiscMM | PctDiscCH | ListPriceDiff | STORE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CH:653 | Min. :227.0 | Min. :1.00 | Min. :1.690 | Min. :1.690 | Min. :0.00000 | Min. :0.0000 | Min. :0.0000 | Min. :0.0000 | Min. :0.000011 | Min. :1.190 | Min. :1.390 | Min. :-0.6700 | No :714 | Min. :0.0000 | Min. :0.00000 | Min. :0.000 | Min. :0.000 | |
| MM:417 | 1st Qu.:240.0 | 1st Qu.:2.00 | 1st Qu.:1.790 | 1st Qu.:1.990 | 1st Qu.:0.00000 | 1st Qu.:0.0000 | 1st Qu.:0.0000 | 1st Qu.:0.0000 | 1st Qu.:0.325257 | 1st Qu.:1.690 | 1st Qu.:1.750 | 1st Qu.: 0.0000 | Yes:356 | 1st Qu.:0.0000 | 1st Qu.:0.00000 | 1st Qu.:0.140 | 1st Qu.:0.000 | |
| NA | Median :257.0 | Median :3.00 | Median :1.860 | Median :2.090 | Median :0.00000 | Median :0.0000 | Median :0.0000 | Median :0.0000 | Median :0.600000 | Median :2.090 | Median :1.860 | Median : 0.2300 | NA | Median :0.0000 | Median :0.00000 | Median :0.240 | Median :2.000 | |
| NA | Mean :254.4 | Mean :3.96 | Mean :1.867 | Mean :2.085 | Mean :0.05186 | Mean :0.1234 | Mean :0.1477 | Mean :0.1617 | Mean :0.565782 | Mean :1.962 | Mean :1.816 | Mean : 0.1465 | NA | Mean :0.0593 | Mean :0.02731 | Mean :0.218 | Mean :1.631 | |
| NA | 3rd Qu.:268.0 | 3rd Qu.:7.00 | 3rd Qu.:1.990 | 3rd Qu.:2.180 | 3rd Qu.:0.00000 | 3rd Qu.:0.2300 | 3rd Qu.:0.0000 | 3rd Qu.:0.0000 | 3rd Qu.:0.850873 | 3rd Qu.:2.130 | 3rd Qu.:1.890 | 3rd Qu.: 0.3200 | NA | 3rd Qu.:0.1127 | 3rd Qu.:0.00000 | 3rd Qu.:0.300 | 3rd Qu.:3.000 | |
| NA | Max. :278.0 | Max. :7.00 | Max. :2.090 | Max. :2.290 | Max. :0.50000 | Max. :0.8000 | Max. :1.0000 | Max. :1.0000 | Max. :0.999947 | Max. :2.290 | Max. :2.090 | Max. : 0.6400 | NA | Max. :0.4020 | Max. :0.25269 | Max. :0.440 | Max. :4.000 |
#kable(head(datos))
#kable(tail(datos))
ggplot(data = OJ, aes(x = Purchase, y = ..count.., fill = Purchase)) +
geom_bar() +
labs(title = "Distribución de 'Purchase'") +
scale_fill_manual(values = c("darkgreen", "orangered2"),
labels = c("Citrus Hill", "Orange Juice")) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5))
frecuencias <- prop.table(table(datos$Purchase)) %>%
round(digits = 2)
frecuencias
##
## CH MM
## 0.61 0.39
set.seed(123)
entrena <- createDataPartition(y = datos$Purchase, p = 0.8, list = FALSE, times = 1)
# Datos entrenamiento
datos.entrenamiento <- datos[entrena, ]
# Datos validación
datos.validacion <- datos[-entrena, ]
kable(head(datos))
| Purchase | WeekofPurchase | StoreID | PriceCH | PriceMM | DiscCH | DiscMM | SpecialCH | SpecialMM | LoyalCH | SalePriceMM | SalePriceCH | PriceDiff | Store7 | PctDiscMM | PctDiscCH | ListPriceDiff | STORE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CH | 237 | 1 | 1.75 | 1.99 | 0.00 | 0.0 | 0 | 0 | 0.500000 | 1.99 | 1.75 | 0.24 | No | 0.000000 | 0.000000 | 0.24 | 1 |
| CH | 239 | 1 | 1.75 | 1.99 | 0.00 | 0.3 | 0 | 1 | 0.600000 | 1.69 | 1.75 | -0.06 | No | 0.150754 | 0.000000 | 0.24 | 1 |
| CH | 245 | 1 | 1.86 | 2.09 | 0.17 | 0.0 | 0 | 0 | 0.680000 | 2.09 | 1.69 | 0.40 | No | 0.000000 | 0.091398 | 0.23 | 1 |
| MM | 227 | 1 | 1.69 | 1.69 | 0.00 | 0.0 | 0 | 0 | 0.400000 | 1.69 | 1.69 | 0.00 | No | 0.000000 | 0.000000 | 0.00 | 1 |
| CH | 228 | 7 | 1.69 | 1.69 | 0.00 | 0.0 | 0 | 0 | 0.956535 | 1.69 | 1.69 | 0.00 | Yes | 0.000000 | 0.000000 | 0.00 | 0 |
| CH | 230 | 7 | 1.69 | 1.99 | 0.00 | 0.0 | 0 | 1 | 0.965228 | 1.99 | 1.69 | 0.30 | Yes | 0.000000 | 0.000000 | 0.30 | 0 |
kable(tail(datos))
| Purchase | WeekofPurchase | StoreID | PriceCH | PriceMM | DiscCH | DiscMM | SpecialCH | SpecialMM | LoyalCH | SalePriceMM | SalePriceCH | PriceDiff | Store7 | PctDiscMM | PctDiscCH | ListPriceDiff | STORE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1065 | CH | 251 | 7 | 1.86 | 2.09 | 0.1 | 0.00 | 0 | 0 | 0.484778 | 2.09 | 1.76 | 0.33 | Yes | 0.000000 | 0.053763 | 0.23 | 0 |
| 1066 | CH | 252 | 7 | 1.86 | 2.09 | 0.1 | 0.00 | 0 | 0 | 0.587822 | 2.09 | 1.76 | 0.33 | Yes | 0.000000 | 0.053763 | 0.23 | 0 |
| 1067 | CH | 256 | 7 | 1.86 | 2.18 | 0.0 | 0.00 | 0 | 0 | 0.670258 | 2.18 | 1.86 | 0.32 | Yes | 0.000000 | 0.000000 | 0.32 | 0 |
| 1068 | MM | 257 | 7 | 1.86 | 2.18 | 0.0 | 0.00 | 0 | 0 | 0.736206 | 2.18 | 1.86 | 0.32 | Yes | 0.000000 | 0.000000 | 0.32 | 0 |
| 1069 | CH | 261 | 7 | 1.86 | 2.13 | 0.0 | 0.24 | 0 | 0 | 0.588965 | 1.89 | 1.86 | 0.03 | Yes | 0.112676 | 0.000000 | 0.27 | 0 |
| 1070 | CH | 270 | 1 | 1.86 | 2.18 | 0.0 | 0.00 | 0 | 0 | 0.671172 | 2.18 | 1.86 | 0.32 | No | 0.000000 | 0.000000 | 0.32 | 1 |
kable(head(datos.entrenamiento))
| Purchase | WeekofPurchase | StoreID | PriceCH | PriceMM | DiscCH | DiscMM | SpecialCH | SpecialMM | LoyalCH | SalePriceMM | SalePriceCH | PriceDiff | Store7 | PctDiscMM | PctDiscCH | ListPriceDiff | STORE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | CH | 239 | 1 | 1.75 | 1.99 | 0 | 0.3 | 0 | 1 | 0.600000 | 1.69 | 1.75 | -0.06 | No | 0.150754 | 0 | 0.24 | 1 |
| 4 | MM | 227 | 1 | 1.69 | 1.69 | 0 | 0.0 | 0 | 0 | 0.400000 | 1.69 | 1.69 | 0.00 | No | 0.000000 | 0 | 0.00 | 1 |
| 5 | CH | 228 | 7 | 1.69 | 1.69 | 0 | 0.0 | 0 | 0 | 0.956535 | 1.69 | 1.69 | 0.00 | Yes | 0.000000 | 0 | 0.00 | 0 |
| 6 | CH | 230 | 7 | 1.69 | 1.99 | 0 | 0.0 | 0 | 1 | 0.965228 | 1.99 | 1.69 | 0.30 | Yes | 0.000000 | 0 | 0.30 | 0 |
| 7 | CH | 232 | 7 | 1.69 | 1.99 | 0 | 0.4 | 1 | 1 | 0.972182 | 1.59 | 1.69 | -0.10 | Yes | 0.201005 | 0 | 0.30 | 0 |
| 8 | CH | 234 | 7 | 1.75 | 1.99 | 0 | 0.4 | 1 | 0 | 0.977746 | 1.59 | 1.75 | -0.16 | Yes | 0.201005 | 0 | 0.24 | 0 |
kable(tail(datos.entrenamiento))
| Purchase | WeekofPurchase | StoreID | PriceCH | PriceMM | DiscCH | DiscMM | SpecialCH | SpecialMM | LoyalCH | SalePriceMM | SalePriceCH | PriceDiff | Store7 | PctDiscMM | PctDiscCH | ListPriceDiff | STORE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1065 | CH | 251 | 7 | 1.86 | 2.09 | 0.1 | 0.00 | 0 | 0 | 0.484778 | 2.09 | 1.76 | 0.33 | Yes | 0.000000 | 0.053763 | 0.23 | 0 |
| 1066 | CH | 252 | 7 | 1.86 | 2.09 | 0.1 | 0.00 | 0 | 0 | 0.587822 | 2.09 | 1.76 | 0.33 | Yes | 0.000000 | 0.053763 | 0.23 | 0 |
| 1067 | CH | 256 | 7 | 1.86 | 2.18 | 0.0 | 0.00 | 0 | 0 | 0.670258 | 2.18 | 1.86 | 0.32 | Yes | 0.000000 | 0.000000 | 0.32 | 0 |
| 1068 | MM | 257 | 7 | 1.86 | 2.18 | 0.0 | 0.00 | 0 | 0 | 0.736206 | 2.18 | 1.86 | 0.32 | Yes | 0.000000 | 0.000000 | 0.32 | 0 |
| 1069 | CH | 261 | 7 | 1.86 | 2.13 | 0.0 | 0.24 | 0 | 0 | 0.588965 | 1.89 | 1.86 | 0.03 | Yes | 0.112676 | 0.000000 | 0.27 | 0 |
| 1070 | CH | 270 | 1 | 1.86 | 2.18 | 0.0 | 0.00 | 0 | 0 | 0.671172 | 2.18 | 1.86 | 0.32 | No | 0.000000 | 0.000000 | 0.32 | 1 |
kable(head(datos.validacion))
| Purchase | WeekofPurchase | StoreID | PriceCH | PriceMM | DiscCH | DiscMM | SpecialCH | SpecialMM | LoyalCH | SalePriceMM | SalePriceCH | PriceDiff | Store7 | PctDiscMM | PctDiscCH | ListPriceDiff | STORE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | CH | 237 | 1 | 1.75 | 1.99 | 0.00 | 0.0 | 0 | 0 | 0.500000 | 1.99 | 1.75 | 0.24 | No | 0.000000 | 0.000000 | 0.24 | 1 |
| 3 | CH | 245 | 1 | 1.86 | 2.09 | 0.17 | 0.0 | 0 | 0 | 0.680000 | 2.09 | 1.69 | 0.40 | No | 0.000000 | 0.091398 | 0.23 | 1 |
| 10 | CH | 238 | 7 | 1.75 | 1.99 | 0.00 | 0.4 | 0 | 0 | 0.985757 | 1.59 | 1.75 | -0.16 | Yes | 0.201005 | 0.000000 | 0.24 | 0 |
| 16 | CH | 278 | 7 | 2.06 | 2.13 | 0.00 | 0.0 | 0 | 0 | 0.795200 | 2.13 | 2.06 | 0.07 | Yes | 0.000000 | 0.000000 | 0.07 | 0 |
| 18 | MM | 268 | 2 | 1.86 | 2.18 | 0.00 | 0.0 | 0 | 1 | 0.400000 | 2.18 | 1.86 | 0.32 | No | 0.000000 | 0.000000 | 0.32 | 2 |
| 22 | CH | 258 | 1 | 1.76 | 2.18 | 0.00 | 0.0 | 0 | 0 | 0.680000 | 2.18 | 1.76 | 0.42 | No | 0.000000 | 0.000000 | 0.42 | 1 |
kable(tail(datos.validacion))
| Purchase | WeekofPurchase | StoreID | PriceCH | PriceMM | DiscCH | DiscMM | SpecialCH | SpecialMM | LoyalCH | SalePriceMM | SalePriceCH | PriceDiff | Store7 | PctDiscMM | PctDiscCH | ListPriceDiff | STORE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1040 | CH | 237 | 7 | 1.75 | 1.99 | 0 | 0.4 | 0 | 0 | 0.320000 | 1.59 | 1.75 | -0.16 | Yes | 0.201005 | 0 | 0.24 | 0 |
| 1043 | CH | 239 | 1 | 1.75 | 1.99 | 0 | 0.3 | 0 | 1 | 0.651840 | 1.69 | 1.75 | -0.06 | No | 0.150754 | 0 | 0.24 | 1 |
| 1053 | CH | 237 | 7 | 1.75 | 1.99 | 0 | 0.4 | 0 | 0 | 0.740928 | 1.59 | 1.75 | -0.16 | Yes | 0.201005 | 0 | 0.24 | 0 |
| 1056 | MM | 227 | 1 | 1.69 | 1.69 | 0 | 0.0 | 0 | 0 | 0.320000 | 1.69 | 1.69 | 0.00 | No | 0.000000 | 0 | 0.00 | 1 |
| 1057 | CH | 228 | 7 | 1.69 | 1.69 | 0 | 0.0 | 0 | 0 | 0.256000 | 1.69 | 1.69 | 0.00 | Yes | 0.000000 | 0 | 0.00 | 0 |
| 1059 | CH | 233 | 7 | 1.75 | 1.99 | 0 | 0.4 | 1 | 0 | 0.523840 | 1.59 | 1.75 | -0.16 | Yes | 0.201005 | 0 | 0.24 | 0 |
set.seed(325)
tuning <- tune(svm, Purchase ~ ., data = datos.entrenamiento,
kernel = "linear",
ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 15, 20)),
scale = TRUE)
summary(tuning)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 15
##
## - best performance: 0.1656772
##
## - Detailed performance results:
## cost error dispersion
## 1 0.001 0.3009850 0.05276636
## 2 0.010 0.1691382 0.04602764
## 3 0.100 0.1679754 0.04037149
## 4 1.000 0.1703010 0.04099747
## 5 5.000 0.1703010 0.03755497
## 6 10.000 0.1668263 0.03560699
## 7 15.000 0.1656772 0.03338668
## 8 20.000 0.1668399 0.03479758
names(tuning)
## [1] "best.parameters" "best.performance" "method" "nparcomb"
## [5] "train.ind" "sampling" "performances" "best.model"
ggplot(data = tuning$performances, aes(x = cost, y = error)) +
geom_line() +
geom_point() +
labs(title = "Error de validación ~ hiperparámetro C") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
modelo <- tuning$best.model
summary(modelo)
##
## Call:
## best.tune(method = svm, train.x = Purchase ~ ., data = datos.entrenamiento,
## ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 15, 20)),
## kernel = "linear", scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 15
##
## Number of Support Vectors: 345
##
## ( 173 172 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
modelo.lin <- svm(Purchase ~ ., data = datos.entrenamiento, kernel = "linear", cost = 15, scale = TRUE)
summary(modelo.lin)
##
## Call:
## svm(formula = Purchase ~ ., data = datos.entrenamiento, kernel = "linear",
## cost = 15, scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 15
##
## Number of Support Vectors: 345
##
## ( 173 172 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
prediccion <- predict(modelo, datos.validacion)
kable(head(prediccion, 10))
| x | |
|---|---|
| 1 | CH |
| 3 | CH |
| 10 | CH |
| 16 | CH |
| 18 | CH |
| 22 | CH |
| 26 | CH |
| 41 | CH |
| 45 | CH |
| 49 | CH |
kable(tail(prediccion, 10))
| x | |
|---|---|
| 1011 | CH |
| 1014 | CH |
| 1020 | CH |
| 1039 | MM |
| 1040 | MM |
| 1043 | CH |
| 1053 | CH |
| 1056 | MM |
| 1057 | MM |
| 1059 | CH |
length(prediccion)
## [1] 213
nrow(datos.validacion)
## [1] 213
mat.confusion <- table(predicho = prediccion, real = datos.validacion$Purchase)
mat.confusion
## real
## predicho CH MM
## CH 108 17
## MM 22 66
De acuerdo a la métrica Accuracy que son la relación de los aciertos con respecto al total de observaciones.
La Exactitud ( en inglés, “Accuracy”) se refiere a lo cerca que está el resultado de una medición del valor verdadero.
En términos estadísticos, la exactitud está relacionada con el sesgo de una estimación. También se conoce como Verdadero Positivo (o “True positive rate”). Se representa por la proporción entre los positivos reales predichos por el algoritmo y todos los casos positivos. OJO: en inglés en ocasiones se usa el término “precision” para la exactitud y el término español precisión ser refiere a “accuracy”. Lo que puede llevar a confusión.
En forma práctica la Exactitud es la cantidad de predicciones positivas que fueron correctas
VP Verdaderos Positivos: es la cantidad de positivos que fueron acertados correctamente como positivos por el modelo.
VN Verdaderos Negativos: es la cantidad de negativos que fueron acertados correctamente como negativos por el modelo.
FN Falsos Negativos: es la cantidad de positivos que fueron clasificados incorrectamente como negativos. Error tipo 2 (Falsos Negativos)
FP Falsos Positivos: es la cantidad de negativos que fueron clasificados incorrectamente como positivos. Error tipo 1 (Falsos positivos)
imagen:
VP+VN Son los casos.acertados
VP+FN+FP+VN son todos.los.casos
casos.acertados/todos.los.casos
exactitud=(VP+VN)/(VP+FN+FP+VN)
En R: round((mat.confusion[1,1]+mat.confusion[2,2])/nrow(datos.validacion)∗100,4)
Se utilizó el modelo de SVM con kernel tipo lineal con costo igual a 15
El modelo acierta en 174 de un total de 213
Representa el 81.6901 % en términos de exactitud estadística.
ggplot(datos.validacion, aes(x = SalePriceCH, y = PriceCH)) +
geom_point(aes(colour = factor(prediccion)))
ggplot(datos.validacion, aes(x = SalePriceMM, y = PriceMM)) +
geom_point(aes(colour = factor(prediccion)))
modelo.pol <- svm(Purchase ~ ., data = datos.entrenamiento, kernel = "polynomial", cost = 15, scale = TRUE)
summary(modelo.pol)
##
## Call:
## svm(formula = Purchase ~ ., data = datos.entrenamiento, kernel = "polynomial",
## cost = 15, scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 15
## degree: 3
## coef.0: 0
##
## Number of Support Vectors: 342
##
## ( 177 165 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
prediccion <- predict(modelo.pol, datos.validacion)
kable(head(prediccion, 10))
| x | |
|---|---|
| 1 | CH |
| 3 | CH |
| 10 | CH |
| 16 | CH |
| 18 | CH |
| 22 | CH |
| 26 | CH |
| 41 | CH |
| 45 | CH |
| 49 | CH |
kable(tail(prediccion, 10))
| x | |
|---|---|
| 1011 | CH |
| 1014 | CH |
| 1020 | CH |
| 1039 | MM |
| 1040 | MM |
| 1043 | CH |
| 1053 | CH |
| 1056 | MM |
| 1057 | CH |
| 1059 | MM |
length(prediccion)
## [1] 213
nrow(datos.validacion)
## [1] 213
mat.confusion <- table(predicho = prediccion, real = datos.validacion$Purchase)
mat.confusion
## real
## predicho CH MM
## CH 108 23
## MM 22 60
ggplot(datos.validacion, aes(x = SalePriceCH, y = PriceCH)) +
geom_point(aes(colour = factor(prediccion)))
ggplot(datos.validacion, aes(x = SalePriceMM, y = PriceMM)) +
geom_point(aes(colour = factor(prediccion)))
modelo.rad <- svm(Purchase ~ ., data = datos.entrenamiento, kernel = "radial", cost = 15, scale = TRUE)
summary(modelo.rad)
##
## Call:
## svm(formula = Purchase ~ ., data = datos.entrenamiento, kernel = "radial",
## cost = 15, scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 15
##
## Number of Support Vectors: 340
##
## ( 173 167 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
prediccion <- predict(modelo.rad, datos.validacion)
kable(head(prediccion, 10))
| x | |
|---|---|
| 1 | MM |
| 3 | CH |
| 10 | CH |
| 16 | CH |
| 18 | CH |
| 22 | CH |
| 26 | CH |
| 41 | CH |
| 45 | CH |
| 49 | CH |
kable(tail(prediccion, 10))
| x | |
|---|---|
| 1011 | CH |
| 1014 | CH |
| 1020 | CH |
| 1039 | CH |
| 1040 | MM |
| 1043 | CH |
| 1053 | CH |
| 1056 | MM |
| 1057 | CH |
| 1059 | CH |
length(prediccion)
## [1] 213
nrow(datos.validacion)
## [1] 213
mat.confusion <- table(predicho = prediccion, real = datos.validacion$Purchase)
mat.confusion
## real
## predicho CH MM
## CH 108 21
## MM 22 62
ggplot(datos.validacion, aes(x = SalePriceCH, y = PriceCH)) +
geom_point(aes(colour = factor(prediccion)))
ggplot(datos.validacion, aes(x = SalePriceMM, y = PriceMM)) +
geom_point(aes(colour = factor(prediccion)))