Support Vector Regression

Dasha Ivanova

20/12/2019

DATOS

Cargamos las librerías:

# install.packages("hydroGOF") # Para la función mse
library(hydroGOF)
library(e1071)
library(caret)
library(knitr)
library(kableExtra)

Variables:

Preparamos los datos:

# Datos
boston = MASS::Boston
head(boston)
##      crim zn indus chas   nox    rm  age    dis rad tax ptratio  black
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12
##   lstat medv
## 1  4.98 24.0
## 2  9.14 21.6
## 3  4.03 34.7
## 4  2.94 33.4
## 5  5.33 36.2
## 6  5.21 28.7
# Train y Test
set.seed(123)
indexes = createDataPartition(boston$medv, p = .9, list = F)
train = boston[indexes, ]
test = boston[-indexes, ]

SVR LINEAL

######### MODELO SVR LINEAL ############

# Modelo SVM LINEAL
model_lin = svm(medv~., data=train, kernel="linear")
print(model_lin)
## 
## Call:
## svm(formula = medv ~ ., data = train, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.07692308 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  350
# Predicción
pred_lin = predict(model_lin, test)
 
# Gráfico
x <- 1:length(test$medv)
plot(x, test$medv, pch=18, col="red")
lines(x, pred_lin, lwd="1", col="blue")

# Precisión de la predicción fijándonos en el error medio cuadrático
mse_lin = mse(test$medv, pred_lin)
mae_lin = mae(test$medv, pred_lin)
rmse_lin = rmse(test$medv, pred_lin)
r2_lin = R2(test$medv, pred_lin, form = "traditional")

SVR POLINÓMICO

######### MODELO SVR POLINOMICO ############

# Modelo SVM POLINOMICO
model_pol = svm(medv~., data=train, kernel="polynomial")
print(model_pol)
## 
## Call:
## svm(formula = medv ~ ., data = train, kernel = "polynomial")
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  3 
##       gamma:  0.07692308 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  335
# Predicción
pred_pol = predict(model_pol, test)
 
# Gráfico
x <- 1:length(test$medv)
plot(x, test$medv, pch=18, col="red")
lines(x, pred_pol, lwd="1", col="blue")

# Precisión de la predicción fijándonos en el error medio cuadrático
mse_pol = mse(test$medv, pred_pol)
mae_pol = mae(test$medv, pred_pol)
rmse_pol = rmse(test$medv, pred_pol)
r2_pol = R2(test$medv, pred_pol, form = "traditional")

SVR RADIAL

######### MODELO SVR RADIAL ############

# Modelo SVM RADIAL
model_rad = svm(medv~., data=train, kernel="radial")
print(model_rad)
## 
## Call:
## svm(formula = medv ~ ., data = train, kernel = "radial")
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.07692308 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  306
# Predicción
pred_rad = predict(model_rad, test)
 
# Gráfico
x <- 1:length(test$medv)
plot(x, test$medv, pch=18, col="red")
lines(x, pred_rad, lwd="1", col="blue")

# Precisión de la predicción fijándonos en el error medio cuadrático
# y en el R cuadrado
mse_rad = mse(test$medv, pred_rad)
mae_rad = mae(test$medv, pred_rad)
rmse_rad = rmse(test$medv, pred_rad)
r2_rad = R2(test$medv, pred_rad, form = "traditional")

Comparativa entre los 3 modelos

prec_lin <- c(mse_lin,mae_lin,rmse_lin,r2_lin*100)
prec_pol <- c(mse_pol,mae_pol,rmse_pol,r2_pol*100)
prec_rad <- c(mse_rad,mae_rad,rmse_rad,r2_rad*100)
precisiones <- data.frame(prec_lin,prec_pol,prec_rad)
colnames(precisiones) <- c("Lineal","Polinómico","Radial")
rownames(precisiones) <- c("MSE","MAE","RMSE","R2")
kable(precisiones, digits=2)%>%
  kable_styling(bootstrap_options = "striped")
Lineal Polinómico Radial
MSE 17.89 12.42 6.03
MAE 3.21 2.72 1.88
RMSE 4.23 3.52 2.46
R2 65.89 82.63 91.41

Donde:

Bibliografía