x = 1:75
y = cumsum((rnorm(length(x))))
# plot
makePlot <-function(x,y){
plot(x,y,col="black",pch=5,lwd=1)
lines(x,y,lty=2, lwd=2)
grid()}
makePlot(x,y)
title("original data")
# make data frame named `Data`
Data<-data.frame(cbind(x,y))
# Create a linear regression model
linregress_model <- lm(y ~ x, data=Data)
# make predictions for regression model for each x val
predictYlinregress <- predict(linregress_model,Data)
# show predictions with orignal
makePlot(x,y)
title("original data + linear regression")
abline(linregress_model, col="red")
rmse <- function(errval)
{
val = sqrt(mean(errval^2))
return(val)
}
errval <- linregress_model$residuals # same as data$Y - predictedY
linregress_RMSE <- rmse(errval)
print(paste('logregress RMSE = ',
linregress_RMSE))
## [1] "logregress RMSE = 1.4337480655808"
#install.packages("e1071")
library(e1071)
## Warning: package 'e1071' was built under R version 3.2.5
# svm model
svm_model <- svm(y ~ x , Data)
#predicted vals for all X
predictYsvm <- predict(svm_model, Data)
# viz comparison
makePlot(x,y)
title("original data + linear regression + svr")
abline(linregress_model, col="red")
points(Data$x, predictYsvm, col = "blue", pch=4)
points(Data$x, predictYsvm, col = "blue", type="l")
#### Error in svr
errval <- Data$y - predictYsvm
svr_RMSE <- rmse(errval)
print(paste('svr RMSE = ',
svr_RMSE))
## [1] "svr RMSE = 1.03189964199952"
# perform a grid search
# (this might take a few seconds, adjust how fine of grid if taking too long)
tuneResult1 <- tune(svm, y ~ x, data = Data,
ranges = list(epsilon = seq(0,1,0.1), cost = 2^(seq(0.5,8,.5)))
)
# Map tuning results
plot(tuneResult1)
#### Finer grid
tuneResult <- tune(svm, y ~ x, data = Data,
ranges = list(epsilon = seq(tuneResult1$best.model$epsilon-.15,
tuneResult1$best.model$epsilon+.15,
0.01),
cost = seq(2^(log2(tuneResult1$best.model$cost)-1),
2^(log2(tuneResult1$best.model$cost)+1),
length=6))
)
plot(tuneResult)
print(tuneResult)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## epsilon cost
## 0.26 512
##
## - best performance: 0.8322872
#predicted vals for all X for tuned
tunedVals <-tuneResult$best.model
predictYsvm2 <- predict(tunedVals, Data)
# viz comparison
makePlot(x,y)
title("original data + linear regression + svr + tuned svm")
abline(linregress_model, col="red")
points(Data$x, predictYsvm, col = "blue", pch=4)
points(Data$x, predictYsvm, col = "blue", type="l")
points(Data$x, predictYsvm2, col = "green", pch=5)
points(Data$x, predictYsvm2, col = "green", type="l")
legend("bottomleft", # places a legend at the appropriate place
c("Data","Linear regress","SVM regress","tuned SVM regress"))
Note: The example shown here is for illustrative purposes only, as in most cases the regression example shown in the previous plot would be a severly overfitted model.
errval2 <- Data$y - predictYsvm2
svr_RMSE2 <- rmse(errval2)
vals <- matrix(c(linregress_RMSE,svr_RMSE,svr_RMSE2),ncol=3,byrow=TRUE)
colnames(vals) <- c("Lin regress ","SVM model ","Tuned SVM model ")
rownames(vals) <- c("RMSE of model")
as.table(vals)
## Lin regress SVM model Tuned SVM model
## RMSE of model 1.4337481 1.0318996 0.8043095