ls()
## character(0)
rm(list=ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 358331 19.2 592000 31.7 460000 24.6
## Vcells 547652 4.2 1023718 7.9 839610 6.5
memory.size()
## [1] 30.09
memory.limit()
## [1] 4027
library(MASS)
dtBoston <- Boston
str(dtBoston)
## 'data.frame': 506 obs. of 14 variables:
## $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ chas : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ rm : num 6.58 6.42 7.18 7 7.15 ...
## $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ dis : num 4.09 4.97 4.97 6.06 6.06 ...
## $ rad : int 1 2 2 3 3 3 5 5 5 5 ...
## $ tax : num 296 242 242 222 222 222 311 311 311 311 ...
## $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ black : num 397 397 393 395 397 ...
## $ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
## $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
attach(dtBoston)
# Plot the data
#plot(data, pch=16)
# Create a linear regression model
##model <- lm(medv ~ lstat + rm + ptratio + dis, data)
model <- lm(medv~lstat , dtBoston)
plot(model)



# Add the fitted line
abline(model)
##plot(model , pch=16)
# make a prediction for each X
predictedY <- predict(model, dtBoston)
# display the predictions
points(lstat, predictedY, col = "blue", pch=4)
rmse <- function(error)
{
sqrt(mean(error^2))
}
error <- lstat - predictedY # same as data$Y - predictedY
predictionRMSE <- rmse(error)
predictionRMSE
## [1] 17.06293
library(e1071)
model <- svm(medv~lstat , dtBoston)
predictedY <- predict(model, dtBoston)
points(lstat, predictedY, col = "red", pch=4)

#Error Computation
error <- lstat - predictedY
svrPredictionRMSE <- rmse(error)
svrPredictionRMSE
## [1] 16.31975
# perform a grid search
tuneResult <- tune(svm, medv ~ lstat, data = dtBoston,
ranges = list(epsilon = seq(0,1,0.1), cost = 2^(2:9))
)
print(tuneResult)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## epsilon cost
## 0.4 8
##
## - best performance: 27.81627
plot(tuneResult)

# Retrying with different parameters
tuneResult <- tune(svm, medv ~ lstat, data = dtBoston,
ranges = list(epsilon = seq(0.38,0.42,0.01), cost = 2^(2:9))
)
print(tuneResult)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## epsilon cost
## 0.38 8
##
## - best performance: 27.78382
plot(tuneResult)

#Tuning the model to select best result
tunedModel <- tuneResult$best.model
tunedModelY <- predict(tunedModel, dtBoston)
error <- medv - tunedModelY
# this value can be different on your computer
# because the tune method randomly shuffles the data
tunedModelRMSE <- rmse(error)
tunedModelRMSE
## [1] 5.16833