SVM_Modelling_Boston.R

ls()

## character(0)

rm(list=ls())
gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 358331 19.2     592000 31.7   460000 24.6
## Vcells 547652  4.2    1023718  7.9   839610  6.5

memory.size()

## [1] 30.09

memory.limit()

## [1] 4027

library(MASS)

dtBoston <- Boston
str(dtBoston)

## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...

attach(dtBoston)

# Plot the data
#plot(data, pch=16)

# Create a linear regression model
##model <- lm(medv ~ lstat + rm + ptratio + dis, data)
model <- lm(medv~lstat , dtBoston)

plot(model)

# Add the fitted line
abline(model)

##plot(model , pch=16)


# make a prediction for each X
predictedY <- predict(model, dtBoston)

# display the predictions
points(lstat, predictedY, col = "blue", pch=4)


rmse <- function(error)
{
  sqrt(mean(error^2))
}

error <- lstat - predictedY # same as data$Y - predictedY
predictionRMSE <- rmse(error) 

predictionRMSE

## [1] 17.06293

library(e1071)


model <- svm(medv~lstat , dtBoston)

predictedY <- predict(model, dtBoston)

points(lstat, predictedY, col = "red", pch=4)

#Error Computation
error <- lstat - predictedY
svrPredictionRMSE <- rmse(error)  

svrPredictionRMSE

## [1] 16.31975

# perform a grid search
tuneResult <- tune(svm, medv ~ lstat,  data = dtBoston,
                   ranges = list(epsilon = seq(0,1,0.1), cost = 2^(2:9))
)
print(tuneResult)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  epsilon cost
##      0.4    8
## 
## - best performance: 27.81627

plot(tuneResult)

# Retrying with different parameters 
tuneResult <- tune(svm, medv ~ lstat,  data = dtBoston,
                   ranges = list(epsilon = seq(0.38,0.42,0.01), cost = 2^(2:9))
) 

print(tuneResult)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  epsilon cost
##     0.38    8
## 
## - best performance: 27.78382

plot(tuneResult)

#Tuning the model to select best result 

tunedModel <- tuneResult$best.model
tunedModelY <- predict(tunedModel, dtBoston) 

error <- medv - tunedModelY  

# this value can be different on your computer
# because the tune method  randomly shuffles the data
tunedModelRMSE <- rmse(error)  

tunedModelRMSE

## [1] 5.16833

SVM_Modelling_Boston.R

Kamalm

Sun Nov 06 04:07:52 2016