# Load the data
library(MASS)
data(Boston)
attach(Boston)
# Plot the data
plot(medv,rm, pch=16)

plot(medv,ptratio, pch=16)

# Create a linear regression model
model <- lm(medv ~ rm+ptratio, Boston)
summary(model)
##
## Call:
## lm(formula = medv ~ rm + ptratio, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.672 -2.821 0.102 2.770 39.819
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.5612 4.1889 -0.611 0.541
## rm 7.7141 0.4136 18.650 <2e-16 ***
## ptratio -1.2672 0.1342 -9.440 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.104 on 503 degrees of freedom
## Multiple R-squared: 0.5613, Adjusted R-squared: 0.5595
## F-statistic: 321.7 on 2 and 503 DF, p-value: < 2.2e-16
# Add the fitted line
plot(medv, pch=16)
abline(model)
## Warning in abline(model): only using the first two of 3 regression
## coefficients
# make a prediction for each X
predictedY <- predict(model, Boston)
# display the predictions
points(medv, predictedY, col = "blue", pch=4)
rmse <- function(error)
{
sqrt(mean(error^2))
}
error <- model$residuals # same as data$Y - predictedY
predictionRMSE <- rmse(error) #
predictionRMSE #[1] 6.08595
## [1] 6.08595
library(e1071)
model2 <- svm(medv ~ rm+ptratio, Boston)
predictedY2 <- predict(model2, Boston)
points(medv, predictedY2, col = "red", pch=4)

error2 <- model2$residuals # same as data$Y - predictedY
predictionRMSE <- rmse(error2) #
predictionRMSE #[1] 5.167693
## [1] 5.167693
#Tuning the Result
tuneResult <- tune(svm, medv ~ rm+ptratio, data = Boston,
ranges = list(epsilon = seq(0,0.2,0.01), cost = 2^(2:9))
)
print(tuneResult)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## epsilon cost
## 0.2 4
##
## - best performance: 28.32703
plot(tuneResult)

tunedModel <- tuneResult$best.model
tunedModelY <- predict(tunedModel, Boston)
error <- tuneResult$best.model$residuals
# this value can be different on your computer
# because the tune method randomly shuffles the data
tunedModelRMSE <- rmse(error)
tunedModelRMSE
## [1] 5.059262
#the darker the region is the better our model is (because the RMSE is closer to zero in darker regions)
#this means we can try another grid and search in a narrower range
#the region between cost 0-150 is darker hence tuning the result again
#The process of choosing these parameters is called hyperparameter optimization, or model selection.
#Tuning the Result
tuneResult <- tune(svm, medv ~ rm+ptratio, data = Boston,
ranges = list(epsilon = seq(0.1,0.2,0.01), cost = 2^(2:7))
)
print(tuneResult)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## epsilon cost
## 0.2 4
##
## - best performance: 28.32387
plot(tuneResult)

tunedModel <- tuneResult$best.model
tunedModelY <- predict(tunedModel, Boston)
error <- tuneResult$best.model$residuals
# this value can be different on your computer
# because the tune method randomly shuffles the data
tunedModelRMSE <- rmse(error)
tunedModelRMSE
## [1] 5.059262