In this part, build and tune 4 models (svm,neurons network,Linear Least Squares,randomForest) sperately by using package “caret”,with leave-one-out crossvalidation method of trainControl which number is 5,repeats is 5,base on P10 database
load("/home/bing/training/P10/P 10 TrainingAndTesting.RData")
library(doMC)
library(kernlab)
library(PerformanceAnalytics)
library(caret)
registerDoMC(cores = 3)
# svm RBF kernel
cvcontrol <- trainControl(method = "LOOCV", number = 5, repeats = 5)
if (file.exists("P10.svmFit.RData")) {
load("P10.svmFit.RData")
} else {
P10.svmFit <- train(InputsTrain, TargetTrain, method = "svmRadial", tuneLength = 4,
trControl = cvcontrol, scaled = TRUE)
save(P10.svmFit, file = "P10.svmFit.RData")
}
# neural networks
if (file.exists("P10.nnetFit.RData")) {
load("P10.nnetFit.RData")
} else {
nnet.grid <- expand.grid(.size = c(7:15), .decay = c(1e-04, 2e-04, 0.005,
0.01))
P10.nnetFit <- train(InputsTrain, TargetTrain, method = "nnet", trControl = cvcontrol,
tuneGrid = nnet.grid)
save(P10.nnetFit, file = "P10.nnetFit.RData")
}
# random Forests
if (file.exists("P10.rfFit.RData")) {
load("P10.rfFit.RData")
} else {
library(randomForest)
P10.rfFit <- train(InputsTrain, TargetTrain, method = "rf", trControl = cvcontrol,
tuneLength = 3)
save(P10.rfFit, file = "P10.rfFit.RData")
}
# Linear Least Squares
if (file.exists("P10.lmFit.RData")) {
load("P10.lmFit.RData")
} else {
P10.lmFit <- train(InputsTrain, TargetTrain, method = "lm", trControl = cvcontrol,
tuneLength = 4)
save(P10.lmFit, file = "P10.lmFit.RData")
}
In this part, I will make prediction according different models,plot and calculate the errors
# the function to caculate the model errors
modelErrors <- function(predicted, actual) {
sal <- vector(mode = "numeric", length = 3)
names(sal) <- c("MAE", "RMSE", "RELE")
meanPredicted <- mean(predicted)
meanActual <- mean(actual)
sumPred <- sum((predicted - meanPredicted)^2)
sumActual <- sum((actual - meanActual)^2)
n <- length(actual)
p3 <- vector(mode = "numeric", length = n)
for (i in c(1:n)) {
if (actual[i] == 0) {
p3[i] <- abs(predicted[i])
} else {
p3[i] <- ((abs(predicted[i] - actual[i]))/actual[i])
}
}
sal[1] <- mean(abs(predicted - actual))
sal[2] <- sqrt(sum((predicted - actual)^2)/n)
sal[3] <- mean(p3)
sal
}
# Predicting different models and plot the prediction values and true
# values
models <- list(svm = P10.svmFit, nnet = P10.nnetFit, randomForest = P10.rfFit,
lm = P10.lmFit)
P10.preValues <- extractPrediction(models, testX = InputsTest, testY = TargetTest)
plotObsVsPred(P10.preValues)
# calculate errors
P10.error <- function(model) {
pd <- predict(model, newdata = InputsTest)
modelErrors(pd, TargetTest)
}
rf.error <- P10.error(P10.rfFit)
nnet.error <- P10.error(P10.nnetFit)
svm.error <- P10.error(P10.svmFit)
lm.error <- P10.error(P10.lmFit)
P10.errorAll <- rbind(rf.error, nnet.error, svm.error, lm.error)
P10.errorAll
## MAE RMSE RELE
## rf.error 0.1190 0.1508 0.6466
## nnet.error 0.1233 0.1583 0.6973
## svm.error 0.1177 0.1511 0.6087
## lm.error 0.1245 0.1577 0.6879
save(P10.errorAll, file = "P10.errorAll.RData")