in this part,we build and tune 4 models(svm,neurons network,randomForest,Linear least squares) seperately by using package “caret”, with 5 numbers and 5 repeats of leave-one-out cross validation davg database
load("/home/bing/training/davg/davgTrainingAndTesting.RData")
setwd("/home/bing/training/davg")
library(doMC)
library(kernlab)
library(caret)
library(PerformanceAnalytics)
registerDoMC(cores = 3)
# svm RBF kernel
cvcontrol <- trainControl(method = "LOOCV", number = 5, repeats = 5)
if (file.exists("davg.svmFit.RData")) {
load("davg.svmFit.RData")
} else {
davg.svmFit <- train(davgInputsTrain, davgTargetTrain, method = "svmRadial",
tuneLength = 4, trControl = cvcontrol, scaled = TRUE)
save(davg.svmFit, file = "davg.svmFit.RData")
}
# neural networks
if (file.exists("davg.nnetFit.RData")) {
load("davg.nnetFit.RData")
} else {
nnet.grid <- expand.grid(.size = c(7:15), .decay = c(1e-04, 2e-04, 0.005,
0.01))
davg.nnetFit <- train(davgInputsTrain, davgTargetTrain, method = "nnet",
trControl = cvcontrol, tuneGrid = nnet.grid)
save(davg.nnetFit, file = "davg.nnetFit.RData")
}
# random Forests
if (file.exists("davg.rfFit.RData")) {
load("davg.rfFit.RData")
} else {
library(randomForest)
davg.rfFit <- train(davgInputsTrain, davgTargetTrain, method = "rf", trControl = cvcontrol,
tuneLength = 4)
save(davg.rfFit, file = "davg.rfFit.RData")
}
# Linear least squares
if (file.exists("davg.lmFit.RData")) {
load("davg.lmFit.RData")
} else {
davg.lmFit <- train(davgInputsTrain, davgTargetTrain, method = "lm", trControl = cvcontrol,
tuneLength = 4)
save(davg.lmFit, file = "davg.lmFit.RData")
}
Eorrors And Plot
# the function to caculate the model errors
modelErrors <- function(predicted, actual) {
sal <- vector(mode = "numeric", length = 3)
names(sal) <- c("MAE", "RMSE", "RELE")
meanPredicted <- mean(predicted)
meanActual <- mean(actual)
sumPred <- sum((predicted - meanPredicted)^2)
sumActual <- sum((actual - meanActual)^2)
n <- length(actual)
p3 <- vector(mode = "numeric", length = n)
for (i in c(1:n)) {
if (actual[i] == 0) {
p3[i] <- abs(predicted[i])
} else {
p3[i] <- ((abs(predicted[i] - actual[i]))/actual[i])
}
}
sal[1] <- mean(abs(predicted - actual))
sal[2] <- sqrt(sum((predicted - actual)^2)/n)
sal[3] <- mean(p3)
sal
}
# prediction of svm,nnet,linearLeatSquare and randomForest models and plot
models <- list(svm = davg.svmFit, nnet = davg.nnetFit, linearLeatSquare = davg.lmFit,
randomForest = davg.rfFit)
davg.preValues <- extractPrediction(models, testX = davgInputsTest, testY = davgTargetTest)
plotObsVsPred(davg.preValues)
# build a function to predict differnet models and calculate the errors of
# those models
davg.error <- function(model) {
pd <- predict(model, newdata = davgInputsTest)
modelErrors(pd, davgTargetTest)
}
rf.error <- davg.error(davg.rfFit)
svm.error <- davg.error(davg.svmFit)
nnet.error <- davg.error(davg.nnetFit)
lm.error <- davg.error(davg.lmFit)
errorAll <- rbind(svm.error, nnet.error, lm.error, rf.error)
errorAll
## MAE RMSE RELE
## svm.error 0.1065 0.1410 0.4464
## nnet.error 0.1079 0.1409 0.4430
## lm.error 0.1094 0.1421 0.4669
## rf.error 0.1045 0.1354 0.4449
Models based on davg database,randomeForest is the best, the second is svm,the following is nnet, last one is Linear least squares.