Phd Report8-Using package “caret” for building models based on delay 5 hours(P5) database

In this part, build and tune 4 models (svm,neurons network,Linear Least Squares,randomForest) sperately by using package “caret”,with leave-one-out crossvalidation method of trainControl which number is 5,repeats is 5,base on P5 database

load("/home/bing/training/P5/P5TrainingAndTesting.RData")
setwd("/home/bing/training/P5")
library(doMC)
library(kernlab)
library(caret)
registerDoMC(cores = 3)
# svm RBF kernel
cvcontrol <- trainControl(method = "LOOCV", number = 5, repeats = 5)
if (file.exists("P5.svmFit.RData")) {
    load("P5.svmFit.RData")
} else {
    P5.svmFit <- train(P5InputsTrain, P5TargetTrain, method = "svmRadial", tuneLength = 4, 
        trControl = cvcontrol, scaled = TRUE)
    save(P5.svmFit, file = "P5.svmFit.RData")
}
# neural networks
if (file.exists("P5.nnetFit.RData")) {
    load("P5.nnetFit.RData")
} else {
    nnet.grid <- expand.grid(.size = c(7:15), .decay = c(1e-04, 2e-04, 0.005, 
        0.01))
    P5.nnetFit <- train(P5InputsTrain, P5TargetTrain, method = "nnet", trControl = cvcontrol, 
        tuneGrid = nnet.grid)
    save(P5.nnetFit, file = "P5.nnetFit.RData")
}
# random Forests
if (file.exists("P5.rfFit.RData")) {
    load("P5.rfFit.RData")
} else {
    library(randomForest)
    P5.rfFit <- train(P5InputsTrain, P5TargetTrain, method = "rf", trControl = cvcontrol, 
        tuneLength = 3)
    save(P5.rfFit, file = "P5.rfFit.RData")
}
# Linear Least Squares
if (file.exists("P5.lmFit.RData")) {
    load("P5.lmFit.RData")
} else {
    P5.lmFit <- train(P5InputsTrain, P5TargetTrain, method = "lm", trControl = cvcontrol, 
        tuneLength = 4)
    save(P5.lmFit, file = "P5.lmFit.RData")
}

In this part, I will make prediction according different models,plot and calculate the errors

# the function to caculate the model errors
modelErrors <- function(predicted, actual) {
    sal <- vector(mode = "numeric", length = 3)
    names(sal) <- c("MAE", "RMSE", "RELE")
    meanPredicted <- mean(predicted)
    meanActual <- mean(actual)
    sumPred <- sum((predicted - meanPredicted)^2)
    sumActual <- sum((actual - meanActual)^2)
    n <- length(actual)
    p3 <- vector(mode = "numeric", length = n)
    for (i in c(1:n)) {
        if (actual[i] == 0) {
            p3[i] <- abs(predicted[i])
        } else {
            p3[i] <- ((abs(predicted[i] - actual[i]))/actual[i])
        }
    }
    sal[1] <- mean(abs(predicted - actual))
    sal[2] <- sqrt(sum((predicted - actual)^2)/n)
    sal[3] <- mean(p3)
    sal
}
# Predicting different models and plot the prediction values and true
# values
models <- list(svm = P5.svmFit, nnet = P5.nnetFit, randomForest = P5.rfFit, 
    lm = P5.lmFit)
P5.preValues <- extractPrediction(models, testX = P5InputsTest, testY = P5TargetTest)
plotObsVsPred(P5.preValues)

plot of chunk delay5H.2

# calculate errors
P5.error <- function(model) {
    pd <- predict(model, newdata = P5InputsTest)
    modelErrors(pd, P5TargetTest)
}
rf.error <- P5.error(P5.rfFit)
nnet.error <- P5.error(P5.nnetFit)
svm.error <- P5.error(P5.svmFit)
lm.error <- P5.error(P5.lmFit)
P5.errorAll <- rbind(rf.error, nnet.error, svm.error, lm.error)
P5.errorAll

##               MAE   RMSE   RELE
## rf.error   0.1227 0.1557 0.5199
## nnet.error 0.1276 0.1615 0.5318
## svm.error  0.1206 0.1568 0.4678
## lm.error   0.1318 0.1648 0.5567

save(P5.errorAll, file = "P5.errorAll.RData")