Phd Report4 Traing models based on mn database

in this part,we build and tune 4 models(svm,neurons network,randomForest,Linear least squares) seperately by using package “caret”, with 5 numbers and 5 repeats of leave-one-out cross validation

load("/home/bing/training/mn/mnTrainAndTest.RData")
setwd("/home/bing/training/mn")
library(doMC)
library(kernlab)
library(caret)
registerDoMC(cores = 3)
# svm RBF kernel
mn.cvcontrol <- trainControl(method = "LOOCV", number = 5, repeats = 5)
if (file.exists("mn.svmFit.RData")) {
    load("mn.svmFit.RData")
} else {
    mn.svmFit <- train(mnInputsTrain, mnTargetTrain, method = "svmRadial", tuneLength = 4, 
        trControl = mn.cvcontrol, scaled = TRUE)
    save(mn.svmFit, file = "mn.svmFit.RData")
}
# neural networks
if (file.exists("mn.nnetFit.RData")) {
    load("mn.nnetFit.RData")
} else {
    nnet.grid <- expand.grid(.size = c(7:15), .decay = c(1e-04, 2e-04, 0.005, 
        0.01))
    mn.nnetFit <- train(mnInputsTrain, mnTargetTrain, method = "nnet", trControl = mn.cvcontrol, 
        tuneGrid = nnet.grid)
    save(mn.nnetFit, file = "mn.nnetFit.RData")
}
# random Forests
if (file.exists("mn.rfFit.RData")) {
    load("mn.rfFit.RData")
} else {
    library(randomForest)
    mn.rfFit <- train(mnInputsTrain, mnTargetTrain, method = "rf", trControl = mn.cvcontrol, 
        tuneLength = 4)
    save(mn.rfFit, file = "mn.rfFit.RData")
}
# Linear least squares
if (file.exists("mn.lmFit.RData")) {
    load("mn.lmFit.RData")
} else {
    mn.lmFit <- train(mnInputsTrain, mnTargetTrain, method = "lm", trControl = mn.cvcontrol, 
        tuneLength = 4)
    save(mn.lmFit, file = "mn.lmFit.RData")
}

Eorrors And Plot

# the function to caculate the model errors
modelErrors <- function(predicted, actual) {
    sal <- vector(mode = "numeric", length = 3)
    names(sal) <- c("MAE", "RMSE", "RELE")
    meanPredicted <- mean(predicted)
    meanActual <- mean(actual)
    sumPred <- sum((predicted - meanPredicted)^2)
    sumActual <- sum((actual - meanActual)^2)
    n <- length(actual)
    p3 <- vector(mode = "numeric", length = n)
    for (i in c(1:n)) {
        if (actual[i] == 0) {
            p3[i] <- abs(predicted[i])
        } else {
            p3[i] <- ((abs(predicted[i] - actual[i]))/actual[i])
        }
    }
    sal[1] <- mean(abs(predicted - actual))
    sal[2] <- sqrt(sum((predicted - actual)^2)/n)
    sal[3] <- mean(p3)
    sal
}
# prediction of svm,nnet,linearLeatSquare and randomForest models and plot
models <- list(svm = mn.svmFit, nnet = mn.nnetFit, linearLeatSquare = mn.lmFit, 
    randomForest = mn.rfFit)
mn.preValues <- extractPrediction(models, testX = mnInputsTest, testY = mnTargetTest)
plotObsVsPred(mn.preValues)

plot of chunk caret for mn.2

# build a function to predict differnet models and calculate the errors of
# those models
mn.error <- function(model) {
    pd <- predict(model, newdata = mnInputsTest)
    modelErrors(pd, mnTargetTest)
}
rf.error <- mn.error(mn.rfFit)
svm.error <- mn.error(mn.svmFit)
nnet.error <- mn.error(mn.nnetFit)
lm.error <- mn.error(mn.lmFit)
errorAll <- rbind(svm.error, nnet.error, lm.error, rf.error)
errorAll

##                MAE   RMSE   RELE
## svm.error  0.08734 0.1194 0.3504
## nnet.error 0.09312 0.1224 0.3764
## lm.error   0.10303 0.1293 0.4484
## rf.error   0.09127 0.1181 0.3771

# plot errors of different models
barplot(errorAll[, c(1)], main = "MAE", col = rainbow(6), beside = TRUE, , ylim = c(0, 
    0.2), legend = (rownames(errorAll)))

plot of chunk caret for mn.2

barplot(errorAll[, c(2)], main = "RMSE", col = rainbow(6), beside = TRUE, , 
    ylim = c(0, 0.2), legend = (rownames(errorAll)))

plot of chunk caret for mn.2

barplot(errorAll[, c(3)], main = "RELE", col = rainbow(6), beside = TRUE, ylim = c(0, 
    0.6), legend = (rownames(errorAll)))

plot of chunk caret for mn.2

Models based on mn database,svm is the best, the second is random Forest,the following is nnet, last one is Linear least squares.