GA feature selection

library(foreach)
library(doParallel)
## Loading required package: iterators
## Loading required package: parallel
library(doMC)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(GA)
## Package 'GA' version 2.2
## Type 'citation("GA")' for citing this R package in publications.
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
registerDoParallel(cores=4)
load("~/PED/datasets/PEDdataNorm.RData")
source('~/functions/calculateErrors.R', echo=TRUE)
## 
## > modelErrors <- function(predicted, actual) {
## +     sal <- vector(mode = "numeric", length = 3)
## +     names(sal) <- c("MAE", "RMSE", "RELE")
## +     me .... [TRUNCATED] 
## 
## > modelsErrorsTotal <- 0
## 
## > allModelErrors <- function(models, inputsTest, targetsTest, 
## +     dataset) {
## +     error <- function(model) {
## +         pd <- predict(model, newdat .... [TRUNCATED]
if(file.exists("rf_ga.RData")){
        load("rf_ga.RData")
}else{
        index<-sample(1:nrow(inputsTrain),nrow(inputsTrain)*0.03,replace=FALSE)
        inputs<-inputsTrain[index,]
        targets<-targetsTrain[index]
        save(inputs,targets,file="test.RData")
        ga_ctrl <-  gafsControl(functions = rfGA,
                       method = "cv",
                       number=2,
                       metric=c(internal ="RMSE",
                                external = "RMSE"),
                       allowParallel =TRUE)
        data.frame(inputs)->inputs
        set.seed(10)
        rf_ga <- gafs(inputs, targets,
                      iters = 20,
                      gafsControl = ga_ctrl,popSize=30)
        save(rf_ga,file="rf_ga.RData")
}


rf_ga$fit->rfFit
(rfFit$importance)
##             IncNodePurity
## MONTH.end          0.1980
## WEEKDAY.end        0.1227
## HORA.end           5.6603
## O3.MAX             3.2141
## NO2.MAX            0.9094
## WSP.MAX            0.4767
## SO2.MAX            0.1788
## O3.MIN             0.7615
## TMP.MIN            0.3821
## WDR.MIN            0.3763
## CO.MIN             0.3504
## NO2.MEAN           0.7639
## RH.MEAN            0.2115
## WSP.MEAN           0.6167
## CO.MEAN            0.4538
## SO2.MEAN           0.2064
## RH.MEDIAN          0.1972
## TMP.MEDIAN         0.2987
## CO.MEDIAN          0.3887
## SO2.MEDIAN         0.1980
## O3.SUM             1.5650
## TMP.SUM            0.2994
## WDR.SUM            0.9794
## SO2.SUM            0.1966
rf_ga$optVariables->n
inputsTest[,c("MONTH end" ,"WEEKDAY end" ,"HORA end" ,"O3 MAX" ,"NO2 MAX" , "WSP MAX" , "SO2 MAX" ,"O3 MIN" ,"TMP MIN" , "WDR MIN" ,"CO MIN" ,"NO2 MEAN" , "RH MEAN" , "WSP MEAN","CO MEAN", "SO2 MEAN" , "RH MEDIAN" ,"TMP MEDIAN","CO MEDIAN", "SO2 MEDIAN","O3 SUM" ,"TMP SUM", "WDR SUM" ,"SO2 SUM")]->test
colnames(test)<-n
####predict based on test data set#####
predict(rfFit,test)->rfPred
####errors of the test data ########
modelErrors(rfPred,targetsTest)
##     MAE    RMSE    RELE 
## 0.02595 0.03727 0.15713
####black one is real value whie the red is the prediction one ######
plot(targetsTest[1:300],type="l")
lines(rfPred[1:300],col="red")

plot of chunk unnamed-chunk-1

plot the performance

plot(rf_ga) + theme_bw()

plot of chunk unnamed-chunk-2