GA feature selection
library(foreach)
library(doParallel)
## Loading required package: iterators
## Loading required package: parallel
library(doMC)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(GA)
## Package 'GA' version 2.2
## Type 'citation("GA")' for citing this R package in publications.
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
registerDoParallel(cores=4)
load("~/PED/datasets/PEDdataNorm.RData")
source('~/functions/calculateErrors.R', echo=TRUE)
##
## > modelErrors <- function(predicted, actual) {
## + sal <- vector(mode = "numeric", length = 3)
## + names(sal) <- c("MAE", "RMSE", "RELE")
## + me .... [TRUNCATED]
##
## > modelsErrorsTotal <- 0
##
## > allModelErrors <- function(models, inputsTest, targetsTest,
## + dataset) {
## + error <- function(model) {
## + pd <- predict(model, newdat .... [TRUNCATED]
if(file.exists("rf_ga.RData")){
load("rf_ga.RData")
}else{
index<-sample(1:nrow(inputsTrain),nrow(inputsTrain)*0.03,replace=FALSE)
inputs<-inputsTrain[index,]
targets<-targetsTrain[index]
save(inputs,targets,file="test.RData")
ga_ctrl <- gafsControl(functions = rfGA,
method = "cv",
number=2,
metric=c(internal ="RMSE",
external = "RMSE"),
allowParallel =TRUE)
data.frame(inputs)->inputs
set.seed(10)
rf_ga <- gafs(inputs, targets,
iters = 20,
gafsControl = ga_ctrl,popSize=30)
save(rf_ga,file="rf_ga.RData")
}
rf_ga$fit->rfFit
(rfFit$importance)
## IncNodePurity
## MONTH.end 0.1980
## WEEKDAY.end 0.1227
## HORA.end 5.6603
## O3.MAX 3.2141
## NO2.MAX 0.9094
## WSP.MAX 0.4767
## SO2.MAX 0.1788
## O3.MIN 0.7615
## TMP.MIN 0.3821
## WDR.MIN 0.3763
## CO.MIN 0.3504
## NO2.MEAN 0.7639
## RH.MEAN 0.2115
## WSP.MEAN 0.6167
## CO.MEAN 0.4538
## SO2.MEAN 0.2064
## RH.MEDIAN 0.1972
## TMP.MEDIAN 0.2987
## CO.MEDIAN 0.3887
## SO2.MEDIAN 0.1980
## O3.SUM 1.5650
## TMP.SUM 0.2994
## WDR.SUM 0.9794
## SO2.SUM 0.1966
rf_ga$optVariables->n
inputsTest[,c("MONTH end" ,"WEEKDAY end" ,"HORA end" ,"O3 MAX" ,"NO2 MAX" , "WSP MAX" , "SO2 MAX" ,"O3 MIN" ,"TMP MIN" , "WDR MIN" ,"CO MIN" ,"NO2 MEAN" , "RH MEAN" , "WSP MEAN","CO MEAN", "SO2 MEAN" , "RH MEDIAN" ,"TMP MEDIAN","CO MEDIAN", "SO2 MEDIAN","O3 SUM" ,"TMP SUM", "WDR SUM" ,"SO2 SUM")]->test
colnames(test)<-n
####predict based on test data set#####
predict(rfFit,test)->rfPred
####errors of the test data ########
modelErrors(rfPred,targetsTest)
## MAE RMSE RELE
## 0.02595 0.03727 0.15713
####black one is real value whie the red is the prediction one ######
plot(targetsTest[1:300],type="l")
lines(rfPred[1:300],col="red")
plot the performance
plot(rf_ga) + theme_bw()