rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 95

modelErrors <- function(predicted, actual) {
  sal <- vector(mode="numeric", length=3)
  names(sal) <- c( "MAE", "RMSE", "RELE")
  meanPredicted <- mean(predicted)
  meanActual <- mean(actual)
  sumPred <- sum((predicted - meanPredicted)^2)
  sumActual <- sum((actual - meanActual)^2)
  n<- length(actual)
  p3<-vector(mode="numeric", length=n)
  for (i in c(1:n)) {
    if (actual[i]==0) {p3[i]<-abs(predicted[i])
    } else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
    }}
  sal[1] <- mean(abs(predicted - actual))
  sal[2] <- sqrt(sum((predicted - actual)^2)/n)
  sal[3] <- mean(p3)
  sal
}
unormalized<-function(x,y){
        ((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
library(xtable)
load("~/PED/newWayPrepareData/finalData/PED_r3_p2_fea0.RData")
load("~/PED/SA/r3_p2_training2000_fea0/dataset_PED_r3_p2_train2000_fea0_rf_sa.RData")
plot(rf_sa) + theme_bw()

plot of chunk unnamed-chunk-1

rf_sa$optVariables
##  [1] "MONTH end"       "DAY end"         "WEEKDAY end"    
##  [4] "HORA end"        "O3 MAX"          "NOx MAX"        
##  [7] "NO2 MAX"         "RH MAX"          "TMP MAX"        
## [10] "WSP MAX"         "CO MAX"          "SO2 MAX"        
## [13] "O3 MIN"          "NOx MIN"         "TMP MIN"        
## [16] "CO MIN"          "SO2 MIN"         "WSP MEAN"       
## [19] "CO MEAN"         "NOx MEDIAN"      "RH MEDIAN"      
## [22] "TMP MEDIAN"      "WSP MEDIAN"      "CO MEDIAN"      
## [25] "NOX_NO2 MEDIAN"  "WDR SUM"         "CO SUM"         
## [28] "SO2 SUM"         "NOX_NO2 SUM"     "MAXO3P"         
## [31] "MINO3P"          "AVGO3P"          "SUMO3P"         
## [34] "MINNO2P"         "SUMNO2P"         "MINNOXP"        
## [37] "MAXRHP"          "MINRHP"          "AVGRHP"         
## [40] "SUMRHP"          "SUMTMPP"         "MINWDRP"        
## [43] "MAXCOP"          "MINCOP"          "MAXNOXP_MAXNO2P"
## [46] "MINNOXP_MINNO2P" "AVGNOXP_AVGNO2P"
#########variable importance#########
data.frame(rf_sa$fit$importance)->imp
imp$rank<-rank(-imp)
imp[ order(imp[,"rank"]), ]->imp_rank
save(imp_rank,file="r3_p2_training2000_fea0_imp_rank.RData")
xtable(imp[ order(imp[,"rank"]), ],caption="rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 95",digits=c(3,3,0))
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Mon Mar 23 17:50:02 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrr}
##   \hline
##  & IncNodePurity & rank \\ 
##   \hline
## HORA end & 5.762 & 1 \\ 
##   O3 MAX & 4.140 & 2 \\ 
##   WDR SUM & 1.299 & 3 \\ 
##   O3 MIN & 1.230 & 4 \\ 
##   NO2 MAX & 0.835 & 5 \\ 
##   TMP MAX & 0.497 & 6 \\ 
##   WSP MAX & 0.455 & 7 \\ 
##   NOx MIN & 0.423 & 8 \\ 
##   WSP MEAN & 0.404 & 9 \\ 
##   TMP MIN & 0.367 & 10 \\ 
##   NOx MEDIAN & 0.349 & 11 \\ 
##   SUMO3P & 0.313 & 12 \\ 
##   TMP MEDIAN & 0.276 & 13 \\ 
##   NOX\_NO2 SUM & 0.258 & 14 \\ 
##   AVGO3P & 0.257 & 15 \\ 
##   NOX\_NO2 MEDIAN & 0.249 & 16 \\ 
##   NOx MAX & 0.248 & 17 \\ 
##   MAXO3P & 0.234 & 18 \\ 
##   SO2 SUM & 0.219 & 19 \\ 
##   CO MEAN & 0.202 & 20 \\ 
##   CO SUM & 0.199 & 21 \\ 
##   WSP MEDIAN & 0.199 & 22 \\ 
##   CO MIN & 0.196 & 23 \\ 
##   AVGNOXP\_AVGNO2P & 0.195 & 24 \\ 
##   MINO3P & 0.195 & 25 \\ 
##   MAXCOP & 0.167 & 26 \\ 
##   SUMTMPP & 0.166 & 27 \\ 
##   SO2 MAX & 0.155 & 28 \\ 
##   CO MEDIAN & 0.153 & 29 \\ 
##   SUMNO2P & 0.148 & 30 \\ 
##   MINWDRP & 0.146 & 31 \\ 
##   RH MEDIAN & 0.144 & 32 \\ 
##   MINNOXP\_MINNO2P & 0.144 & 33 \\ 
##   MAXNOXP\_MAXNO2P & 0.143 & 34 \\ 
##   SUMRHP & 0.138 & 35 \\ 
##   SO2 MIN & 0.131 & 36 \\ 
##   DAY end & 0.129 & 37 \\ 
##   CO MAX & 0.127 & 38 \\ 
##   AVGRHP & 0.125 & 39 \\ 
##   MAXRHP & 0.123 & 40 \\ 
##   RH MAX & 0.123 & 41 \\ 
##   MINRHP & 0.119 & 42 \\ 
##   MINCOP & 0.112 & 43 \\ 
##   MINNO2P & 0.112 & 44 \\ 
##   MINNOXP & 0.109 & 45 \\ 
##   MONTH end & 0.090 & 46 \\ 
##   WEEKDAY end & 0.070 & 47 \\ 
##    \hline
## \end{tabular}
## \caption{rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 95} 
## \end{table}
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
#########predict sa+rf############
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train2000_fea0_sa_pred
#############predict rf######################

load("~/PED/SA/r3_p2_training2000_fea0/dataset_PED_r3_p2_train2000_fea0_rfFit.RData")
as.data.frame(rfFit$finalModel$importance)->rfFit_imp
rfFit_imp$rank<-rank(-rfFit_imp)
rfFit_imp[ order(rfFit_imp[,"rank"]), ]->rfFit_imp_rank

predict(rfFit,inputsTest)->r3_p2_train2000_fea0_pred
cbind(r3_p2_train2000_fea0_pred,r3_p2_train2000_fea0_sa_pred,targetsTest)->r3_p2_train2000_fea0_predVsReal
colnames(r3_p2_train2000_fea0_predVsReal)<-c("RF","SA+RF","Real")
############errors of normalization data set##############################
modelErrors(r3_p2_train2000_fea0_predVsReal[,"RF"],r3_p2_train2000_fea0_predVsReal[,"Real"])->error_norm_rf
modelErrors(r3_p2_train2000_fea0_predVsReal[,"SA+RF"],r3_p2_train2000_fea0_predVsReal[,"Real"])->error_norm_sa_rf
error_norm_rf
##     MAE    RMSE    RELE 
## 0.02489 0.03541 0.15192
error_norm_sa_rf
##     MAE    RMSE    RELE 
## 0.02416 0.03364 0.15009
#####denormalize##############
load("~/PED/newWayPrepareData/finalData/O3.RData")
apply(r3_p2_train2000_fea0_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train2000_fea0_predVsReal_denorm
colnames(r3_p2_train2000_fea0_predVsReal_denorm)<-c("RF","SA+RF","Real")
save(r3_p2_train2000_fea0_predVsReal_denorm,file="r3_p2_train2000_fea0_predVsReal_denorm.RData")
modelErrors(r3_p2_train2000_fea0_predVsReal_denorm[,"RF"],r3_p2_train2000_fea0_predVsReal_denorm[,"Real"])->error_denorm_rf
#error between SA+RF with real value#####
modelErrors(r3_p2_train2000_fea0_predVsReal_denorm[,"SA+RF"],r3_p2_train2000_fea0_predVsReal_denorm[,"Real"])->error_denorm_sa_rf
error_denorm_rf
##     MAE    RMSE    RELE 
## 0.01251 0.01780 1.08181
error_denorm_sa_rf
##     MAE    RMSE    RELE 
## 0.01214 0.01690 1.10195
save(error_norm_rf,error_norm_sa_rf,error_denorm_rf,error_denorm_sa_rf,file="error_r3_p2_training2000_fea0.RData")