rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 155

modelErrors <- function(predicted, actual) {
  sal <- vector(mode="numeric", length=3)
  names(sal) <- c( "MAE", "RMSE", "RELE")
  meanPredicted <- mean(predicted)
  meanActual <- mean(actual)
  sumPred <- sum((predicted - meanPredicted)^2)
  sumActual <- sum((actual - meanActual)^2)
  n<- length(actual)
  p3<-vector(mode="numeric", length=n)
  for (i in c(1:n)) {
    if (actual[i]==0) {p3[i]<-abs(predicted[i])
    } else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
    }}
  sal[1] <- mean(abs(predicted - actual))
  sal[2] <- sqrt(sum((predicted - actual)^2)/n)
  sal[3] <- mean(p3)
  sal
}
unormalized<-function(x,y){
        ((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
library(xtable)
load("~/PED/newWayPrepareData/finalData/PED_r3_p2_feaOrig_100.RData")
load("~/PED/SA/r3_p2_training2000_feaOrig_100/dataset_PED_r3_p2_train2000_feaOrig_100_rf_sa.RData")
plot(rf_sa) + theme_bw()

plot of chunk unnamed-chunk-1

rf_sa$optVariables
##  [1] "DAY.end"                "HORA.end"              
##  [3] "WDR.MAX"                "NOX_NO2.MAX"           
##  [5] "NOx.MIN"                "RH.MIN"                
##  [7] "TMP.MIN"                "WDR.MIN"               
##  [9] "WSP.MIN"                "CO.MIN"                
## [11] "NOx.MEAN"               "NO2.MEAN"              
## [13] "WDR.MEAN"               "O3.MEDIAN"             
## [15] "NOx.MEDIAN"             "NO2.MEDIAN"            
## [17] "WSP.MEDIAN"             "CO.MEDIAN"             
## [19] "NO2.SUM"                "RH.SUM"                
## [21] "CO.SUM"                 "NOX_NO2.SUM"           
## [23] "NOx.SUM._.TMP.SUM"      "NOx.MEAN._.RH.SUM"     
## [25] "WDR.MAX._.RH.MEDIAN"    "NO2.MEDIAN._.NO2.MAX"  
## [27] "WSP.MIN._.NO2.MEAN"     "NOx.MEAN._.CO.SUM"     
## [29] "RH.MAX._.TMP.MEDIAN"    "O3.MAX._.NO2.MAX"      
## [31] "RH.MEDIAN._.TMP.MEAN"   "SO2.MEDIAN._.WDR.SUM"  
## [33] "RH.MAX._.WDR.MIN"       "NO2.MEAN._.WEEKDAY.end"
## [35] "NOx.MIN._.CO.MIN"       "O3.SUM._.WSP.MEDIAN"   
## [37] "NO2.MIN._.TMP.MIN"      "O3.SUM._.WDR.MEAN"     
## [39] "HORA.end._.WDR.MAX"     "SO2.MEAN._.NO2.MEAN"   
## [41] "CO.MEDIAN._.WDR.MEDIAN" "NO2.MAX._.WSP.SUM"     
## [43] "TMP.MAX._.RH.MAX"       "MONTH.end._.WDR.SUM"   
## [45] "TMP.MAX._.SEASON.end"   "NOX_NO2.MAX._.TMP.MAX" 
## [47] "SO2.SUM._.HORA.end"     "CO.MEAN._.RH.MIN"      
## [49] "NOx.MEDIAN._.RH.MAX"    "RH.SUM._.CO.MIN"       
## [51] "O3.MEDIAN._.NO2.MEAN"   "HORA.end._.SO2.SUM"    
## [53] "NO2.MEAN._.SO2.MEDIAN"  "CO.SUM._.WSP.MAX"      
## [55] "CO.MEDIAN._.TMP.SUM"    "TMP.MEAN._.WDR.MAX"    
## [57] "SO2.MIN._.O3.MEAN"      "DAY.end._.RH.MIN"
#########variable importance#########
data.frame(rf_sa$fit$importance)->imp
imp$rank<-rank(-imp)
imp[ order(imp[,"rank"]), ]
##                        IncNodePurity rank
## HORA.end                     4.24206    1
## O3.SUM._.WSP.MEDIAN          2.51220    2
## O3.SUM._.WDR.MEAN            2.38739    3
## O3.MEDIAN                    1.21585    4
## HORA.end._.WDR.MAX           0.63823    5
## SO2.SUM._.HORA.end           0.57238    6
## HORA.end._.SO2.SUM           0.51401    7
## NO2.MAX._.WSP.SUM            0.49037    8
## CO.MEDIAN._.WDR.MEDIAN       0.46285    9
## WDR.MEAN                     0.44170   10
## NO2.MEDIAN._.NO2.MAX         0.34984   11
## O3.MAX._.NO2.MAX             0.32758   12
## SO2.MIN._.O3.MEAN            0.31595   13
## WDR.MAX                      0.28278   14
## NO2.MEAN                     0.27195   15
## CO.SUM._.WSP.MAX             0.25817   16
## NO2.SUM                      0.25487   17
## SO2.MEDIAN._.WDR.SUM         0.25308   18
## WSP.MIN._.NO2.MEAN           0.23973   19
## NOx.MIN                      0.21987   20
## NOx.MEDIAN                   0.20623   21
## O3.MEDIAN._.NO2.MEAN         0.20423   22
## CO.MEAN._.RH.MIN             0.19820   23
## NO2.MEDIAN                   0.19630   24
## NO2.MIN._.TMP.MIN            0.18881   25
## TMP.MEAN._.WDR.MAX           0.18569   26
## NOX_NO2.SUM                  0.18225   27
## TMP.MIN                      0.18040   28
## TMP.MAX._.SEASON.end         0.17844   29
## NOX_NO2.MAX                  0.17639   30
## WSP.MEDIAN                   0.16656   31
## NOX_NO2.MAX._.TMP.MAX        0.16462   32
## NO2.MEAN._.WEEKDAY.end       0.15933   33
## NOx.SUM._.TMP.SUM            0.15701   34
## SO2.MEAN._.NO2.MEAN          0.15635   35
## NOx.MEAN._.RH.SUM            0.15535   36
## WSP.MIN                      0.15440   37
## NOx.MEAN                     0.15267   38
## WDR.MAX._.RH.MEDIAN          0.15192   39
## NO2.MEAN._.SO2.MEDIAN        0.15165   40
## NOx.MEDIAN._.RH.MAX          0.15097   41
## RH.MIN                       0.14976   42
## WDR.MIN                      0.14475   43
## CO.MIN                       0.14006   44
## RH.SUM._.CO.MIN              0.13839   45
## NOx.MIN._.CO.MIN             0.13626   46
## MONTH.end._.WDR.SUM          0.13447   47
## CO.SUM                       0.12816   48
## RH.MAX._.WDR.MIN             0.12375   49
## DAY.end._.RH.MIN             0.12313   50
## CO.MEDIAN._.TMP.SUM          0.11873   51
## NOx.MEAN._.CO.SUM            0.11819   52
## DAY.end                      0.11465   53
## RH.MEDIAN._.TMP.MEAN         0.10335   54
## CO.MEDIAN                    0.10268   55
## TMP.MAX._.RH.MAX             0.10084   56
## RH.SUM                       0.09682   57
## RH.MAX._.TMP.MEDIAN          0.09198   58
xtable(imp[ order(imp[,"rank"]), ],caption="rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 155",digits=c(3,3,0))
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Mon Mar 23 11:31:20 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrr}
##   \hline
##  & IncNodePurity & rank \\ 
##   \hline
## HORA.end & 4.242 & 1 \\ 
##   O3.SUM.\_.WSP.MEDIAN & 2.512 & 2 \\ 
##   O3.SUM.\_.WDR.MEAN & 2.387 & 3 \\ 
##   O3.MEDIAN & 1.216 & 4 \\ 
##   HORA.end.\_.WDR.MAX & 0.638 & 5 \\ 
##   SO2.SUM.\_.HORA.end & 0.572 & 6 \\ 
##   HORA.end.\_.SO2.SUM & 0.514 & 7 \\ 
##   NO2.MAX.\_.WSP.SUM & 0.490 & 8 \\ 
##   CO.MEDIAN.\_.WDR.MEDIAN & 0.463 & 9 \\ 
##   WDR.MEAN & 0.442 & 10 \\ 
##   NO2.MEDIAN.\_.NO2.MAX & 0.350 & 11 \\ 
##   O3.MAX.\_.NO2.MAX & 0.328 & 12 \\ 
##   SO2.MIN.\_.O3.MEAN & 0.316 & 13 \\ 
##   WDR.MAX & 0.283 & 14 \\ 
##   NO2.MEAN & 0.272 & 15 \\ 
##   CO.SUM.\_.WSP.MAX & 0.258 & 16 \\ 
##   NO2.SUM & 0.255 & 17 \\ 
##   SO2.MEDIAN.\_.WDR.SUM & 0.253 & 18 \\ 
##   WSP.MIN.\_.NO2.MEAN & 0.240 & 19 \\ 
##   NOx.MIN & 0.220 & 20 \\ 
##   NOx.MEDIAN & 0.206 & 21 \\ 
##   O3.MEDIAN.\_.NO2.MEAN & 0.204 & 22 \\ 
##   CO.MEAN.\_.RH.MIN & 0.198 & 23 \\ 
##   NO2.MEDIAN & 0.196 & 24 \\ 
##   NO2.MIN.\_.TMP.MIN & 0.189 & 25 \\ 
##   TMP.MEAN.\_.WDR.MAX & 0.186 & 26 \\ 
##   NOX\_NO2.SUM & 0.182 & 27 \\ 
##   TMP.MIN & 0.180 & 28 \\ 
##   TMP.MAX.\_.SEASON.end & 0.178 & 29 \\ 
##   NOX\_NO2.MAX & 0.176 & 30 \\ 
##   WSP.MEDIAN & 0.167 & 31 \\ 
##   NOX\_NO2.MAX.\_.TMP.MAX & 0.165 & 32 \\ 
##   NO2.MEAN.\_.WEEKDAY.end & 0.159 & 33 \\ 
##   NOx.SUM.\_.TMP.SUM & 0.157 & 34 \\ 
##   SO2.MEAN.\_.NO2.MEAN & 0.156 & 35 \\ 
##   NOx.MEAN.\_.RH.SUM & 0.155 & 36 \\ 
##   WSP.MIN & 0.154 & 37 \\ 
##   NOx.MEAN & 0.153 & 38 \\ 
##   WDR.MAX.\_.RH.MEDIAN & 0.152 & 39 \\ 
##   NO2.MEAN.\_.SO2.MEDIAN & 0.152 & 40 \\ 
##   NOx.MEDIAN.\_.RH.MAX & 0.151 & 41 \\ 
##   RH.MIN & 0.150 & 42 \\ 
##   WDR.MIN & 0.145 & 43 \\ 
##   CO.MIN & 0.140 & 44 \\ 
##   RH.SUM.\_.CO.MIN & 0.138 & 45 \\ 
##   NOx.MIN.\_.CO.MIN & 0.136 & 46 \\ 
##   MONTH.end.\_.WDR.SUM & 0.134 & 47 \\ 
##   CO.SUM & 0.128 & 48 \\ 
##   RH.MAX.\_.WDR.MIN & 0.124 & 49 \\ 
##   DAY.end.\_.RH.MIN & 0.123 & 50 \\ 
##   CO.MEDIAN.\_.TMP.SUM & 0.119 & 51 \\ 
##   NOx.MEAN.\_.CO.SUM & 0.118 & 52 \\ 
##   DAY.end & 0.115 & 53 \\ 
##   RH.MEDIAN.\_.TMP.MEAN & 0.103 & 54 \\ 
##   CO.MEDIAN & 0.103 & 55 \\ 
##   TMP.MAX.\_.RH.MAX & 0.101 & 56 \\ 
##   RH.SUM & 0.097 & 57 \\ 
##   RH.MAX.\_.TMP.MEDIAN & 0.092 & 58 \\ 
##    \hline
## \end{tabular}
## \caption{rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 155} 
## \end{table}
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
#########predict sa+rf############
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train2000_feaOrg_100_sa_pred
#############predict rf######################
load("~/PED/SA/r3_p2_training2000_feaOrig_100/dataset_PED_r3_p2_train2000_feaOrig_100_rfFit.RData")
predict(rfFit,inputsTest)->r3_p2_train2000_feaOrg_100_pred
cbind(r3_p2_train2000_feaOrg_100_pred,r3_p2_train2000_feaOrg_100_sa_pred,targetsTest)->r3_p2_train2000_feaOrg_100_predVsReal
colnames(r3_p2_train2000_feaOrg_100_predVsReal)<-c("RF","SA+RF","Real")
############errors of normalization data set##############################
modelErrors(r3_p2_train2000_feaOrg_100_predVsReal[,"RF"],r3_p2_train2000_feaOrg_100_predVsReal[,"Real"])->error_norm_rf
modelErrors(r3_p2_train2000_feaOrg_100_predVsReal[,"SA+RF"],r3_p2_train2000_feaOrg_100_predVsReal[,"Real"])->error_norm_sa_rf
error_norm_rf
##     MAE    RMSE    RELE 
## 0.02596 0.03571 0.16221
error_norm_sa_rf
##     MAE    RMSE    RELE 
## 0.02679 0.03685 0.16722
#####denormalize##############
load("~/PED/newWayPrepareData/finalData/O3.RData")
apply(r3_p2_train2000_feaOrg_100_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train2000_feaOrg_100_predVsReal_denorm
colnames(r3_p2_train2000_feaOrg_100_predVsReal_denorm)<-c("RF","SA+RF","Real")
save(r3_p2_train2000_feaOrg_100_predVsReal_denorm,file="r3_p2_train2000_feaOrg_100_predVsReal_denorm.RData")
modelErrors(r3_p2_train2000_feaOrg_100_predVsReal_denorm[,"RF"],r3_p2_train2000_feaOrg_100_predVsReal_denorm[,"Real"])->error_denorm_rf
#error between SA+RF with real value#####
modelErrors(r3_p2_train2000_feaOrg_100_predVsReal_denorm[,"SA+RF"],r3_p2_train2000_feaOrg_100_predVsReal_denorm[,"Real"])->error_denorm_sa_rf
error_denorm_rf
##     MAE    RMSE    RELE 
## 0.01304 0.01794 1.20173
error_denorm_sa_rf
##     MAE    RMSE    RELE 
## 0.01346 0.01852 1.22292
save(error_denorm_rf,error_denorm_sa_rf,error_norm_rf,error_norm_sa_rf,file="error_r3_p2_training2000_feaOrig_100.RData")
##############reshape###############
# data.frame(r3_p2_train2000_feaOrg_100_predVsReal_denorm)->r3_p2_train2000_feaOrg_100_predVsReal_denorm
# reshape(r3_p2_train2000_feaOrg_100_predVsReal_denorm[1:400,],varying=list(names(r3_p2_train2000_feaOrg_100_predVsReal_denorm)),v.names="Ozone",timevar="modelType",times=names(r3_p2_train2000_feaOrg_100_predVsReal_denorm),direction = "long")->r3_p2_train2000_feaOrg_100_predVsReal_denorm_reshape




# pdf("r3_p2_train2000_feaOrg_100_test400.pdf",width=11,height=6,bg="transparent")
# ggplot(r3_p2_train2000_feaOrg_100_predVsReal_denorm_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
#         geom_line(aes(linetype=modelType),size=0.8)+
#         geom_point(size=2,fill="white")+
#         xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize2000,features 155,testSize400")->r3_p2_plot
# 
# r3_p2_plot<-r3_p2_plot+theme(
#     panel.background = element_rect(fill = "transparent"), # or theme_blank()
#     panel.grid.minor = element_blank(), 
#     panel.grid.major = element_blank(),
#     plot.background = element_rect(fill = "transparent"),
#     axis.line=element_line(colour="black") 
# )
# 
# r3_p2_plot<-r3_p2_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
# r3_p2_plot<-r3_p2_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
# r3_p2_plot<-r3_p2_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
# r3_p2_plot
# dev.off()