rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 195

modelErrors <- function(predicted, actual) {
  sal <- vector(mode="numeric", length=3)
  names(sal) <- c( "MAE", "RMSE", "RELE")
  meanPredicted <- mean(predicted)
  meanActual <- mean(actual)
  sumPred <- sum((predicted - meanPredicted)^2)
  sumActual <- sum((actual - meanActual)^2)
  n<- length(actual)
  p3<-vector(mode="numeric", length=n)
  for (i in c(1:n)) {
    if (actual[i]==0) {p3[i]<-abs(predicted[i])
    } else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
    }}
  sal[1] <- mean(abs(predicted - actual))
  sal[2] <- sqrt(sum((predicted - actual)^2)/n)
  sal[3] <- mean(p3)
  sal
}
unormalized<-function(x,y){
        ((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
library(xtable)
load("~/PED/newWayPrepareData/finalData/PED_r3_p2_fea100.RData")
load("~/PED/SA/r3_p2_training2000_fea100/dataset_PED_r3_p2_train2000_fea100_rf_sa.RData")
plot(rf_sa) + theme_bw()

plot of chunk unnamed-chunk-1

rf_sa$optVariables
##  [1] "MONTH.end"                 "SEASON.end"               
##  [3] "HORA.end"                  "O3.MAX"                   
##  [5] "RH.MAX"                    "WDR.MAX"                  
##  [7] "WSP.MAX"                   "O3.MIN"                   
##  [9] "RH.MIN"                    "TMP.MIN"                  
## [11] "WSP.MIN"                   "CO.MIN"                   
## [13] "NOX_NO2.MIN"               "RH.MEAN"                  
## [15] "WDR.MEAN"                  "O3.MEDIAN"                
## [17] "NO2.MEDIAN"                "TMP.MEDIAN"               
## [19] "SO2.MEDIAN"                "SO2.SUM"                  
## [21] "MAXO3P"                    "SUMO3P"                   
## [23] "AVGNO2P"                   "SUMNO2P"                  
## [25] "MAXNOXP"                   "AVGNOXP"                  
## [27] "SUMNOXP"                   "MAXRHP"                   
## [29] "AVGRHP"                    "SUMRHP"                   
## [31] "MINTMPP"                   "AVGTMPP"                  
## [33] "MINWDRP"                   "AVGWDRP"                  
## [35] "SUMWDRP"                   "MAXWSPP"                  
## [37] "MAXCOP"                    "AVGSO2P"                  
## [39] "MAXNOXP_MAXNO2P"           "TMP.MEDIAN._.WEEKDAY.end" 
## [41] "MINO3P._.NOX_NO2.MAX"      "MINNOXP_MINNO2P._.NO2.MIN"
## [43] "AVGWDRP._.TMP.SUM"         "AVGRHP._.WDR.MEDIAN"      
## [45] "CO.SUM._.SEASON.end"       "WEEKDAY.end._.DAY.end"    
## [47] "AVGRHP._.NOx.MEDIAN"       "O3.SUM._.RH.MEDIAN"       
## [49] "SUMWSPP._.MINSO2P"         "NOx.MEDIAN._.NOX_NO2.MEAN"
## [51] "RH.MAX._.WEEKDAY.end"      "TMP.MAX._.MAXO3P"         
## [53] "RH.SUM._.MINNO2P"          "MINWDRP._.MAXWDRP"        
## [55] "WDR.MAX._.WDR.MEDIAN"      "WSP.MAX._.MAXNOXP"        
## [57] "MINNOXP._.AVGRHP"          "AVGO3P._.SUMCOP"          
## [59] "TMP.MEAN._.CO.MEAN"        "SUMNO2P._.RH.MEAN"        
## [61] "MINNOXP_MINNO2P._.O3.SUM"  "MONTH.end._.CO.SUM"       
## [63] "WDR.SUM._.MONTH.end"       "TMP.SUM._.NO2.MEAN"       
## [65] "SUMO3P._.AVGNO2P"          "AVGTMPP._.RH.MIN"         
## [67] "NO2.MEDIAN._.NO2.MAX"      "TMP.SUM._.NOx.MIN"        
## [69] "SUMNOXP._.NO2.MIN"         "SUMNO2P._.SUMNOXP"        
## [71] "AVGNOXP._.WEEKDAY.end"     "NOX_NO2.MIN._.CO.MEAN"    
## [73] "WSP.MEDIAN._.RH.MAX"       "AVGNO2P._.WDR.SUM"        
## [75] "AVGO3P._.SUMNOXP"          "TMP.MAX._.AVGWDRP"        
## [77] "NOx.MEAN._.WDR.MIN"        "SO2.MIN._.SUMNOXP"        
## [79] "CO.MEAN._.AVGNO2P"         "WSP.MIN._.NO2.MIN"
#########variable importance#########
data.frame(rf_sa$fit$importance)->imp
imp$rank<-rank(-imp)
imp[ order(imp[,"rank"]), ]
##                           IncNodePurity rank
## HORA.end                        5.07029    1
## O3.MAX                          3.26165    2
## MINNOXP_MINNO2P._.O3.SUM        1.41351    3
## O3.MEDIAN                       1.05932    4
## WDR.MEAN                        0.64890    5
## O3.MIN                          0.52258    6
## O3.SUM._.RH.MEDIAN              0.51157    7
## TMP.SUM._.NOx.MIN               0.49041    8
## NO2.MEDIAN._.NO2.MAX            0.37768    9
## WSP.MAX                         0.36550   10
## WSP.MIN._.NO2.MIN               0.31092   11
## NOx.MEDIAN._.NOX_NO2.MEAN       0.29965   12
## WDR.MAX                         0.28613   13
## TMP.SUM._.NO2.MEAN              0.27605   14
## NOx.MEAN._.WDR.MIN              0.23266   15
## NO2.MEDIAN                      0.22961   16
## MINNOXP_MINNO2P._.NO2.MIN       0.20603   17
## TMP.MIN                         0.20355   18
## NOX_NO2.MIN._.CO.MEAN           0.19936   19
## SUMO3P                          0.18760   20
## TMP.MEAN._.CO.MEAN              0.17574   21
## MINO3P._.NOX_NO2.MAX            0.16488   22
## NOX_NO2.MIN                     0.16187   23
## WDR.MAX._.WDR.MEDIAN            0.16181   24
## AVGRHP._.NOx.MEDIAN             0.15846   25
## AVGO3P._.SUMCOP                 0.15784   26
## AVGNO2P._.WDR.SUM               0.15095   27
## WSP.MEDIAN._.RH.MAX             0.14740   28
## MAXO3P                          0.14424   29
## SUMNOXP._.NO2.MIN               0.13691   30
## WSP.MIN                         0.13624   31
## SUMO3P._.AVGNO2P                0.12972   32
## CO.MIN                          0.12667   33
## TMP.MEDIAN                      0.12519   34
## AVGRHP._.WDR.MEDIAN             0.12197   35
## AVGTMPP                         0.11426   36
## SO2.SUM                         0.11402   37
## SUMNO2P._.SUMNOXP               0.11199   38
## WDR.SUM._.MONTH.end             0.11171   39
## AVGO3P._.SUMNOXP                0.11110   40
## TMP.MAX._.MAXO3P                0.10706   41
## SUMWSPP._.MINSO2P               0.09899   42
## AVGSO2P                         0.09819   43
## MAXWSPP                         0.09573   44
## CO.MEAN._.AVGNO2P               0.09403   45
## CO.SUM._.SEASON.end             0.09233   46
## AVGTMPP._.RH.MIN                0.09028   47
## RH.MIN                          0.08603   48
## MINWDRP._.MAXWDRP               0.08389   49
## SUMWDRP                         0.08383   50
## MINTMPP                         0.08284   51
## MAXNOXP_MAXNO2P                 0.08160   52
## MAXRHP                          0.08068   53
## RH.SUM._.MINNO2P                0.08057   54
## MAXCOP                          0.08035   55
## TMP.MEDIAN._.WEEKDAY.end        0.08015   56
## AVGNOXP._.WEEKDAY.end           0.08007   57
## WEEKDAY.end._.DAY.end           0.07848   58
## MINWDRP                         0.07690   59
## SO2.MIN._.SUMNOXP               0.07656   60
## MINNOXP._.AVGRHP                0.07654   61
## RH.MAX._.WEEKDAY.end            0.07473   62
## TMP.MAX._.AVGWDRP               0.07375   63
## SUMRHP                          0.07332   64
## AVGRHP                          0.07299   65
## WSP.MAX._.MAXNOXP               0.07295   66
## AVGWDRP._.TMP.SUM               0.07099   67
## SO2.MEDIAN                      0.06959   68
## RH.MEAN                         0.06592   69
## MONTH.end._.CO.SUM              0.06435   70
## SUMNO2P._.RH.MEAN               0.06309   71
## RH.MAX                          0.06132   72
## AVGWDRP                         0.06043   73
## SUMNO2P                         0.06041   74
## AVGNOXP                         0.05885   75
## SUMNOXP                         0.05863   76
## MAXNOXP                         0.05630   77
## AVGNO2P                         0.05585   78
## MONTH.end                       0.03835   79
## SEASON.end                      0.01350   80
xtable(imp[ order(imp[,"rank"]), ],caption="rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 195",digits=c(3,3,0))
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Mon Mar 23 11:21:03 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrr}
##   \hline
##  & IncNodePurity & rank \\ 
##   \hline
## HORA.end & 5.070 & 1 \\ 
##   O3.MAX & 3.262 & 2 \\ 
##   MINNOXP\_MINNO2P.\_.O3.SUM & 1.414 & 3 \\ 
##   O3.MEDIAN & 1.059 & 4 \\ 
##   WDR.MEAN & 0.649 & 5 \\ 
##   O3.MIN & 0.523 & 6 \\ 
##   O3.SUM.\_.RH.MEDIAN & 0.512 & 7 \\ 
##   TMP.SUM.\_.NOx.MIN & 0.490 & 8 \\ 
##   NO2.MEDIAN.\_.NO2.MAX & 0.378 & 9 \\ 
##   WSP.MAX & 0.366 & 10 \\ 
##   WSP.MIN.\_.NO2.MIN & 0.311 & 11 \\ 
##   NOx.MEDIAN.\_.NOX\_NO2.MEAN & 0.300 & 12 \\ 
##   WDR.MAX & 0.286 & 13 \\ 
##   TMP.SUM.\_.NO2.MEAN & 0.276 & 14 \\ 
##   NOx.MEAN.\_.WDR.MIN & 0.233 & 15 \\ 
##   NO2.MEDIAN & 0.230 & 16 \\ 
##   MINNOXP\_MINNO2P.\_.NO2.MIN & 0.206 & 17 \\ 
##   TMP.MIN & 0.204 & 18 \\ 
##   NOX\_NO2.MIN.\_.CO.MEAN & 0.199 & 19 \\ 
##   SUMO3P & 0.188 & 20 \\ 
##   TMP.MEAN.\_.CO.MEAN & 0.176 & 21 \\ 
##   MINO3P.\_.NOX\_NO2.MAX & 0.165 & 22 \\ 
##   NOX\_NO2.MIN & 0.162 & 23 \\ 
##   WDR.MAX.\_.WDR.MEDIAN & 0.162 & 24 \\ 
##   AVGRHP.\_.NOx.MEDIAN & 0.158 & 25 \\ 
##   AVGO3P.\_.SUMCOP & 0.158 & 26 \\ 
##   AVGNO2P.\_.WDR.SUM & 0.151 & 27 \\ 
##   WSP.MEDIAN.\_.RH.MAX & 0.147 & 28 \\ 
##   MAXO3P & 0.144 & 29 \\ 
##   SUMNOXP.\_.NO2.MIN & 0.137 & 30 \\ 
##   WSP.MIN & 0.136 & 31 \\ 
##   SUMO3P.\_.AVGNO2P & 0.130 & 32 \\ 
##   CO.MIN & 0.127 & 33 \\ 
##   TMP.MEDIAN & 0.125 & 34 \\ 
##   AVGRHP.\_.WDR.MEDIAN & 0.122 & 35 \\ 
##   AVGTMPP & 0.114 & 36 \\ 
##   SO2.SUM & 0.114 & 37 \\ 
##   SUMNO2P.\_.SUMNOXP & 0.112 & 38 \\ 
##   WDR.SUM.\_.MONTH.end & 0.112 & 39 \\ 
##   AVGO3P.\_.SUMNOXP & 0.111 & 40 \\ 
##   TMP.MAX.\_.MAXO3P & 0.107 & 41 \\ 
##   SUMWSPP.\_.MINSO2P & 0.099 & 42 \\ 
##   AVGSO2P & 0.098 & 43 \\ 
##   MAXWSPP & 0.096 & 44 \\ 
##   CO.MEAN.\_.AVGNO2P & 0.094 & 45 \\ 
##   CO.SUM.\_.SEASON.end & 0.092 & 46 \\ 
##   AVGTMPP.\_.RH.MIN & 0.090 & 47 \\ 
##   RH.MIN & 0.086 & 48 \\ 
##   MINWDRP.\_.MAXWDRP & 0.084 & 49 \\ 
##   SUMWDRP & 0.084 & 50 \\ 
##   MINTMPP & 0.083 & 51 \\ 
##   MAXNOXP\_MAXNO2P & 0.082 & 52 \\ 
##   MAXRHP & 0.081 & 53 \\ 
##   RH.SUM.\_.MINNO2P & 0.081 & 54 \\ 
##   MAXCOP & 0.080 & 55 \\ 
##   TMP.MEDIAN.\_.WEEKDAY.end & 0.080 & 56 \\ 
##   AVGNOXP.\_.WEEKDAY.end & 0.080 & 57 \\ 
##   WEEKDAY.end.\_.DAY.end & 0.078 & 58 \\ 
##   MINWDRP & 0.077 & 59 \\ 
##   SO2.MIN.\_.SUMNOXP & 0.077 & 60 \\ 
##   MINNOXP.\_.AVGRHP & 0.077 & 61 \\ 
##   RH.MAX.\_.WEEKDAY.end & 0.075 & 62 \\ 
##   TMP.MAX.\_.AVGWDRP & 0.074 & 63 \\ 
##   SUMRHP & 0.073 & 64 \\ 
##   AVGRHP & 0.073 & 65 \\ 
##   WSP.MAX.\_.MAXNOXP & 0.073 & 66 \\ 
##   AVGWDRP.\_.TMP.SUM & 0.071 & 67 \\ 
##   SO2.MEDIAN & 0.070 & 68 \\ 
##   RH.MEAN & 0.066 & 69 \\ 
##   MONTH.end.\_.CO.SUM & 0.064 & 70 \\ 
##   SUMNO2P.\_.RH.MEAN & 0.063 & 71 \\ 
##   RH.MAX & 0.061 & 72 \\ 
##   AVGWDRP & 0.060 & 73 \\ 
##   SUMNO2P & 0.060 & 74 \\ 
##   AVGNOXP & 0.059 & 75 \\ 
##   SUMNOXP & 0.059 & 76 \\ 
##   MAXNOXP & 0.056 & 77 \\ 
##   AVGNO2P & 0.056 & 78 \\ 
##   MONTH.end & 0.038 & 79 \\ 
##   SEASON.end & 0.014 & 80 \\ 
##    \hline
## \end{tabular}
## \caption{rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 195} 
## \end{table}
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
#########predict sa+rf############
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train2000_fea100_sa_pred
#############predict rf######################
load("~/PED/SA/r3_p2_training2000_fea100/dataset_PED_r3_p2_train2000_fea100_rfFit.RData")
predict(rfFit,inputsTest)->r3_p2_train2000_fea100_pred
cbind(r3_p2_train2000_fea100_pred,r3_p2_train2000_fea100_sa_pred,targetsTest)->r3_p2_train2000_fea100_predVsReal
colnames(r3_p2_train2000_fea100_predVsReal)<-c("RF","SA+RF","Real")
############errors of normalization data set##############################
modelErrors(r3_p2_train2000_fea100_predVsReal[,"RF"],r3_p2_train2000_fea100_predVsReal[,"Real"])->error_norm_rf
modelErrors(r3_p2_train2000_fea100_predVsReal[,"SA+RF"],r3_p2_train2000_fea100_predVsReal[,"Real"])->error_norm_sa_rf
error_norm_rf
##     MAE    RMSE    RELE 
## 0.02749 0.03693 0.17613
error_norm_sa_rf
##     MAE    RMSE    RELE 
## 0.02483 0.03423 0.15604
#####denormalize##############
load("~/PED/newWayPrepareData/finalData/O3.RData")

apply(r3_p2_train2000_fea100_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train2000_fea100_predVsReal_denorm
colnames(r3_p2_train2000_fea100_predVsReal_denorm)<-c("RF","SA+RF","Real")
save(r3_p2_train2000_fea100_predVsReal_denorm,file="r3_p2_training2000_fea100_predVsReal_denorm.RData")
modelErrors(r3_p2_train2000_fea100_predVsReal_denorm[,"RF"],r3_p2_train2000_fea100_predVsReal_denorm[,"Real"])->error_denorm_rf
#error between SA+RF with real value#####
modelErrors(r3_p2_train2000_fea100_predVsReal_denorm[,"SA+RF"],r3_p2_train2000_fea100_predVsReal_denorm[,"Real"])->error_denorm_sa_rf
error_denorm_rf
##     MAE    RMSE    RELE 
## 0.01381 0.01856 1.37691
error_denorm_sa_rf
##     MAE    RMSE    RELE 
## 0.01248 0.01720 1.19082
##############reshape###############
# data.frame(r3_p2_train2000_fea100_predVsReal_denorm)->r3_p2_train2000_fea100_predVsReal_denorm
# reshape(r3_p2_train2000_fea100_predVsReal_denorm[1:400,],varying=list(names(r3_p2_train2000_fea100_predVsReal_denorm)),v.names="Ozone",timevar="modelType",times=names(r3_p2_train2000_fea100_predVsReal_denorm),direction = "long")->r3_p2_train2000_fea100_predVsReal_denorm_reshape



# 
# pdf("r3_p2_train2000_fea100_test400.pdf",width=11,height=6,bg="transparent")
# ggplot(r3_p2_train2000_fea100_predVsReal_denorm_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
#         geom_line(aes(linetype=modelType),size=0.8)+
#         geom_point(size=2,fill="white")+
#         xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize2000,features 195,testSize400")->r3_p2_plot
# 
# r3_p2_plot<-r3_p2_plot+theme(
#     panel.background = element_rect(fill = "transparent"), # or theme_blank()
#     panel.grid.minor = element_blank(), 
#     panel.grid.major = element_blank(),
#     plot.background = element_rect(fill = "transparent"),
#     axis.line=element_line(colour="black") 
# )
# 
# r3_p2_plot<-r3_p2_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
# r3_p2_plot<-r3_p2_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
# r3_p2_plot<-r3_p2_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
# r3_p2_plot
# dev.off()
# r3_p2_plot

# #############################high level############################
# r3_p2_train2000_fea100_predVsReal_denorm$level<-ifelse(r3_p2_train2000_fea100_predVsReal_denorm[,"Real"]>0.11,"H","L")
# r3_p2_train2000_fea100_predVsReal_denorm[r3_p2_train2000_fea100_predVsReal_denorm[,"Real"]>0.11,]->r3_p2_train2000_fea100_predVsReal_denorm_H
# r3_p2_train2000_fea100_predVsReal_denorm[r3_p2_train2000_fea100_predVsReal_denorm[,"Real"]<=0.11,]->r3_p2_train2000_predVsReal_denorm_L
# ####errors between RF and Real
# modelErrors(r3_p2_train2000_fea100_predVsReal_denorm_H[,"RF"],r3_p2_train2000_fea100_predVsReal_denorm_H[,"Real"])
# ###errors between SA + RF Real####
# modelErrors(r3_p2_train2000_fea100_predVsReal_denorm_H[,"SA.RF"],r3_p2_train2000_fea100_predVsReal_denorm_H[,"Real"])
# ####################plot high level
# reshape(r3_p2_train2000_fea100_predVsReal_denorm_H[1:400,],varying=list(names(r3_p2_train2000_fea100_predVsReal_denorm_H[,1:3])),v.names="Ozone",timevar="modelType",times=names(r3_p2_train2000_fea100_predVsReal_denorm_H[,1:3]),direction = "long")->r3_p2_train2000_fea100_test400_H_reshape
# 
# pdf("r3_p2_train2000_fea100_test400_H.pdf",width=11,height=6,bg="transparent")
# ggplot(r3_p2_train2000_fea100_test400_H_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
#         geom_line(aes(linetype=modelType),size=0.8)+
#         geom_point(size=2,fill="white")+
#         xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize1000,feature 95,testSize400,high Level")->r3_p2_H_plot
# 
# r3_p2_H_plot<-r3_p2_H_plot+theme(
#     panel.background = element_rect(fill = "transparent"), # or theme_blank()
#     panel.grid.minor = element_blank(), 
#     panel.grid.major = element_blank(),
#     plot.background = element_rect(fill = "transparent"),
#     axis.line=element_line(colour="black") 
# )
# 
# r3_p2_H_plot<-r3_p2_H_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
# r3_p2_H_plot<-r3_p2_H_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
# r3_p2_H_plot<-r3_p2_H_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
# r3_p2_H_plot
# dev.off()
# r3_p2_H_plot