rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 55

modelErrors <- function(predicted, actual) {
  sal <- vector(mode="numeric", length=3)
  names(sal) <- c( "MAE", "RMSE", "RELE")
  meanPredicted <- mean(predicted)
  meanActual <- mean(actual)
  sumPred <- sum((predicted - meanPredicted)^2)
  sumActual <- sum((actual - meanActual)^2)
  n<- length(actual)
  p3<-vector(mode="numeric", length=n)
  for (i in c(1:n)) {
    if (actual[i]==0) {p3[i]<-abs(predicted[i])
    } else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
    }}
  sal[1] <- mean(abs(predicted - actual))
  sal[2] <- sqrt(sum((predicted - actual)^2)/n)
  sal[3] <- mean(p3)
  sal
}
unormalized<-function(x,y){
        ((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
library(xtable)
load("~/PED/newWayPrepareData/finalData/PED_r3_p2_feaOrig.RData")
load("~/PED/SA/r3_p2_training2000_feaOrg/dataset_PED_r3_p2_train2000_feaOrig_rf_sa.RData")
plot(rf_sa) + theme_bw()

plot of chunk unnamed-chunk-1

rf_sa$optVariables
##  [1] "WEEKDAY end"  "HORA end"     "O3 MAX"       "TMP MAX"     
##  [5] "CO MAX"       "SO2 MAX"      "O3 MIN"       "NO2 MIN"     
##  [9] "CO MIN"       "NOX_NO2 MIN"  "NO2 MEAN"     "RH MEAN"     
## [13] "TMP MEAN"     "SO2 MEAN"     "NOX_NO2 MEAN" "NO2 MEDIAN"  
## [17] "WSP MEDIAN"   "CO MEDIAN"    "SO2 MEDIAN"   "O3 SUM"      
## [21] "NOx SUM"      "WDR SUM"      "WSP SUM"      "SO2 SUM"     
## [25] "NOX_NO2 SUM"
#########variable importance#########
data.frame(rf_sa$fit$importance)->imp
imp$rank<-rank(-imp)
imp[ order(imp[,"rank"]), ]
##              IncNodePurity rank
## HORA end            5.8112    1
## O3 MAX              3.4161    2
## O3 SUM              2.5019    3
## WDR SUM             1.2323    4
## O3 MIN              1.0717    5
## WSP SUM             0.7506    6
## NO2 MEAN            0.6908    7
## NOx SUM             0.6246    8
## NO2 MEDIAN          0.4780    9
## TMP MEAN            0.4604   10
## NO2 MIN             0.4351   11
## TMP MAX             0.4266   12
## WSP MEDIAN          0.4045   13
## NOX_NO2 SUM         0.3880   14
## NOX_NO2 MEAN        0.3832   15
## NOX_NO2 MIN         0.3541   16
## RH MEAN             0.3373   17
## CO MEDIAN           0.3307   18
## CO MIN              0.3235   19
## CO MAX              0.3162   20
## SO2 SUM             0.2756   21
## SO2 MEAN            0.2554   22
## SO2 MAX             0.2204   23
## SO2 MEDIAN          0.2066   24
## WEEKDAY end         0.1329   25
xtable(imp[ order(imp[,"rank"]), ],caption="rolling time 3 hour, predict time 2 hours later,training set is 1000 samples,features are 155",digits=c(3,3,0))
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Mon Mar 23 10:48:10 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrr}
##   \hline
##  & IncNodePurity & rank \\ 
##   \hline
## HORA end & 5.811 & 1 \\ 
##   O3 MAX & 3.416 & 2 \\ 
##   O3 SUM & 2.502 & 3 \\ 
##   WDR SUM & 1.232 & 4 \\ 
##   O3 MIN & 1.072 & 5 \\ 
##   WSP SUM & 0.751 & 6 \\ 
##   NO2 MEAN & 0.691 & 7 \\ 
##   NOx SUM & 0.625 & 8 \\ 
##   NO2 MEDIAN & 0.478 & 9 \\ 
##   TMP MEAN & 0.460 & 10 \\ 
##   NO2 MIN & 0.435 & 11 \\ 
##   TMP MAX & 0.427 & 12 \\ 
##   WSP MEDIAN & 0.405 & 13 \\ 
##   NOX\_NO2 SUM & 0.388 & 14 \\ 
##   NOX\_NO2 MEAN & 0.383 & 15 \\ 
##   NOX\_NO2 MIN & 0.354 & 16 \\ 
##   RH MEAN & 0.337 & 17 \\ 
##   CO MEDIAN & 0.331 & 18 \\ 
##   CO MIN & 0.323 & 19 \\ 
##   CO MAX & 0.316 & 20 \\ 
##   SO2 SUM & 0.276 & 21 \\ 
##   SO2 MEAN & 0.255 & 22 \\ 
##   SO2 MAX & 0.220 & 23 \\ 
##   SO2 MEDIAN & 0.207 & 24 \\ 
##   WEEKDAY end & 0.133 & 25 \\ 
##    \hline
## \end{tabular}
## \caption{rolling time 3 hour, predict time 2 hours later,training set is 1000 samples,features are 155} 
## \end{table}
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
#########predict sa+rf############
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train2000_feaOrg_sa_pred
#############predict rf######################

load("~/PED/SA/r3_p2_training2000_feaOrg/dataset_PED_r3_p2_train2000_feaOrig_rfFit.RData")
predict(rfFit,inputsTest)->r3_p2_train2000_feaOrg_pred
cbind(r3_p2_train2000_feaOrg_pred,r3_p2_train2000_feaOrg_sa_pred,targetsTest)->r3_p2_train2000_feaOrg_predVsReal
colnames(r3_p2_train2000_feaOrg_predVsReal)<-c("RF","SA+RF","Real")
############errors of normalization data set##############################
modelErrors(r3_p2_train2000_feaOrg_predVsReal[,"RF"],r3_p2_train2000_feaOrg_predVsReal[,"Real"])->error_norm_rf
modelErrors(r3_p2_train2000_feaOrg_predVsReal[,"SA+RF"],r3_p2_train2000_feaOrg_predVsReal[,"Real"])->error_norm_sa_rf
error_norm_rf
##     MAE    RMSE    RELE 
## 0.02533 0.03730 0.15027
error_norm_sa_rf
##     MAE    RMSE    RELE 
## 0.02498 0.03571 0.15229
#####denormalize##############
load("~/PED/newWayPrepareData/finalData/O3.RData")
apply(r3_p2_train2000_feaOrg_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train2000_feaOrg_predVsReal_denorm
colnames(r3_p2_train2000_feaOrg_predVsReal_denorm)<-c("RF","SA+RF","Real")
save(r3_p2_train2000_feaOrg_predVsReal_denorm,file="r3_p2_train2000_feaOrg_predVsReal_denorm.RData")
modelErrors(r3_p2_train2000_feaOrg_predVsReal_denorm[,"RF"],r3_p2_train2000_feaOrg_predVsReal_denorm[,"Real"])->error_denorm_rf
#error between SA+RF with real value#####
modelErrors(r3_p2_train2000_feaOrg_predVsReal_denorm[,"SA+RF"],r3_p2_train2000_feaOrg_predVsReal_denorm[,"Real"])->error_denorm_sa_rf
error_denorm_rf
##     MAE    RMSE    RELE 
## 0.01273 0.01874 0.96879
error_denorm_sa_rf
##     MAE    RMSE    RELE 
## 0.01255 0.01794 1.05791
save(error_norm_rf,error_norm_sa_rf,error_denorm_rf,error_denorm_sa_rf,file="error_r3_p2_training2000_feaOrg.RData")
##############reshape###############
# data.frame(r3_p2_train2000_feaOrg_predVsReal_denorm)->r3_p2_train2000_feaOrg_predVsReal_denorm
# reshape(r3_p2_train2000_feaOrg_predVsReal_denorm[1:400,],varying=list(names(r3_p2_train2000_feaOrg_predVsReal_denorm)),v.names="Ozone",timevar="modelType",times=names(r3_p2_train2000_feaOrg_predVsReal_denorm),direction = "long")->r3_p2_train2000_feaOrg_predVsReal_denorm_reshape
# 
# 
# 
# 
# pdf("r3_p2_train2000_feaOrg_test400.pdf",width=11,height=6,bg="transparent")
# ggplot(r3_p2_train2000_feaOrg_predVsReal_denorm_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
#         geom_line(aes(linetype=modelType),size=0.8)+
#         geom_point(size=2,fill="white")+
#         xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize2000,features 55,testSize400")->r3_p2_plot
# 
# r3_p2_plot<-r3_p2_plot+theme(
#     panel.background = element_rect(fill = "transparent"), # or theme_blank()
#     panel.grid.minor = element_blank(), 
#     panel.grid.major = element_blank(),
#     plot.background = element_rect(fill = "transparent"),
#     axis.line=element_line(colour="black") 
# )
# 
# r3_p2_plot<-r3_p2_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
# r3_p2_plot<-r3_p2_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
# r3_p2_plot<-r3_p2_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
# r3_p2_plot
# dev.off()