rolling time 3 hour, predict time 2 hours later,training set is 100 samples

modelErrors <- function(predicted, actual) {
  sal <- vector(mode="numeric", length=3)
  names(sal) <- c( "MAE", "RMSE", "RELE")
  meanPredicted <- mean(predicted)
  meanActual <- mean(actual)
  sumPred <- sum((predicted - meanPredicted)^2)
  sumActual <- sum((actual - meanActual)^2)
  n<- length(actual)
  p3<-vector(mode="numeric", length=n)
  for (i in c(1:n)) {
    if (actual[i]==0) {p3[i]<-abs(predicted[i])
    } else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
    }}
  sal[1] <- mean(abs(predicted - actual))
  sal[2] <- sqrt(sum((predicted - actual)^2)/n)
  sal[3] <- mean(p3)
  sal
}
unormalized<-function(x,y){
        ((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
load("~/PED/newWayPrepareData/PED_r3_p2_fea0.RData")
load("~/PED/SA/dataset_ PED_r3_p2_train1000_fea0 _rf_sa.RData")
plot(rf_sa) + theme_bw()

plot of chunk unnamed-chunk-1

#########variable importance#########
rf_sa$fit$importance
##                 IncNodePurity
## MONTH end             0.07030
## DAY end               0.10428
## HORA end              2.87993
## O3 MAX                1.98430
## RH MAX                0.08843
## TMP MAX               0.22421
## WSP MAX               0.30190
## CO MAX                0.19210
## NOx MIN               0.39950
## NO2 MIN               0.31407
## RH MIN                0.07615
## CO MIN                0.13444
## O3 MEAN               1.17542
## RH MEAN               0.07862
## SO2 MEAN              0.23890
## NOX_NO2 MEAN          0.17239
## RH MEDIAN             0.07325
## TMP MEDIAN            0.16185
## WDR MEDIAN            0.33309
## WSP MEDIAN            0.10283
## CO MEDIAN             0.16553
## SO2 MEDIAN            0.23799
## TMP SUM               0.19082
## WSP SUM               0.21848
## NOX_NO2 SUM           0.17062
## MINNO2P               0.11210
## MAXNOXP               0.12635
## MAXRHP                0.11030
## SUMRHP                0.11868
## MINTMPP               0.08854
## AVGTMPP               0.10697
## MAXWDRP               0.11499
## SUMWDRP               0.12562
## MINWSPP               0.06687
## MINCOP                0.09507
## MINNOXP_MINNO2P       0.10698
## AVGNOXP_AVGNO2P       0.12955
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
#########predict sa+rf############
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train1000_fea0_sa_pred
#############predict rf######################
load("~/PED/SA/dataset_ PED_r3_p2_train1000_fea0 _rfFit.RData")
predict(rfFit,inputsTest)->r3_p2_train1000_fea0_pred
cbind(r3_p2_train1000_fea0_pred,r3_p2_train1000_fea0_sa_pred,targetsTest)->r3_p2_train1000_fea0_predVsReal
colnames(r3_p2_train1000_fea0_predVsReal)<-c("RF","SA+RF","Real")
############errors of normalization data set##############################
modelErrors(r3_p2_train1000_fea0_predVsReal[,"RF"],r3_p2_train1000_fea0_predVsReal[,"Real"])
##     MAE    RMSE    RELE 
## 0.02776 0.03715 0.18073
modelErrors(r3_p2_train1000_fea0_predVsReal[,"SA+RF"],r3_p2_train1000_fea0_predVsReal[,"Real"])
##     MAE    RMSE    RELE 
## 0.02619 0.03527 0.16861
#####denormalize##############
load("~/PED/newWayPrepareData/r3_p2_ext.RData")
r3_p2_ext[,"O3"]->O3
apply(r3_p2_train1000_fea0_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train1000_fea0_predVsReal_denorm
colnames(r3_p2_train1000_fea0_predVsReal_denorm)<-c("RF","SA+RF","Real")
save(r3_p2_train1000_fea0_predVsReal_denorm,file="r3_p2_training1000_fea0_predVsReal_denorm.RData")
modelErrors(r3_p2_train1000_fea0_predVsReal_denorm[,"RF"],r3_p2_train1000_fea0_predVsReal_denorm[,"Real"])
##     MAE    RMSE    RELE 
## 0.01395 0.01867 1.47699
#error between SA+RF with real value#####
modelErrors(r3_p2_train1000_fea0_predVsReal_denorm[,"SA+RF"],r3_p2_train1000_fea0_predVsReal_denorm[,"Real"])
##     MAE    RMSE    RELE 
## 0.01316 0.01772 1.33952
##############reshape###############
data.frame(r3_p2_train1000_fea0_predVsReal_denorm)->r3_p2_train1000_fea0_predVsReal_denorm
reshape(r3_p2_train1000_fea0_predVsReal_denorm[1:400,],varying=list(names(r3_p2_train1000_fea0_predVsReal_denorm)),v.names="Ozone",timevar="modelType",times=names(r3_p2_train1000_fea0_predVsReal_denorm),direction = "long")->r3_p2_train1000_fea0_predVsReal_denorm_reshape




pdf("r3_p2_train1000_fea0_test400.pdf",width=11,height=6,bg="transparent")
ggplot(r3_p2_train1000_fea0_predVsReal_denorm_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
        geom_line(aes(linetype=modelType),size=0.8)+
        geom_point(size=2,fill="white")+
        xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize1000,features 195,testSize400")->r3_p2_plot

r3_p2_plot<-r3_p2_plot+theme(
    panel.background = element_rect(fill = "transparent"), # or theme_blank()
    panel.grid.minor = element_blank(), 
    panel.grid.major = element_blank(),
    plot.background = element_rect(fill = "transparent"),
    axis.line=element_line(colour="black") 
)

r3_p2_plot<-r3_p2_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
r3_p2_plot<-r3_p2_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
r3_p2_plot<-r3_p2_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
r3_p2_plot
dev.off()
## pdf 
##   2
r3_p2_plot

plot of chunk unnamed-chunk-1

#############################jhigh level############################
r3_p2_train1000_fea0_predVsReal_denorm$level<-ifelse(r3_p2_train1000_fea0_predVsReal_denorm[,"Real"]>0.11,"H","L")
r3_p2_train1000_fea0_predVsReal_denorm[r3_p2_train1000_fea0_predVsReal_denorm[,"Real"]>0.11,]->r3_p2_train1000_fea0_predVsReal_denorm_H
r3_p2_train1000_fea0_predVsReal_denorm[r3_p2_train1000_fea0_predVsReal_denorm[,"Real"]<=0.11,]->r3_p2_train1000_predVsReal_denorm_L
####errors between RF and Real
modelErrors(r3_p2_train1000_fea0_predVsReal_denorm_H[,"RF"],r3_p2_train1000_fea0_predVsReal_denorm_H[,"Real"])
##     MAE    RMSE    RELE 
## 0.02273 0.02960 0.16574
###errors between SA + RF Real####
modelErrors(r3_p2_train1000_fea0_predVsReal_denorm_H[,"SA.RF"],r3_p2_train1000_fea0_predVsReal_denorm_H[,"Real"])
##     MAE    RMSE    RELE 
## 0.02442 0.03058 0.17768
####################plot high level
reshape(r3_p2_train1000_fea0_predVsReal_denorm_H[1:400,],varying=list(names(r3_p2_train1000_fea0_predVsReal_denorm_H[,1:3])),v.names="Ozone",timevar="modelType",times=names(r3_p2_train1000_fea0_predVsReal_denorm_H[,1:3]),direction = "long")->r3_p2_train1000_fea0_test400_H_reshape

pdf("r3_p2_train1000_fea0_test400_H.pdf",width=11,height=6,bg="transparent")
ggplot(r3_p2_train1000_fea0_test400_H_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
        geom_line(aes(linetype=modelType),size=0.8)+
        geom_point(size=2,fill="white")+
        xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize1000,feature 95,testSize400,high Level")->r3_p2_H_plot

r3_p2_H_plot<-r3_p2_H_plot+theme(
    panel.background = element_rect(fill = "transparent"), # or theme_blank()
    panel.grid.minor = element_blank(), 
    panel.grid.major = element_blank(),
    plot.background = element_rect(fill = "transparent"),
    axis.line=element_line(colour="black") 
)

r3_p2_H_plot<-r3_p2_H_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
r3_p2_H_plot<-r3_p2_H_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
r3_p2_H_plot<-r3_p2_H_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
r3_p2_H_plot
dev.off()
## pdf 
##   2
r3_p2_H_plot

plot of chunk unnamed-chunk-1