rolling time 3 hour, predict time 2 hours later,training set is 1000 samples,features are 195
modelErrors <- function(predicted, actual) {
sal <- vector(mode="numeric", length=3)
names(sal) <- c( "MAE", "RMSE", "RELE")
meanPredicted <- mean(predicted)
meanActual <- mean(actual)
sumPred <- sum((predicted - meanPredicted)^2)
sumActual <- sum((actual - meanActual)^2)
n<- length(actual)
p3<-vector(mode="numeric", length=n)
for (i in c(1:n)) {
if (actual[i]==0) {p3[i]<-abs(predicted[i])
} else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
}}
sal[1] <- mean(abs(predicted - actual))
sal[2] <- sqrt(sum((predicted - actual)^2)/n)
sal[3] <- mean(p3)
sal
}
unormalized<-function(x,y){
((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
load("~/PED/newWayPrepareData/PED_r3_p2_fea100.RData")
load("~/PED/SA/dataset_ PED_r3_p2_train1000_fea100 _rf_sa.RData")
plot(rf_sa) + theme_bw()
#########variable importance#########
rf_sa$fit$importance
## IncNodePurity
## MONTH.end 0.022958
## SEASON.end 0.006223
## HORA.end 1.913777
## O3.MAX 0.912410
## NOx.MAX 0.081623
## TMP.MAX 0.060516
## WDR.MAX 0.059109
## NOX_NO2.MAX 0.068610
## O3.MIN 0.339177
## RH.MIN 0.036629
## WSP.MIN 0.035357
## CO.MIN 0.036234
## SO2.MIN 0.219026
## NOX_NO2.MIN 0.061174
## NO2.MEAN 0.165850
## WDR.MEAN 0.071972
## WSP.MEAN 0.054254
## SO2.MEAN 0.121142
## NOX_NO2.MEAN 0.053668
## RH.MEDIAN 0.041109
## NOX_NO2.MEDIAN 0.073764
## NOx.SUM 0.115834
## WSP.SUM 0.067668
## CO.SUM 0.065650
## NOX_NO2.SUM 0.050019
## SUMO3P 0.102877
## MINNOXP 0.055243
## SUMNOXP 0.039542
## MINRHP 0.037669
## AVGTMPP 0.050249
## MINWDRP 0.040194
## SUMWDRP 0.069609
## MINWSPP 0.023907
## AVGWSPP 0.046536
## AVGCOP 0.050489
## MAXSO2P 0.056650
## MAXNOXP_MAXNO2P 0.039780
## SUMNOXP_SUMNO2P 0.050187
## MAXWSPP._.WDR.MEAN 0.053375
## AVGNOXP_AVGNO2P._.MAXWSPP 0.058424
## WSP.SUM._.CO.MEDIAN 0.097036
## WDR.MAX._.NO2.MEDIAN 0.121565
## NOX_NO2.SUM._.NOx.MAX 0.064296
## SUMWSPP._.CO.MIN 0.053129
## MAXWDRP._.WSP.MAX 0.082542
## SEASON.end._.AVGRHP 0.045545
## WDR.MIN._.WSP.MEDIAN 0.046440
## AVGNOXP_AVGNO2P._.WSP.MAX 0.082424
## NO2.MEDIAN._.WSP.MAX 0.224976
## O3.MEAN._.WSP.MEAN 0.965881
## AVGO3P._.HORA.end 0.413058
## AVGWDRP._.MINNOXP 0.042922
## WDR.MEAN._.RH.MAX 0.063617
## NOx.SUM._.RH.SUM 0.042198
## SO2.MEDIAN._.CO.MEAN 0.050157
## NOx.MIN._.SUMRHP 0.061242
## SUMCOP._.AVGO3P 0.076199
## CO.MIN._.NO2.MEDIAN 0.044083
## RH.MEDIAN._.NOX_NO2.MAX 0.047333
## NOX_NO2.MEDIAN._.MAXWSPP 0.054167
## MAXRHP._.NOx.MIN 0.084783
## SUMNOXP._.SUMO3P 0.046415
## MAXWDRP._.MAXNOXP 0.040257
## NOX_NO2.MAX._.NOx.MAX 0.062920
## TMP.SUM._.TMP.MAX 0.663618
## O3.SUM._.MAXNO2P 0.093855
## WDR.MAX._.NO2.MIN 0.100709
## NO2.MAX._.SO2.SUM 0.068683
## CO.MEAN._.HORA.end 0.315204
## DAY.end._.NO2.MEAN 0.073993
## MAXWDRP._.O3.MAX 0.365217
## NO2.MAX._.TMP.MEDIAN 0.155069
## MINNOXP_MINNO2P._.O3.MAX 1.155869
## WDR.MIN._.NOx.MAX 0.065634
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
#########predict sa+rf############
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train1000_fea100_sa_pred
#############predict rf######################
load("~/PED/SA/dataset_ PED_r3_p2_train1000_fea100 _rfFit.RData")
predict(rfFit,inputsTest)->r3_p2_train1000_fea100_pred
cbind(r3_p2_train1000_fea100_pred,r3_p2_train1000_fea100_sa_pred,targetsTest)->r3_p2_train1000_fea100_predVsReal
colnames(r3_p2_train1000_fea100_predVsReal)<-c("RF","SA+RF","Real")
############errors of normalization data set##############################
modelErrors(r3_p2_train1000_fea100_predVsReal[,"RF"],r3_p2_train1000_fea100_predVsReal[,"Real"])
## MAE RMSE RELE
## 0.02923 0.03802 0.19271
modelErrors(r3_p2_train1000_fea100_predVsReal[,"SA+RF"],r3_p2_train1000_fea100_predVsReal[,"Real"])
## MAE RMSE RELE
## 0.02810 0.03675 0.18378
#####denormalize##############
load("~/PED/newWayPrepareData/r3_p2_ext.RData")
r3_p2_ext[,"O3"]->O3
apply(r3_p2_train1000_fea100_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train1000_fea100_predVsReal_denorm
colnames(r3_p2_train1000_fea100_predVsReal_denorm)<-c("RF","SA+RF","Real")
save(r3_p2_train1000_fea100_predVsReal_denorm,file="r3_p2_training1000_fea100_predVsReal_denorm.RData")
modelErrors(r3_p2_train1000_fea100_predVsReal_denorm[,"RF"],r3_p2_train1000_fea100_predVsReal_denorm[,"Real"])
## MAE RMSE RELE
## 0.01469 0.01911 1.62175
#error between SA+RF with real value#####
modelErrors(r3_p2_train1000_fea100_predVsReal_denorm[,"SA+RF"],r3_p2_train1000_fea100_predVsReal_denorm[,"Real"])
## MAE RMSE RELE
## 0.01412 0.01846 1.53220
##############reshape###############
data.frame(r3_p2_train1000_fea100_predVsReal_denorm)->r3_p2_train1000_fea100_predVsReal_denorm
reshape(r3_p2_train1000_fea100_predVsReal_denorm[1:400,],varying=list(names(r3_p2_train1000_fea100_predVsReal_denorm)),v.names="Ozone",timevar="modelType",times=names(r3_p2_train1000_fea100_predVsReal_denorm),direction = "long")->r3_p2_train1000_fea100_predVsReal_denorm_reshape
pdf("r3_p2_train1000_fea100_test400.pdf",width=11,height=6,bg="transparent")
ggplot(r3_p2_train1000_fea100_predVsReal_denorm_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
geom_line(aes(linetype=modelType),size=0.8)+
geom_point(size=2,fill="white")+
xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize1000,features 195,testSize400")->r3_p2_plot
r3_p2_plot<-r3_p2_plot+theme(
panel.background = element_rect(fill = "transparent"), # or theme_blank()
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent"),
axis.line=element_line(colour="black")
)
r3_p2_plot<-r3_p2_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
r3_p2_plot<-r3_p2_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
r3_p2_plot<-r3_p2_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
r3_p2_plot
dev.off()
## pdf
## 2
r3_p2_plot
#############################jhigh level############################
r3_p2_train1000_fea100_predVsReal_denorm$level<-ifelse(r3_p2_train1000_fea100_predVsReal_denorm[,"Real"]>0.11,"H","L")
r3_p2_train1000_fea100_predVsReal_denorm[r3_p2_train1000_fea100_predVsReal_denorm[,"Real"]>0.11,]->r3_p2_train1000_fea100_predVsReal_denorm_H
r3_p2_train1000_fea100_predVsReal_denorm[r3_p2_train1000_fea100_predVsReal_denorm[,"Real"]<=0.11,]->r3_p2_train1000_predVsReal_denorm_L
####errors between RF and Real
modelErrors(r3_p2_train1000_fea100_predVsReal_denorm_H[,"RF"],r3_p2_train1000_fea100_predVsReal_denorm_H[,"Real"])
## MAE RMSE RELE
## 0.02472 0.03167 0.18064
###errors between SA + RF Real####
modelErrors(r3_p2_train1000_fea100_predVsReal_denorm_H[,"SA.RF"],r3_p2_train1000_fea100_predVsReal_denorm_H[,"Real"])
## MAE RMSE RELE
## 0.02656 0.03317 0.19470
####################plot high level
reshape(r3_p2_train1000_fea100_predVsReal_denorm_H[1:400,],varying=list(names(r3_p2_train1000_fea100_predVsReal_denorm_H[,1:3])),v.names="Ozone",timevar="modelType",times=names(r3_p2_train1000_fea100_predVsReal_denorm_H[,1:3]),direction = "long")->r3_p2_train1000_fea100_test400_H_reshape
pdf("r3_p2_train1000_fea100_test400_H.pdf",width=11,height=6,bg="transparent")
ggplot(r3_p2_train1000_fea100_test400_H_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
geom_line(aes(linetype=modelType),size=0.8)+
geom_point(size=2,fill="white")+
xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize1000,feature 95,testSize400,high Level")->r3_p2_H_plot
r3_p2_H_plot<-r3_p2_H_plot+theme(
panel.background = element_rect(fill = "transparent"), # or theme_blank()
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent"),
axis.line=element_line(colour="black")
)
r3_p2_H_plot<-r3_p2_H_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
r3_p2_H_plot<-r3_p2_H_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
r3_p2_H_plot<-r3_p2_H_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
r3_p2_H_plot
dev.off()
## pdf
## 2
r3_p2_H_plot