rolling time 3 hour, predict time 2 hours later,training set is 100 samples
modelErrors <- function(predicted, actual) {
sal <- vector(mode="numeric", length=3)
names(sal) <- c( "MAE", "RMSE", "RELE")
meanPredicted <- mean(predicted)
meanActual <- mean(actual)
sumPred <- sum((predicted - meanPredicted)^2)
sumActual <- sum((actual - meanActual)^2)
n<- length(actual)
p3<-vector(mode="numeric", length=n)
for (i in c(1:n)) {
if (actual[i]==0) {p3[i]<-abs(predicted[i])
} else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
}}
sal[1] <- mean(abs(predicted - actual))
sal[2] <- sqrt(sum((predicted - actual)^2)/n)
sal[3] <- mean(p3)
sal
}
unormalized<-function(x,y){
((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
load("~/PED/newWayPrepareData/PED_r3_p2_2000s.RData")
##########################100samples##################################
##########without sa feature selection########################
load("~/PED/SA/dataset_ PED_r3_p2_train100 _rfFit.RData")
predict(rfFit,inputsTest)->r3_p2_train100_pred
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
###############sa feature selection#####################
load("~/PED/SA/dataset_ PED_r3_p2_train100 _rf_sa.RData")
plot(rf_sa) + theme_bw()
#########variable importance#########
rf_sa$fit$importance
## IncNodePurity
## SEASON end 0.01085
## HORA end 0.19037
## O3 MAX 0.37894
## RH MAX 0.03532
## TMP MAX 0.07399
## WDR MAX 0.07595
## CO MAX 0.05346
## NO2 MIN 0.04074
## WDR MIN 0.03040
## SO2 MIN 0.01590
## NOX_NO2 MIN 0.03409
## O3 MEAN 0.17096
## TMP MEAN 0.05241
## WSP MEAN 0.06014
## O3 MEDIAN 0.17710
## WDR MEDIAN 0.04957
## CO MEDIAN 0.03872
## O3 SUM 0.19692
## NOx SUM 0.09621
## RH SUM 0.03167
## TMP SUM 0.04610
## WSP SUM 0.06294
## SO2 SUM 0.02882
######select the varialbes which the SA selected###########
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
###predict ######
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train100_sa_pred
cbind(r3_p2_train100_pred,r3_p2_train100_sa_pred,targetsTest)->r3_p2_train100_predVsReal
####denormalized########
load("~/PED/newWayPrepareData/r3_p2.RData")
r3_p2[,"O3"]->O3
apply(r3_p2_train100_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train100_predVsReal_denorm
##############the limition is 0.11ppm/m3#############
####select the rows that targetsTest is more than 0.11
r3_p2_train100_predVsReal_denorm[r3_p2_train100_predVsReal_denorm[,"targetsTest"]>0.11,]->r3_p2_train100_predVsReal_denorm_H
r3_p2_train100_predVsReal_denorm[r3_p2_train100_predVsReal_denorm[,"targetsTest"]<=0.11,]->r3_p2_train100_predVsReal_denorm_L
####1722 rows that higher than 0.11####
######35027 rows that lower than 0.11###
###########errors #########
#########change the coloum names############
colnames(r3_p2_train100_predVsReal_denorm)<-c("RF","SA+RF","Real")
#error between RF with real value#####
modelErrors(r3_p2_train100_predVsReal_denorm[,"RF"],r3_p2_train100_predVsReal_denorm[,"Real"])
## MAE RMSE RELE
## 0.01971 0.02708 1.82803
#error between SA+RF with real value#####
modelErrors(r3_p2_train100_predVsReal_denorm[,"SA+RF"],r3_p2_train100_predVsReal_denorm[,"Real"])
## MAE RMSE RELE
## 0.02021 0.02803 1.84661
data.frame(r3_p2_train100_predVsReal_denorm)->r3_p2_train100_predVsReal_denorm
r3_p2_train100_predVsReal_denorm$level<-ifelse(r3_p2_train100_predVsReal_denorm[,"Real"]>0.11,"H","L")
###############reshpae all the level data################
reshape(r3_p2_train100_predVsReal_denorm[1:400,],varying=list(names(r3_p2_train100_predVsReal_denorm[,1:3])),v.names="Ozone",timevar="modelType",times=names(r3_p2_train100_predVsReal_denorm[,1:3]),direction = "long")->r3_p2_train100_test400_reshape
pdf("r3_p2_train100_test400.pdf",width=11,height=6,bg="transparent")
ggplot(r3_p2_train100_test400_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
geom_line(aes(linetype=modelType),size=0.8)+
geom_point(size=2,fill="white")+
xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize100,testSize400")->r3_p2_plot
r3_p2_plot<-r3_p2_plot+theme(
panel.background = element_rect(fill = "transparent"), # or theme_blank()
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent"),
axis.line=element_line(colour="black")
)
r3_p2_plot<-r3_p2_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
r3_p2_plot<-r3_p2_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
r3_p2_plot<-r3_p2_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
r3_p2_plot
dev.off()
## pdf
## 2
r3_p2_plot
#################high level##################################
colnames(r3_p2_train100_predVsReal_denorm_H)<-c("RF","SA+RF","Real")
####errors between RF and Real
modelErrors(r3_p2_train100_predVsReal_denorm_H[,"RF"],r3_p2_train100_predVsReal_denorm_H[,"Real"])
## MAE RMSE RELE
## 0.02672 0.03357 0.19849
###errors between SA + RF Real####
modelErrors(r3_p2_train100_predVsReal_denorm_H[,"SA+RF"],r3_p2_train100_predVsReal_denorm_H[,"Real"])
## MAE RMSE RELE
## 0.02839 0.03497 0.20997
data.frame(r3_p2_train100_predVsReal_denorm_H)->r3_p2_train100_predVsReal_denorm_H
###reshape####
reshape(r3_p2_train100_predVsReal_denorm_H[1:300,],varying=list(names(r3_p2_train100_predVsReal_denorm_H[,1:3])),v.names="Ozone",timevar="modelType",times=names(r3_p2_train100_predVsReal_denorm_H[,1:3]),direction = "long")->r3_p2_train100_test300_H_reshape
pdf("r3_p2_train100_test300_H.pdf",width=11,height=6,bg="transparent")
ggplot(r3_p2_train100_test300_H_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
geom_line(aes(linetype=modelType),size=0.8)+
geom_point(size=2,fill="white")+
xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize100,testSize300,high Level")->r3_p2_H_plot
r3_p2_H_plot<-r3_p2_H_plot+theme(
panel.background = element_rect(fill = "transparent"), # or theme_blank()
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent"),
axis.line=element_line(colour="black")
)
r3_p2_H_plot<-r3_p2_H_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
r3_p2_H_plot<-r3_p2_H_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
r3_p2_H_plot<-r3_p2_H_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
r3_p2_H_plot
dev.off()
## pdf
## 2
r3_p2_H_plot
#################low level##################################
colnames(r3_p2_train100_predVsReal_denorm_L)<-c("RF","SA+RF","Real")
####errors between RF and Real
modelErrors(r3_p2_train100_predVsReal_denorm_L[,"RF"],r3_p2_train100_predVsReal_denorm_L[,"Real"])
## MAE RMSE RELE
## 0.01936 0.02672 1.90815
###errors between SA + RF Real####
modelErrors(r3_p2_train100_predVsReal_denorm_L[,"SA+RF"],r3_p2_train100_predVsReal_denorm_L[,"Real"])
## MAE RMSE RELE
## 0.01981 0.02765 1.92707
data.frame(r3_p2_train100_predVsReal_denorm_L)->r3_p2_train100_predVsReal_denorm_L
reshape(r3_p2_train100_predVsReal_denorm_L[1:300,],varying=list(names(r3_p2_train100_predVsReal_denorm_L[,1:3])),v.names="Ozone",timevar="modelType",times=names(r3_p2_train100_predVsReal_denorm_L[,1:3]),direction = "long")->r3_p2_train100_test300_L_reshape
pdf("r3_p2_train100_test300_L.pdf",width=11,height=6,bg="transparent")
ggplot(r3_p2_train100_test300_L_reshape,aes(x=id,y=Ozone,group=modelType,color=modelType,shape=modelType))+
geom_line(aes(linetype=modelType),size=0.8)+
geom_point(size=2,fill="white")+
xlab("Samples")+ylab("Ozone")+ggtitle("Rolling time 3, predict next 2 hours, trainSize100,testSize300,low Level")->r3_p2_L_plot
r3_p2_L_plot<-r3_p2_L_plot+theme(
panel.background = element_rect(fill = "transparent"), # or theme_blank()
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = "transparent"),
axis.line=element_line(colour="black")
)
r3_p2_L_plot<-r3_p2_L_plot+theme(axis.title.x=element_text(colour="black",size=17),axis.title.y=element_text(colour="black",size=17))
r3_p2_L_plot<-r3_p2_L_plot+theme(axis.text.x=element_text(colour="black",size=15),axis.text.y=element_text(colour="black",size=13))
r3_p2_L_plot<-r3_p2_L_plot+theme(legend.title = element_text(colour="black", size=17, face="bold"))
r3_p2_L_plot
dev.off()
## pdf
## 2
r3_p2_L_plot
rolling time 3 hour, predict time 2 hours later,training set is 100 samples
load("~/PED/SA/dataset_ PED_r3_p2_train1000 _rfFit.RData")
predict(rfFit,inputsTest)->r3_p2_train1000_pred
###############sa feature selection#####################
load("~/PED/SA/dataset_ PED_r3_p2_train100 _rf_sa.RData")
plot(rf_sa) + theme_bw()
#########variable importance#########
rf_sa$fit$importance
## IncNodePurity
## SEASON end 0.01085
## HORA end 0.19037
## O3 MAX 0.37894
## RH MAX 0.03532
## TMP MAX 0.07399
## WDR MAX 0.07595
## CO MAX 0.05346
## NO2 MIN 0.04074
## WDR MIN 0.03040
## SO2 MIN 0.01590
## NOX_NO2 MIN 0.03409
## O3 MEAN 0.17096
## TMP MEAN 0.05241
## WSP MEAN 0.06014
## O3 MEDIAN 0.17710
## WDR MEDIAN 0.04957
## CO MEDIAN 0.03872
## O3 SUM 0.19692
## NOx SUM 0.09621
## RH SUM 0.03167
## TMP SUM 0.04610
## WSP SUM 0.06294
## SO2 SUM 0.02882