rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 95
modelErrors <- function(predicted, actual) {
sal <- vector(mode="numeric", length=3)
names(sal) <- c( "MAE", "RMSE", "RELE")
meanPredicted <- mean(predicted)
meanActual <- mean(actual)
sumPred <- sum((predicted - meanPredicted)^2)
sumActual <- sum((actual - meanActual)^2)
n<- length(actual)
p3<-vector(mode="numeric", length=n)
for (i in c(1:n)) {
if (actual[i]==0) {p3[i]<-abs(predicted[i])
} else { p3[i]<-((abs(predicted[i]-actual[i]))/actual[i])
}}
sal[1] <- mean(abs(predicted - actual))
sal[2] <- sqrt(sum((predicted - actual)^2)/n)
sal[3] <- mean(p3)
sal
}
unormalized<-function(x,y){
((y-0.1)*(max(x)-min(x))/0.8) + min(x)
}
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
library(xtable)
load("~/PED/newWayPrepareData/finalData/PED_r3_p2_fea0.RData")
load("~/PED/SA/r3_p2_training2000_fea0/dataset_PED_r3_p2_train2000_fea0_rf_sa.RData")
plot(rf_sa) + theme_bw()
rf_sa$optVariables
## [1] "MONTH end" "DAY end" "WEEKDAY end"
## [4] "HORA end" "O3 MAX" "NOx MAX"
## [7] "NO2 MAX" "RH MAX" "TMP MAX"
## [10] "WSP MAX" "CO MAX" "SO2 MAX"
## [13] "O3 MIN" "NOx MIN" "TMP MIN"
## [16] "CO MIN" "SO2 MIN" "WSP MEAN"
## [19] "CO MEAN" "NOx MEDIAN" "RH MEDIAN"
## [22] "TMP MEDIAN" "WSP MEDIAN" "CO MEDIAN"
## [25] "NOX_NO2 MEDIAN" "WDR SUM" "CO SUM"
## [28] "SO2 SUM" "NOX_NO2 SUM" "MAXO3P"
## [31] "MINO3P" "AVGO3P" "SUMO3P"
## [34] "MINNO2P" "SUMNO2P" "MINNOXP"
## [37] "MAXRHP" "MINRHP" "AVGRHP"
## [40] "SUMRHP" "SUMTMPP" "MINWDRP"
## [43] "MAXCOP" "MINCOP" "MAXNOXP_MAXNO2P"
## [46] "MINNOXP_MINNO2P" "AVGNOXP_AVGNO2P"
#########variable importance#########
data.frame(rf_sa$fit$importance)->imp
imp$rank<-rank(-imp)
imp[ order(imp[,"rank"]), ]->imp_rank
save(imp_rank,file="r3_p2_training2000_fea0_imp_rank.RData")
xtable(imp[ order(imp[,"rank"]), ],caption="rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 95",digits=c(3,3,0))
## % latex table generated in R 3.1.2 by xtable 1.7-1 package
## % Mon Mar 23 17:50:02 2015
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrr}
## \hline
## & IncNodePurity & rank \\
## \hline
## HORA end & 5.762 & 1 \\
## O3 MAX & 4.140 & 2 \\
## WDR SUM & 1.299 & 3 \\
## O3 MIN & 1.230 & 4 \\
## NO2 MAX & 0.835 & 5 \\
## TMP MAX & 0.497 & 6 \\
## WSP MAX & 0.455 & 7 \\
## NOx MIN & 0.423 & 8 \\
## WSP MEAN & 0.404 & 9 \\
## TMP MIN & 0.367 & 10 \\
## NOx MEDIAN & 0.349 & 11 \\
## SUMO3P & 0.313 & 12 \\
## TMP MEDIAN & 0.276 & 13 \\
## NOX\_NO2 SUM & 0.258 & 14 \\
## AVGO3P & 0.257 & 15 \\
## NOX\_NO2 MEDIAN & 0.249 & 16 \\
## NOx MAX & 0.248 & 17 \\
## MAXO3P & 0.234 & 18 \\
## SO2 SUM & 0.219 & 19 \\
## CO MEAN & 0.202 & 20 \\
## CO SUM & 0.199 & 21 \\
## WSP MEDIAN & 0.199 & 22 \\
## CO MIN & 0.196 & 23 \\
## AVGNOXP\_AVGNO2P & 0.195 & 24 \\
## MINO3P & 0.195 & 25 \\
## MAXCOP & 0.167 & 26 \\
## SUMTMPP & 0.166 & 27 \\
## SO2 MAX & 0.155 & 28 \\
## CO MEDIAN & 0.153 & 29 \\
## SUMNO2P & 0.148 & 30 \\
## MINWDRP & 0.146 & 31 \\
## RH MEDIAN & 0.144 & 32 \\
## MINNOXP\_MINNO2P & 0.144 & 33 \\
## MAXNOXP\_MAXNO2P & 0.143 & 34 \\
## SUMRHP & 0.138 & 35 \\
## SO2 MIN & 0.131 & 36 \\
## DAY end & 0.129 & 37 \\
## CO MAX & 0.127 & 38 \\
## AVGRHP & 0.125 & 39 \\
## MAXRHP & 0.123 & 40 \\
## RH MAX & 0.123 & 41 \\
## MINRHP & 0.119 & 42 \\
## MINCOP & 0.112 & 43 \\
## MINNO2P & 0.112 & 44 \\
## MINNOXP & 0.109 & 45 \\
## MONTH end & 0.090 & 46 \\
## WEEKDAY end & 0.070 & 47 \\
## \hline
## \end{tabular}
## \caption{rolling time 3 hour, predict time 2 hours later,training set is 2000 samples,features are 95}
## \end{table}
subset(inputsTest,select=rownames(rf_sa$fit$importance))->inputsTestImp
#########predict sa+rf############
rfSA$pred(rf_sa$fit,inputsTestImp)->r3_p2_train2000_fea0_sa_pred
#############predict rf######################
load("~/PED/SA/r3_p2_training2000_fea0/dataset_PED_r3_p2_train2000_fea0_rfFit.RData")
as.data.frame(rfFit$finalModel$importance)->rfFit_imp
rfFit_imp$rank<-rank(-rfFit_imp)
rfFit_imp[ order(rfFit_imp[,"rank"]), ]->rfFit_imp_rank
predict(rfFit,inputsTest)->r3_p2_train2000_fea0_pred
cbind(r3_p2_train2000_fea0_pred,r3_p2_train2000_fea0_sa_pred,targetsTest)->r3_p2_train2000_fea0_predVsReal
colnames(r3_p2_train2000_fea0_predVsReal)<-c("RF","SA+RF","Real")
############errors of normalization data set##############################
modelErrors(r3_p2_train2000_fea0_predVsReal[,"RF"],r3_p2_train2000_fea0_predVsReal[,"Real"])->error_norm_rf
modelErrors(r3_p2_train2000_fea0_predVsReal[,"SA+RF"],r3_p2_train2000_fea0_predVsReal[,"Real"])->error_norm_sa_rf
error_norm_rf
## MAE RMSE RELE
## 0.02489 0.03541 0.15192
error_norm_sa_rf
## MAE RMSE RELE
## 0.02416 0.03364 0.15009
#####denormalize##############
load("~/PED/newWayPrepareData/finalData/O3.RData")
apply(r3_p2_train2000_fea0_predVsReal,2,function(x) unormalized(O3,x))->r3_p2_train2000_fea0_predVsReal_denorm
colnames(r3_p2_train2000_fea0_predVsReal_denorm)<-c("RF","SA+RF","Real")
save(r3_p2_train2000_fea0_predVsReal_denorm,file="r3_p2_train2000_fea0_predVsReal_denorm.RData")
modelErrors(r3_p2_train2000_fea0_predVsReal_denorm[,"RF"],r3_p2_train2000_fea0_predVsReal_denorm[,"Real"])->error_denorm_rf
#error between SA+RF with real value#####
modelErrors(r3_p2_train2000_fea0_predVsReal_denorm[,"SA+RF"],r3_p2_train2000_fea0_predVsReal_denorm[,"Real"])->error_denorm_sa_rf
error_denorm_rf
## MAE RMSE RELE
## 0.01251 0.01780 1.08181
error_denorm_sa_rf
## MAE RMSE RELE
## 0.01214 0.01690 1.10195
save(error_norm_rf,error_norm_sa_rf,error_denorm_rf,error_denorm_sa_rf,file="error_r3_p2_training2000_fea0.RData")