Working on the feature 14

load("~/PED/prepareDataDay/feature_new_norm14.RData")
##the variables
colnames(feature_new_train14)
##  [1] "MAXO3P"                "AVGO3P"               
##  [3] "MAXO3P_MAXRHP"         "MAXO3P_MEDIANO2P"     
##  [5] "MEDIANWSPP_MEDIANRHP"  "MAXWDRP_MEDIANNOxP"   
##  [7] "MAXTMPP_MAXRHP"        "MAXNOXP"              
##  [9] "MAXNO2P"               "STDO3P"               
## [11] "STDTMPP"               "STDWSPP"              
## [13] "STDRHP"                "WEEKDAYC"             
## [15] "SEASONC"               "TMPpoint"             
## [17] "RHpoint"               "WSPpoint"             
## [19] "MAXTMPP_MAXNO2P"       "MEDIANTMPP_MEDIANNO2P"
## [21] "RHpoint_MEDIANNO2P"    "MAXO3C"
##the size of samples
nrow(feature_new_train14)
## [1] 2316
nrow(feature_new_test14)
## [1] 409
source('~/PED/nnetAnalysis/function.R', echo=TRUE)
## 
## > library(nnet)
## 
## > library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## 
## > modelErrors <- function(predicted, actual) {
## +     sal <- vector(mode = "numeric", length = 3)
## +     names(sal) <- c("MAE", "RMSE", "RELE")
## +     me .... [TRUNCATED] 
## 
## > train_testErrors <- function(model, inputsTrain, targetsTrain, 
## +     inputsTest, targetsTest) {
## +     trainPredict <- predict(model, newdata = as.d .... [TRUNCATED] 
## 
## > error_distri <- function(model, inputsTrain, targetsTrain, 
## +     inputsTest, targetsTest) {
## +     trainPredict <- predict(model, newdata = as.data. .... [TRUNCATED] 
## 
## > lm_nnet_rf_error <- function(feature_new_train, feature_new_test, 
## +     dataset) {
## +     inputsTrain <- feature_new_train[, -c(ncol(feature_new_tra .... [TRUNCATED] 
## 
## > denorm <- function(norm, orig) {
## +     ((norm - 0.1) * (max(orig) - min(orig))/0.8) + min(orig)
## + }
###train models and calculate the errors
##lm_nnet_rf_error(feature_new_train14,feature_new_test14,14)##
load(paste("dataset_",14,"MAE.RData"))
load(paste("dataset_",14,"RMSE.RData"))
load(paste("lm_size_", "dataset_",14,".RData"))
load(paste("dataset_",14,"MAE.rf.RData"))
load(paste("dataset_",14,"RMSE.rf.RData"))

showing the results

error_lm
## $train
##     MAE    RMSE    RELE 
## 0.07208 0.09352 0.22690 
## 
## $test
##     MAE    RMSE    RELE 
## 0.07649 0.10204 0.22812

ANN

###ANN
### when decay is 1e-4
MAE[MAE[,"decay"]==1e-4,c("size","trainMAE","testMAE")]->MAE4
MAE4[order(MAE4[,1]),]
##     size trainMAE testMAE
## MAE   11  0.05972 0.07908
## MAE   13  0.05227 0.08723
## MAE   15  0.05630 0.08026
## MAE   17  0.05023 0.08822
## MAE   19  0.05391 0.08077
## MAE   21  0.05809 0.07963
## MAE   23  0.05210 0.08697
## MAE   25  0.05422 0.08684
RMSE[RMSE[,"decay"]==1e-4,c("size","trainRMSE","testRMSE")]->RMSE4
RMSE4[order(RMSE4[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.07777   0.1049
## RMSE   13   0.06721   0.1207
## RMSE   15   0.07370   0.1067
## RMSE   17   0.06492   0.1154
## RMSE   19   0.06968   0.1071
## RMSE   21   0.07474   0.1061
## RMSE   23   0.06742   0.1155
## RMSE   25   0.07080   0.1133
###decay is 1e-3
MAE[MAE[,"decay"]==1e-3,c("size","trainMAE","testMAE")]->MAE3
MAE3[order(MAE3[,1]),]
##     size trainMAE testMAE
## MAE   11  0.05813 0.07835
## MAE   13  0.06177 0.07745
## MAE   15  0.05538 0.07941
## MAE   17  0.05867 0.07878
## MAE   19  0.05363 0.07840
## MAE   21  0.05605 0.08078
## MAE   23  0.06035 0.07392
## MAE   25  0.05436 0.08088
RMSE[RMSE[,"decay"]==1e-3,c("size","trainRMSE","testRMSE")]->RMSE3
RMSE3[order(RMSE3[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.07521  0.10323
## RMSE   13   0.08019  0.10081
## RMSE   15   0.07194  0.10815
## RMSE   17   0.07635  0.10302
## RMSE   19   0.06963  0.10279
## RMSE   21   0.07339  0.10717
## RMSE   23   0.07784  0.09895
## RMSE   25   0.07038  0.10537
###decay is 1e-2
MAE[MAE[,"decay"]==1e-2,c("size","trainMAE","testMAE")]->MAE2
MAE2[order(MAE2[,1]),]
##     size trainMAE testMAE
## MAE   11  0.06837 0.07378
## MAE   13  0.06802 0.07280
## MAE   15  0.06792 0.07299
## MAE   17  0.06837 0.07379
## MAE   19  0.06802 0.07284
## MAE   21  0.06837 0.07378
## MAE   23  0.06837 0.07378
## MAE   25  0.06803 0.07285
RMSE[RMSE[,"decay"]==1e-2,c("size","trainRMSE","testRMSE")]->RMSE2
RMSE2[order(RMSE2[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.08948  0.09839
## RMSE   13   0.08894  0.09760
## RMSE   15   0.08875  0.09770
## RMSE   17   0.08948  0.09839
## RMSE   19   0.08900  0.09775
## RMSE   21   0.08948  0.09839
## RMSE   23   0.08948  0.09839
## RMSE   25   0.08901  0.09775
###decay is 1e-1
MAE[MAE[,"decay"]==1e-1,c("size","trainMAE","testMAE")]->MAE1
MAE1[order(MAE1[,1]),]
##     size trainMAE testMAE
## MAE   11  0.07285  0.0771
## MAE   13  0.07285  0.0771
## MAE   15  0.07285  0.0771
## MAE   17  0.07284  0.0771
## MAE   19  0.07285  0.0771
## MAE   21  0.07285  0.0771
## MAE   23  0.07285  0.0771
## MAE   25  0.07285  0.0771
RMSE[RMSE[,"decay"]==1e-1,c("size","trainRMSE","testRMSE")]->RMSE1
RMSE1[order(RMSE1[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.09448   0.1021
## RMSE   13   0.09449   0.1021
## RMSE   15   0.09449   0.1021
## RMSE   17   0.09448   0.1021
## RMSE   19   0.09449   0.1021
## RMSE   21   0.09449   0.1021
## RMSE   23   0.09449   0.1021
## RMSE   25   0.09449   0.1021
###decay is 1
MAE[MAE[,"decay"]==1,c("size","trainMAE","testMAE")]->MAE0
MAE0[order(MAE0[,1]),]
##     size trainMAE testMAE
## MAE   11  0.07979 0.08325
## MAE   13  0.07986 0.08332
## MAE   15  0.07976 0.08322
## MAE   17  0.07981 0.08327
## MAE   19  0.07990 0.08336
## MAE   21  0.07977 0.08323
## MAE   23  0.07984 0.08329
## MAE   25  0.07975 0.08320
RMSE[RMSE[,"decay"]==1,c("size","trainRMSE","testRMSE")]->RMSE0
RMSE0[order(RMSE0[,1]),]
##      size trainRMSE testRMSE
## RMSE   11    0.1017   0.1080
## RMSE   13    0.1018   0.1080
## RMSE   15    0.1017   0.1079
## RMSE   17    0.1018   0.1080
## RMSE   19    0.1019   0.1081
## RMSE   21    0.1017   0.1080
## RMSE   23    0.1018   0.1080
## RMSE   25    0.1017   0.1079

randomForest

##when ntree is 500
MAE_rf[MAE_rf[,"ntree"]==500,c("mtry","trainMAE","testMAE")]->MAE500
RMSE_rf[RMSE_rf[,"ntree"]==500,c("mtry","trainRMSE","testRMSE")]->RMSE500
MAE500
##     mtry trainMAE testMAE
## MAE    2  0.03153 0.07618
## MAE    3  0.03045 0.07592
## MAE    4  0.02987 0.07583
## MAE    5  0.02940 0.07523
## MAE    6  0.02922 0.07575
## MAE    7  0.02902 0.07506
RMSE500
##      mtry trainRMSE testRMSE
## RMSE    2   0.04209  0.10068
## RMSE    3   0.04067  0.10036
## RMSE    4   0.04001  0.10051
## RMSE    5   0.03947  0.09976
## RMSE    6   0.03909  0.10037
## RMSE    7   0.03913  0.09994
##when ntree is 2000 
MAE_rf[MAE_rf[,"ntree"]==2000,c("mtry","trainMAE","testMAE")]->MAE2000
RMSE_rf[RMSE_rf[,"ntree"]==2000,c("mtry","trainRMSE","testRMSE")]->RMSE2000
MAE2000
##     mtry trainMAE testMAE
## MAE    2  0.03156 0.07608
## MAE    3  0.03043 0.07599
## MAE    4  0.02980 0.07550
## MAE    5  0.02943 0.07539
## MAE    6  0.02914 0.07530
## MAE    7  0.02898 0.07506
RMSE2000
##      mtry trainRMSE testRMSE
## RMSE    2   0.04210  0.10064
## RMSE    3   0.04074  0.10057
## RMSE    4   0.03994  0.10005
## RMSE    5   0.03947  0.10010
## RMSE    6   0.03903  0.09999
## RMSE    7   0.03893  0.09971

Error distribution

load(paste("lmFit_error_dis_dataset",14,".RData"))
load(paste("lmFit_trainPredict_dataset",14,".RData"))
load(paste("lmFit_testPredict_dataset",14,".RData"))
load(paste("dataset_",14,"_sepdata.RData"))
load("~/PED/prepareDataDay/PP_new7.RData")
PP_new7[,"MAXO3C"]->orig

#######################linear regression #########################
#traing set
breaks<-seq(0,0.5,by=0.025)
denorm(targetsTrain,orig)->trainTargets_orig
denorm(targetsTest,orig)->testTargets_orig
denorm(trainPredict,orig)->trainPredict_orig
denorm(testPredict,orig)->testPredict_orig
hist(trainTargets_orig,col="red",main="ozone distribution of training set for dataset 14",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone (PPM)")

plot of chunk unnamed-chunk-8

hist(trainPredict_orig,col="red",main="ozone prediction value distribution of training set for dataset 14(linear regression)",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone(PPM)")

plot of chunk unnamed-chunk-8

###testing set
hist(testTargets_orig,col="blue",main="ozone distribution of testing set for dataset 14",breaks=breaks,xlim=range(orig),ylim=c(0,100),xlab="daily maximum ozone (PPM)")

plot of chunk unnamed-chunk-8

hist(testPredict_orig,col="blue",main="ozone prediction value distribution of testing set for dataset 14(linear regression)",xlab="daily maximum ozone (PPM)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,100))

plot of chunk unnamed-chunk-8

##errors distribution of train set
abs(trainTargets_orig-trainPredict_orig)->trainError
plot(trainTargets_orig,trainError,type="p")

plot of chunk unnamed-chunk-8

##errors distribution of test sets
abs(testTargets_orig-testPredict_orig)->testError
plot(testTargets_orig,testError,type="p")

plot of chunk unnamed-chunk-8

######randomFoest###########################
load(paste("rfFit_trainPredict_dataset",14,"_mtry_",2,"_ntree_",500,".RData"))
load(paste("rfFit_testPredict_dataset",14,"_mtry_",2,"_ntree_",500,".RData"))
##mtry is 2 n tree is 500###
hist(trainPredict,col="red",main="ozone prediction value distribution of training set for dataset 14(RF)",breaks=breaks,xlim=c(0,1),ylim=c(0,500))
## Error: some 'x' not counted; maybe 'breaks' do not span range of 'x'
hist(testPredict,col="blue",main="ozone prediction value distribution of testing set for dataset 14(RF)",breaks=breaks,xlim=c(0,1),ylim=c(0,100))
## Error: some 'x' not counted; maybe 'breaks' do not span range of 'x'

importance factors

load(paste("rfFit_dataset_",14,"_mtry_",2,"_ntree_",500,".RData")) 
rfFit$importance
##                         %IncMSE IncNodePurity
## MAXO3P                2.632e-03        3.3492
## AVGO3P                2.010e-03        3.1924
## MAXO3P_MAXRHP         2.015e-03        2.6779
## MAXO3P_MEDIANO2P      1.088e-03        1.6263
## MEDIANWSPP_MEDIANRHP  3.554e-04        0.9757
## MAXWDRP_MEDIANNOxP    6.903e-04        1.2401
## MAXTMPP_MAXRHP        4.673e-04        1.1210
## MAXNOXP               6.731e-04        1.0891
## MAXNO2P               9.389e-04        1.3369
## STDO3P                1.933e-03        2.9389
## STDTMPP               2.722e-04        1.1775
## STDWSPP               2.032e-04        0.9448
## STDRHP                3.348e-04        1.1781
## WEEKDAYC              8.194e-05        0.5826
## SEASONC               1.740e-04        0.3968
## TMPpoint              3.548e-04        1.2170
## RHpoint               4.590e-04        1.2009
## WSPpoint              3.512e-04        1.1281
## MAXTMPP_MAXNO2P       1.136e-03        1.3477
## MEDIANTMPP_MEDIANNO2P 9.935e-04        1.3176
## RHpoint_MEDIANNO2P    9.211e-04        1.3944
load(paste("rfFit_dataset_",14,"_mtry_",7,"_ntree_",500,".RData")) 
rfFit$importance
##                         %IncMSE IncNodePurity
## MAXO3P                0.0046674        5.2980
## AVGO3P                0.0030817        4.0206
## MAXO3P_MAXRHP         0.0021039        2.3329
## MAXO3P_MEDIANO2P      0.0006264        1.0042
## MEDIANWSPP_MEDIANRHP  0.0003861        0.9231
## MAXWDRP_MEDIANNOxP    0.0006221        1.0525
## MAXTMPP_MAXRHP        0.0005792        1.1441
## MAXNOXP               0.0005350        0.8761
## MAXNO2P               0.0007329        0.9743
## STDO3P                0.0024737        3.5111
## STDTMPP               0.0003202        1.2549
## STDWSPP               0.0001728        0.9208
## STDRHP                0.0002787        1.1904
## WEEKDAYC              0.0001210        0.5801
## SEASONC               0.0001146        0.2899
## TMPpoint              0.0005167        1.3542
## RHpoint               0.0006185        1.3072
## WSPpoint              0.0004359        1.2877
## MAXTMPP_MAXNO2P       0.0007886        0.9371
## MEDIANTMPP_MEDIANNO2P 0.0005901        0.9279
## RHpoint_MEDIANNO2P    0.0006277        1.0748
load(paste("rfFit_dataset_",14,"_mtry_",2,"_ntree_",2000,".RData")) 
rfFit$importance
##                         %IncMSE IncNodePurity
## MAXO3P                2.644e-03        3.4191
## AVGO3P                2.182e-03        3.2360
## MAXO3P_MAXRHP         1.833e-03        2.6320
## MAXO3P_MEDIANO2P      1.078e-03        1.6330
## MEDIANWSPP_MEDIANRHP  3.440e-04        0.9735
## MAXWDRP_MEDIANNOxP    6.934e-04        1.2227
## MAXTMPP_MAXRHP        4.994e-04        1.1453
## MAXNOXP               6.961e-04        1.0978
## MAXNO2P               9.070e-04        1.2900
## STDO3P                1.868e-03        2.9571
## STDTMPP               2.562e-04        1.1553
## STDWSPP               1.952e-04        0.9547
## STDRHP                2.851e-04        1.1674
## WEEKDAYC              9.436e-05        0.5829
## SEASONC               1.622e-04        0.3872
## TMPpoint              4.442e-04        1.2215
## RHpoint               4.970e-04        1.2277
## WSPpoint              2.972e-04        1.0868
## MAXTMPP_MAXNO2P       1.050e-03        1.3670
## MEDIANTMPP_MEDIANNO2P 9.290e-04        1.3216
## RHpoint_MEDIANNO2P    8.419e-04        1.3479
load(paste("rfFit_dataset_",14,"_mtry_",7,"_ntree_",2000,".RData")) 
rfFit$importance
##                         %IncMSE IncNodePurity
## MAXO3P                4.718e-03        5.3113
## AVGO3P                3.145e-03        4.1956
## MAXO3P_MAXRHP         2.018e-03        2.2169
## MAXO3P_MEDIANO2P      6.006e-04        0.9662
## MEDIANWSPP_MEDIANRHP  3.737e-04        0.9359
## MAXWDRP_MEDIANNOxP    6.350e-04        1.0547
## MAXTMPP_MAXRHP        5.611e-04        1.1288
## MAXNOXP               4.955e-04        0.8837
## MAXNO2P               7.480e-04        0.9576
## STDO3P                2.578e-03        3.4891
## STDTMPP               3.079e-04        1.2461
## STDWSPP               1.815e-04        0.9245
## STDRHP                3.244e-04        1.2079
## WEEKDAYC              8.922e-05        0.5911
## SEASONC               9.507e-05        0.2926
## TMPpoint              5.369e-04        1.3553
## RHpoint               6.098e-04        1.2961
## WSPpoint              4.531e-04        1.3058
## MAXTMPP_MAXNO2P       9.067e-04        0.9580
## MEDIANTMPP_MEDIANNO2P 6.070e-04        0.9012
## RHpoint_MEDIANNO2P    6.052e-04        1.0516