Working on the feature 15

load("~/PED/prepareDataDay/feature_new_norm15.RData")
##the variables
colnames(feature_new_train15)
##  [1] "MAXO3P"                "AVGO3P"               
##  [3] "MAXO3P_MAXRHP"         "MAXO3P_MEDIANO2P"     
##  [5] "MEDIANWSPP_MEDIANRHP"  "MAXWDRP_MEDIANNOxP"   
##  [7] "MAXTMPP_MAXRHP"        "MAXNOXP"              
##  [9] "MAXNO2P"               "STDO3P"               
## [11] "STDTMPP"               "STDWSPP"              
## [13] "STDRHP"                "WEEKDAYC"             
## [15] "SEASONC"               "TMPpoint"             
## [17] "RHpoint"               "WSPpoint"             
## [19] "MAXTMPP_MAXNO2P"       "MEDIANTMPP_MEDIANNO2P"
## [21] "RHpoint_MEDIANNO2P"    "MAXO3C"
##the size of samples
nrow(feature_new_train15)
## [1] 2136
nrow(feature_new_test15)
## [1] 377
source('~/PED/nnetAnalysis/function.R', echo=TRUE)
## 
## > library(nnet)
## 
## > library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## 
## > modelErrors <- function(predicted, actual) {
## +     sal <- vector(mode = "numeric", length = 3)
## +     names(sal) <- c("MAE", "RMSE", "RELE")
## +     me .... [TRUNCATED] 
## 
## > train_testErrors <- function(model, inputsTrain, targetsTrain, 
## +     inputsTest, targetsTest) {
## +     trainPredict <- predict(model, newdata = as.d .... [TRUNCATED] 
## 
## > error_distri <- function(model, inputsTrain, targetsTrain, 
## +     inputsTest, targetsTest) {
## +     trainPredict <- predict(model, newdata = as.data. .... [TRUNCATED] 
## 
## > lm_nnet_rf_error <- function(feature_new_train, feature_new_test, 
## +     dataset) {
## +     inputsTrain <- feature_new_train[, -c(ncol(feature_new_tra .... [TRUNCATED] 
## 
## > denorm <- function(norm, orig) {
## +     ((norm - 0.1) * (max(orig) - min(orig))/0.8) + min(orig)
## + }
###train models and calculate the errors
##lm_nnet_rf_error(feature_new_train15,feature_new_test15,15)##
load(paste("dataset_",15,"MAE.RData"))
load(paste("dataset_",15,"RMSE.RData"))
load(paste("lm_size_", "dataset_",15,".RData"))
load(paste("dataset_",15,"MAE.rf.RData"))
load(paste("dataset_",15,"RMSE.rf.RData"))

showing the results

error_lm
## $train
##    MAE   RMSE   RELE 
## 0.1128 0.1422 0.2574 
## 
## $test
##    MAE   RMSE   RELE 
## 0.1183 0.1477 0.2583

ANN

###ANN
### when decay is 1e-4
MAE[MAE[,"decay"]==1e-4,c("size","trainMAE","testMAE")]->MAE4
MAE4[order(MAE4[,1]),]
##     size trainMAE testMAE
## MAE   11  0.09228  0.1183
## MAE   13  0.07833  0.1340
## MAE   15  0.08741  0.1276
## MAE   17  0.07422  0.1373
## MAE   19  0.08184  0.1308
## MAE   21  0.09030  0.1211
## MAE   23  0.07470  0.1381
## MAE   25  0.08445  0.1319
RMSE[RMSE[,"decay"]==1e-4,c("size","trainRMSE","testRMSE")]->RMSE4
RMSE4[order(RMSE4[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.11715   0.1531
## RMSE   13   0.10065   0.1764
## RMSE   15   0.11093   0.1619
## RMSE   17   0.09646   0.1799
## RMSE   19   0.10394   0.1699
## RMSE   21   0.11471   0.1536
## RMSE   23   0.09518   0.1787
## RMSE   25   0.10747   0.1701
###decay is 1e-3
MAE[MAE[,"decay"]==1e-3,c("size","trainMAE","testMAE")]->MAE3
MAE3[order(MAE3[,1]),]
##     size trainMAE testMAE
## MAE   11  0.08751  0.1177
## MAE   13  0.09462  0.1154
## MAE   15  0.08318  0.1248
## MAE   17  0.09069  0.1191
## MAE   19  0.08047  0.1261
## MAE   21  0.08589  0.1242
## MAE   23  0.09171  0.1215
## MAE   25  0.08326  0.1242
RMSE[RMSE[,"decay"]==1e-3,c("size","trainRMSE","testRMSE")]->RMSE3
RMSE3[order(RMSE3[,1]),]
##      size trainRMSE testRMSE
## RMSE   11    0.1115   0.1549
## RMSE   13    0.1203   0.1461
## RMSE   15    0.1057   0.1588
## RMSE   17    0.1150   0.1534
## RMSE   19    0.1021   0.1644
## RMSE   21    0.1093   0.1633
## RMSE   23    0.1166   0.1571
## RMSE   25    0.1054   0.1625
###decay is 1e-2
MAE[MAE[,"decay"]==1e-2,c("size","trainMAE","testMAE")]->MAE2
MAE2[order(MAE2[,1]),]
##     size trainMAE testMAE
## MAE   11   0.1028  0.1121
## MAE   13   0.1022  0.1120
## MAE   15   0.1025  0.1131
## MAE   17   0.1031  0.1121
## MAE   19   0.1027  0.1118
## MAE   21   0.1027  0.1121
## MAE   23   0.1028  0.1128
## MAE   25   0.1027  0.1126
RMSE[RMSE[,"decay"]==1e-2,c("size","trainRMSE","testRMSE")]->RMSE2
RMSE2[order(RMSE2[,1]),]
##      size trainRMSE testRMSE
## RMSE   11    0.1304   0.1416
## RMSE   13    0.1298   0.1419
## RMSE   15    0.1300   0.1423
## RMSE   17    0.1308   0.1413
## RMSE   19    0.1305   0.1413
## RMSE   21    0.1304   0.1417
## RMSE   23    0.1304   0.1418
## RMSE   25    0.1306   0.1421
###decay is 1e-1
MAE[MAE[,"decay"]==1e-1,c("size","trainMAE","testMAE")]->MAE1
MAE1[order(MAE1[,1]),]
##     size trainMAE testMAE
## MAE   11   0.1129  0.1174
## MAE   13   0.1129  0.1174
## MAE   15   0.1129  0.1174
## MAE   17   0.1129  0.1174
## MAE   19   0.1129  0.1174
## MAE   21   0.1129  0.1174
## MAE   23   0.1129  0.1174
## MAE   25   0.1129  0.1174
RMSE[RMSE[,"decay"]==1e-1,c("size","trainRMSE","testRMSE")]->RMSE1
RMSE1[order(RMSE1[,1]),]
##      size trainRMSE testRMSE
## RMSE   11    0.1424   0.1468
## RMSE   13    0.1424   0.1468
## RMSE   15    0.1424   0.1468
## RMSE   17    0.1424   0.1468
## RMSE   19    0.1424   0.1468
## RMSE   21    0.1424   0.1468
## RMSE   23    0.1424   0.1468
## RMSE   25    0.1424   0.1468
###decay is 1
MAE[MAE[,"decay"]==1,c("size","trainMAE","testMAE")]->MAE0
MAE0[order(MAE0[,1]),]
##     size trainMAE testMAE
## MAE   11   0.1208  0.1251
## MAE   13   0.1208  0.1251
## MAE   15   0.1208  0.1251
## MAE   17   0.1208  0.1251
## MAE   19   0.1209  0.1251
## MAE   21   0.1208  0.1251
## MAE   23   0.1208  0.1251
## MAE   25   0.1207  0.1251
RMSE[RMSE[,"decay"]==1,c("size","trainRMSE","testRMSE")]->RMSE0
RMSE0[order(RMSE0[,1]),]
##      size trainRMSE testRMSE
## RMSE   11    0.1504   0.1540
## RMSE   13    0.1504   0.1540
## RMSE   15    0.1504   0.1540
## RMSE   17    0.1504   0.1540
## RMSE   19    0.1505   0.1541
## RMSE   21    0.1504   0.1540
## RMSE   23    0.1504   0.1540
## RMSE   25    0.1504   0.1540

randomForest

##when ntree is 500
MAE_rf[MAE_rf[,"ntree"]==500,c("mtry","trainMAE","testMAE")]->MAE500
RMSE_rf[RMSE_rf[,"ntree"]==500,c("mtry","trainRMSE","testRMSE")]->RMSE500
MAE500
##     mtry trainMAE testMAE
## MAE    7  0.04541  0.1148
## MAE    8  0.04535  0.1148
## MAE    9  0.04497  0.1146
## MAE   10  0.04489  0.1147
## MAE   11  0.04482  0.1144
## MAE   12  0.04463  0.1145
## MAE   13  0.04468  0.1144
## MAE   14  0.04452  0.1143
RMSE500
##      mtry trainRMSE testRMSE
## RMSE    7   0.05885   0.1451
## RMSE    8   0.05867   0.1448
## RMSE    9   0.05817   0.1451
## RMSE   10   0.05826   0.1449
## RMSE   11   0.05801   0.1447
## RMSE   12   0.05796   0.1447
## RMSE   13   0.05792   0.1447
## RMSE   14   0.05767   0.1444
##when ntree is 2000 
MAE_rf[MAE_rf[,"ntree"]==2000,c("mtry","trainMAE","testMAE")]->MAE2000
RMSE_rf[RMSE_rf[,"ntree"]==2000,c("mtry","trainRMSE","testRMSE")]->RMSE2000
MAE2000
##     mtry trainMAE testMAE
## MAE    7  0.04546  0.1145
## MAE    8  0.04526  0.1147
## MAE    9  0.04508  0.1143
## MAE   10  0.04487  0.1147
## MAE   11  0.04464  0.1146
## MAE   12  0.04468  0.1144
## MAE   13  0.04440  0.1141
## MAE   14  0.04437  0.1141
RMSE2000
##      mtry trainRMSE testRMSE
## RMSE    7   0.05871   0.1446
## RMSE    8   0.05857   0.1450
## RMSE    9   0.05834   0.1445
## RMSE   10   0.05815   0.1448
## RMSE   11   0.05785   0.1448
## RMSE   12   0.05782   0.1447
## RMSE   13   0.05764   0.1443
## RMSE   14   0.05751   0.1443

Error distribution

load(paste("lmFit_error_dis_dataset",15,".RData"))
load(paste("lmFit_trainPredict_dataset",15,".RData"))
load(paste("lmFit_testPredict_dataset",15,".RData"))
load(paste("dataset_",15,"_sepdata.RData"))
load("~/PED/prepareDataDay/PP_new8.RData")
PP_new8[,"MAXO3C"]->orig

#######################linear regression #########################
#traing set
breaks<-seq(0,0.5,by=0.025)
denorm(targetsTrain,orig)->trainTargets_orig
denorm(targetsTest,orig)->testTargets_orig
denorm(trainPredict,orig)->trainPredict_orig
denorm(testPredict,orig)->testPredict_orig
hist(trainTargets_orig,col="red",main="ozone distribution of training set for dataset 15",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone (PPM)")

plot of chunk unnamed-chunk-8

hist(trainPredict_orig,col="red",main="ozone prediction value distribution of training set for dataset 15(linear regression)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone(PPM)")

plot of chunk unnamed-chunk-8

###testing set
hist(testTargets_orig,col="blue",main="ozone distribution of testing set for dataset 15",breaks=breaks,xlim=range(orig),ylim=c(0,100),xlab="daily maximum ozone (PPM)")

plot of chunk unnamed-chunk-8

hist(testPredict_orig,col="blue",main="ozone prediction value distribution of testing set for dataset 15(linear regression)",xlab="daily maximum ozone (PPM)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,100))

plot of chunk unnamed-chunk-8

##errors distribution of train set
abs(trainTargets_orig-trainPredict_orig)->trainError
plot(trainTargets_orig,trainError,type="p")

plot of chunk unnamed-chunk-8

##errors distribution of test sets
abs(testTargets_orig-testPredict_orig)->testError
plot(testTargets_orig,testError,type="p")

plot of chunk unnamed-chunk-8

######randomFoest###########################
load(paste("rfFit_trainPredict_dataset",15,"_mtry_",7,"_ntree_",500,".RData"))
load(paste("rfFit_testPredict_dataset",15,"_mtry_",7,"_ntree_",500,".RData"))
denorm(trainPredict,orig)->trainPredict_orig
denorm(testPredict,orig)->testPredict_orig
##mtry is 2 n tree is 500###
hist(trainPredict_orig,col="red",main="ozone prediction value distribution of training set for dataset 15(Rf)",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone(PPM)")

plot of chunk unnamed-chunk-8

hist(testPredict_orig,col="blue",main="ozone prediction value distribution of testing set for dataset 15(RF)",xlab="daily maximum ozone (PPM)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,100))

plot of chunk unnamed-chunk-8

##errors distribution of train set
abs(trainTargets_orig-trainPredict_orig)->trainError
plot(trainTargets_orig,trainError,type="p")

plot of chunk unnamed-chunk-8

##errors distribution of test sets
abs(testTargets_orig-testPredict_orig)->testError
plot(testTargets_orig,testError,type="p")

plot of chunk unnamed-chunk-8

importance factors

load(paste("rfFit_dataset_",15,"_mtry_",7,"_ntree_",500,".RData")) 
rfFit$importance
##                         %IncMSE IncNodePurity
## MAXO3P                8.118e-03       10.5068
## AVGO3P                6.675e-03        7.9920
## MAXO3P_MAXRHP         3.293e-03        4.2115
## MAXO3P_MEDIANO2P      1.135e-03        2.1079
## MEDIANWSPP_MEDIANRHP  7.175e-04        1.9972
## MAXWDRP_MEDIANNOxP    7.756e-04        2.0520
## MAXTMPP_MAXRHP        9.400e-04        2.3018
## MAXNOXP               7.190e-04        1.7583
## MAXNO2P               9.964e-04        1.8498
## STDO3P                3.721e-03        6.7559
## STDTMPP               6.011e-04        2.4958
## STDWSPP               2.953e-04        1.9395
## STDRHP                3.693e-04        2.2699
## WEEKDAYC              8.719e-05        0.9633
## SEASONC               2.147e-04        0.5800
## TMPpoint              9.932e-04        3.0335
## RHpoint               2.123e-03        3.4106
## WSPpoint              1.543e-03        3.1916
## MAXTMPP_MAXNO2P       1.314e-03        1.9687
## MEDIANTMPP_MEDIANNO2P 1.217e-03        1.9592
## RHpoint_MEDIANNO2P    1.407e-03        2.5606
load(paste("rfFit_dataset_",15,"_mtry_",14,"_ntree_",500,".RData")) 
rfFit$importance
##                         %IncMSE IncNodePurity
## MAXO3P                1.024e-02       14.9357
## AVGO3P                6.468e-03        7.4511
## MAXO3P_MAXRHP         2.510e-03        2.7596
## MAXO3P_MEDIANO2P      1.058e-03        1.6859
## MEDIANWSPP_MEDIANRHP  8.179e-04        2.0343
## MAXWDRP_MEDIANNOxP    4.563e-04        2.0481
## MAXTMPP_MAXRHP        9.222e-04        2.2049
## MAXNOXP               7.032e-04        1.6340
## MAXNO2P               8.688e-04        1.3027
## STDO3P                2.975e-03        5.5053
## STDTMPP               7.009e-04        2.7755
## STDWSPP               2.400e-04        2.0343
## STDRHP                4.839e-04        2.2882
## WEEKDAYC              2.922e-05        0.9949
## SEASONC               1.972e-04        0.5738
## TMPpoint              1.048e-03        3.2820
## RHpoint               2.428e-03        3.8643
## WSPpoint              1.987e-03        3.7236
## MAXTMPP_MAXNO2P       1.128e-03        1.5388
## MEDIANTMPP_MEDIANNO2P 1.139e-03        1.6069
## RHpoint_MEDIANNO2P    9.587e-04        2.2097
load(paste("rfFit_dataset_",15,"_mtry_",7,"_ntree_",2000,".RData")) 
rfFit$importance
##                          %IncMSE IncNodePurity
## MAXO3P                 8.386e-03       10.0463
## AVGO3P                 6.626e-03        8.1436
## MAXO3P_MAXRHP          3.347e-03        4.3028
## MAXO3P_MEDIANO2P       1.147e-03        2.0562
## MEDIANWSPP_MEDIANRHP   6.907e-04        2.0148
## MAXWDRP_MEDIANNOxP     7.170e-04        2.0705
## MAXTMPP_MAXRHP         9.427e-04        2.2880
## MAXNOXP                6.688e-04        1.7298
## MAXNO2P                1.136e-03        1.8479
## STDO3P                 3.692e-03        6.9403
## STDTMPP                6.138e-04        2.5092
## STDWSPP                1.823e-04        1.9876
## STDRHP                 4.194e-04        2.2575
## WEEKDAYC              -9.313e-08        0.9541
## SEASONC                1.845e-04        0.6005
## TMPpoint               9.339e-04        3.1017
## RHpoint                2.084e-03        3.4605
## WSPpoint               1.560e-03        3.2678
## MAXTMPP_MAXNO2P        1.359e-03        1.9696
## MEDIANTMPP_MEDIANNO2P  1.281e-03        1.9518
## RHpoint_MEDIANNO2P     1.273e-03        2.4835
load(paste("rfFit_dataset_",15,"_mtry_",14,"_ntree_",2000,".RData")) 
rfFit$importance
##                         %IncMSE IncNodePurity
## MAXO3P                1.071e-02       14.7284
## AVGO3P                6.711e-03        7.5566
## MAXO3P_MAXRHP         2.303e-03        2.6347
## MAXO3P_MEDIANO2P      9.404e-04        1.6608
## MEDIANWSPP_MEDIANRHP  7.880e-04        2.0721
## MAXWDRP_MEDIANNOxP    5.915e-04        2.0135
## MAXTMPP_MAXRHP        8.964e-04        2.2458
## MAXNOXP               5.941e-04        1.6180
## MAXNO2P               7.873e-04        1.3365
## STDO3P                3.207e-03        5.7512
## STDTMPP               6.131e-04        2.7275
## STDWSPP               1.831e-04        2.0111
## STDRHP                3.949e-04        2.3078
## WEEKDAYC              4.743e-06        0.9846
## SEASONC               1.767e-04        0.5779
## TMPpoint              9.887e-04        3.2428
## RHpoint               2.474e-03        3.9041
## WSPpoint              2.070e-03        3.7111
## MAXTMPP_MAXNO2P       1.131e-03        1.5715
## MEDIANTMPP_MEDIANNO2P 1.026e-03        1.6088
## RHpoint_MEDIANNO2P    9.254e-04        2.1461