Working on the feature 12

load("~/PED/prepareDataDay/feature_new_norm12.RData")
##the variables
colnames(feature_new_train12)
##  [1] "MAXO3P"               "AVGO3P"               "MAXO3P_MAXRHP"       
##  [4] "MAXO3P_MEDIANO2P"     "MEDIANWSPP_MEDIANRHP" "MAXWDRP_MEDIANNOxP"  
##  [7] "MAXTMPP_MAXRHP"       "MAXNOXP"              "MAXNO2P"             
## [10] "STDO3P"               "STDTMPP"              "STDWSPP"             
## [13] "STDRHP"               "WEEKDAYC"             "SEASONC"             
## [16] "TMPpoint"             "RHpoint"              "WSPpoint"            
## [19] "MAXO3C"
##the size of samples
nrow(feature_new_train12)
## [1] 2316
nrow(feature_new_test12)
## [1] 409
source('~/PED/nnetAnalysis/function.R', echo=TRUE)
## 
## > library(nnet)
## 
## > library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## 
## > modelErrors <- function(predicted, actual) {
## +     sal <- vector(mode = "numeric", length = 3)
## +     names(sal) <- c("MAE", "RMSE", "RELE")
## +     me .... [TRUNCATED] 
## 
## > train_testErrors <- function(model, inputsTrain, targetsTrain, 
## +     inputsTest, targetsTest) {
## +     trainPredict <- predict(model, newdata = as.d .... [TRUNCATED] 
## 
## > error_distri <- function(model, inputsTrain, targetsTrain, 
## +     inputsTest, targetsTest) {
## +     trainPredict <- predict(model, newdata = as.data. .... [TRUNCATED] 
## 
## > lm_nnet_rf_error <- function(feature_new_train, feature_new_test, 
## +     dataset) {
## +     inputsTrain <- feature_new_train[, -c(ncol(feature_new_tra .... [TRUNCATED]
###train models and calculate the errors
##lm_nnet_rf_error(feature_new_test12,feature_new_train12,12)##
load(paste("dataset_",12,"MAE.RData"))
load(paste("dataset_",12,"RMSE.RData"))
load(paste("lm_size_", "dataset_",12,".RData"))
load(paste("dataset_",12,"MAE.rf.RData"))
load(paste("dataset_",12,"RMSE.rf.RData"))

showing the results

error_lm
## $train
##     MAE    RMSE    RELE 
## 0.06737 0.08958 0.21313 
## 
## $test
##     MAE    RMSE    RELE 
## 0.07433 0.09708 0.23070

ANN

###ANN
### when decay is 1e-4
MAE[MAE[,"decay"]==1e-4,c("size","trainMAE","testMAE")]->MAE4
MAE4[order(MAE4[,1]),]
##     size trainMAE testMAE
## MAE   11 0.032526  0.1051
## MAE   13 0.010278  0.1321
## MAE   15 0.021433  0.1254
## MAE   17 0.006646  0.1252
## MAE   19 0.011659  0.1351
## MAE   21 0.026674  0.1196
## MAE   23 0.008527  0.1324
## MAE   25 0.016380  0.1409
RMSE[RMSE[,"decay"]==1e-4,c("size","trainRMSE","testRMSE")]->RMSE4
RMSE4[order(RMSE4[,1]),]
##      size trainRMSE testRMSE
## RMSE   11  0.044370   0.1401
## RMSE   13  0.014336   0.1720
## RMSE   15  0.028156   0.1681
## RMSE   17  0.009313   0.1636
## RMSE   19  0.015892   0.1781
## RMSE   21  0.036245   0.1569
## RMSE   23  0.011862   0.1723
## RMSE   25  0.021750   0.1813
###decay is 1e-3
MAE[MAE[,"decay"]==1e-3,c("size","trainMAE","testMAE")]->MAE3
MAE3[order(MAE3[,1]),]
##     size trainMAE testMAE
## MAE   11  0.03868 0.09164
## MAE   13  0.04435 0.08567
## MAE   15  0.04100 0.08923
## MAE   17  0.03807 0.09033
## MAE   19  0.03827 0.09073
## MAE   21  0.04078 0.08832
## MAE   23  0.04151 0.08928
## MAE   25  0.04159 0.08923
RMSE[RMSE[,"decay"]==1e-3,c("size","trainRMSE","testRMSE")]->RMSE3
RMSE3[order(RMSE3[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.05126   0.1227
## RMSE   13   0.05877   0.1133
## RMSE   15   0.05444   0.1187
## RMSE   17   0.05025   0.1190
## RMSE   19   0.05100   0.1209
## RMSE   21   0.05326   0.1174
## RMSE   23   0.05429   0.1187
## RMSE   25   0.05484   0.1195
###decay is 1e-2
MAE[MAE[,"decay"]==1e-2,c("size","trainMAE","testMAE")]->MAE2
MAE2[order(MAE2[,1]),]
##     size trainMAE testMAE
## MAE   11  0.06766 0.07432
## MAE   13  0.06766 0.07432
## MAE   15  0.06767 0.07432
## MAE   17  0.06766 0.07432
## MAE   19  0.06766 0.07432
## MAE   21  0.06766 0.07432
## MAE   23  0.06765 0.07431
## MAE   25  0.06767 0.07432
RMSE[RMSE[,"decay"]==1e-2,c("size","trainRMSE","testRMSE")]->RMSE2
RMSE2[order(RMSE2[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.08956  0.09684
## RMSE   13   0.08956  0.09684
## RMSE   15   0.08957  0.09684
## RMSE   17   0.08956  0.09684
## RMSE   19   0.08956  0.09684
## RMSE   21   0.08956  0.09684
## RMSE   23   0.08955  0.09683
## RMSE   25   0.08957  0.09684
###decay is 1e-1
MAE[MAE[,"decay"]==1e-1,c("size","trainMAE","testMAE")]->MAE1
MAE1[order(MAE1[,1]),]
##     size trainMAE testMAE
## MAE   11  0.07233 0.07697
## MAE   13  0.07228 0.07696
## MAE   15  0.07230 0.07696
## MAE   17  0.07234 0.07697
## MAE   19  0.07229 0.07696
## MAE   21  0.07231 0.07696
## MAE   23  0.07227 0.07696
## MAE   25  0.07229 0.07696
RMSE[RMSE[,"decay"]==1e-1,c("size","trainRMSE","testRMSE")]->RMSE1
RMSE1[order(RMSE1[,1]),]
##      size trainRMSE testRMSE
## RMSE   11   0.09468  0.09954
## RMSE   13   0.09466  0.09953
## RMSE   15   0.09467  0.09953
## RMSE   17   0.09468  0.09953
## RMSE   19   0.09467  0.09954
## RMSE   21   0.09466  0.09953
## RMSE   23   0.09467  0.09954
## RMSE   25   0.09467  0.09953
###decay is 1
MAE[MAE[,"decay"]==1,c("size","trainMAE","testMAE")]->MAE0
MAE0[order(MAE0[,1]),]
##     size trainMAE testMAE
## MAE   11  0.09558 0.09618
## MAE   13  0.09558 0.09617
## MAE   15  0.09559 0.09618
## MAE   17  0.09558 0.09617
## MAE   19  0.09558 0.09617
## MAE   21  0.09559 0.09618
## MAE   23  0.09558 0.09617
## MAE   25  0.09559 0.09618
RMSE[RMSE[,"decay"]==1,c("size","trainRMSE","testRMSE")]->RMSE0
RMSE0[order(RMSE0[,1]),]
##      size trainRMSE testRMSE
## RMSE   11    0.1203   0.1207
## RMSE   13    0.1202   0.1206
## RMSE   15    0.1203   0.1207
## RMSE   17    0.1203   0.1206
## RMSE   19    0.1202   0.1206
## RMSE   21    0.1203   0.1207
## RMSE   23    0.1202   0.1206
## RMSE   25    0.1203   0.1207

randomForest

##when ntree is 500
MAE_rf[MAE_rf[,"ntree"]==500,c("mtry","trainMAE","testMAE")]->MAE500
RMSE_rf[RMSE_rf[,"ntree"]==500,c("mtry","trainRMSE","testRMSE")]->RMSE500
MAE500
##     mtry trainMAE testMAE
## MAE    3  0.03064 0.07394
## MAE    5  0.02999 0.07416
## MAE    7  0.02947 0.07403
## MAE    9  0.02967 0.07398
## MAE   11  0.02924 0.07401
## MAE   13  0.02929 0.07423
## MAE   15  0.02908 0.07406
## MAE   17  0.02874 0.07389
## MAE   19  0.02900 0.07400
RMSE500
##      mtry trainRMSE testRMSE
## RMSE    3   0.04119  0.09736
## RMSE    5   0.04058  0.09771
## RMSE    7   0.03981  0.09771
## RMSE    9   0.04004  0.09760
## RMSE   11   0.03920  0.09763
## RMSE   13   0.03961  0.09776
## RMSE   15   0.03947  0.09776
## RMSE   17   0.03907  0.09742
## RMSE   19   0.03980  0.09761
##when ntree is 1000 
MAE_rf[MAE_rf[,"ntree"]==1000,c("mtry","trainMAE","testMAE")]->MAE1000
RMSE_rf[RMSE_rf[,"ntree"]==1000,c("mtry","trainRMSE","testRMSE")]->RMSE1000
MAE1000
##     mtry trainMAE testMAE
## MAE    3  0.03084 0.07418
## MAE    5  0.02989 0.07398
## MAE    7  0.02975 0.07415
## MAE    9  0.02936 0.07417
## MAE   11  0.02930 0.07405
## MAE   13  0.02907 0.07405
## MAE   15  0.02916 0.07395
## MAE   17  0.02904 0.07380
## MAE   19  0.02914 0.07386
RMSE1000
##      mtry trainRMSE testRMSE
## RMSE    3   0.04169  0.09748
## RMSE    5   0.04037  0.09755
## RMSE    7   0.04019  0.09771
## RMSE    9   0.03966  0.09777
## RMSE   11   0.03958  0.09770
## RMSE   13   0.03939  0.09760
## RMSE   15   0.03952  0.09748
## RMSE   17   0.03929  0.09744
## RMSE   19   0.03941  0.09747
##when ntree is 1500 
MAE_rf[MAE_rf[,"ntree"]==1500,c("mtry","trainMAE","testMAE")]->MAE1500
RMSE_rf[RMSE_rf[,"ntree"]==1500,c("mtry","trainRMSE","testRMSE")]->RMSE1500
MAE1500
##     mtry trainMAE testMAE
## MAE    3  0.03059 0.07414
## MAE    5  0.03003 0.07399
## MAE    7  0.02947 0.07411
## MAE    9  0.02935 0.07409
## MAE   11  0.02940 0.07401
## MAE   13  0.02917 0.07386
## MAE   15  0.02919 0.07383
## MAE   17  0.02910 0.07389
## MAE   19  0.02889 0.07389
RMSE1500
##      mtry trainRMSE testRMSE
## RMSE    3   0.04137  0.09751
## RMSE    5   0.04064  0.09746
## RMSE    7   0.03977  0.09766
## RMSE    9   0.03978  0.09767
## RMSE   11   0.03984  0.09757
## RMSE   13   0.03961  0.09749
## RMSE   15   0.03974  0.09744
## RMSE   17   0.03960  0.09752
## RMSE   19   0.03929  0.09756
##when ntree is 2000 
MAE_rf[MAE_rf[,"ntree"]==2000,c("mtry","trainMAE","testMAE")]->MAE2000
RMSE_rf[RMSE_rf[,"ntree"]==2000,c("mtry","trainRMSE","testRMSE")]->RMSE2000
MAE2000
##     mtry trainMAE testMAE
## MAE    3  0.03077 0.07398
## MAE    5  0.02999 0.07409
## MAE    7  0.02948 0.07404
## MAE    9  0.02932 0.07392
## MAE   11  0.02923 0.07402
## MAE   13  0.02920 0.07385
## MAE   15  0.02889 0.07390
## MAE   17  0.02900 0.07392
## MAE   19  0.02896 0.07380
RMSE2000
##      mtry trainRMSE testRMSE
## RMSE    3   0.04151  0.09737
## RMSE    5   0.04063  0.09756
## RMSE    7   0.03980  0.09753
## RMSE    9   0.03961  0.09749
## RMSE   11   0.03970  0.09762
## RMSE   13   0.03960  0.09744
## RMSE   15   0.03939  0.09749
## RMSE   17   0.03930  0.09745
## RMSE   19   0.03928  0.09738
##when ntree is 2500
MAE_rf[MAE_rf[,"ntree"]==2500,c("mtry","trainMAE","testMAE")]->MAE2500
RMSE_rf[RMSE_rf[,"ntree"]==2500,c("mtry","trainRMSE","testRMSE")]->RMSE2500
MAE2500
##     mtry trainMAE testMAE
## MAE    3  0.03076 0.07410
## MAE    5  0.02977 0.07402
## MAE    7  0.02961 0.07405
## MAE    9  0.02948 0.07408
## MAE   11  0.02926 0.07394
## MAE   13  0.02911 0.07398
## MAE   15  0.02905 0.07392
## MAE   17  0.02902 0.07379
## MAE   19  0.02904 0.07384
RMSE2500
##      mtry trainRMSE testRMSE
## RMSE    3   0.04157  0.09752
## RMSE    5   0.04036  0.09754
## RMSE    7   0.04000  0.09754
## RMSE    9   0.03994  0.09761
## RMSE   11   0.03967  0.09753
## RMSE   13   0.03935  0.09757
## RMSE   15   0.03945  0.09751
## RMSE   17   0.03955  0.09739
## RMSE   19   0.03946  0.09746

Error distribution

#######################linear regression #########################
load(paste("lmFit_error_dis_dataset",12,".RData"))
##errors distribution of train set
hist(error_dis[[2]],col="red",main="linear regression error distrubion of training set",xlab="error")

plot of chunk unnamed-chunk-8

##O3 errors againts expected O3 of training set
plot(error_dis[[2]],col="red",ylab="errors",main="linear regression error_O3 ~ expected O3(training set)")
abline(h=mean(error_dis[[2]]))

plot of chunk unnamed-chunk-8

##errors distribution of test sets
hist(error_dis[[1]],col="red",main="linear regression error distrubion of testing set",xlab="error")

plot of chunk unnamed-chunk-8

##O3 errors againts expected O3 of training set
plot(error_dis[[1]],col="red",ylab="errors",main="linear regression error_O3 ~ expected O3(testing set)")
abline(h=mean(error_dis[[1]]))

plot of chunk unnamed-chunk-8

############################ANN#######################
load(paste("netFit_error_dis_dataset",12,"_size_",19,"_decay_",1e-2,".RData"))
##errors of train sets
hist(error_dis[[2]],col="red",main="ANN error distrubion of training set",xlab="error")

plot of chunk unnamed-chunk-8

plot(error_dis[[2]],col="red",ylab="errors",main="ANN error_O3 ~ expected O3(training set)")
abline(h=mean(error_dis[[2]]))

plot of chunk unnamed-chunk-8

##errors distribution of test sets
hist(error_dis[[1]],col="red",main="ANN error distrubion of testing set",xlab="error")

plot of chunk unnamed-chunk-8

plot(error_dis[[1]],col="red",ylab="errors",main="ANN error_O3 ~ expected O3(testing set)")
abline(h=mean(error_dis[[1]]))

plot of chunk unnamed-chunk-8

importance factors

load(paste("rfFit_dataset_",12,"_mtry_",15,"_ntree_",500,".RData")) 
rfFit$importance
##                        %IncMSE IncNodePurity
## MAXO3P               5.117e-03        1.3939
## AVGO3P               3.384e-03        0.9851
## MAXO3P_MAXRHP        7.507e-04        0.2839
## MAXO3P_MEDIANO2P     2.263e-04        0.1492
## MEDIANWSPP_MEDIANRHP 2.157e-04        0.1597
## MAXWDRP_MEDIANNOxP   5.718e-04        0.1945
## MAXTMPP_MAXRHP       1.832e-04        0.1696
## MAXNOXP              3.522e-04        0.1530
## MAXNO2P              3.668e-04        0.1899
## STDO3P               1.025e-03        0.5282
## STDTMPP              2.116e-04        0.1979
## STDWSPP              1.729e-04        0.1816
## STDRHP               1.200e-04        0.1986
## WEEKDAYC             7.198e-05        0.1378
## SEASONC              4.867e-05        0.0581
## TMPpoint             3.982e-04        0.2911
## RHpoint              3.954e-04        0.2975
## WSPpoint             3.017e-04        0.2269
load(paste("rfFit_dataset_",12,"_mtry_",15,"_ntree_",1500,".RData")) 
rfFit$importance
##                        %IncMSE IncNodePurity
## MAXO3P               5.191e-03       1.35213
## AVGO3P               3.317e-03       0.99835
## MAXO3P_MAXRHP        7.488e-04       0.23662
## MAXO3P_MEDIANO2P     2.200e-04       0.15343
## MEDIANWSPP_MEDIANRHP 1.879e-04       0.15507
## MAXWDRP_MEDIANNOxP   5.575e-04       0.19659
## MAXTMPP_MAXRHP       1.676e-04       0.18237
## MAXNOXP              3.083e-04       0.14891
## MAXNO2P              3.787e-04       0.19301
## STDO3P               1.393e-03       0.56127
## STDTMPP              1.550e-04       0.20073
## STDWSPP              1.393e-04       0.17687
## STDRHP               1.305e-04       0.19654
## WEEKDAYC             1.761e-05       0.13631
## SEASONC              5.949e-05       0.05453
## TMPpoint             3.927e-04       0.29759
## RHpoint              3.478e-04       0.29993
## WSPpoint             3.614e-04       0.23225
load(paste("rfFit_dataset_",12,"_mtry_",15,"_ntree_",2500,".RData")) 
rfFit$importance
##                        %IncMSE IncNodePurity
## MAXO3P               5.292e-03       1.35186
## AVGO3P               3.218e-03       1.01371
## MAXO3P_MAXRHP        6.991e-04       0.23921
## MAXO3P_MEDIANO2P     2.005e-04       0.14942
## MEDIANWSPP_MEDIANRHP 2.081e-04       0.15541
## MAXWDRP_MEDIANNOxP   5.280e-04       0.19629
## MAXTMPP_MAXRHP       1.780e-04       0.17368
## MAXNOXP              2.810e-04       0.14792
## MAXNO2P              3.863e-04       0.19627
## STDO3P               1.273e-03       0.55893
## STDTMPP              1.672e-04       0.20345
## STDWSPP              1.055e-04       0.17719
## STDRHP               8.328e-05       0.20102
## WEEKDAYC             8.598e-07       0.13340
## SEASONC              6.459e-05       0.05547
## TMPpoint             4.064e-04       0.29983
## RHpoint              3.593e-04       0.29313
## WSPpoint             3.316e-04       0.22890
load(paste("rfFit_dataset_",12,"_mtry_",19,"_ntree_",1500,".RData")) 
rfFit$importance
##                        %IncMSE IncNodePurity
## MAXO3P               5.369e-03       1.42030
## AVGO3P               3.344e-03       0.97444
## MAXO3P_MAXRHP        6.509e-04       0.22339
## MAXO3P_MEDIANO2P     2.087e-04       0.15116
## MEDIANWSPP_MEDIANRHP 1.885e-04       0.14930
## MAXWDRP_MEDIANNOxP   5.799e-04       0.18988
## MAXTMPP_MAXRHP       1.822e-04       0.17349
## MAXNOXP              2.719e-04       0.14747
## MAXNO2P              4.057e-04       0.19356
## STDO3P               1.110e-03       0.51789
## STDTMPP              1.419e-04       0.20065
## STDWSPP              7.355e-05       0.17765
## STDRHP               9.899e-05       0.20020
## WEEKDAYC             5.104e-06       0.14453
## SEASONC              7.789e-05       0.05872
## TMPpoint             3.890e-04       0.31145
## RHpoint              3.785e-04       0.29986
## WSPpoint             3.543e-04       0.23254