Working on the feature 12
load("~/PED/prepareDataDay/feature_new_norm12.RData")
##the variables
colnames(feature_new_train12)
## [1] "MAXO3P" "AVGO3P" "MAXO3P_MAXRHP"
## [4] "MAXO3P_MEDIANO2P" "MEDIANWSPP_MEDIANRHP" "MAXWDRP_MEDIANNOxP"
## [7] "MAXTMPP_MAXRHP" "MAXNOXP" "MAXNO2P"
## [10] "STDO3P" "STDTMPP" "STDWSPP"
## [13] "STDRHP" "WEEKDAYC" "SEASONC"
## [16] "TMPpoint" "RHpoint" "WSPpoint"
## [19] "MAXO3C"
##the size of samples
nrow(feature_new_train12)
## [1] 2316
nrow(feature_new_test12)
## [1] 409
source('~/PED/nnetAnalysis/function.R', echo=TRUE)
##
## > library(nnet)
##
## > library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
##
## > modelErrors <- function(predicted, actual) {
## + sal <- vector(mode = "numeric", length = 3)
## + names(sal) <- c("MAE", "RMSE", "RELE")
## + me .... [TRUNCATED]
##
## > train_testErrors <- function(model, inputsTrain, targetsTrain,
## + inputsTest, targetsTest) {
## + trainPredict <- predict(model, newdata = as.d .... [TRUNCATED]
##
## > error_distri <- function(model, inputsTrain, targetsTrain,
## + inputsTest, targetsTest) {
## + trainPredict <- predict(model, newdata = as.data. .... [TRUNCATED]
##
## > lm_nnet_rf_error <- function(feature_new_train, feature_new_test,
## + dataset) {
## + inputsTrain <- feature_new_train[, -c(ncol(feature_new_tra .... [TRUNCATED]
###train models and calculate the errors
##lm_nnet_rf_error(feature_new_test12,feature_new_train12,12)##
load(paste("dataset_",12,"MAE.RData"))
load(paste("dataset_",12,"RMSE.RData"))
load(paste("lm_size_", "dataset_",12,".RData"))
load(paste("dataset_",12,"MAE.rf.RData"))
load(paste("dataset_",12,"RMSE.rf.RData"))
showing the results
error_lm
## $train
## MAE RMSE RELE
## 0.06737 0.08958 0.21313
##
## $test
## MAE RMSE RELE
## 0.07433 0.09708 0.23070
ANN
###ANN
### when decay is 1e-4
MAE[MAE[,"decay"]==1e-4,c("size","trainMAE","testMAE")]->MAE4
MAE4[order(MAE4[,1]),]
## size trainMAE testMAE
## MAE 11 0.032526 0.1051
## MAE 13 0.010278 0.1321
## MAE 15 0.021433 0.1254
## MAE 17 0.006646 0.1252
## MAE 19 0.011659 0.1351
## MAE 21 0.026674 0.1196
## MAE 23 0.008527 0.1324
## MAE 25 0.016380 0.1409
RMSE[RMSE[,"decay"]==1e-4,c("size","trainRMSE","testRMSE")]->RMSE4
RMSE4[order(RMSE4[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.044370 0.1401
## RMSE 13 0.014336 0.1720
## RMSE 15 0.028156 0.1681
## RMSE 17 0.009313 0.1636
## RMSE 19 0.015892 0.1781
## RMSE 21 0.036245 0.1569
## RMSE 23 0.011862 0.1723
## RMSE 25 0.021750 0.1813
###decay is 1e-3
MAE[MAE[,"decay"]==1e-3,c("size","trainMAE","testMAE")]->MAE3
MAE3[order(MAE3[,1]),]
## size trainMAE testMAE
## MAE 11 0.03868 0.09164
## MAE 13 0.04435 0.08567
## MAE 15 0.04100 0.08923
## MAE 17 0.03807 0.09033
## MAE 19 0.03827 0.09073
## MAE 21 0.04078 0.08832
## MAE 23 0.04151 0.08928
## MAE 25 0.04159 0.08923
RMSE[RMSE[,"decay"]==1e-3,c("size","trainRMSE","testRMSE")]->RMSE3
RMSE3[order(RMSE3[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.05126 0.1227
## RMSE 13 0.05877 0.1133
## RMSE 15 0.05444 0.1187
## RMSE 17 0.05025 0.1190
## RMSE 19 0.05100 0.1209
## RMSE 21 0.05326 0.1174
## RMSE 23 0.05429 0.1187
## RMSE 25 0.05484 0.1195
###decay is 1e-2
MAE[MAE[,"decay"]==1e-2,c("size","trainMAE","testMAE")]->MAE2
MAE2[order(MAE2[,1]),]
## size trainMAE testMAE
## MAE 11 0.06766 0.07432
## MAE 13 0.06766 0.07432
## MAE 15 0.06767 0.07432
## MAE 17 0.06766 0.07432
## MAE 19 0.06766 0.07432
## MAE 21 0.06766 0.07432
## MAE 23 0.06765 0.07431
## MAE 25 0.06767 0.07432
RMSE[RMSE[,"decay"]==1e-2,c("size","trainRMSE","testRMSE")]->RMSE2
RMSE2[order(RMSE2[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.08956 0.09684
## RMSE 13 0.08956 0.09684
## RMSE 15 0.08957 0.09684
## RMSE 17 0.08956 0.09684
## RMSE 19 0.08956 0.09684
## RMSE 21 0.08956 0.09684
## RMSE 23 0.08955 0.09683
## RMSE 25 0.08957 0.09684
###decay is 1e-1
MAE[MAE[,"decay"]==1e-1,c("size","trainMAE","testMAE")]->MAE1
MAE1[order(MAE1[,1]),]
## size trainMAE testMAE
## MAE 11 0.07233 0.07697
## MAE 13 0.07228 0.07696
## MAE 15 0.07230 0.07696
## MAE 17 0.07234 0.07697
## MAE 19 0.07229 0.07696
## MAE 21 0.07231 0.07696
## MAE 23 0.07227 0.07696
## MAE 25 0.07229 0.07696
RMSE[RMSE[,"decay"]==1e-1,c("size","trainRMSE","testRMSE")]->RMSE1
RMSE1[order(RMSE1[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.09468 0.09954
## RMSE 13 0.09466 0.09953
## RMSE 15 0.09467 0.09953
## RMSE 17 0.09468 0.09953
## RMSE 19 0.09467 0.09954
## RMSE 21 0.09466 0.09953
## RMSE 23 0.09467 0.09954
## RMSE 25 0.09467 0.09953
###decay is 1
MAE[MAE[,"decay"]==1,c("size","trainMAE","testMAE")]->MAE0
MAE0[order(MAE0[,1]),]
## size trainMAE testMAE
## MAE 11 0.09558 0.09618
## MAE 13 0.09558 0.09617
## MAE 15 0.09559 0.09618
## MAE 17 0.09558 0.09617
## MAE 19 0.09558 0.09617
## MAE 21 0.09559 0.09618
## MAE 23 0.09558 0.09617
## MAE 25 0.09559 0.09618
RMSE[RMSE[,"decay"]==1,c("size","trainRMSE","testRMSE")]->RMSE0
RMSE0[order(RMSE0[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.1203 0.1207
## RMSE 13 0.1202 0.1206
## RMSE 15 0.1203 0.1207
## RMSE 17 0.1203 0.1206
## RMSE 19 0.1202 0.1206
## RMSE 21 0.1203 0.1207
## RMSE 23 0.1202 0.1206
## RMSE 25 0.1203 0.1207
randomForest
##when ntree is 500
MAE_rf[MAE_rf[,"ntree"]==500,c("mtry","trainMAE","testMAE")]->MAE500
RMSE_rf[RMSE_rf[,"ntree"]==500,c("mtry","trainRMSE","testRMSE")]->RMSE500
MAE500
## mtry trainMAE testMAE
## MAE 3 0.03064 0.07394
## MAE 5 0.02999 0.07416
## MAE 7 0.02947 0.07403
## MAE 9 0.02967 0.07398
## MAE 11 0.02924 0.07401
## MAE 13 0.02929 0.07423
## MAE 15 0.02908 0.07406
## MAE 17 0.02874 0.07389
## MAE 19 0.02900 0.07400
RMSE500
## mtry trainRMSE testRMSE
## RMSE 3 0.04119 0.09736
## RMSE 5 0.04058 0.09771
## RMSE 7 0.03981 0.09771
## RMSE 9 0.04004 0.09760
## RMSE 11 0.03920 0.09763
## RMSE 13 0.03961 0.09776
## RMSE 15 0.03947 0.09776
## RMSE 17 0.03907 0.09742
## RMSE 19 0.03980 0.09761
##when ntree is 1000
MAE_rf[MAE_rf[,"ntree"]==1000,c("mtry","trainMAE","testMAE")]->MAE1000
RMSE_rf[RMSE_rf[,"ntree"]==1000,c("mtry","trainRMSE","testRMSE")]->RMSE1000
MAE1000
## mtry trainMAE testMAE
## MAE 3 0.03084 0.07418
## MAE 5 0.02989 0.07398
## MAE 7 0.02975 0.07415
## MAE 9 0.02936 0.07417
## MAE 11 0.02930 0.07405
## MAE 13 0.02907 0.07405
## MAE 15 0.02916 0.07395
## MAE 17 0.02904 0.07380
## MAE 19 0.02914 0.07386
RMSE1000
## mtry trainRMSE testRMSE
## RMSE 3 0.04169 0.09748
## RMSE 5 0.04037 0.09755
## RMSE 7 0.04019 0.09771
## RMSE 9 0.03966 0.09777
## RMSE 11 0.03958 0.09770
## RMSE 13 0.03939 0.09760
## RMSE 15 0.03952 0.09748
## RMSE 17 0.03929 0.09744
## RMSE 19 0.03941 0.09747
##when ntree is 1500
MAE_rf[MAE_rf[,"ntree"]==1500,c("mtry","trainMAE","testMAE")]->MAE1500
RMSE_rf[RMSE_rf[,"ntree"]==1500,c("mtry","trainRMSE","testRMSE")]->RMSE1500
MAE1500
## mtry trainMAE testMAE
## MAE 3 0.03059 0.07414
## MAE 5 0.03003 0.07399
## MAE 7 0.02947 0.07411
## MAE 9 0.02935 0.07409
## MAE 11 0.02940 0.07401
## MAE 13 0.02917 0.07386
## MAE 15 0.02919 0.07383
## MAE 17 0.02910 0.07389
## MAE 19 0.02889 0.07389
RMSE1500
## mtry trainRMSE testRMSE
## RMSE 3 0.04137 0.09751
## RMSE 5 0.04064 0.09746
## RMSE 7 0.03977 0.09766
## RMSE 9 0.03978 0.09767
## RMSE 11 0.03984 0.09757
## RMSE 13 0.03961 0.09749
## RMSE 15 0.03974 0.09744
## RMSE 17 0.03960 0.09752
## RMSE 19 0.03929 0.09756
##when ntree is 2000
MAE_rf[MAE_rf[,"ntree"]==2000,c("mtry","trainMAE","testMAE")]->MAE2000
RMSE_rf[RMSE_rf[,"ntree"]==2000,c("mtry","trainRMSE","testRMSE")]->RMSE2000
MAE2000
## mtry trainMAE testMAE
## MAE 3 0.03077 0.07398
## MAE 5 0.02999 0.07409
## MAE 7 0.02948 0.07404
## MAE 9 0.02932 0.07392
## MAE 11 0.02923 0.07402
## MAE 13 0.02920 0.07385
## MAE 15 0.02889 0.07390
## MAE 17 0.02900 0.07392
## MAE 19 0.02896 0.07380
RMSE2000
## mtry trainRMSE testRMSE
## RMSE 3 0.04151 0.09737
## RMSE 5 0.04063 0.09756
## RMSE 7 0.03980 0.09753
## RMSE 9 0.03961 0.09749
## RMSE 11 0.03970 0.09762
## RMSE 13 0.03960 0.09744
## RMSE 15 0.03939 0.09749
## RMSE 17 0.03930 0.09745
## RMSE 19 0.03928 0.09738
##when ntree is 2500
MAE_rf[MAE_rf[,"ntree"]==2500,c("mtry","trainMAE","testMAE")]->MAE2500
RMSE_rf[RMSE_rf[,"ntree"]==2500,c("mtry","trainRMSE","testRMSE")]->RMSE2500
MAE2500
## mtry trainMAE testMAE
## MAE 3 0.03076 0.07410
## MAE 5 0.02977 0.07402
## MAE 7 0.02961 0.07405
## MAE 9 0.02948 0.07408
## MAE 11 0.02926 0.07394
## MAE 13 0.02911 0.07398
## MAE 15 0.02905 0.07392
## MAE 17 0.02902 0.07379
## MAE 19 0.02904 0.07384
RMSE2500
## mtry trainRMSE testRMSE
## RMSE 3 0.04157 0.09752
## RMSE 5 0.04036 0.09754
## RMSE 7 0.04000 0.09754
## RMSE 9 0.03994 0.09761
## RMSE 11 0.03967 0.09753
## RMSE 13 0.03935 0.09757
## RMSE 15 0.03945 0.09751
## RMSE 17 0.03955 0.09739
## RMSE 19 0.03946 0.09746
Error distribution
#######################linear regression #########################
load(paste("lmFit_error_dis_dataset",12,".RData"))
##errors distribution of train set
hist(error_dis[[2]],col="red",main="linear regression error distrubion of training set",xlab="error")
##O3 errors againts expected O3 of training set
plot(error_dis[[2]],col="red",ylab="errors",main="linear regression error_O3 ~ expected O3(training set)")
abline(h=mean(error_dis[[2]]))
##errors distribution of test sets
hist(error_dis[[1]],col="red",main="linear regression error distrubion of testing set",xlab="error")
##O3 errors againts expected O3 of training set
plot(error_dis[[1]],col="red",ylab="errors",main="linear regression error_O3 ~ expected O3(testing set)")
abline(h=mean(error_dis[[1]]))
############################ANN#######################
load(paste("netFit_error_dis_dataset",12,"_size_",19,"_decay_",1e-2,".RData"))
##errors of train sets
hist(error_dis[[2]],col="red",main="ANN error distrubion of training set",xlab="error")
plot(error_dis[[2]],col="red",ylab="errors",main="ANN error_O3 ~ expected O3(training set)")
abline(h=mean(error_dis[[2]]))
##errors distribution of test sets
hist(error_dis[[1]],col="red",main="ANN error distrubion of testing set",xlab="error")
plot(error_dis[[1]],col="red",ylab="errors",main="ANN error_O3 ~ expected O3(testing set)")
abline(h=mean(error_dis[[1]]))
importance factors
load(paste("rfFit_dataset_",12,"_mtry_",15,"_ntree_",500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 5.117e-03 1.3939
## AVGO3P 3.384e-03 0.9851
## MAXO3P_MAXRHP 7.507e-04 0.2839
## MAXO3P_MEDIANO2P 2.263e-04 0.1492
## MEDIANWSPP_MEDIANRHP 2.157e-04 0.1597
## MAXWDRP_MEDIANNOxP 5.718e-04 0.1945
## MAXTMPP_MAXRHP 1.832e-04 0.1696
## MAXNOXP 3.522e-04 0.1530
## MAXNO2P 3.668e-04 0.1899
## STDO3P 1.025e-03 0.5282
## STDTMPP 2.116e-04 0.1979
## STDWSPP 1.729e-04 0.1816
## STDRHP 1.200e-04 0.1986
## WEEKDAYC 7.198e-05 0.1378
## SEASONC 4.867e-05 0.0581
## TMPpoint 3.982e-04 0.2911
## RHpoint 3.954e-04 0.2975
## WSPpoint 3.017e-04 0.2269
load(paste("rfFit_dataset_",12,"_mtry_",15,"_ntree_",1500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 5.191e-03 1.35213
## AVGO3P 3.317e-03 0.99835
## MAXO3P_MAXRHP 7.488e-04 0.23662
## MAXO3P_MEDIANO2P 2.200e-04 0.15343
## MEDIANWSPP_MEDIANRHP 1.879e-04 0.15507
## MAXWDRP_MEDIANNOxP 5.575e-04 0.19659
## MAXTMPP_MAXRHP 1.676e-04 0.18237
## MAXNOXP 3.083e-04 0.14891
## MAXNO2P 3.787e-04 0.19301
## STDO3P 1.393e-03 0.56127
## STDTMPP 1.550e-04 0.20073
## STDWSPP 1.393e-04 0.17687
## STDRHP 1.305e-04 0.19654
## WEEKDAYC 1.761e-05 0.13631
## SEASONC 5.949e-05 0.05453
## TMPpoint 3.927e-04 0.29759
## RHpoint 3.478e-04 0.29993
## WSPpoint 3.614e-04 0.23225
load(paste("rfFit_dataset_",12,"_mtry_",15,"_ntree_",2500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 5.292e-03 1.35186
## AVGO3P 3.218e-03 1.01371
## MAXO3P_MAXRHP 6.991e-04 0.23921
## MAXO3P_MEDIANO2P 2.005e-04 0.14942
## MEDIANWSPP_MEDIANRHP 2.081e-04 0.15541
## MAXWDRP_MEDIANNOxP 5.280e-04 0.19629
## MAXTMPP_MAXRHP 1.780e-04 0.17368
## MAXNOXP 2.810e-04 0.14792
## MAXNO2P 3.863e-04 0.19627
## STDO3P 1.273e-03 0.55893
## STDTMPP 1.672e-04 0.20345
## STDWSPP 1.055e-04 0.17719
## STDRHP 8.328e-05 0.20102
## WEEKDAYC 8.598e-07 0.13340
## SEASONC 6.459e-05 0.05547
## TMPpoint 4.064e-04 0.29983
## RHpoint 3.593e-04 0.29313
## WSPpoint 3.316e-04 0.22890
load(paste("rfFit_dataset_",12,"_mtry_",19,"_ntree_",1500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 5.369e-03 1.42030
## AVGO3P 3.344e-03 0.97444
## MAXO3P_MAXRHP 6.509e-04 0.22339
## MAXO3P_MEDIANO2P 2.087e-04 0.15116
## MEDIANWSPP_MEDIANRHP 1.885e-04 0.14930
## MAXWDRP_MEDIANNOxP 5.799e-04 0.18988
## MAXTMPP_MAXRHP 1.822e-04 0.17349
## MAXNOXP 2.719e-04 0.14747
## MAXNO2P 4.057e-04 0.19356
## STDO3P 1.110e-03 0.51789
## STDTMPP 1.419e-04 0.20065
## STDWSPP 7.355e-05 0.17765
## STDRHP 9.899e-05 0.20020
## WEEKDAYC 5.104e-06 0.14453
## SEASONC 7.789e-05 0.05872
## TMPpoint 3.890e-04 0.31145
## RHpoint 3.785e-04 0.29986
## WSPpoint 3.543e-04 0.23254