Working on the feature 14
load("~/PED/prepareDataDay/feature_new_norm14.RData")
##the variables
colnames(feature_new_train14)
## [1] "MAXO3P" "AVGO3P"
## [3] "MAXO3P_MAXRHP" "MAXO3P_MEDIANO2P"
## [5] "MEDIANWSPP_MEDIANRHP" "MAXWDRP_MEDIANNOxP"
## [7] "MAXTMPP_MAXRHP" "MAXNOXP"
## [9] "MAXNO2P" "STDO3P"
## [11] "STDTMPP" "STDWSPP"
## [13] "STDRHP" "WEEKDAYC"
## [15] "SEASONC" "TMPpoint"
## [17] "RHpoint" "WSPpoint"
## [19] "MAXTMPP_MAXNO2P" "MEDIANTMPP_MEDIANNO2P"
## [21] "RHpoint_MEDIANNO2P" "MAXO3C"
##the size of samples
nrow(feature_new_train14)
## [1] 2316
nrow(feature_new_test14)
## [1] 409
source('~/PED/nnetAnalysis/function.R', echo=TRUE)
##
## > library(nnet)
##
## > library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
##
## > modelErrors <- function(predicted, actual) {
## + sal <- vector(mode = "numeric", length = 3)
## + names(sal) <- c("MAE", "RMSE", "RELE")
## + me .... [TRUNCATED]
##
## > train_testErrors <- function(model, inputsTrain, targetsTrain,
## + inputsTest, targetsTest) {
## + trainPredict <- predict(model, newdata = as.d .... [TRUNCATED]
##
## > error_distri <- function(model, inputsTrain, targetsTrain,
## + inputsTest, targetsTest) {
## + trainPredict <- predict(model, newdata = as.data. .... [TRUNCATED]
##
## > lm_nnet_rf_error <- function(feature_new_train, feature_new_test,
## + dataset) {
## + inputsTrain <- feature_new_train[, -c(ncol(feature_new_tra .... [TRUNCATED]
##
## > denorm <- function(norm, orig) {
## + ((norm - 0.1) * (max(orig) - min(orig))/0.8) + min(orig)
## + }
###train models and calculate the errors
##lm_nnet_rf_error(feature_new_train14,feature_new_test14,14)##
load(paste("dataset_",14,"MAE.RData"))
load(paste("dataset_",14,"RMSE.RData"))
load(paste("lm_size_", "dataset_",14,".RData"))
load(paste("dataset_",14,"MAE.rf.RData"))
load(paste("dataset_",14,"RMSE.rf.RData"))
showing the results
error_lm
## $train
## MAE RMSE RELE
## 0.07208 0.09352 0.22690
##
## $test
## MAE RMSE RELE
## 0.07649 0.10204 0.22812
ANN
###ANN
### when decay is 1e-4
MAE[MAE[,"decay"]==1e-4,c("size","trainMAE","testMAE")]->MAE4
MAE4[order(MAE4[,1]),]
## size trainMAE testMAE
## MAE 11 0.05972 0.07908
## MAE 13 0.05227 0.08723
## MAE 15 0.05630 0.08026
## MAE 17 0.05023 0.08822
## MAE 19 0.05391 0.08077
## MAE 21 0.05809 0.07963
## MAE 23 0.05210 0.08697
## MAE 25 0.05422 0.08684
RMSE[RMSE[,"decay"]==1e-4,c("size","trainRMSE","testRMSE")]->RMSE4
RMSE4[order(RMSE4[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.07777 0.1049
## RMSE 13 0.06721 0.1207
## RMSE 15 0.07370 0.1067
## RMSE 17 0.06492 0.1154
## RMSE 19 0.06968 0.1071
## RMSE 21 0.07474 0.1061
## RMSE 23 0.06742 0.1155
## RMSE 25 0.07080 0.1133
###decay is 1e-3
MAE[MAE[,"decay"]==1e-3,c("size","trainMAE","testMAE")]->MAE3
MAE3[order(MAE3[,1]),]
## size trainMAE testMAE
## MAE 11 0.05813 0.07835
## MAE 13 0.06177 0.07745
## MAE 15 0.05538 0.07941
## MAE 17 0.05867 0.07878
## MAE 19 0.05363 0.07840
## MAE 21 0.05605 0.08078
## MAE 23 0.06035 0.07392
## MAE 25 0.05436 0.08088
RMSE[RMSE[,"decay"]==1e-3,c("size","trainRMSE","testRMSE")]->RMSE3
RMSE3[order(RMSE3[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.07521 0.10323
## RMSE 13 0.08019 0.10081
## RMSE 15 0.07194 0.10815
## RMSE 17 0.07635 0.10302
## RMSE 19 0.06963 0.10279
## RMSE 21 0.07339 0.10717
## RMSE 23 0.07784 0.09895
## RMSE 25 0.07038 0.10537
###decay is 1e-2
MAE[MAE[,"decay"]==1e-2,c("size","trainMAE","testMAE")]->MAE2
MAE2[order(MAE2[,1]),]
## size trainMAE testMAE
## MAE 11 0.06837 0.07378
## MAE 13 0.06802 0.07280
## MAE 15 0.06792 0.07299
## MAE 17 0.06837 0.07379
## MAE 19 0.06802 0.07284
## MAE 21 0.06837 0.07378
## MAE 23 0.06837 0.07378
## MAE 25 0.06803 0.07285
RMSE[RMSE[,"decay"]==1e-2,c("size","trainRMSE","testRMSE")]->RMSE2
RMSE2[order(RMSE2[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.08948 0.09839
## RMSE 13 0.08894 0.09760
## RMSE 15 0.08875 0.09770
## RMSE 17 0.08948 0.09839
## RMSE 19 0.08900 0.09775
## RMSE 21 0.08948 0.09839
## RMSE 23 0.08948 0.09839
## RMSE 25 0.08901 0.09775
###decay is 1e-1
MAE[MAE[,"decay"]==1e-1,c("size","trainMAE","testMAE")]->MAE1
MAE1[order(MAE1[,1]),]
## size trainMAE testMAE
## MAE 11 0.07285 0.0771
## MAE 13 0.07285 0.0771
## MAE 15 0.07285 0.0771
## MAE 17 0.07284 0.0771
## MAE 19 0.07285 0.0771
## MAE 21 0.07285 0.0771
## MAE 23 0.07285 0.0771
## MAE 25 0.07285 0.0771
RMSE[RMSE[,"decay"]==1e-1,c("size","trainRMSE","testRMSE")]->RMSE1
RMSE1[order(RMSE1[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.09448 0.1021
## RMSE 13 0.09449 0.1021
## RMSE 15 0.09449 0.1021
## RMSE 17 0.09448 0.1021
## RMSE 19 0.09449 0.1021
## RMSE 21 0.09449 0.1021
## RMSE 23 0.09449 0.1021
## RMSE 25 0.09449 0.1021
###decay is 1
MAE[MAE[,"decay"]==1,c("size","trainMAE","testMAE")]->MAE0
MAE0[order(MAE0[,1]),]
## size trainMAE testMAE
## MAE 11 0.07979 0.08325
## MAE 13 0.07986 0.08332
## MAE 15 0.07976 0.08322
## MAE 17 0.07981 0.08327
## MAE 19 0.07990 0.08336
## MAE 21 0.07977 0.08323
## MAE 23 0.07984 0.08329
## MAE 25 0.07975 0.08320
RMSE[RMSE[,"decay"]==1,c("size","trainRMSE","testRMSE")]->RMSE0
RMSE0[order(RMSE0[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.1017 0.1080
## RMSE 13 0.1018 0.1080
## RMSE 15 0.1017 0.1079
## RMSE 17 0.1018 0.1080
## RMSE 19 0.1019 0.1081
## RMSE 21 0.1017 0.1080
## RMSE 23 0.1018 0.1080
## RMSE 25 0.1017 0.1079
randomForest
##when ntree is 500
MAE_rf[MAE_rf[,"ntree"]==500,c("mtry","trainMAE","testMAE")]->MAE500
RMSE_rf[RMSE_rf[,"ntree"]==500,c("mtry","trainRMSE","testRMSE")]->RMSE500
MAE500
## mtry trainMAE testMAE
## MAE 2 0.03153 0.07618
## MAE 3 0.03045 0.07592
## MAE 4 0.02987 0.07583
## MAE 5 0.02940 0.07523
## MAE 6 0.02922 0.07575
## MAE 7 0.02902 0.07506
RMSE500
## mtry trainRMSE testRMSE
## RMSE 2 0.04209 0.10068
## RMSE 3 0.04067 0.10036
## RMSE 4 0.04001 0.10051
## RMSE 5 0.03947 0.09976
## RMSE 6 0.03909 0.10037
## RMSE 7 0.03913 0.09994
##when ntree is 2000
MAE_rf[MAE_rf[,"ntree"]==2000,c("mtry","trainMAE","testMAE")]->MAE2000
RMSE_rf[RMSE_rf[,"ntree"]==2000,c("mtry","trainRMSE","testRMSE")]->RMSE2000
MAE2000
## mtry trainMAE testMAE
## MAE 2 0.03156 0.07608
## MAE 3 0.03043 0.07599
## MAE 4 0.02980 0.07550
## MAE 5 0.02943 0.07539
## MAE 6 0.02914 0.07530
## MAE 7 0.02898 0.07506
RMSE2000
## mtry trainRMSE testRMSE
## RMSE 2 0.04210 0.10064
## RMSE 3 0.04074 0.10057
## RMSE 4 0.03994 0.10005
## RMSE 5 0.03947 0.10010
## RMSE 6 0.03903 0.09999
## RMSE 7 0.03893 0.09971
Error distribution
load(paste("lmFit_error_dis_dataset",14,".RData"))
load(paste("lmFit_trainPredict_dataset",14,".RData"))
load(paste("lmFit_testPredict_dataset",14,".RData"))
load(paste("dataset_",14,"_sepdata.RData"))
load("~/PED/prepareDataDay/PP_new7.RData")
PP_new7[,"MAXO3C"]->orig
#######################linear regression #########################
#traing set
breaks<-seq(0,0.5,by=0.025)
denorm(targetsTrain,orig)->trainTargets_orig
denorm(targetsTest,orig)->testTargets_orig
denorm(trainPredict,orig)->trainPredict_orig
denorm(testPredict,orig)->testPredict_orig
hist(trainTargets_orig,col="red",main="ozone distribution of training set for dataset 14",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone (PPM)")
hist(trainPredict_orig,col="red",main="ozone prediction value distribution of training set for dataset 14(linear regression)",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone(PPM)")
###testing set
hist(testTargets_orig,col="blue",main="ozone distribution of testing set for dataset 14",breaks=breaks,xlim=range(orig),ylim=c(0,100),xlab="daily maximum ozone (PPM)")
hist(testPredict_orig,col="blue",main="ozone prediction value distribution of testing set for dataset 14(linear regression)",xlab="daily maximum ozone (PPM)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,100))
##errors distribution of train set
abs(trainTargets_orig-trainPredict_orig)->trainError
plot(trainTargets_orig,trainError,type="p")
##errors distribution of test sets
abs(testTargets_orig-testPredict_orig)->testError
plot(testTargets_orig,testError,type="p")
######randomFoest###########################
load(paste("rfFit_trainPredict_dataset",14,"_mtry_",2,"_ntree_",500,".RData"))
load(paste("rfFit_testPredict_dataset",14,"_mtry_",2,"_ntree_",500,".RData"))
##mtry is 2 n tree is 500###
hist(trainPredict,col="red",main="ozone prediction value distribution of training set for dataset 14(RF)",breaks=breaks,xlim=c(0,1),ylim=c(0,500))
## Error: some 'x' not counted; maybe 'breaks' do not span range of 'x'
hist(testPredict,col="blue",main="ozone prediction value distribution of testing set for dataset 14(RF)",breaks=breaks,xlim=c(0,1),ylim=c(0,100))
## Error: some 'x' not counted; maybe 'breaks' do not span range of 'x'
importance factors
load(paste("rfFit_dataset_",14,"_mtry_",2,"_ntree_",500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 2.632e-03 3.3492
## AVGO3P 2.010e-03 3.1924
## MAXO3P_MAXRHP 2.015e-03 2.6779
## MAXO3P_MEDIANO2P 1.088e-03 1.6263
## MEDIANWSPP_MEDIANRHP 3.554e-04 0.9757
## MAXWDRP_MEDIANNOxP 6.903e-04 1.2401
## MAXTMPP_MAXRHP 4.673e-04 1.1210
## MAXNOXP 6.731e-04 1.0891
## MAXNO2P 9.389e-04 1.3369
## STDO3P 1.933e-03 2.9389
## STDTMPP 2.722e-04 1.1775
## STDWSPP 2.032e-04 0.9448
## STDRHP 3.348e-04 1.1781
## WEEKDAYC 8.194e-05 0.5826
## SEASONC 1.740e-04 0.3968
## TMPpoint 3.548e-04 1.2170
## RHpoint 4.590e-04 1.2009
## WSPpoint 3.512e-04 1.1281
## MAXTMPP_MAXNO2P 1.136e-03 1.3477
## MEDIANTMPP_MEDIANNO2P 9.935e-04 1.3176
## RHpoint_MEDIANNO2P 9.211e-04 1.3944
load(paste("rfFit_dataset_",14,"_mtry_",7,"_ntree_",500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 0.0046674 5.2980
## AVGO3P 0.0030817 4.0206
## MAXO3P_MAXRHP 0.0021039 2.3329
## MAXO3P_MEDIANO2P 0.0006264 1.0042
## MEDIANWSPP_MEDIANRHP 0.0003861 0.9231
## MAXWDRP_MEDIANNOxP 0.0006221 1.0525
## MAXTMPP_MAXRHP 0.0005792 1.1441
## MAXNOXP 0.0005350 0.8761
## MAXNO2P 0.0007329 0.9743
## STDO3P 0.0024737 3.5111
## STDTMPP 0.0003202 1.2549
## STDWSPP 0.0001728 0.9208
## STDRHP 0.0002787 1.1904
## WEEKDAYC 0.0001210 0.5801
## SEASONC 0.0001146 0.2899
## TMPpoint 0.0005167 1.3542
## RHpoint 0.0006185 1.3072
## WSPpoint 0.0004359 1.2877
## MAXTMPP_MAXNO2P 0.0007886 0.9371
## MEDIANTMPP_MEDIANNO2P 0.0005901 0.9279
## RHpoint_MEDIANNO2P 0.0006277 1.0748
load(paste("rfFit_dataset_",14,"_mtry_",2,"_ntree_",2000,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 2.644e-03 3.4191
## AVGO3P 2.182e-03 3.2360
## MAXO3P_MAXRHP 1.833e-03 2.6320
## MAXO3P_MEDIANO2P 1.078e-03 1.6330
## MEDIANWSPP_MEDIANRHP 3.440e-04 0.9735
## MAXWDRP_MEDIANNOxP 6.934e-04 1.2227
## MAXTMPP_MAXRHP 4.994e-04 1.1453
## MAXNOXP 6.961e-04 1.0978
## MAXNO2P 9.070e-04 1.2900
## STDO3P 1.868e-03 2.9571
## STDTMPP 2.562e-04 1.1553
## STDWSPP 1.952e-04 0.9547
## STDRHP 2.851e-04 1.1674
## WEEKDAYC 9.436e-05 0.5829
## SEASONC 1.622e-04 0.3872
## TMPpoint 4.442e-04 1.2215
## RHpoint 4.970e-04 1.2277
## WSPpoint 2.972e-04 1.0868
## MAXTMPP_MAXNO2P 1.050e-03 1.3670
## MEDIANTMPP_MEDIANNO2P 9.290e-04 1.3216
## RHpoint_MEDIANNO2P 8.419e-04 1.3479
load(paste("rfFit_dataset_",14,"_mtry_",7,"_ntree_",2000,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 4.718e-03 5.3113
## AVGO3P 3.145e-03 4.1956
## MAXO3P_MAXRHP 2.018e-03 2.2169
## MAXO3P_MEDIANO2P 6.006e-04 0.9662
## MEDIANWSPP_MEDIANRHP 3.737e-04 0.9359
## MAXWDRP_MEDIANNOxP 6.350e-04 1.0547
## MAXTMPP_MAXRHP 5.611e-04 1.1288
## MAXNOXP 4.955e-04 0.8837
## MAXNO2P 7.480e-04 0.9576
## STDO3P 2.578e-03 3.4891
## STDTMPP 3.079e-04 1.2461
## STDWSPP 1.815e-04 0.9245
## STDRHP 3.244e-04 1.2079
## WEEKDAYC 8.922e-05 0.5911
## SEASONC 9.507e-05 0.2926
## TMPpoint 5.369e-04 1.3553
## RHpoint 6.098e-04 1.2961
## WSPpoint 4.531e-04 1.3058
## MAXTMPP_MAXNO2P 9.067e-04 0.9580
## MEDIANTMPP_MEDIANNO2P 6.070e-04 0.9012
## RHpoint_MEDIANNO2P 6.052e-04 1.0516