Working on the feature 15
load("~/PED/prepareDataDay/feature_new_norm15.RData")
##the variables
colnames(feature_new_train15)
## [1] "MAXO3P" "AVGO3P"
## [3] "MAXO3P_MAXRHP" "MAXO3P_MEDIANO2P"
## [5] "MEDIANWSPP_MEDIANRHP" "MAXWDRP_MEDIANNOxP"
## [7] "MAXTMPP_MAXRHP" "MAXNOXP"
## [9] "MAXNO2P" "STDO3P"
## [11] "STDTMPP" "STDWSPP"
## [13] "STDRHP" "WEEKDAYC"
## [15] "SEASONC" "TMPpoint"
## [17] "RHpoint" "WSPpoint"
## [19] "MAXTMPP_MAXNO2P" "MEDIANTMPP_MEDIANNO2P"
## [21] "RHpoint_MEDIANNO2P" "MAXO3C"
##the size of samples
nrow(feature_new_train15)
## [1] 2136
nrow(feature_new_test15)
## [1] 377
source('~/PED/nnetAnalysis/function.R', echo=TRUE)
##
## > library(nnet)
##
## > library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
##
## > modelErrors <- function(predicted, actual) {
## + sal <- vector(mode = "numeric", length = 3)
## + names(sal) <- c("MAE", "RMSE", "RELE")
## + me .... [TRUNCATED]
##
## > train_testErrors <- function(model, inputsTrain, targetsTrain,
## + inputsTest, targetsTest) {
## + trainPredict <- predict(model, newdata = as.d .... [TRUNCATED]
##
## > error_distri <- function(model, inputsTrain, targetsTrain,
## + inputsTest, targetsTest) {
## + trainPredict <- predict(model, newdata = as.data. .... [TRUNCATED]
##
## > lm_nnet_rf_error <- function(feature_new_train, feature_new_test,
## + dataset) {
## + inputsTrain <- feature_new_train[, -c(ncol(feature_new_tra .... [TRUNCATED]
##
## > denorm <- function(norm, orig) {
## + ((norm - 0.1) * (max(orig) - min(orig))/0.8) + min(orig)
## + }
###train models and calculate the errors
##lm_nnet_rf_error(feature_new_train15,feature_new_test15,15)##
load(paste("dataset_",15,"MAE.RData"))
load(paste("dataset_",15,"RMSE.RData"))
load(paste("lm_size_", "dataset_",15,".RData"))
load(paste("dataset_",15,"MAE.rf.RData"))
load(paste("dataset_",15,"RMSE.rf.RData"))
showing the results
error_lm
## $train
## MAE RMSE RELE
## 0.1128 0.1422 0.2574
##
## $test
## MAE RMSE RELE
## 0.1183 0.1477 0.2583
ANN
###ANN
### when decay is 1e-4
MAE[MAE[,"decay"]==1e-4,c("size","trainMAE","testMAE")]->MAE4
MAE4[order(MAE4[,1]),]
## size trainMAE testMAE
## MAE 11 0.09228 0.1183
## MAE 13 0.07833 0.1340
## MAE 15 0.08741 0.1276
## MAE 17 0.07422 0.1373
## MAE 19 0.08184 0.1308
## MAE 21 0.09030 0.1211
## MAE 23 0.07470 0.1381
## MAE 25 0.08445 0.1319
RMSE[RMSE[,"decay"]==1e-4,c("size","trainRMSE","testRMSE")]->RMSE4
RMSE4[order(RMSE4[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.11715 0.1531
## RMSE 13 0.10065 0.1764
## RMSE 15 0.11093 0.1619
## RMSE 17 0.09646 0.1799
## RMSE 19 0.10394 0.1699
## RMSE 21 0.11471 0.1536
## RMSE 23 0.09518 0.1787
## RMSE 25 0.10747 0.1701
###decay is 1e-3
MAE[MAE[,"decay"]==1e-3,c("size","trainMAE","testMAE")]->MAE3
MAE3[order(MAE3[,1]),]
## size trainMAE testMAE
## MAE 11 0.08751 0.1177
## MAE 13 0.09462 0.1154
## MAE 15 0.08318 0.1248
## MAE 17 0.09069 0.1191
## MAE 19 0.08047 0.1261
## MAE 21 0.08589 0.1242
## MAE 23 0.09171 0.1215
## MAE 25 0.08326 0.1242
RMSE[RMSE[,"decay"]==1e-3,c("size","trainRMSE","testRMSE")]->RMSE3
RMSE3[order(RMSE3[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.1115 0.1549
## RMSE 13 0.1203 0.1461
## RMSE 15 0.1057 0.1588
## RMSE 17 0.1150 0.1534
## RMSE 19 0.1021 0.1644
## RMSE 21 0.1093 0.1633
## RMSE 23 0.1166 0.1571
## RMSE 25 0.1054 0.1625
###decay is 1e-2
MAE[MAE[,"decay"]==1e-2,c("size","trainMAE","testMAE")]->MAE2
MAE2[order(MAE2[,1]),]
## size trainMAE testMAE
## MAE 11 0.1028 0.1121
## MAE 13 0.1022 0.1120
## MAE 15 0.1025 0.1131
## MAE 17 0.1031 0.1121
## MAE 19 0.1027 0.1118
## MAE 21 0.1027 0.1121
## MAE 23 0.1028 0.1128
## MAE 25 0.1027 0.1126
RMSE[RMSE[,"decay"]==1e-2,c("size","trainRMSE","testRMSE")]->RMSE2
RMSE2[order(RMSE2[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.1304 0.1416
## RMSE 13 0.1298 0.1419
## RMSE 15 0.1300 0.1423
## RMSE 17 0.1308 0.1413
## RMSE 19 0.1305 0.1413
## RMSE 21 0.1304 0.1417
## RMSE 23 0.1304 0.1418
## RMSE 25 0.1306 0.1421
###decay is 1e-1
MAE[MAE[,"decay"]==1e-1,c("size","trainMAE","testMAE")]->MAE1
MAE1[order(MAE1[,1]),]
## size trainMAE testMAE
## MAE 11 0.1129 0.1174
## MAE 13 0.1129 0.1174
## MAE 15 0.1129 0.1174
## MAE 17 0.1129 0.1174
## MAE 19 0.1129 0.1174
## MAE 21 0.1129 0.1174
## MAE 23 0.1129 0.1174
## MAE 25 0.1129 0.1174
RMSE[RMSE[,"decay"]==1e-1,c("size","trainRMSE","testRMSE")]->RMSE1
RMSE1[order(RMSE1[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.1424 0.1468
## RMSE 13 0.1424 0.1468
## RMSE 15 0.1424 0.1468
## RMSE 17 0.1424 0.1468
## RMSE 19 0.1424 0.1468
## RMSE 21 0.1424 0.1468
## RMSE 23 0.1424 0.1468
## RMSE 25 0.1424 0.1468
###decay is 1
MAE[MAE[,"decay"]==1,c("size","trainMAE","testMAE")]->MAE0
MAE0[order(MAE0[,1]),]
## size trainMAE testMAE
## MAE 11 0.1208 0.1251
## MAE 13 0.1208 0.1251
## MAE 15 0.1208 0.1251
## MAE 17 0.1208 0.1251
## MAE 19 0.1209 0.1251
## MAE 21 0.1208 0.1251
## MAE 23 0.1208 0.1251
## MAE 25 0.1207 0.1251
RMSE[RMSE[,"decay"]==1,c("size","trainRMSE","testRMSE")]->RMSE0
RMSE0[order(RMSE0[,1]),]
## size trainRMSE testRMSE
## RMSE 11 0.1504 0.1540
## RMSE 13 0.1504 0.1540
## RMSE 15 0.1504 0.1540
## RMSE 17 0.1504 0.1540
## RMSE 19 0.1505 0.1541
## RMSE 21 0.1504 0.1540
## RMSE 23 0.1504 0.1540
## RMSE 25 0.1504 0.1540
randomForest
##when ntree is 500
MAE_rf[MAE_rf[,"ntree"]==500,c("mtry","trainMAE","testMAE")]->MAE500
RMSE_rf[RMSE_rf[,"ntree"]==500,c("mtry","trainRMSE","testRMSE")]->RMSE500
MAE500
## mtry trainMAE testMAE
## MAE 7 0.04541 0.1148
## MAE 8 0.04535 0.1148
## MAE 9 0.04497 0.1146
## MAE 10 0.04489 0.1147
## MAE 11 0.04482 0.1144
## MAE 12 0.04463 0.1145
## MAE 13 0.04468 0.1144
## MAE 14 0.04452 0.1143
RMSE500
## mtry trainRMSE testRMSE
## RMSE 7 0.05885 0.1451
## RMSE 8 0.05867 0.1448
## RMSE 9 0.05817 0.1451
## RMSE 10 0.05826 0.1449
## RMSE 11 0.05801 0.1447
## RMSE 12 0.05796 0.1447
## RMSE 13 0.05792 0.1447
## RMSE 14 0.05767 0.1444
##when ntree is 2000
MAE_rf[MAE_rf[,"ntree"]==2000,c("mtry","trainMAE","testMAE")]->MAE2000
RMSE_rf[RMSE_rf[,"ntree"]==2000,c("mtry","trainRMSE","testRMSE")]->RMSE2000
MAE2000
## mtry trainMAE testMAE
## MAE 7 0.04546 0.1145
## MAE 8 0.04526 0.1147
## MAE 9 0.04508 0.1143
## MAE 10 0.04487 0.1147
## MAE 11 0.04464 0.1146
## MAE 12 0.04468 0.1144
## MAE 13 0.04440 0.1141
## MAE 14 0.04437 0.1141
RMSE2000
## mtry trainRMSE testRMSE
## RMSE 7 0.05871 0.1446
## RMSE 8 0.05857 0.1450
## RMSE 9 0.05834 0.1445
## RMSE 10 0.05815 0.1448
## RMSE 11 0.05785 0.1448
## RMSE 12 0.05782 0.1447
## RMSE 13 0.05764 0.1443
## RMSE 14 0.05751 0.1443
Error distribution
load(paste("lmFit_error_dis_dataset",15,".RData"))
load(paste("lmFit_trainPredict_dataset",15,".RData"))
load(paste("lmFit_testPredict_dataset",15,".RData"))
load(paste("dataset_",15,"_sepdata.RData"))
load("~/PED/prepareDataDay/PP_new8.RData")
PP_new8[,"MAXO3C"]->orig
#######################linear regression #########################
#traing set
breaks<-seq(0,0.5,by=0.025)
denorm(targetsTrain,orig)->trainTargets_orig
denorm(targetsTest,orig)->testTargets_orig
denorm(trainPredict,orig)->trainPredict_orig
denorm(testPredict,orig)->testPredict_orig
hist(trainTargets_orig,col="red",main="ozone distribution of training set for dataset 15",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone (PPM)")
hist(trainPredict_orig,col="red",main="ozone prediction value distribution of training set for dataset 15(linear regression)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone(PPM)")
###testing set
hist(testTargets_orig,col="blue",main="ozone distribution of testing set for dataset 15",breaks=breaks,xlim=range(orig),ylim=c(0,100),xlab="daily maximum ozone (PPM)")
hist(testPredict_orig,col="blue",main="ozone prediction value distribution of testing set for dataset 15(linear regression)",xlab="daily maximum ozone (PPM)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,100))
##errors distribution of train set
abs(trainTargets_orig-trainPredict_orig)->trainError
plot(trainTargets_orig,trainError,type="p")
##errors distribution of test sets
abs(testTargets_orig-testPredict_orig)->testError
plot(testTargets_orig,testError,type="p")
######randomFoest###########################
load(paste("rfFit_trainPredict_dataset",15,"_mtry_",7,"_ntree_",500,".RData"))
load(paste("rfFit_testPredict_dataset",15,"_mtry_",7,"_ntree_",500,".RData"))
denorm(trainPredict,orig)->trainPredict_orig
denorm(testPredict,orig)->testPredict_orig
##mtry is 2 n tree is 500###
hist(trainPredict_orig,col="red",main="ozone prediction value distribution of training set for dataset 15(Rf)",breaks=breaks,xlim=range(orig),ylim=c(0,600),xlab="daily maximum ozone(PPM)")
hist(testPredict_orig,col="blue",main="ozone prediction value distribution of testing set for dataset 15(RF)",xlab="daily maximum ozone (PPM)",breaks=seq(-0.1,0.5,by=0.025),xlim=range(orig),ylim=c(0,100))
##errors distribution of train set
abs(trainTargets_orig-trainPredict_orig)->trainError
plot(trainTargets_orig,trainError,type="p")
##errors distribution of test sets
abs(testTargets_orig-testPredict_orig)->testError
plot(testTargets_orig,testError,type="p")
importance factors
load(paste("rfFit_dataset_",15,"_mtry_",7,"_ntree_",500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 8.118e-03 10.5068
## AVGO3P 6.675e-03 7.9920
## MAXO3P_MAXRHP 3.293e-03 4.2115
## MAXO3P_MEDIANO2P 1.135e-03 2.1079
## MEDIANWSPP_MEDIANRHP 7.175e-04 1.9972
## MAXWDRP_MEDIANNOxP 7.756e-04 2.0520
## MAXTMPP_MAXRHP 9.400e-04 2.3018
## MAXNOXP 7.190e-04 1.7583
## MAXNO2P 9.964e-04 1.8498
## STDO3P 3.721e-03 6.7559
## STDTMPP 6.011e-04 2.4958
## STDWSPP 2.953e-04 1.9395
## STDRHP 3.693e-04 2.2699
## WEEKDAYC 8.719e-05 0.9633
## SEASONC 2.147e-04 0.5800
## TMPpoint 9.932e-04 3.0335
## RHpoint 2.123e-03 3.4106
## WSPpoint 1.543e-03 3.1916
## MAXTMPP_MAXNO2P 1.314e-03 1.9687
## MEDIANTMPP_MEDIANNO2P 1.217e-03 1.9592
## RHpoint_MEDIANNO2P 1.407e-03 2.5606
load(paste("rfFit_dataset_",15,"_mtry_",14,"_ntree_",500,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 1.024e-02 14.9357
## AVGO3P 6.468e-03 7.4511
## MAXO3P_MAXRHP 2.510e-03 2.7596
## MAXO3P_MEDIANO2P 1.058e-03 1.6859
## MEDIANWSPP_MEDIANRHP 8.179e-04 2.0343
## MAXWDRP_MEDIANNOxP 4.563e-04 2.0481
## MAXTMPP_MAXRHP 9.222e-04 2.2049
## MAXNOXP 7.032e-04 1.6340
## MAXNO2P 8.688e-04 1.3027
## STDO3P 2.975e-03 5.5053
## STDTMPP 7.009e-04 2.7755
## STDWSPP 2.400e-04 2.0343
## STDRHP 4.839e-04 2.2882
## WEEKDAYC 2.922e-05 0.9949
## SEASONC 1.972e-04 0.5738
## TMPpoint 1.048e-03 3.2820
## RHpoint 2.428e-03 3.8643
## WSPpoint 1.987e-03 3.7236
## MAXTMPP_MAXNO2P 1.128e-03 1.5388
## MEDIANTMPP_MEDIANNO2P 1.139e-03 1.6069
## RHpoint_MEDIANNO2P 9.587e-04 2.2097
load(paste("rfFit_dataset_",15,"_mtry_",7,"_ntree_",2000,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 8.386e-03 10.0463
## AVGO3P 6.626e-03 8.1436
## MAXO3P_MAXRHP 3.347e-03 4.3028
## MAXO3P_MEDIANO2P 1.147e-03 2.0562
## MEDIANWSPP_MEDIANRHP 6.907e-04 2.0148
## MAXWDRP_MEDIANNOxP 7.170e-04 2.0705
## MAXTMPP_MAXRHP 9.427e-04 2.2880
## MAXNOXP 6.688e-04 1.7298
## MAXNO2P 1.136e-03 1.8479
## STDO3P 3.692e-03 6.9403
## STDTMPP 6.138e-04 2.5092
## STDWSPP 1.823e-04 1.9876
## STDRHP 4.194e-04 2.2575
## WEEKDAYC -9.313e-08 0.9541
## SEASONC 1.845e-04 0.6005
## TMPpoint 9.339e-04 3.1017
## RHpoint 2.084e-03 3.4605
## WSPpoint 1.560e-03 3.2678
## MAXTMPP_MAXNO2P 1.359e-03 1.9696
## MEDIANTMPP_MEDIANNO2P 1.281e-03 1.9518
## RHpoint_MEDIANNO2P 1.273e-03 2.4835
load(paste("rfFit_dataset_",15,"_mtry_",14,"_ntree_",2000,".RData"))
rfFit$importance
## %IncMSE IncNodePurity
## MAXO3P 1.071e-02 14.7284
## AVGO3P 6.711e-03 7.5566
## MAXO3P_MAXRHP 2.303e-03 2.6347
## MAXO3P_MEDIANO2P 9.404e-04 1.6608
## MEDIANWSPP_MEDIANRHP 7.880e-04 2.0721
## MAXWDRP_MEDIANNOxP 5.915e-04 2.0135
## MAXTMPP_MAXRHP 8.964e-04 2.2458
## MAXNOXP 5.941e-04 1.6180
## MAXNO2P 7.873e-04 1.3365
## STDO3P 3.207e-03 5.7512
## STDTMPP 6.131e-04 2.7275
## STDWSPP 1.831e-04 2.0111
## STDRHP 3.949e-04 2.3078
## WEEKDAYC 4.743e-06 0.9846
## SEASONC 1.767e-04 0.5779
## TMPpoint 9.887e-04 3.2428
## RHpoint 2.474e-03 3.9041
## WSPpoint 2.070e-03 3.7111
## MAXTMPP_MAXNO2P 1.131e-03 1.5715
## MEDIANTMPP_MEDIANNO2P 1.026e-03 1.6088
## RHpoint_MEDIANNO2P 9.254e-04 2.1461