Training models corresponding to UPT 0,1,2,3..12 dataset
source("~/SUR/UPT1-UPT12/functionForNormilizeAndTraining.R")
# Train models with previous day input variables
load("~/SUR/dataPrepare/UPT 0 .RData")
Target <- PP[, "MAXO3C"]
Inputs <- PP[, c("MONTHC", "DAYC", "WEEKDAYC", "SEASONC", "MAXO3P", "MAXNOXP",
"MAXNO2P", "MAXSO2P", "MAXCOP", "MAXTMPP", "MINRHP", "MAXWSPP", "AVGO3P",
"AVGNOXP", "AVGNO2P", "AVGSO2P", "AVGCOP", "AVGTMPP", "AVGRHP", "AVGWSPP",
"AVGWDRP")]
PreData(paste("UPT", 0), Inputs, Target)
load(file = paste("UPT", 0, "TrainingAndTesting.RData"))
Training(paste("UPT", 0), inputsTrain, targetsTrain, inputsTest, targetsTest)
# Train models with current day input variables
for (i in 1:12) {
load(paste("~/SUR/dataPrepare/UPT", i, ".RData"))
Target <- UPT[, "MAXO3C"]
Inputs <- UPT[, c("MONTHC", "DAYC", "WEEKDAYC", "SEASONC", "MAXO3P", "AVGO3P",
"MAXO3UT", "MAXNOXUT", "MAXNO2UT", "MAXSO2UT", "MAXCOUT", "MAXTMPUT",
"MINRHUT", "MAXWSPUT", "AVGO3UT", "AVGNOXUT", "AVGNO2UT", "AVGSO2UT",
"AVGCOUT", "AVGTMPUT", "AVGRHUT", "AVGWSPUT", "AVGWDRUT")]
PreData(paste("UPT", i), Inputs, Target)
load(file = paste("UPT", i, "TrainingAndTesting.RData"))
Training(paste("UPT", i), inputsTrain, targetsTrain, inputsTest, targetsTest)
}
Comparsion among different models corresonding to different datasets
# combine all the errors corresponding to al datasets
load("~/SUR/UPT1-UPT12/UPT 0 modelsErrorsTotal.RData")
Error <- modelsErrorsTotal
for (i in 1:12) {
load(paste("~/SUR/UPT1-UPT12/UPT", i, "modelsErrorsTotal.RData"))
Error <- rbind(Error, modelsErrorsTotal)
}
Error <- cbind(Error, dataset = rep(0:12, each = 3), errorType = rep(c("MAE",
"RMSE", "RELE"), 13))
# reshape the dataset
R <- reshape(Error, times = colnames(Error)[1:6], timevar = "modelType", varying = list(names(Error[,
1:6])), v.names = "errorValue", direction = "long")
# Plot the errors
library(lattice)
xyplot(errorValue ~ dataset | errorType, groups = modelType, data = R[R$errorType ==
"MAE", ], type = "b", xlab = "Dataset", par.settings = list(superpose.symbol = list(cex = 1.2,
pch = c(1:6))), xlim = c(0:12), auto.key = list(text = levels(as.factor(R$modelType)),
points = TRUE, x = 0.05, y = 0.9, space = "right"), plot.points = TRUE,
main = "MAE")
xyplot(errorValue ~ dataset | errorType, groups = modelType, data = R[R$errorType ==
"RMSE", ], type = "b", xlab = "Dataset", par.settings = list(superpose.symbol = list(cex = 1.2,
pch = c(1:6))), xlim = c(0:12), auto.key = list(text = levels(as.factor(R$modelType)),
points = TRUE, x = 0.05, y = 0.9, space = "right"), plot.points = TRUE,
main = "RMSE")
xyplot(errorValue ~ dataset | errorType, groups = modelType, data = R[R$errorType ==
"RELE", ], type = "b", xlab = "Dataset", par.settings = list(superpose.symbol = list(cex = 1.2,
pch = c(1:6))), xlim = c(0:12), auto.key = list(text = levels(as.factor(R$modelType)),
points = TRUE, x = 0.05, y = 0.9, space = "right"), plot.points = TRUE,
main = "RELE")
Delete some input variables which have not siginficant contributions to the models.
load("~/SUR/dataPrepare/UPT 12 .RData")
load("~/SUR/UPT1-UPT12/UPT 12 lmFit.RData")
anova(lmFit$finalModel)
## Analysis of Variance Table
##
## Response: .outcome
## Df Sum Sq Mean Sq F value Pr(>F)
## MONTHC 1 0.23 0.23 13.31 0.00029 ***
## DAYC 1 0.09 0.09 5.00 0.02581 *
## WEEKDAYC 1 0.00 0.00 0.18 0.67296
## SEASONC 1 0.84 0.84 49.41 6.5e-12 ***
## MAXO3P 1 3.75 3.75 219.96 < 2e-16 ***
## AVGO3P 1 0.68 0.68 39.94 5.6e-10 ***
## MAXO3UT 1 3.07 3.07 180.12 < 2e-16 ***
## MAXNOXUT 1 0.03 0.03 1.65 0.19927
## MAXNO2UT 1 0.20 0.20 11.83 0.00063 ***
## MAXSO2UT 1 0.02 0.02 1.35 0.24660
## MAXCOUT 1 0.02 0.02 0.97 0.32483
## MAXTMPUT 1 0.23 0.23 13.72 0.00023 ***
## MINRHUT 1 0.01 0.01 0.67 0.41499
## MAXWSPUT 1 0.16 0.16 9.24 0.00249 **
## AVGO3UT 1 0.08 0.08 4.82 0.02850 *
## AVGNOXUT 1 0.02 0.02 1.40 0.23737
## AVGNO2UT 1 0.00 0.00 0.02 0.89296
## AVGSO2UT 1 0.00 0.00 0.09 0.76791
## AVGCOUT 1 0.00 0.00 0.06 0.80026
## AVGTMPUT 1 0.15 0.15 9.01 0.00281 **
## AVGRHUT 1 0.05 0.05 2.97 0.08521 .
## AVGWSPUT 1 0.00 0.00 0.14 0.71045
## AVGWDRUT 1 0.00 0.00 0.24 0.62451
## Residuals 525 8.96 0.02
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Target <- UPT[, "MAXO3C"]
Inputs <- UPT[, c("DAYC", "WEEKDAYC", "MAXNOXUT", "MAXSO2UT", "MAXCOUT", "MINRHUT",
"AVGO3UT", "AVGNOXUT", "AVGNO2UT", "AVGSO2UT", "AVGCOUT", "AVGTMPUT", "AVGRHUT",
"AVGWSPUT", "AVGWDRUT")]
PreData(paste("UPT", 12.1), Inputs, Target)
load(file = paste("UPT", 12.1, "TrainingAndTesting.RData"))
load("~/SUR/UPT1-UPT12/UPT 12 modelsErrorsTotal.RData")
Error12 <- modelsErrorsTotal
load("~/SUR/UPT1-UPT12/UPT 12.1 modelsErrorsTotal.RData")
Error121 <- modelsErrorsTotal
Error12
## lmFit svmFit rfFit nnetFit linearFit greedyFit
## MAE 0.08325 0.0893 0.09524 0.07564 0.08286 0.08378
## RMSE 0.10512 0.1116 0.11578 0.10123 0.10353 0.10430
## RELE 0.64811 0.5608 0.64184 0.44904 0.57233 0.59072
Error121
## lmFit svmFit rfFit nnetFit linearFit greedyFit
## MAE 0.1127 0.1162 0.1101 0.1101 0.1040 0.1065
## RMSE 0.1349 0.1434 0.1368 0.1358 0.1308 0.1329
## RELE 0.7556 0.9016 0.8233 0.7705 0.7490 0.7949