Training models corresponding to UPT 0,1,2,3..12 dataset

source("~/SUR/UPT1-UPT12/functionForNormilizeAndTraining.R")
# Train models with previous day input variables
load("~/SUR/dataPrepare/UPT 0 .RData")
Target <- PP[, "MAXO3C"]
Inputs <- PP[, c("MONTHC", "DAYC", "WEEKDAYC", "SEASONC", "MAXO3P", "MAXNOXP", 
    "MAXNO2P", "MAXSO2P", "MAXCOP", "MAXTMPP", "MINRHP", "MAXWSPP", "AVGO3P", 
    "AVGNOXP", "AVGNO2P", "AVGSO2P", "AVGCOP", "AVGTMPP", "AVGRHP", "AVGWSPP", 
    "AVGWDRP")]
PreData(paste("UPT", 0), Inputs, Target)
load(file = paste("UPT", 0, "TrainingAndTesting.RData"))
Training(paste("UPT", 0), inputsTrain, targetsTrain, inputsTest, targetsTest)
# Train models with current day input variables
for (i in 1:12) {
    load(paste("~/SUR/dataPrepare/UPT", i, ".RData"))
    Target <- UPT[, "MAXO3C"]
    Inputs <- UPT[, c("MONTHC", "DAYC", "WEEKDAYC", "SEASONC", "MAXO3P", "AVGO3P", 
        "MAXO3UT", "MAXNOXUT", "MAXNO2UT", "MAXSO2UT", "MAXCOUT", "MAXTMPUT", 
        "MINRHUT", "MAXWSPUT", "AVGO3UT", "AVGNOXUT", "AVGNO2UT", "AVGSO2UT", 
        "AVGCOUT", "AVGTMPUT", "AVGRHUT", "AVGWSPUT", "AVGWDRUT")]
    PreData(paste("UPT", i), Inputs, Target)
    load(file = paste("UPT", i, "TrainingAndTesting.RData"))
    Training(paste("UPT", i), inputsTrain, targetsTrain, inputsTest, targetsTest)
}

Comparsion among different models corresonding to different datasets


# combine all the errors corresponding to al datasets
load("~/SUR/UPT1-UPT12/UPT 0 modelsErrorsTotal.RData")
Error <- modelsErrorsTotal
for (i in 1:12) {
    load(paste("~/SUR/UPT1-UPT12/UPT", i, "modelsErrorsTotal.RData"))
    Error <- rbind(Error, modelsErrorsTotal)
}
Error <- cbind(Error, dataset = rep(0:12, each = 3), errorType = rep(c("MAE", 
    "RMSE", "RELE"), 13))

# reshape the dataset
R <- reshape(Error, times = colnames(Error)[1:6], timevar = "modelType", varying = list(names(Error[, 
    1:6])), v.names = "errorValue", direction = "long")


# Plot the errors
library(lattice)
xyplot(errorValue ~ dataset | errorType, groups = modelType, data = R[R$errorType == 
    "MAE", ], type = "b", xlab = "Dataset", par.settings = list(superpose.symbol = list(cex = 1.2, 
    pch = c(1:6))), xlim = c(0:12), auto.key = list(text = levels(as.factor(R$modelType)), 
    points = TRUE, x = 0.05, y = 0.9, space = "right"), plot.points = TRUE, 
    main = "MAE")

plot of chunk UPT0-UPT12.1


xyplot(errorValue ~ dataset | errorType, groups = modelType, data = R[R$errorType == 
    "RMSE", ], type = "b", xlab = "Dataset", par.settings = list(superpose.symbol = list(cex = 1.2, 
    pch = c(1:6))), xlim = c(0:12), auto.key = list(text = levels(as.factor(R$modelType)), 
    points = TRUE, x = 0.05, y = 0.9, space = "right"), plot.points = TRUE, 
    main = "RMSE")

plot of chunk UPT0-UPT12.1


xyplot(errorValue ~ dataset | errorType, groups = modelType, data = R[R$errorType == 
    "RELE", ], type = "b", xlab = "Dataset", par.settings = list(superpose.symbol = list(cex = 1.2, 
    pch = c(1:6))), xlim = c(0:12), auto.key = list(text = levels(as.factor(R$modelType)), 
    points = TRUE, x = 0.05, y = 0.9, space = "right"), plot.points = TRUE, 
    main = "RELE")

plot of chunk UPT0-UPT12.1

Delete some input variables which have not siginficant contributions to the models.


load("~/SUR/dataPrepare/UPT 12 .RData")
load("~/SUR/UPT1-UPT12/UPT 12 lmFit.RData")
anova(lmFit$finalModel)
## Analysis of Variance Table
## 
## Response: .outcome
##            Df Sum Sq Mean Sq F value  Pr(>F)    
## MONTHC      1   0.23    0.23   13.31 0.00029 ***
## DAYC        1   0.09    0.09    5.00 0.02581 *  
## WEEKDAYC    1   0.00    0.00    0.18 0.67296    
## SEASONC     1   0.84    0.84   49.41 6.5e-12 ***
## MAXO3P      1   3.75    3.75  219.96 < 2e-16 ***
## AVGO3P      1   0.68    0.68   39.94 5.6e-10 ***
## MAXO3UT     1   3.07    3.07  180.12 < 2e-16 ***
## MAXNOXUT    1   0.03    0.03    1.65 0.19927    
## MAXNO2UT    1   0.20    0.20   11.83 0.00063 ***
## MAXSO2UT    1   0.02    0.02    1.35 0.24660    
## MAXCOUT     1   0.02    0.02    0.97 0.32483    
## MAXTMPUT    1   0.23    0.23   13.72 0.00023 ***
## MINRHUT     1   0.01    0.01    0.67 0.41499    
## MAXWSPUT    1   0.16    0.16    9.24 0.00249 ** 
## AVGO3UT     1   0.08    0.08    4.82 0.02850 *  
## AVGNOXUT    1   0.02    0.02    1.40 0.23737    
## AVGNO2UT    1   0.00    0.00    0.02 0.89296    
## AVGSO2UT    1   0.00    0.00    0.09 0.76791    
## AVGCOUT     1   0.00    0.00    0.06 0.80026    
## AVGTMPUT    1   0.15    0.15    9.01 0.00281 ** 
## AVGRHUT     1   0.05    0.05    2.97 0.08521 .  
## AVGWSPUT    1   0.00    0.00    0.14 0.71045    
## AVGWDRUT    1   0.00    0.00    0.24 0.62451    
## Residuals 525   8.96    0.02                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Target <- UPT[, "MAXO3C"]
Inputs <- UPT[, c("DAYC", "WEEKDAYC", "MAXNOXUT", "MAXSO2UT", "MAXCOUT", "MINRHUT", 
    "AVGO3UT", "AVGNOXUT", "AVGNO2UT", "AVGSO2UT", "AVGCOUT", "AVGTMPUT", "AVGRHUT", 
    "AVGWSPUT", "AVGWDRUT")]
PreData(paste("UPT", 12.1), Inputs, Target)
load(file = paste("UPT", 12.1, "TrainingAndTesting.RData"))
load("~/SUR/UPT1-UPT12/UPT 12 modelsErrorsTotal.RData")
Error12 <- modelsErrorsTotal
load("~/SUR/UPT1-UPT12/UPT 12.1 modelsErrorsTotal.RData")
Error121 <- modelsErrorsTotal
Error12
##        lmFit svmFit   rfFit nnetFit linearFit greedyFit
## MAE  0.08325 0.0893 0.09524 0.07564   0.08286   0.08378
## RMSE 0.10512 0.1116 0.11578 0.10123   0.10353   0.10430
## RELE 0.64811 0.5608 0.64184 0.44904   0.57233   0.59072
Error121
##       lmFit svmFit  rfFit nnetFit linearFit greedyFit
## MAE  0.1127 0.1162 0.1101  0.1101    0.1040    0.1065
## RMSE 0.1349 0.1434 0.1368  0.1358    0.1308    0.1329
## RELE 0.7556 0.9016 0.8233  0.7705    0.7490    0.7949