PhD Report3-Prepare databases for training and testing by using package RSNNS

1.database “mn” first. Using the function “createDataPartition” to create random splits of a dataset 0.85 of the data is training data, and 0.15 is testing data set.

library(caret)
library(zoo)
library(PerformanceAnalytics)
library(RSNNS)
load("/home/gong/prepareData/mn.RData")
# shuffle the vector
mnNew <- mn[sample(1:nrow(mn), length(1:nrow(mn))), 1:ncol(mn)]
mnValues <- mnNew[, c("MONTH", "DAY", "WEEKDAY", "HORA", "SEASON", "NOx", "NO2", 
    "RH", "TMP", "WDR", "WSP")]
mnTarget <- mnNew[, c("O3")]
# the correlations among different variables
cor.prob <- function(X, dfr = nrow(X) - 2) {
    R <- cor(X, use = "pairwise.complete.obs")
    above <- row(R) < col(R)
    r2 <- R[above]^2
    Fstat <- r2 * dfr/(1 - r2)
    R[above] <- 1 - pf(Fstat, 1, dfr)
    R[row(R) == col(R)] <- NA
    R
}
flattenSquareMatrix <- function(m) {
    if ((class(m) != "matrix") | (nrow(m) != ncol(m))) 
        stop("Must be a square matrix.")
    if (!identical(rownames(m), colnames(m))) 
        stop("Row and column names must be equal.")
    ut <- upper.tri(m)
    data.frame(i = rownames(m)[row(m)[ut]], j = rownames(m)[col(m)[ut]], cor = t(m)[ut], 
        p = m[ut])
}
# find the correlations in mnValues
mnCorr <- cor(mnValues)
mnCor.prob <- cor.prob(mnValues)
flattenSquareMatrix(mnCor.prob)
##          i       j        cor         p
## 1    MONTH     DAY  4.539e-02 1.704e-02
## 2    MONTH WEEKDAY -2.807e-02 1.402e-01
## 3      DAY WEEKDAY -4.757e-04 9.801e-01
## 4    MONTH    HORA  9.155e-05 9.962e-01
## 5      DAY    HORA  1.457e-02 4.441e-01
## 6  WEEKDAY    HORA  3.079e-02 1.058e-01
## 7    MONTH  SEASON  9.715e-01 0.000e+00
## 8      DAY  SEASON  5.540e-02 3.584e-03
## 9  WEEKDAY  SEASON -2.855e-02 1.337e-01
## 10    HORA  SEASON  3.060e-03 8.723e-01
## 11   MONTH     NOx  1.311e-01 4.643e-12
## 12     DAY     NOx  1.625e-02 3.933e-01
## 13 WEEKDAY     NOx -1.726e-01 0.000e+00
## 14    HORA     NOx -8.742e-02 4.199e-06
## 15  SEASON     NOx  1.314e-01 4.076e-12
## 16   MONTH     NO2  1.269e-01 2.213e-11
## 17     DAY     NO2  1.363e-02 4.740e-01
## 18 WEEKDAY     NO2 -1.656e-01 0.000e+00
## 19    HORA     NO2 -6.411e-02 7.485e-04
## 20  SEASON     NO2  1.295e-01 8.534e-12
## 21     NOx     NO2  9.580e-01 0.000e+00
## 22   MONTH      RH  3.321e-01 0.000e+00
## 23     DAY      RH -4.834e-03 7.995e-01
## 24 WEEKDAY      RH -1.482e-02 4.361e-01
## 25    HORA      RH -2.205e-01 0.000e+00
## 26  SEASON      RH  3.457e-01 0.000e+00
## 27     NOx      RH  1.618e-01 0.000e+00
## 28     NO2      RH  1.348e-01 1.133e-12
## 29   MONTH     TMP -1.544e-01 3.331e-16
## 30     DAY     TMP  2.146e-02 2.595e-01
## 31 WEEKDAY     TMP  1.761e-02 3.548e-01
## 32    HORA     TMP  1.969e-01 0.000e+00
## 33  SEASON     TMP -1.633e-01 0.000e+00
## 34     NOx     TMP -2.797e-01 0.000e+00
## 35     NO2     TMP -2.518e-01 0.000e+00
## 36      RH     TMP -4.259e-01 0.000e+00
## 37   MONTH     WDR  1.097e-01 7.493e-09
## 38     DAY     WDR  6.933e-03 7.157e-01
## 39 WEEKDAY     WDR -1.273e-02 5.036e-01
## 40    HORA     WDR  2.625e-02 1.679e-01
## 41  SEASON     WDR  1.077e-01 1.412e-08
## 42     NOx     WDR  3.202e-02 9.251e-02
## 43     NO2     WDR  1.922e-02 3.126e-01
## 44      RH     WDR  1.233e-01 7.978e-11
## 45     TMP     WDR -1.913e-01 0.000e+00
## 46   MONTH     WSP -1.230e-01 8.783e-11
## 47     DAY     WSP  9.927e-03 6.020e-01
## 48 WEEKDAY     WSP -1.131e-02 5.524e-01
## 49    HORA     WSP -1.266e-01 2.476e-11
## 50  SEASON     WSP -1.334e-01 1.930e-12
## 51     NOx     WSP -6.137e-02 1.251e-03
## 52     NO2     WSP -7.385e-02 1.024e-04
## 53      RH     WSP -1.780e-01 0.000e+00
## 54     TMP     WSP  5.473e-02 4.011e-03
## 55     WDR     WSP -2.458e-02 1.965e-01
chart.Correlation(mnValues)

plot of chunk Prepare Training And Testing Database

# remove the variables that result in correlations greater than 0.9.
mn.high.corr <- findCorrelation(mnCorr)
mnValues2 <- mnValues[, -mn.high.corr]
# normalize data
mnValuesNorm <- normalizeData(mnValues2, type = "0_1")
mnTargetNorm <- normalizeData(mnTarget, type = "0_1")
# split the data into Training set and Test set
mnSplit <- splitForTrainingAndTest(mnValuesNorm, mnTargetNorm, ratio = 0.15)
mnInputsTrain <- mnSplit$inputsTrain
mnTargetTrain <- mnSplit$targetsTrain
mnInputsTest <- mnSplit$inputsTest
mnTargetTest <- mnSplit$targetsTest
save(mnInputsTrain, mnTargetTrain, mnInputsTest, mnTargetTest, file = "mnTrainAndTest.RData")
# build a neural network(mlp) to test the database
model <- mlp(mnSplit$inputsTrain, mnSplit$targetsTrain, size = 5, learnFuncParams = c(0.1), 
    maxit = 100, inputsTest = mnSplit$inputsTest, targetsTest = mnSplit$targetsTest)
predictions <- predict(model, mnSplit$inputsTest)
# plot the predict data and observed data normalized
plot(predictions, mnSplit$targetsTest)
lines(c(0, 1), c(0, 1), col = 2)

plot of chunk Prepare Training And Testing Database

# plot the predict data and observed data denormalized
prd <- denormalizeData(predictions, getNormParameters(mnTargetNorm))
obs <- denormalizeData(mnSplit$targetsTest, getNormParameters(mnTargetNorm))
plot(prd, obs)
lines(c(0, 1), c(0, 1), col = 2)

plot of chunk Prepare Training And Testing Database

2.Database “davg”

load("~/prepareData/davg.RData")
# shuffle the vector
davgNew <- davg[sample(1:nrow(davg), length(1:nrow(davg))), 1:ncol(davg)]
davgValues <- davgNew[, c("MONTH", "SEASON", "DAY", "WEEKDAY", "HORA.x", "AVGO3", 
    "AVGNOx", "AVGNO2", "AVGRH", "AVGTMP", "AVGWDR", "AVGWSP")]
davgTarget <- davgNew[, c("O3")]
# the correlations among different variables find the correlations in
# mnValues
davgCorr <- cor(davgValues)
davgCor.prob <- cor.prob(davgValues)
flattenSquareMatrix(davgCor.prob)
##          i       j       cor         p
## 1    MONTH  SEASON  0.970628 0.000e+00
## 2    MONTH     DAY  0.044687 7.969e-02
## 3   SEASON     DAY  0.059751 1.907e-02
## 4    MONTH WEEKDAY -0.035163 1.680e-01
## 5   SEASON WEEKDAY -0.041317 1.052e-01
## 6      DAY WEEKDAY -0.011941 6.397e-01
## 7    MONTH  HORA.x -0.014544 5.686e-01
## 8   SEASON  HORA.x -0.009202 7.183e-01
## 9      DAY  HORA.x  0.008259 7.461e-01
## 10 WEEKDAY  HORA.x  0.017887 4.832e-01
## 11   MONTH   AVGO3 -0.160295 2.545e-10
## 12  SEASON   AVGO3 -0.174236 5.883e-12
## 13     DAY   AVGO3 -0.046033 7.102e-02
## 14 WEEKDAY   AVGO3 -0.031386 2.185e-01
## 15  HORA.x   AVGO3 -0.085142 8.273e-04
## 16   MONTH  AVGNOx  0.133564 1.448e-07
## 17  SEASON  AVGNOx  0.130936 2.541e-07
## 18     DAY  AVGNOx  0.037961 1.366e-01
## 19 WEEKDAY  AVGNOx  0.117727 3.642e-06
## 20  HORA.x  AVGNOx -0.006366 8.030e-01
## 21   AVGO3  AVGNOx  0.180377 1.011e-12
## 22   MONTH  AVGNO2  0.082661 1.172e-03
## 23  SEASON  AVGNO2  0.074755 3.342e-03
## 24     DAY  AVGNO2  0.004799 8.508e-01
## 25 WEEKDAY  AVGNO2  0.106129 3.021e-05
## 26  HORA.x  AVGNO2  0.006070 8.119e-01
## 27   AVGO3  AVGNO2  0.374029 0.000e+00
## 28  AVGNOx  AVGNO2  0.889677 0.000e+00
## 29   MONTH   AVGRH  0.381264 0.000e+00
## 30  SEASON   AVGRH  0.393875 0.000e+00
## 31     DAY   AVGRH  0.010564 6.788e-01
## 32 WEEKDAY   AVGRH  0.005293 8.356e-01
## 33  HORA.x   AVGRH -0.036925 1.476e-01
## 34   AVGO3   AVGRH -0.381592 0.000e+00
## 35  AVGNOx   AVGRH  0.021640 3.962e-01
## 36  AVGNO2   AVGRH -0.027960 2.730e-01
## 37   MONTH  AVGTMP -0.067295 8.270e-03
## 38  SEASON  AVGTMP -0.082241 1.242e-03
## 39     DAY  AVGTMP -0.003649 8.863e-01
## 40 WEEKDAY  AVGTMP -0.001145 9.642e-01
## 41  HORA.x  AVGTMP -0.106061 3.057e-05
## 42   AVGO3  AVGTMP  0.249080 0.000e+00
## 43  AVGNOx  AVGTMP -0.345257 0.000e+00
## 44  AVGNO2  AVGTMP -0.223678 0.000e+00
## 45   AVGRH  AVGTMP -0.184637 2.871e-13
## 46   MONTH  AVGWDR  0.152569 1.791e-09
## 47  SEASON  AVGWDR  0.155996 7.628e-10
## 48     DAY  AVGWDR -0.002444 9.237e-01
## 49 WEEKDAY  AVGWDR  0.017085 5.030e-01
## 50  HORA.x  AVGWDR  0.005611 8.259e-01
## 51   AVGO3  AVGWDR -0.197672 4.996e-15
## 52  AVGNOx  AVGWDR -0.014351 5.737e-01
## 53  AVGNO2  AVGWDR -0.046319 6.928e-02
## 54   AVGRH  AVGWDR  0.202426 1.110e-15
## 55  AVGTMP  AVGWDR -0.116545 4.562e-06
## 56   MONTH  AVGWSP -0.140616 3.034e-08
## 57  SEASON  AVGWSP -0.156873 6.113e-10
## 58     DAY  AVGWSP -0.011228 6.599e-01
## 59 WEEKDAY  AVGWSP -0.024617 3.345e-01
## 60  HORA.x  AVGWSP -0.098076 1.164e-04
## 61   AVGO3  AVGWSP -0.057147 2.497e-02
## 62  AVGNOx  AVGWSP -0.300702 0.000e+00
## 63  AVGNO2  AVGWSP -0.260574 0.000e+00
## 64   AVGRH  AVGWSP -0.215546 0.000e+00
## 65  AVGTMP  AVGWSP  0.155117 9.514e-10
## 66  AVGWDR  AVGWSP  0.192353 2.720e-14
chart.Correlation(davgValues)

plot of chunk Prepare Training And Testing Database by using RSNNS.2

# remove the variables that result in correlations greater than 0.9.
davg.high.corr <- findCorrelation(davgCorr)
davgValues2 <- davgValues[, -davg.high.corr]
# normalize data
davgValuesNorm <- normalizeData(davgValues2, type = "0_1")
davgTargetNorm <- normalizeData(davgTarget, type = "0_1")
# split the data into Training set and Test set
davgSplit <- splitForTrainingAndTest(davgValuesNorm, davgTargetNorm, ratio = 0.15)
davgInputsTrain <- davgSplit$inputsTrain
davgTargetTrain <- davgSplit$targetsTrain
davgInputsTest <- davgSplit$inputsTest
davgTargetTest <- davgSplit$targetsTest
save(davgInputsTrain, davgTargetTrain, davgInputsTest, davgTargetTest, file = "davgTrainingAndTesting.RData")

3.Database “dmax”

load("~/prepareData/dmax.RData")
# shuffle the vector
dmaxNew <- dmax[sample(1:nrow(dmax), length(1:nrow(dmax))), 1:ncol(dmax)]
dmaxValues <- dmaxNew[, c("MONTH", "SEASON", "DAY", "WEEKDAY", "HORA.y", "MAXO3Y", 
    "MAXNOxY", "MAXNO2Y", "MINRHY", "MAXTMP", "AVGWDR", "AVGWSP")]
dmaxTarget <- dmaxNew[, c("O3")]
# the correlations among different variables find the correlations in
# mnValues
dmaxCorr <- cor(dmaxValues)
dmaxCor.prob <- cor.prob(dmaxValues)
flattenSquareMatrix(dmaxCor.prob)
##          i       j        cor         p
## 1    MONTH  SEASON  0.9706281 0.000e+00
## 2    MONTH     DAY  0.0446865 7.969e-02
## 3   SEASON     DAY  0.0597508 1.907e-02
## 4    MONTH WEEKDAY -0.0351626 1.680e-01
## 5   SEASON WEEKDAY -0.0413171 1.052e-01
## 6      DAY WEEKDAY -0.0119407 6.397e-01
## 7    MONTH  HORA.y -0.0145444 5.686e-01
## 8   SEASON  HORA.y -0.0092019 7.183e-01
## 9      DAY  HORA.y  0.0082587 7.461e-01
## 10 WEEKDAY  HORA.y  0.0178871 4.832e-01
## 11   MONTH  MAXO3Y -0.0187697 4.618e-01
## 12  SEASON  MAXO3Y -0.0228830 3.697e-01
## 13     DAY  MAXO3Y -0.0273923 2.829e-01
## 14 WEEKDAY  MAXO3Y  0.0221262 3.857e-01
## 15  HORA.y  MAXO3Y -0.0454001 7.499e-02
## 16   MONTH MAXNOxY -0.0189540 4.575e-01
## 17  SEASON MAXNOxY -0.0210799 4.086e-01
## 18     DAY MAXNOxY  0.0487606 5.582e-02
## 19 WEEKDAY MAXNOxY  0.0881245 5.381e-04
## 20  HORA.y MAXNOxY -0.0112039 6.605e-01
## 21  MAXO3Y MAXNOxY  0.2705618 0.000e+00
## 22   MONTH MAXNO2Y  0.0339138 1.836e-01
## 23  SEASON MAXNO2Y  0.0312272 2.208e-01
## 24     DAY MAXNO2Y  0.0022279 9.304e-01
## 25 WEEKDAY MAXNO2Y  0.0717918 4.836e-03
## 26  HORA.y MAXNO2Y -0.0173466 4.965e-01
## 27  MAXO3Y MAXNO2Y  0.4381321 0.000e+00
## 28 MAXNOxY MAXNO2Y  0.7569097 0.000e+00
## 29   MONTH  MINRHY  0.3712850 0.000e+00
## 30  SEASON  MINRHY  0.3895492 0.000e+00
## 31     DAY  MINRHY -0.0009413 9.706e-01
## 32 WEEKDAY  MINRHY  0.0012726 9.602e-01
## 33  HORA.y  MINRHY -0.0223454 3.810e-01
## 34  MAXO3Y  MINRHY -0.2358823 0.000e+00
## 35 MAXNOxY  MINRHY -0.1506512 2.864e-09
## 36 MAXNO2Y  MINRHY -0.1348479 1.096e-07
## 37   MONTH  MAXTMP -0.1885280 8.860e-14
## 38  SEASON  MAXTMP -0.2067812 2.220e-16
## 39     DAY  MAXTMP  0.0020559 9.358e-01
## 40 WEEKDAY  MAXTMP  0.0104283 6.827e-01
## 41  HORA.y  MAXTMP -0.0721252 4.642e-03
## 42  MAXO3Y  MAXTMP  0.1309384 2.540e-07
## 43 MAXNOxY  MAXTMP -0.1281484 4.559e-07
## 44 MAXNO2Y  MAXTMP -0.1094946 1.670e-05
## 45  MINRHY  MAXTMP -0.4247955 0.000e+00
## 46   MONTH  AVGWDR  0.1525686 1.791e-09
## 47  SEASON  AVGWDR  0.1559964 7.628e-10
## 48     DAY  AVGWDR -0.0024442 9.237e-01
## 49 WEEKDAY  AVGWDR  0.0170847 5.030e-01
## 50  HORA.y  AVGWDR  0.0056113 8.259e-01
## 51  MAXO3Y  AVGWDR -0.1583804 4.165e-10
## 52 MAXNOxY  AVGWDR -0.0508233 4.621e-02
## 53 MAXNO2Y  AVGWDR -0.0567208 2.607e-02
## 54  MINRHY  AVGWDR  0.2619972 0.000e+00
## 55  MAXTMP  AVGWDR -0.1825909 5.276e-13
## 56   MONTH  AVGWSP -0.1406160 3.034e-08
## 57  SEASON  AVGWSP -0.1568728 6.113e-10
## 58     DAY  AVGWSP -0.0112277 6.599e-01
## 59 WEEKDAY  AVGWSP -0.0246168 3.345e-01
## 60  HORA.y  AVGWSP -0.0980756 1.164e-04
## 61  MAXO3Y  AVGWSP -0.2224792 0.000e+00
## 62 MAXNOxY  AVGWSP -0.1985839 3.775e-15
## 63 MAXNO2Y  AVGWSP -0.1914823 3.564e-14
## 64  MINRHY  AVGWSP -0.2553490 0.000e+00
## 65  MAXTMP  AVGWSP  0.1445151 1.236e-08
## 66  AVGWDR  AVGWSP  0.1923527 2.720e-14
chart.Correlation(dmaxValues)

plot of chunk Prepare Training And Testing Database by using RSNNS.3

# remove the variables that result in correlations greater than 0.9.
dmax.high.corr <- findCorrelation(dmaxCorr)
dmaxValues2 <- dmaxValues[, -dmax.high.corr]
# normalize data
dmaxValuesNorm <- normalizeData(dmaxValues2, type = "0_1")
dmaxTargetNorm <- normalizeData(dmaxTarget, type = "0_1")
# split the data into Training set and Test set
dmaxSplit <- splitForTrainingAndTest(dmaxValuesNorm, dmaxTargetNorm, ratio = 0.15)
dmaxInputsTrain <- dmaxSplit$inputsTrain
dmaxTargetTrain <- dmaxSplit$targetsTrain
dmaxInputsTest <- dmaxSplit$inputsTest
dmaxTargetTest <- dmaxSplit$targetsTest
save(dmaxInputsTrain, dmaxTargetTrain, dmaxInputsTest, dmaxTargetTest, file = "dmaxTrainingAndTesting.RData")

4.Database “P5”

load("~/prepareData/P5.RData")
# shuffle the vector
P5New <- P5[sample(1:nrow(P5), length(1:nrow(P5))), 1:ncol(P5)]
P5Values <- P5New[, c("MONTH", "SEASON", "DAY", "WEEKDAY", "HORA", "NOx", "NO2", 
    "RH", "TMP", "WDR", "WSP")]
P5Target <- P5New[, c("O3")]
# the correlations among different variables find the correlations in
# mnValues
P5Corr <- cor(P5Values)
P5Cor.prob <- cor.prob(P5Values)
flattenSquareMatrix(P5Cor.prob)
##          i       j        cor         p
## 1    MONTH  SEASON  9.716e-01 0.000e+00
## 2    MONTH     DAY  4.477e-02 1.908e-02
## 3   SEASON     DAY  5.589e-02 3.421e-03
## 4    MONTH WEEKDAY -2.620e-02 1.702e-01
## 5   SEASON WEEKDAY -2.595e-02 1.745e-01
## 6      DAY WEEKDAY -4.931e-06 9.998e-01
## 7    MONTH    HORA  1.459e-02 4.450e-01
## 8   SEASON    HORA  2.137e-02 2.633e-01
## 9      DAY    HORA  4.845e-03 7.998e-01
## 10 WEEKDAY    HORA  1.391e-02 4.667e-01
## 11   MONTH     NOx  6.757e-04 9.718e-01
## 12  SEASON     NOx  1.033e-03 9.569e-01
## 13     DAY     NOx  1.808e-02 3.442e-01
## 14 WEEKDAY     NOx -2.552e-01 0.000e+00
## 15    HORA     NOx -5.325e-02 5.290e-03
## 16   MONTH     NO2 -4.081e-02 3.262e-02
## 17  SEASON     NO2 -4.777e-02 1.238e-02
## 18     DAY     NO2  9.106e-03 6.337e-01
## 19 WEEKDAY     NO2 -1.347e-01 1.442e-12
## 20    HORA     NO2  2.223e-01 0.000e+00
## 21     NOx     NO2  6.623e-01 0.000e+00
## 22   MONTH      RH  3.089e-01 0.000e+00
## 23  SEASON      RH  3.179e-01 0.000e+00
## 24     DAY      RH  8.438e-03 6.588e-01
## 25 WEEKDAY      RH -1.630e-02 3.935e-01
## 26    HORA      RH -3.480e-01 0.000e+00
## 27     NOx      RH -4.807e-02 1.184e-02
## 28     NO2      RH -2.743e-01 0.000e+00
## 29   MONTH     TMP -1.292e-03 9.461e-01
## 30  SEASON     TMP -3.502e-03 8.546e-01
## 31     DAY     TMP  1.687e-02 3.774e-01
## 32 WEEKDAY     TMP  1.967e-02 3.032e-01
## 33    HORA     TMP  5.323e-01 0.000e+00
## 34     NOx     TMP -2.169e-01 0.000e+00
## 35     NO2     TMP  9.345e-02 9.532e-07
## 36      RH     TMP -3.181e-01 0.000e+00
## 37   MONTH     WDR  4.619e-02 1.559e-02
## 38  SEASON     WDR  4.918e-02 1.002e-02
## 39     DAY     WDR  5.857e-04 9.755e-01
## 40 WEEKDAY     WDR -1.587e-02 4.062e-01
## 41    HORA     WDR -3.220e-01 0.000e+00
## 42     NOx     WDR -3.281e-02 8.590e-02
## 43     NO2     WDR -1.935e-01 0.000e+00
## 44      RH     WDR  2.329e-01 0.000e+00
## 45     TMP     WDR -3.337e-01 0.000e+00
## 46   MONTH     WSP -4.140e-02 3.019e-02
## 47  SEASON     WSP -5.040e-02 8.309e-03
## 48     DAY     WSP -1.266e-03 9.472e-01
## 49 WEEKDAY     WSP -1.343e-02 4.821e-01
## 50    HORA     WSP  2.952e-01 0.000e+00
## 51     NOx     WSP -2.135e-01 0.000e+00
## 52     NO2     WSP -4.031e-02 3.482e-02
## 53      RH     WSP -1.569e-01 1.110e-16
## 54     TMP     WSP  2.423e-01 0.000e+00
## 55     WDR     WSP  1.609e-02 3.999e-01
chart.Correlation(P5Values)

plot of chunk Prepare Training And Testing Database by using RSNNS.4

# remove the variables that result in correlations greater than 0.9.
P5.high.corr <- findCorrelation(P5Corr)
P5Values2 <- P5Values[, -P5.high.corr]
# normalize data
P5ValuesNorm <- normalizeData(P5Values2, type = "0_1")
P5TargetNorm <- normalizeData(P5Target, type = "0_1")
# split the data into Training set and Test set
P5Split <- splitForTrainingAndTest(P5ValuesNorm, P5TargetNorm, ratio = 0.15)
P5InputsTrain <- P5Split$inputsTrain
P5TargetTrain <- P5Split$targetsTrain
P5InputsTest <- P5Split$inputsTest
P5TargetTest <- P5Split$targetsTest
save(P5InputsTrain, P5TargetTrain, P5InputsTest, P5TargetTest, file = "P5TrainingAndTesting.RData")

4.Build a function to seperate database into training and testing part

load("~/prepareData/P6.RData")
# shuffle the vector
TrainAndTest <- function(P5, i) {
    P5New <- P5[sample(1:nrow(P5), length(1:nrow(P5))), 1:ncol(P5)]
    P5Values <- P5New[, c("MONTH", "SEASON", "DAY", "WEEKDAY", "HORA", "NOx", 
        "NO2", "RH", "TMP", "WDR", "WSP")]
    P5Target <- P5New[, c("O3")]
    # the correlations among different variables find the correlations in
    # mnValues
    P5Corr <- cor(P5Values)
    P5Cor.prob <- cor.prob(P5Values)
    flattenSquareMatrix(P5Cor.prob)
    # remove the variables that result in correlations greater than 0.9.
    P5.high.corr <- findCorrelation(P5Corr)
    P5Values2 <- P5Values[, -P5.high.corr]
    # normalize data
    P5ValuesNorm <- normalizeData(P5Values2, type = "0_1")
    P5TargetNorm <- normalizeData(P5Target, type = "0_1")
    # split the data into Training set and Test set
    P5Split <- splitForTrainingAndTest(P5ValuesNorm, P5TargetNorm, ratio = 0.15)
    InputsTrain <- P5Split$inputsTrain
    TargetTrain <- P5Split$targetsTrain
    InputsTest <- P5Split$inputsTest
    TargetTest <- P5Split$targetsTest
    save(InputsTrain, TargetTrain, InputsTest, TargetTest, file = paste("P", 
        i, "TrainingAndTesting.RData"))
}
# Database 'P1'
load("~/previous 1 hour.RData")
P1 <- p
TrainAndTest(P1, 1)

# Database 'P2'
load("~/previous 2 hour.RData")
P2 <- p
TrainAndTest(P2, 2)
# Database 'P3'
load("~/previous 3 hour.RData")
P3 <- p
TrainAndTest(P3, 3)
# Database 'P4'
load("~/previous 4 hour.RData")
P4 <- p
TrainAndTest(P4, 4)


# Database 'P6'
load("~/previous 6 hour.RData")
P6 <- p
TrainAndTest(P6, 6)

# Database 'P7'
load("~/previous 7 hour.RData")
P7 <- p
TrainAndTest(P7, 7)
# Database 'P8'
load("~/previous 8 hour.RData")
P8 <- p
TrainAndTest(P8, 8)
# Database 'P9'
load("~/previous 9 hour.RData")
P9 <- p
TrainAndTest(P9, 9)
# Database 'P10'
load("~/previous 10 hour.RData")
P10 <- p
TrainAndTest(P10, 10)