timeDifCompare

This report aims to explain the relationship between the errors and the Interval hours of the time daily maxinum ozone occured and the time that inputs variables happend for predictions(from 1- 12 ocolck)

# load TimeDifTestData from HORA.1 TO HORA.12 files and rbind them
# together.
load("~/HORA1/timeDifTestData.RData")
timeDifHora1 <- timeDifTestData
load("~/HORA2/timeDifTestData.RData")
timeDifHora2 <- timeDifTestData
load("~/HORA3/timeDifTestData.RData")
timeDifHora3 <- timeDifTestData
load("~/HORA4/timeDifTestData.RData")
timeDifHora4 <- timeDifTestData
load("~/HORA5/timeDifTestData.RData")
timeDifHora5 <- timeDifTestData
load("~/HORA6/timeDifTestData.RData")
timeDifHora6 <- timeDifTestData
load("~/HORA7/timeDifTestData.RData")
timeDifHora7 <- timeDifTestData
load("~/HORA8/timeDifTestData.RData")
timeDifHora8 <- timeDifTestData
load("~/HORA9/timeDifTestData.RData")
timeDifHora9 <- timeDifTestData
load("~/HORA10/timeDifTestData.RData")
timeDifHora10 <- timeDifTestData
load("~/HORA11/timeDifTestData.RData")
timeDifHora11 <- timeDifTestData
load("~/HORA12/timeDifTestData.RData")
timeDifHora12 <- timeDifTestData
timeDifTotal <- rbind(timeDifHora1, timeDifHora2, timeDifHora3, timeDifHora4, 
    timeDifHora5, timeDifHora6, timeDifHora7, timeDifHora8, timeDifHora9, timeDifHora10, 
    timeDifHora11, timeDifHora12)

# To check the frequency of the timeDif
library(PerformanceAnalytics)
table(timeDifTotal$timeDif)
## 
## -11 -10  -9  -8  -7  -6  -5  -4  -3  -2  -1   0   1   2   3   4   5   6 
##   1   1   1   2   4   4   4   4   4   3   4   9  44 124 229 300 331 340 
##   7   8   9  10  11  12  13  14  15  16  17  18  21  22  23 
## 326 320 307 304 298 291 263 203 106  41   7   1   1   1   1
# the range of colume 'timeDif' is between -11 to 23 ,but without 19,20
# calculate residuals
res <- function(model) {
    residuals <- timeDifTotal[, c("realO3")] - timeDifTotal[, c(model)]
}
# make a database with prediction values,real values,timeDif and residuals
# based on different models
timeDifTotal <- cbind(timeDifTotal, lmRes = res("lmFit"), svmRes = res("svmFit"), 
    rfRes = res("rfFit"), nnetRes = res("nnetFit"), linearRes = res("linearFit"), 
    greedyRes = res("greedyFit"))
# build a database to that contains the mean, and max and min of residulas
# based on different models
resMeanMaxMin <- function(modelRes) {
    ResMean <- mean(timeDifTotal[timeDifTotal$timeDif == -11, modelRes])
    ResMax <- max(timeDifTotal[timeDifTotal$timeDif == -11, modelRes])
    ResMin <- min(timeDifTotal[timeDifTotal$timeDif == -11, modelRes])
    resTotal <- c(ResMean, ResMax, ResMin, modelRes, -11)
    for (i in c(-10:18, 21:23)) {
        ResMean <- mean(timeDifTotal[timeDifTotal$timeDif == i, modelRes])
        ResMax <- max(timeDifTotal[timeDifTotal$timeDif == i, modelRes])
        ResMin <- min(timeDifTotal[timeDifTotal$timeDif == i, modelRes])
        r <- c(ResMean, ResMax, ResMin, modelRes, i)
        resTotal <- rbind(resTotal, r)
    }
    colnames(resTotal) <- c("mean", "max", "min", "models", "timeDif")
    mean <- as.numeric(resTotal[, "mean"])
    max <- as.numeric(resTotal[, "max"])
    min <- as.numeric(resTotal[, "min"])
    models <- as.data.frame(resTotal[, "models"])
    timeDif <- as.numeric(resTotal[, "timeDif"])
    resTotal <- cbind(mean, max, min, models, timeDif)
    save(resTotal, file = paste(modelRes, "Total.RData"))
    return(resTotal)
}
# the function above return 'resTotal'
lmR <- resMeanMaxMin("lmRes")
svmR <- resMeanMaxMin("svmRes")
rfR <- resMeanMaxMin("rfRes")
nnetR <- resMeanMaxMin("nnetRes")
linearR <- resMeanMaxMin("linearRes")
greedyR <- resMeanMaxMin("greedyRes")
resTotal <- rbind(lmR, svmR, rfR, nnetR, linearR, greedyR)
colnames(resTotal)[4] <- c("model")
en <- reshape(resTotal, idvar = c("model", "timeDif"), times = names(resTotal[, 
    1:3]), timevar = "types", varying = list(names(resTotal[, 1:3])), direction = "long")
colnames(en)[4] <- "values"
# Plot the mean of residuals against the timeDif based on different models
library(lattice)
xyplot(values ~ timeDif | model, group = types, data = en[en$types == "mean", 
    ], type = "l", auto.key = list(space = "right"))

plot of chunk timeDifCompare2