Get the data from SQL, and prepare the database mn including the daily Max o3, NOx,NO2,RH,TMP,WSP,WDR all of which are from the same hour.
library(DBI)
library(RODBC)
library(RMySQL)
m = dbDriver("MySQL")
con <- dbConnect(m, user = user, password = pĂ ssword, dbname = dbname, host = host)
## Error: object 'dbname' not found
if (file.exists("da.RData")) {
load("da.RData")
} else {
# Looking for days with full datasets.
da <- dbGetQuery(con, "select * from o3_data_ped where fecha in (select fecha from (SELECT FECHA, count(fecha) as NH from o3_data_ped group by fecha) as foo where NH > 23) order by fecha asc")
da <- da[order(da$FECHA, da$HORA), ]
save(da, file = ("da.RData"))
}
if (file.exists("d.RData")) {
load("d.RData")
} else {
# Taking out of the data with only fecha and daily Max O3
d <- dbGetQuery(con, "select FECHA,mo3 from (SELECT FECHA,MAX( PED ) AS mo3, COUNT( id ) AS no3 FROM O3 WHERE PED >0 GROUP BY fecha ASC) as foo where no3 > 23")
save(d, file = ("d.RData"))
}
# select the data that daily Max O3 level is equal to the hourly O3
m <- merge(d, da, by = "FECHA")
mn <- m[which(m$mo3 == m$O3), ]
if (file.exists("mn.RData")) {
load("mn.RData")
} else {
save(mn, file = "mn.RData")
}
Prepare 10 databases, each of which contains the varialbes “FECHA”,“mo3”,“YEAR”,“MONTH”,“DAY”,“WEEKDAY”,“HORA”,“SEASON”,“O3”, and 6 meteroroloical varialbes 1 to 10 n hours delay of the time daily max o3 levels happened repectively.
# select the subset of database mn when the daily max O3 happend after 10
# oclock
fecha10 <- mn[mn$HORA > 10, c("FECHA")]
m.hora10 <- subset(m, FECHA %in% fecha10)
pre <- function(n, m.hora10) {
# take out the meterological varibles from database m
meter.var <- m.hora10[, c("NOx", "NO2", "RH", "TMP", "WDR", "WSP")]
# add n rows at the begining in order to move those meterological data n
# hours backwards
a <- array(0, dim = c(n, 6))
colnames(a) <- c("NOx", "NO2", "RH", "TMP", "WDR", "WSP")
p <- rbind(a, meter.var)
# delete the last n rows meterological data and cbind the FECHA,O3,MONTH
# etc. varibles with them
mp <- cbind(m.hora10[, c("FECHA", "mo3", "YEAR", "MONTH", "DAY", "WEEKDAY",
"HORA", "SEASON", "O3")], p[-c((nrow(p) - n + 1):nrow(p)), ])
return(mp)
}
for (i in 1:10) {
p <- pre(i, m.hora10)
p <- p[which(p$mo3 == p$O3), ]
save(p, file = paste("previous", i, "hour.RData"))
}
build a database “davg” containing the daily max ozone(O3), and the hour, the day, the month, the year, the weekday that daily max o3 happened,the “yesterday” average O3 levels, also the meterological varialbes which are “yesterday” average of NOx,NO2,RH,TMP,WDR,WSP.
if (file.exists("dm.RData")) {
load("dm.RData")
} else {
dm <- dbGetQuery(con, "select FECHA+1,FECHA,YEAR,MONTH,DAY,WEEKDAY,HORA,SEASON,MAX(o3),AVG(o3),AVG(NOx),AVG(NO2),AVG(RH),AVG(TMP),AVG(WDR),AVG(WSP) from o3_data_ped where fecha in (select fecha from (SELECT FECHA, count(fecha) as NH from o3_data_ped group by fecha) as foo where NH > 23) group by FECHA order by fecha asc")
save(dm, file = ("dm.RData"))
}
if (file.exists("davg.RData")) {
load("davg.RData")
} else {
yd <- dm[, c("FECHA+1", "AVG(o3)", "AVG(NOx)", "AVG(NO2)", "AVG(RH)", "AVG(TMP)",
"AVG(WDR)", "AVG(WSP)")]
colnames(yd)[1] <- c("FECHA")
yd$FECHA <- as.Date(as.factor(yd$FECHA), format = "%Y%m%d")
td$FECHA <- as.Date(as.factor(td$FECHA))
davg <- merge(td, yd, by = "FECHA")
HORA <- mn[, c("FECHA", "HORA")]
HORA$FECHA <- as.Date(as.factor(HORA$FECHA))
davg <- merge(HORA, davg, by = "FECHA")
davg <- davg[, -c(7)]
colnames(davg)[colnames(davg) == "AVG(o3)"] <- "AVGO3"
colnames(davg)[colnames(davg) == "AVG(NOx)"] <- "AVGNOx"
colnames(davg)[colnames(davg) == "AVG(NO2)"] <- "AVGNO2"
colnames(davg)[colnames(davg) == "AVG(RH)"] <- "AVGRH"
colnames(davg)[colnames(davg) == "AVG(TMP)"] <- "AVGTMP"
colnames(davg)[colnames(davg) == "AVG(WDR)"] <- "AVGWDR"
colnames(davg)[colnames(davg) == "AVG(WSP)"] <- "AVGWSP"
colnames(davg)[colnames(davg) == "MAX(o3)"] <- "O3"
save(davg, file = "davg.RData")
}
build a database “dmax” containing the daily max ozone(O3), and the hour, the day, the month, the year, the weekday that daily max o3 happened,the “yesterday” MAX O3 levels also the meterological varialbes which are “yesterday” max NOx,max NO2, min RH, max TMP,average WDR and average WSP.
if (file.exists("dmaxy.RData")) {
load("dmaxy.RData")
} else {
dmaxy <- dbGetQuery(con, "select FECHA+1,FECHA,YEAR,MONTH,DAY,WEEKDAY,HORA,SEASON,MAX(o3),MAX(NOx),MAX(NO2),MIN(RH),MAX(TMP),AVG(WDR),AVG(WSP) from o3_data_ped where fecha in (select fecha from (SELECT FECHA, count(fecha) as NH from o3_data_ped group by fecha) as foo where NH > 23) group by FECHA order by fecha asc")
save(dmaxy, file = "dmaxy.RData")
}
if (file.exists("dmax.RData")) {
load("dmax.RData")
} else {
yd <- dmaxy[, c("FECHA+1", "MAX(o3)", "MAX(NOx)", "MAX(NO2)", "MIN(RH)",
"MAX(TMP)", "AVG(WDR)", "AVG(WSP)")]
td <- dmaxy[, c("FECHA", "YEAR", "MONTH", "DAY", "WEEKDAY", "HORA", "SEASON")]
colnames(yd)[1] <- c("FECHA")
yd$FECHA <- as.Date(as.factor(yd$FECHA), format = "%Y%m%d")
td$FECHA <- as.Date(as.factor(td$FECHA))
dmax <- merge(td, yd, by = "FECHA")
HORA <- mn[, c("FECHA", "HORA")]
HORA$FECHA <- as.Date(as.factor(HORA$FECHA))
dmax <- merge(HORA, dmax, by = "FECHA")
dmax <- dmax[, -c(1, 7)]
colnames(dmax)[colnames(dmax) == "MAX(o3).x"] <- "O3"
colnames(dmax)[colnames(dmax) == "MAX(o3).y"] <- "MAXO3Y"
colnames(dmax)[colnames(dmax) == "MAX(NOx)"] <- "MAXNOxY"
colnames(dmax)[colnames(dmax) == "MAX(NO2)"] <- "MAXNO2Y"
colnames(dmax)[colnames(dmax) == "MIN(RH)"] <- "MINRHY"
colnames(dmax)[colnames(dmax) == "AVG(WDR)"] <- "AVGWDRY"
colnames(dmax)[colnames(dmax) == "AVG(WSP)"] <- "AVGWSP"
colnames(dmax)
save(dmax, file = "dmax.RData")
}