data2016 <- read.csv("../data/KC2016_04.csv")
head(data2016)
#Data type conversion and pulling out date parts into new columns
data2016$date <- as.POSIXct(as.character(data2016$date ), format = "%d/%m/%Y %H:%M")
data2016$m <- paste(format(data2016$date, format="%m"),"_",format(data2016$date, format="%B"), sep="")
data2016$d <- paste(format(data2016$date, format="%u"),"_",format(data2016$date, format="%A"), sep="")
data2016$t <- format(data2016$date, format="%R")
data2016$FSP <- as.numeric(data2016$FSP)
head(data2016[,8:13])
2016 Pollution By Month
p <- ggplot(data2016, aes(m, FSP, fill=m)) + geom_boxplot()
p

Prepare Training and Test Sets
# remove bad rows and unused cols
data2016 <- data2016[!is.na(data2016$date),]
# Create dummy vars
data2016 <- cbind(data2016,
model.matrix(~ m -1, data2016),
model.matrix(~ d -1, data2016),
model.matrix(~ t -1, data2016))
data2016mdl <- data2016[,c(10,14:56)]
names(data2016mdl) <- gsub(":00","",names(data2016mdl))
# Split test and train
sample <- sample.int(n = nrow(data2016mdl), size = floor(.90 * nrow(data2016mdl)), replace = F)
data2016.train <- data2016mdl[sample, ]
data2016.test <- data2016mdl[-sample, ]
head(data2016.train)
Build Decision Tree Model and Predict
dt.mdl <- rpart(FSP ~ .,data=data2016.train,control=rpart.control(minsplit=1, minbucket=1, cp=0.0015))
plot(dt.mdl)
text(dt.mdl,cex=0.5)

data2016.test$predFSP<-predict(dt.mdl,data2016.test)
Prediction Accuracy (Delta Actual FSP & Predicted FSP)
bins <- 20
cols <- c("green","orange", "red", "darkred")
colGradient <- colorRampPalette(cols)
cut.cols <- colGradient(bins)
cuts <- cut(data2016.test$FSP,bins)
names(cuts) <- sapply(cuts,function(t) cut.cols[which(as.character(t) == levels(cuts))])
p <- ggplot(data2016.test, aes(abs(FSP-predFSP),fill=cut(abs(FSP-predFSP),bins))) +
geom_histogram(binwidth=1,show.legend = FALSE) +
scale_color_manual(values=cut.cols,labels=levels(cuts)) +
scale_fill_manual(values=cut.cols,labels=levels(cuts))
p

LS0tCnRpdGxlOiAiSEsgUG9sbHV0aW9uIERhdGEgRXhwbG9yYXRpb24iCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgoKYGBge3J9CmRhdGEyMDE2IDwtIHJlYWQuY3N2KCIuLi9kYXRhL0tDMjAxNl8wNC5jc3YiKQpoZWFkKGRhdGEyMDE2KQpgYGAKCgpgYGB7cn0KI0RhdGEgdHlwZSBjb252ZXJzaW9uIGFuZCBwdWxsaW5nIG91dCBkYXRlIHBhcnRzIGludG8gbmV3IGNvbHVtbnMKZGF0YTIwMTYkZGF0ZSA8LSBhcy5QT1NJWGN0KGFzLmNoYXJhY3RlcihkYXRhMjAxNiRkYXRlICksIGZvcm1hdCA9ICIlZC8lbS8lWSAlSDolTSIpIApkYXRhMjAxNiRtIDwtIHBhc3RlKGZvcm1hdChkYXRhMjAxNiRkYXRlLCBmb3JtYXQ9IiVtIiksIl8iLGZvcm1hdChkYXRhMjAxNiRkYXRlLCBmb3JtYXQ9IiVCIiksIHNlcD0iIikKZGF0YTIwMTYkZCA8LSBwYXN0ZShmb3JtYXQoZGF0YTIwMTYkZGF0ZSwgZm9ybWF0PSIldSIpLCJfIixmb3JtYXQoZGF0YTIwMTYkZGF0ZSwgZm9ybWF0PSIlQSIpLCBzZXA9IiIpIApkYXRhMjAxNiR0IDwtIGZvcm1hdChkYXRhMjAxNiRkYXRlLCBmb3JtYXQ9IiVSIikKCmRhdGEyMDE2JEZTUCA8LSBhcy5udW1lcmljKGRhdGEyMDE2JEZTUCkKCmhlYWQoZGF0YTIwMTZbLDg6MTNdKQpgYGAKCiMjIDIwMTYgUG9sbHV0aW9uIEJ5IE1vbnRoCgoKYGBge3J9CnAgPC0gZ2dwbG90KGRhdGEyMDE2LCBhZXMobSwgRlNQLCBmaWxsPW0pKSArIGdlb21fYm94cGxvdCgpIApwCmBgYAoKIyMgUHJlcGFyZSBUcmFpbmluZyBhbmQgVGVzdCBTZXRzCmBgYHtyfQojIHJlbW92ZSBiYWQgcm93cyBhbmQgdW51c2VkIGNvbHMKZGF0YTIwMTYgPC0gZGF0YTIwMTZbIWlzLm5hKGRhdGEyMDE2JGRhdGUpLF0KCiMgQ3JlYXRlIGR1bW15IHZhcnMKZGF0YTIwMTYgPC0gY2JpbmQoZGF0YTIwMTYsIAogICAgICAgICAgICAgICAgICBtb2RlbC5tYXRyaXgofiBtIC0xLCBkYXRhMjAxNiksIAogICAgICAgICAgICAgICAgICBtb2RlbC5tYXRyaXgofiBkIC0xLCBkYXRhMjAxNiksCiAgICAgICAgICAgICAgICAgIG1vZGVsLm1hdHJpeCh+IHQgLTEsIGRhdGEyMDE2KSkKCmRhdGEyMDE2bWRsIDwtIGRhdGEyMDE2WyxjKDEwLDE0OjU2KV0KbmFtZXMoZGF0YTIwMTZtZGwpIDwtIGdzdWIoIjowMCIsIiIsbmFtZXMoZGF0YTIwMTZtZGwpKQoKIyBTcGxpdCB0ZXN0IGFuZCB0cmFpbgpzYW1wbGUgPC0gc2FtcGxlLmludChuID0gbnJvdyhkYXRhMjAxNm1kbCksIHNpemUgPSBmbG9vciguOTAgKiBucm93KGRhdGEyMDE2bWRsKSksIHJlcGxhY2UgPSBGKQpkYXRhMjAxNi50cmFpbiA8LSBkYXRhMjAxNm1kbFtzYW1wbGUsIF0KZGF0YTIwMTYudGVzdCAgPC0gZGF0YTIwMTZtZGxbLXNhbXBsZSwgXQoKaGVhZChkYXRhMjAxNi50cmFpbikKYGBgCgojIyBCdWlsZCBEZWNpc2lvbiBUcmVlIE1vZGVsIGFuZCBQcmVkaWN0CmBgYHtyfQpkdC5tZGwgPC0gcnBhcnQoRlNQIH4gLixkYXRhPWRhdGEyMDE2LnRyYWluLGNvbnRyb2w9cnBhcnQuY29udHJvbChtaW5zcGxpdD0xLCBtaW5idWNrZXQ9MSwgY3A9MC4wMDE1KSkKcGxvdChkdC5tZGwpCnRleHQoZHQubWRsLGNleD0wLjUpCmRhdGEyMDE2LnRlc3QkcHJlZEZTUDwtcHJlZGljdChkdC5tZGwsZGF0YTIwMTYudGVzdCkgCmBgYAoKIyMgUHJlZGljdGlvbiBBY2N1cmFjeSAoRGVsdGEgQWN0dWFsIEZTUCAmIFByZWRpY3RlZCBGU1ApCmBgYHtyfQpiaW5zIDwtIDIwCmNvbHMgPC0gYygiZ3JlZW4iLCJvcmFuZ2UiLCAicmVkIiwgImRhcmtyZWQiKQpjb2xHcmFkaWVudCA8LSBjb2xvclJhbXBQYWxldHRlKGNvbHMpCmN1dC5jb2xzIDwtIGNvbEdyYWRpZW50KGJpbnMpCmN1dHMgPC0gY3V0KGRhdGEyMDE2LnRlc3QkRlNQLGJpbnMpCm5hbWVzKGN1dHMpIDwtIHNhcHBseShjdXRzLGZ1bmN0aW9uKHQpIGN1dC5jb2xzW3doaWNoKGFzLmNoYXJhY3Rlcih0KSA9PSBsZXZlbHMoY3V0cykpXSkKCnAgPC0gZ2dwbG90KGRhdGEyMDE2LnRlc3QsIGFlcyhhYnMoRlNQLXByZWRGU1ApLGZpbGw9Y3V0KGFicyhGU1AtcHJlZEZTUCksYmlucykpKSArIAogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoPTEsc2hvdy5sZWdlbmQgPSBGQUxTRSkgKyAKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWN1dC5jb2xzLGxhYmVscz1sZXZlbHMoY3V0cykpICsKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXM9Y3V0LmNvbHMsbGFiZWxzPWxldmVscyhjdXRzKSkgCnAKCmBgYAoKCg==