if (!require(pacman)){ install.packages("pacman") }
pacman::p_load(data.table, zoo, dygraphs)
knitr::opts_chunk$set(
fig.width=9, fig.height=5,
warning=FALSE, message=FALSE, results="hide", fig.keep=T)
Latest date: 2017/05/31
quality = rbind(
fread("Beijing_2015_HourlyPM25_created20160201.csv", skip=3),
fread("Beijing_2016_HourlyPM25_created20170201.csv", skip=3),
fread("Beijing_2017_HourlyPM25_created20170705\ (3).csv", skip=3)
)
There are 3 duplicate entries:
sum(duplicated(quality[, get("Date\ (LST)")]))
Use the latest entries:
quality = quality[!duplicated(quality[, get("Date\ (LST)")], fromLast=T)]
quality[get("QC Name") != "Valid", Value := NA]
quality[, Value := na.locf(quality[, Value], na.rm=F)]
quality[, date:=as.Date(strptime(quality[, get("Date (LST)")], "%m/%d/%Y %H:%M"), tz="Asia/Shanghai")]
daily = quality[, .(median=as.numeric(median(Value)), avg=mean(Value),
upper=quantile(Value, probs=0.90),
lower=quantile(Value, probs=0.1)),
by=date]
dygraph(daily[, .(date, lower, avg, upper)],
main="Beijing Daily PM2.5 Concentration") %>%
dyAxis("x", drawGrid=F) %>% dyAxis("y", drawGrid=F) %>%
dySeries(c("lower", "avg", "upper"), label="PM2.5") %>%
dyLimit(35, "US EPA 24-hr Guildline", labelLoc="right", color="green") %>%
# dyLimit(25, "WHO 24-hr Guildline", labelLoc="right", color="grey") %>%
dyOptions(colors = RColorBrewer::brewer.pal(3, "Set1")) %>%
dyRangeSelector(dateWindow=c(as.Date("2017-01-01"), max(daily$date))) %>% dyLegend(width = 200, show = "follow")