library(ggplot2)
library(reshape2)
# Load ECan data (stored locally)
ecan <- read.csv('RangioraWinter2016.csv',stringsAsFactors=FALSE)
names(ecan) <- c('date','time','wind.speed','wind.dir','wind.dir.std','wind.speed.std',
'wind.max','co','temp.ground','temp.2m','temp.6m','pm10',
'pm2.5','pm.course')
ecan$date <- as.POSIXct(ecan$date,format = '%m/%d/%Y %H:%M', tz='Etc/GMT-12')
ecan$datel <- as.POSIXlt(ecan$date)
ecan$time <- NULL # this is included in date
months <- 6:9
month.names <- c('July','August','September','October')
densities <- data.frame(month=NA,y=NA,x=NA)
no.points <- data.frame(month=NA,init=NA,fin=NA)
for (i in 1:length(months)) {
month <- months[i]
month.name <- month.names[i]
month.data <- ecan[ecan$datel$mon==month & !is.na(ecan$pm2.5),'pm2.5']
no.points[i,'init'] <- length(month.data)
hist(month.data,
xlim=c(-10,160),
ylim=c(0,0.16),
breaks=seq(-20,160,3),
main=substitute(PM[2.5] ~ 'in' ~ mon, list(mon=month.name)),
xlab=expression(PM[2.5] ~ (mu*'g'~m^-3)),
prob=TRUE
)
d <- density(month.data)
densities <- rbind(densities,data.frame(month=rep(month.name,length(d$y)),y=d$y,x=d$x))
}




densities <- densities[2:nrow(densities),] # first row is na
densities[,'month'] <- factor(densities$month,levels=c("July","August","September","October"))
ggplot(densities) +
geom_line(aes(x=x,y=y,colour=month)) +
scale_colour_manual("Month",values=c(July="red",August="orange",September="green",October="blue")) +
xlab(expression(PM[2.5*',ECan'] ~ (mu*'g'~m^-3))) +
ylab("Density") +
theme(legend.position = c(0.9, 0.7))

# ggtitle(substitute(PM[2.5*',ECan'] ~ 'Density by Month'))
Ignore the following:
ecan.trunc <- ecan[!is.na(ecan$pm2.5),]
print(which(is.na(ecan.trunc$pm2.5)))
## integer(0)
min.pm2.5 <- 0
max.pm2.5 <- 10
ecan.trunc <- ecan.trunc[ecan.trunc$pm2.5 < max.pm2.5 & ecan.trunc$pm2.5 > min.pm2.5,]
months <- 6:9
month.names <- c('July','August','September','October')
densities <- data.frame(month=NA,y=NA,x=NA)
for (i in 1:length(months)) {
month <- months[i]
month.name <- month.names[i]
month.data <- ecan.trunc[ecan.trunc$datel$mon==month,'pm2.5']
no.points[i,'fin'] <- length(month.data)
hist(month.data,
xlim=c(0,10),
ylim=c(0,0.2),
breaks=seq(0,10,1),
main=substitute(PM[2.5] ~ 'in' ~ mon, list(mon=month.name)),
xlab=expression(PM[2.5] ~ (mu*'g'~m^-3)),
prob=TRUE
)
d <- density(month.data)
densities <- rbind(densities,data.frame(month=rep(month.name,length(d$y)),y=d$y,x=d$x))
}




ggplot(densities) +
geom_line(aes(x=x,y=y,colour=month)) +
scale_colour_manual(values=c(July="red",August="orange",September="green",October="blue")) +
xlab(expression(PM[2.5] ~ (mu*'g'~m^-3))) +
ylab("Density") +
ggtitle(substitute(PM[2.5] ~ 'Density by Month'))

# How much of each month has been cut?
init <- no.points[,'init']
fin <- no.points[,'fin']
no.points[,'cut'] <- (init-fin)/init
no.points
## month init fin cut
## 1 NA 4367 2066 0.5269063
## 2 NA 4444 2462 0.4459946
## 3 NA 4220 3528 0.1639810
## 4 NA 4433 3848 0.1319648