Load required packages

require(ggplot2)
require(gplots)
require(dplyr)

Load data into rovData and drop the blank values

rovData <- read.csv("roving17_JanMar_2014.csv", stringsAsFactors=FALSE)
rovData <- na.omit(rovData)


Convert the sampletime column to a number by replacing the colon : with a decimal .

rovData$sampletime <- gsub(":", ".", rovData$sampletime)
rovData$sampletime <- as.numeric(rovData$sampletime)

Tell R that the date column is a date with in the format %m/%d/%Y

rovData$Date <- as.Date(rovData$Date, format= "%m/%d/%Y")

Add months as a column month, and turn into a factor to order correctly

rovData$month <- months(rovData$Date)
rovData$month <- factor(rovData$month, levels = c("January","February","March"))

Add the day of the week as a column week_day, and turn into a factor to order correctly

rovData$week_day <- weekdays(rovData$Date)
rovData$week_day <-  factor(rovData$week_day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))

Create hourly boxplots averaged across all days (log scale)

boxplot(log10(rovData$Result) ~ rovData$sampletime, col="powderblue", main = "PM Concentration by Hour \n Center line marks the median", sub="Midnight as zero", xlab= "Hour of Day", ylab="PM Concentration")

plot of chunk unnamed-chunk-7

#boxplot(rovData$Result ~ rovData$sampletime, col="powderblue")

Save chart as image

dev.copy(bmp,'bars_hour.bmp', width =2000, height=1000)
dev.off()

Show the means of the hourly concentrations with 95% CI

plotmeans(rovData$Result~rovData$sampletime, n.label=F, main = "Mean PM Concentration by Hour \n Blue whiskers show 95% CI", xlab= "Hour of Day", ylab="PM Concentration")

plot of chunk unnamed-chunk-9

Save chart as image

dev.copy(bmp,'means_hour.bmp', width =2000, height=1000)
dev.off()

Create weekday boxplots for each day (log scale)

boxplot(log10(rovData$Result) ~ rovData$week_day, col="green", main = "PM Concentration by Weekday \n Center line marks the median", xlab= "Weekday", ylab="PM Concentration")

plot of chunk unnamed-chunk-11

#boxplot(rovData$Result ~ rovData$week_day, col="green")

Save chart as image

dev.copy(bmp,'bars_day.bmp', width =2000, height=1000)
dev.off()

Show the means of the weekday concentrations with 95% CI

plotmeans(rovData$Result~rovData$week_day, n.label=F, main = "Mean PM Concentration by Weekday \n Blue whiskers show 95% CI", xlab= "Weekday", ylab="PM Concentration")

plot of chunk unnamed-chunk-13

Save chart as image

dev.copy(bmp,'means_day.bmp', width =2000, height=1000)
dev.off()

Plot by hour of day for each weekday (using ggplot)

# Summarize data to get the mean for each hour of each day
rovData_summary <- group_by(rovData, week_day, sampletime) %.% summarize(meanR=mean(Result))

ggplot(ungroup(rovData_summary), aes(factor(sampletime), meanR, group=week_day)) + geom_line() + geom_point(fill="powderblue")  + 
       facet_wrap( ~ week_day, ncol=7) +
       labs(x="Hour of Day", y="Mean PM Concentration", title="PM Concetration by Hour of Day")

plot of chunk unnamed-chunk-15

Save chart as image

dev.copy(bmp,'roving_hour&day.bmp', width =2000, height=1000)
dev.off()

Plot by hour of day for each month (using ggplot)

# Summarize data to get the mean for each hour of each day
rovData_summary_month <- group_by(rovData, month, sampletime) %.% summarize(meanR=mean(Result))

rovData_summary_month <- arrange(rovData_summary_month, factor(month,levels = c("January","February","March")), sampletime)

ggplot(rovData_summary_month, aes(factor(sampletime), meanR, group=month)) + geom_line() + geom_point(fill="powderblue")  + 
       facet_wrap(~ month, ncol=7) +
       labs(x="Hour of Day", y="Mean PM Concentration", title="Months \n PM Concetration by Hour of Day")

plot of chunk unnamed-chunk-17

Save chart as image

dev.copy(bmp,'roving_hour&month.bmp', width =2000, height=1000)
dev.off()

Test for difference between weekdays using t-test

# Mean of Monday vs. Sunday is significantly different
t.test(filter(rovData, week_day=="Monday")$Result, filter(rovData, week_day=="Sunday")$Result)
## 
##  Welch Two Sample t-test
## 
## data:  filter(rovData, week_day == "Monday")$Result and filter(rovData, week_day == "Sunday")$Result
## t = 3.22, df = 583, p-value = 0.001353
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.8087 3.3379
## sample estimates:
## mean of x mean of y 
##     12.76     10.69
# Mean of Friday vs. Sunday is significatly different
t.test(filter(rovData, week_day=="Friday")$Result, filter(rovData, week_day=="Sunday")$Result)
## 
##  Welch Two Sample t-test
## 
## data:  filter(rovData, week_day == "Friday")$Result and filter(rovData, week_day == "Sunday")$Result
## t = 4.298, df = 557.5, p-value = 2.03e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.715 4.602
## sample estimates:
## mean of x mean of y 
##     13.85     10.69

Mean of Tuesday at 7Am vs. Tuesday at 11AM is NOT significatly different, (only 11 measurements)

t.test(filter(rovData, week_day=="Tuesday", sampletime==7)$Result, filter(rovData, week_day=="Tuesday", sampletime==11)$Result)
## 
##  Welch Two Sample t-test
## 
## data:  filter(rovData, week_day == "Tuesday", sampletime == 7)$Result and filter(rovData, week_day == "Tuesday", sampletime == 11)$Result
## t = 1.907, df = 16.64, p-value = 0.07394
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7279 14.1824
## sample estimates:
## mean of x mean of y 
##     17.27     10.55