Load required packages
require(ggplot2)
require(gplots)
require(dplyr)
Load data into rovData and drop the blank values
rovData <- read.csv("roving17_JanMar_2014.csv", stringsAsFactors=FALSE)
rovData <- na.omit(rovData)
Convert the sampletime column to a number by replacing the colon : with a decimal .
rovData$sampletime <- gsub(":", ".", rovData$sampletime)
rovData$sampletime <- as.numeric(rovData$sampletime)
Tell R that the date column is a date with in the format %m/%d/%Y
rovData$Date <- as.Date(rovData$Date, format= "%m/%d/%Y")
Add months as a column month, and turn into a factor to order correctly
rovData$month <- months(rovData$Date)
rovData$month <- factor(rovData$month, levels = c("January","February","March"))
Add the day of the week as a column week_day, and turn into a factor to order correctly
rovData$week_day <- weekdays(rovData$Date)
rovData$week_day <- factor(rovData$week_day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
Create hourly boxplots averaged across all days (log scale)
boxplot(log10(rovData$Result) ~ rovData$sampletime, col="powderblue", main = "PM Concentration by Hour \n Center line marks the median", sub="Midnight as zero", xlab= "Hour of Day", ylab="PM Concentration")
#boxplot(rovData$Result ~ rovData$sampletime, col="powderblue")
Save chart as image
dev.copy(bmp,'bars_hour.bmp', width =2000, height=1000)
dev.off()
Show the means of the hourly concentrations with 95% CI
plotmeans(rovData$Result~rovData$sampletime, n.label=F, main = "Mean PM Concentration by Hour \n Blue whiskers show 95% CI", xlab= "Hour of Day", ylab="PM Concentration")
Save chart as image
dev.copy(bmp,'means_hour.bmp', width =2000, height=1000)
dev.off()
Create weekday boxplots for each day (log scale)
boxplot(log10(rovData$Result) ~ rovData$week_day, col="green", main = "PM Concentration by Weekday \n Center line marks the median", xlab= "Weekday", ylab="PM Concentration")
#boxplot(rovData$Result ~ rovData$week_day, col="green")
Save chart as image
dev.copy(bmp,'bars_day.bmp', width =2000, height=1000)
dev.off()
Show the means of the weekday concentrations with 95% CI
plotmeans(rovData$Result~rovData$week_day, n.label=F, main = "Mean PM Concentration by Weekday \n Blue whiskers show 95% CI", xlab= "Weekday", ylab="PM Concentration")
Save chart as image
dev.copy(bmp,'means_day.bmp', width =2000, height=1000)
dev.off()
Plot by hour of day for each weekday (using ggplot)
# Summarize data to get the mean for each hour of each day
rovData_summary <- group_by(rovData, week_day, sampletime) %.% summarize(meanR=mean(Result))
ggplot(ungroup(rovData_summary), aes(factor(sampletime), meanR, group=week_day)) + geom_line() + geom_point(fill="powderblue") +
facet_wrap( ~ week_day, ncol=7) +
labs(x="Hour of Day", y="Mean PM Concentration", title="PM Concetration by Hour of Day")
Save chart as image
dev.copy(bmp,'roving_hour&day.bmp', width =2000, height=1000)
dev.off()
Plot by hour of day for each month (using ggplot)
# Summarize data to get the mean for each hour of each day
rovData_summary_month <- group_by(rovData, month, sampletime) %.% summarize(meanR=mean(Result))
rovData_summary_month <- arrange(rovData_summary_month, factor(month,levels = c("January","February","March")), sampletime)
ggplot(rovData_summary_month, aes(factor(sampletime), meanR, group=month)) + geom_line() + geom_point(fill="powderblue") +
facet_wrap(~ month, ncol=7) +
labs(x="Hour of Day", y="Mean PM Concentration", title="Months \n PM Concetration by Hour of Day")
Save chart as image
dev.copy(bmp,'roving_hour&month.bmp', width =2000, height=1000)
dev.off()
Test for difference between weekdays using t-test
# Mean of Monday vs. Sunday is significantly different
t.test(filter(rovData, week_day=="Monday")$Result, filter(rovData, week_day=="Sunday")$Result)
##
## Welch Two Sample t-test
##
## data: filter(rovData, week_day == "Monday")$Result and filter(rovData, week_day == "Sunday")$Result
## t = 3.22, df = 583, p-value = 0.001353
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.8087 3.3379
## sample estimates:
## mean of x mean of y
## 12.76 10.69
# Mean of Friday vs. Sunday is significatly different
t.test(filter(rovData, week_day=="Friday")$Result, filter(rovData, week_day=="Sunday")$Result)
##
## Welch Two Sample t-test
##
## data: filter(rovData, week_day == "Friday")$Result and filter(rovData, week_day == "Sunday")$Result
## t = 4.298, df = 557.5, p-value = 2.03e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.715 4.602
## sample estimates:
## mean of x mean of y
## 13.85 10.69
Mean of Tuesday at 7Am vs. Tuesday at 11AM is NOT significatly different, (only 11 measurements)
t.test(filter(rovData, week_day=="Tuesday", sampletime==7)$Result, filter(rovData, week_day=="Tuesday", sampletime==11)$Result)
##
## Welch Two Sample t-test
##
## data: filter(rovData, week_day == "Tuesday", sampletime == 7)$Result and filter(rovData, week_day == "Tuesday", sampletime == 11)$Result
## t = 1.907, df = 16.64, p-value = 0.07394
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7279 14.1824
## sample estimates:
## mean of x mean of y
## 17.27 10.55