For this analysis, I’ll load in FEM data from the Millbrook School site in Wake County, Raleigh, North Carolina. The address is 3801 Spring Forest Rd. The site number is 14 (AQS site ID 37-183-0014), coordinates are 35.86, -78.57.
## Installing packages into 'C:/Users/vonwa/OneDrive/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## package 'plyr' successfully unpacked and MD5 sums checked
## package 'dplyr' successfully unpacked and MD5 sums checked
## package 'ggpubr' successfully unpacked and MD5 sums checked
## package 'xts' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\vonwa\AppData\Local\Temp\RtmpWytqO6\downloaded_packages
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: devtools
## Skipping install of 'flipTime' from a github remote, the SHA1 (3d0d3bc0) has not changed since last install.
## Use `force = TRUE` to force installation
# Check for the data
if(!file.exists("hourly_88101_2018.csv")){
download.file("https://aqs.epa.gov/aqsweb/airdata/hourly_88101_2018.zip", destfile = "FEMdata.zip")
unzip("FEMdata.zip")
file.remove("FEMdata.zip")
}
# Read in the data, which will take quite a while
hourlyFEM <- read.csv("hourly_88101_2018.csv")
WakeFEM <- filter(hourlyFEM, County.Name == "Wake")
#In this analysis, using the FEM analysis near Millbrook school
MillbrookFEM <- filter(WakeFEM, Site.Num == 14)
MillbrookFEM <- mutate(MillbrookFEM, daytime = AsDateTime(paste(MillbrookFEM$Date.GMT, MillbrookFEM$Time.GMT, sep = " ")))
MillbrookFEMsmall <- filter(MillbrookFEM, daytime > AsDateTime("2018-10-15 00:00:00 UTC"), daytime < AsDateTime("2018-10-22 00:00:01 UTC"))
MillbrookFEMbig <- filter(MillbrookFEM, daytime > AsDateTime("2018-09-29 00:00:00 UTC"), daytime < AsDateTime("2018-11-01 00:00:00 UTC"))
There is no single link to paste here for downloading the PA data. For this analysis I downloaded the MECME AQ2 PA sensor data, both primary and secondary and A and B, from 9/28/18 to 12/31/2018. This sensor is located in Millbrook, on East. Millbrook Rd, between Memory Rd and Old Wake Forest Rd, coordinates 35.85, -78.61. It’s the closest sensor the the FEM site. For the script to work, the PA data must be in the working directory with the names given below.
PA.A <- read.csv("MECME AQ2 (35.848679 -78.610198) Primary 09_20_2018 12_31_2018.csv")
PA.B <- read.csv("MECME AQ2 B (35.848679 -78.610198) Primary 09_20_2018 12_31_2018.csv")
PA.A <- mutate(PA.A, daytime = AsDateTime(PA.A$created_at))
PA.B <- mutate(PA.B, daytime = AsDateTime(PA.B$created_at))
PA.Asmall <- filter(PA.A, daytime > AsDateTime("2018-10-15 00:00:00 UTC"), daytime < AsDateTime("2018-10-22 00:00:00 UTC"))
PA.Bsmall <- filter(PA.B, daytime > AsDateTime("2018-10-15 00:00:00 UTC"), daytime < AsDateTime("2018-10-22 00:00:00 UTC"))
PA.Abig <- filter(PA.A, daytime > AsDateTime("2018-09-29 00:00:00 UTC"), daytime < AsDateTime("2018-11-01 00:00:00 UTC"))
PA.Bbig <- filter(PA.B, daytime > AsDateTime("2018-09-29 00:00:00 UTC"), daytime < AsDateTime("2018-11-01 00:00:00 UTC"))
Plots below–
#First make a simple time plot of the entire timeframe for the FEM and PA channels
ggplot() +
geom_line(data = MillbrookFEMbig, aes(x = daytime, y = Sample.Measurement, alpha = 1/4, color = "FEM")) +
geom_line(data = PA.Abig, aes(x = daytime, y = PM2.5_CF_ATM_ug.m3, alpha = 1/4, color = "PA A")) +
geom_line(data = PA.Bbig, aes(x = daytime, y = PM2.5_CF_ATM_ug.m3, color = "PA B")) +
theme_bw() +
coord_cartesian(ylim = c(-1, 100)) +
labs(title = "PM2.5 Over Time", x = "Day/Time", y = "PM2.5 Concentration (ug/m3)")
#Next look at correlation between the two PA channels by merging into one dataframe
PA.Amerge <- PA.A %>% select(daytime, entry_id, PM2.5_CF_ATM_ug.m3) %>% rename(daytimeA = daytime, pm2.5A = PM2.5_CF_ATM_ug.m3)
PA.Bmerge <- PA.B %>% select(daytime, entry_id, PM2.5_CF_ATM_ug.m3) %>% rename(daytimeB = daytime, pm2.5B = PM2.5_CF_ATM_ug.m3)
mergePAbig <- merge(PA.Amerge, PA.Bmerge, by = "entry_id")
mergePAsmall <- filter(mergePAbig, daytimeA > AsDateTime("2018-10-15 00:00:00 UTC"), daytimeA < AsDateTime("2018-10-22 00:00:00 UTC"))
FullR2 <- round(cor(mergePAbig$pm2.5A, mergePAbig$pm2.5B)^2, 2)
SmallR2 <- round(cor(mergePAsmall$pm2.5A, mergePAsmall$pm2.5B)^2, 2)
#Plot the two PA channels over entire date range - note the huge spread and outliers
ggplot() +
geom_point(data = mergePAbig, aes(x = pm2.5A, y = pm2.5B), alpha = 0.5) +
coord_cartesian(xlim = c(-1, 100), ylim = c(-1, 100)) +
theme_bw()
#Plot the PA channels over just one week, much smaller spread
ggplot(data = mergePAsmall, aes(x = pm2.5A, y = pm2.5B), alpha = 0.5)+
geom_point(colour = "black", alpha = 0.2) +
geom_smooth(method = "lm", colour = "red") +
coord_cartesian(xlim = c(-1, 40), ylim = c(-1, 40)) +
theme_bw()
Here I’ll find the hourly average for the PA sensors, and compare them to the FEMs. I’ll also compare to FEMs and PA hourly averages to themselves (each has 2 POCs or channels)
#First compare PA correlation between channels
PA.Ahourly <- xts(PA.A$PM2.5_CF_ATM_ug.m3, PA.A$daytime) #make xts objects over entire time series
PA.Bhourly <- xts(PA.B$PM2.5_CF_ATM_ug.m3, PA.B$daytime)
PA.Ahourlyavg <- period.apply(PA.Ahourly, endpoints(PA.Ahourly, "hours"), mean)
PA.Bhourlyavg <- period.apply(PA.Bhourly, endpoints(PA.Bhourly, "hours"), mean)
PAcomp <- lm(PA.Bhourlyavg ~ PA.Ahourlyavg)
summary(PAcomp)
##
## Call:
## lm(formula = PA.Bhourlyavg ~ PA.Ahourlyavg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.0865 -0.3154 0.0399 0.3826 17.7167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.731077 0.026385 -27.71 <2e-16 ***
## PA.Ahourlyavg 1.019658 0.001417 719.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9778 on 2169 degrees of freedom
## Multiple R-squared: 0.9958, Adjusted R-squared: 0.9958
## F-statistic: 5.178e+05 on 1 and 2169 DF, p-value: < 2.2e-16
ggplot() +
geom_point(aes(x = PA.Ahourlyavg, y = PA.Bhourlyavg), alpha = 0.5) +
geom_abline(slope = PAcomp$coefficients[2], intercept = PAcomp$coefficients[1], color = "red") +
theme_bw()
## Don't know how to automatically pick scale for object of type xts/zoo. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type xts/zoo. Defaulting to continuous.
#Next look at correlation between the two FEM POCs
FEM3 <- MillbrookFEM %>% filter(POC == 3) %>% select(Sample.Measurement, daytime) %>% rename(POC3PM2.5 = Sample.Measurement)
FEM5 <- MillbrookFEM %>% filter(POC == 5) %>% select(Sample.Measurement, daytime) %>% rename(POC5PM2.5 = Sample.Measurement)
MergeFEM <- merge(FEM3, FEM5, by = "daytime")
FEMcomp <- lm(MergeFEM$POC5PM2.5 ~ MergeFEM$POC3PM2.5)
summary(FEMcomp)
##
## Call:
## lm(formula = MergeFEM$POC5PM2.5 ~ MergeFEM$POC3PM2.5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0568 -1.5587 -0.1054 1.4922 21.6154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.423183 0.091831 26.39 <2e-16 ***
## MergeFEM$POC3PM2.5 0.764091 0.009054 84.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.506 on 4500 degrees of freedom
## Multiple R-squared: 0.6128, Adjusted R-squared: 0.6127
## F-statistic: 7123 on 1 and 4500 DF, p-value: < 2.2e-16
ggplot() +
geom_point(data = MergeFEM,aes(x = POC3PM2.5, y = POC5PM2.5),alpha = 0.5) +
geom_abline(slope = FEMcomp$coefficients[2], intercept = FEMcomp$coefficients[1], color = "red") +
theme_bw()
#Check the two POCs, only looking at week-long data
#For comparisons, only going to look at weeklong data (some PA data is missing)
#Chose channel A as the comparison to FEM, but it shouldn't matter which channel (very close correlation between the two)
PA.AcomparetoFEM <- PA.Ahourlyavg["2018-10-16/2018-10-23"]
PA.BcomparetoFEM <- PA.Bhourlyavg["2018-10-16/2018-10-23"]
FEM3week <- filter(FEM3, daytime > AsDateTime("2018-10-16 00:00:00 UTC"), daytime < AsDateTime("2018-10-24 00:00:01 UTC"))
FEM5week <- filter(FEM5, daytime > AsDateTime("2018-10-16 00:00:00 UTC"), daytime < AsDateTime("2018-10-24 00:00:01 UTC"))
FEM3PA.Acomp <- lm(PA.AcomparetoFEM ~ FEM3week$POC3PM2.5)
FEM5PA.Acomp <- lm(PA.AcomparetoFEM ~ FEM5week$POC5PM2.5)
ggplot() +
geom_point(aes(x = FEM3week$POC3PM2.5, y = PA.AcomparetoFEM), alpha = 0.5) +
geom_abline(slope = FEM3PA.Acomp$coefficients[2], intercept = FEM3PA.Acomp$coefficients[1], color = "red") +
theme_bw()
## Don't know how to automatically pick scale for object of type xts/zoo. Defaulting to continuous.
ggplot() +
geom_point(aes(x = FEM5week$POC5PM2.5, y = PA.AcomparetoFEM), alpha = 0.5) +
geom_abline(slope = FEM5PA.Acomp$coefficients[2], intercept = FEM5PA.Acomp$coefficients[1], color = "red") +
theme_bw()
## Don't know how to automatically pick scale for object of type xts/zoo. Defaulting to continuous.
Questions and observations I have: