- I have made and still make lots of mistakes.
- Some of them are funny.
- Some, not so funny and very frustrating
December 8, 2015
pulls a dataframe from the database, processes it as above, and saves a csv
All of the processing steps are documented so I don't need to re-learn what I did in an Excel spreadsheet.
library(dplyr)
data <- read.csv("X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Data Processing/allPAHconcentrations.csv")
names(data)
data_MilleLacs <- data[MPCA_Site_ID == "3051", ]
#> data_MilleLacs <- data[MPCA_Site_ID == "3051", ]
#Error in match(x, table, nomatch = 0L) : object 'MPCA_Site_ID' not found
data <- read.csv("X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Data Processing/allPAHconcentrations.csv", stringsAsFactors = FALSE)
data_MilleLacs <- data[data$MPCA_Site_ID == "3051", ]
head(data_MilleLacs, n=3)
## location_if_collocated MPCA_Site_ID CAS Study_Location Season ## 1 north 3051 a3697-24-3 Mille Lacs winter ## 2 north 3051 a7496-02-8 Mille Lacs spring ## 3 north 3051 a5522-43-0 Mille Lacs winter ## MDH_Lab_ID_Code Sampler_Type Result Units Start_Run_Date passive_active ## 1 PAS-060 Passive NA ng/m3 2013-12-05 passive ## 2 PAS-083 Passive NA ng/m3 2014-03-04 passive ## 3 PAS-060 Passive NA ng/m3 2013-12-05 passive ## Qstd_Volume Primary_Occurence_Code Year Flags_Text ## 1 NA 2 2013 farther from lake (north) ## 2 NA 2 2014 north: farther from lake ## 3 NA 2 2013 farther from lake (north) ## Study_Year MDL Analyte Qualifier Censored Season_StudyYear ## 1 First NaN 5-Methylchrysene , TRUE winter First ## 2 First NaN 6-Nitrochrysene , TRUE spring First ## 3 First NaN 1-Nitropyrene , TRUE winter First ## Site.Name Long.Site.Name Unified_Date ## 1 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01 ## 2 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2014-03-01 ## 3 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01
%in% not ==data <- read.csv("X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Data Processing/allPAHconcentrations.csv", stringsAsFactors = FALSE)
data_year <- data[data$Year == c(2012, 2013, 2014), ]
## Warning in data$Year == c(2012, 2013, 2014): longer object length is not a ## multiple of shorter object length
head(data_year, n=3)
## location_if_collocated MPCA_Site_ID CAS Study_Location Season ## 18 north 3051 a189-64-0 Mille Lacs spring ## 20 north 3051 a5522-43-0 Mille Lacs summer ## 26 north 3051 a63466-71-7 Mille Lacs winter ## MDH_Lab_ID_Code Sampler_Type Result Units Start_Run_Date ## 18 PAS-083 Passive NA ng/m3 2014-03-04 ## 20 PAS-014 Passive NA ng/m3 2013-06-06 ## 26 PAS-060 Passive 0.551703 ng/m3 2013-12-05 ## passive_active Qstd_Volume Primary_Occurence_Code Year ## 18 passive NA 2 2014 ## 20 passive NA 2 2013 ## 26 passive NA 2 2013 ## Flags_Text Study_Year MDL Analyte Qualifier ## 18 north: farther from lake First NaN Dibenzo[a,h]pyrene L3,L3 ## 20 farther from lake (North) First NaN 1-Nitropyrene , ## 26 farther from lake (north) First NA Benzo[a]pyrene-d12 , ## Censored Season_StudyYear Site.Name Long.Site.Name ## 18 TRUE spring First Mille Lacs Mille Lacs (MPCA ID 3051)_north ## 20 TRUE summer First Mille Lacs Mille Lacs (MPCA ID 3051)_north ## 26 FALSE winter First Mille Lacs Mille Lacs (MPCA ID 3051)_north ## Unified_Date ## 18 2014-03-01 ## 20 2013-06-01 ## 26 2013-12-01
%in%data <- read.csv("X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Data Processing/allPAHconcentrations.csv", stringsAsFactors = FALSE)
data_year <- data[data$Year %in% c(2012, 2013, 2014), ]
head(data_year, n=3)
## location_if_collocated MPCA_Site_ID CAS Study_Location Season ## 1 north 3051 a3697-24-3 Mille Lacs winter ## 2 north 3051 a7496-02-8 Mille Lacs spring ## 3 north 3051 a5522-43-0 Mille Lacs winter ## MDH_Lab_ID_Code Sampler_Type Result Units Start_Run_Date passive_active ## 1 PAS-060 Passive NA ng/m3 2013-12-05 passive ## 2 PAS-083 Passive NA ng/m3 2014-03-04 passive ## 3 PAS-060 Passive NA ng/m3 2013-12-05 passive ## Qstd_Volume Primary_Occurence_Code Year Flags_Text ## 1 NA 2 2013 farther from lake (north) ## 2 NA 2 2014 north: farther from lake ## 3 NA 2 2013 farther from lake (north) ## Study_Year MDL Analyte Qualifier Censored Season_StudyYear ## 1 First NaN 5-Methylchrysene , TRUE winter First ## 2 First NaN 6-Nitrochrysene , TRUE spring First ## 3 First NaN 1-Nitropyrene , TRUE winter First ## Site.Name Long.Site.Name Unified_Date ## 1 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01 ## 2 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2014-03-01 ## 3 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01
data_MilleLacs <- data[data$MPCA_Site_ID %in% "3051", ] head(data_Millelacs, n=3L) #Error in head(data_Millelacs, n = 3L) : object 'data_Millelacs' not found
data <- read.csv("X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Data Processing/allPAHconcentrations.csv", stringsAsFactors = FALSE)
data_isnotNA <- data[is.na(data$Result), ]
head(data_isnotNA, n=3L)
## location_if_collocated MPCA_Site_ID CAS Study_Location Season ## 1 north 3051 a3697-24-3 Mille Lacs winter ## 2 north 3051 a7496-02-8 Mille Lacs spring ## 3 north 3051 a5522-43-0 Mille Lacs winter ## MDH_Lab_ID_Code Sampler_Type Result Units Start_Run_Date passive_active ## 1 PAS-060 Passive NA ng/m3 2013-12-05 passive ## 2 PAS-083 Passive NA ng/m3 2014-03-04 passive ## 3 PAS-060 Passive NA ng/m3 2013-12-05 passive ## Qstd_Volume Primary_Occurence_Code Year Flags_Text ## 1 NA 2 2013 farther from lake (north) ## 2 NA 2 2014 north: farther from lake ## 3 NA 2 2013 farther from lake (north) ## Study_Year MDL Analyte Qualifier Censored Season_StudyYear ## 1 First NaN 5-Methylchrysene , TRUE winter First ## 2 First NaN 6-Nitrochrysene , TRUE spring First ## 3 First NaN 1-Nitropyrene , TRUE winter First ## Site.Name Long.Site.Name Unified_Date ## 1 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01 ## 2 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2014-03-01 ## 3 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01
data_isnotNA <- data[!is.na(data$Result), ] head(data_isnotNA, n=3)
## location_if_collocated MPCA_Site_ID CAS Study_Location Season ## 15 north 3051 a1016-05-3 Mille Lacs fall ## 19 north 3051 a129-00-0 Mille Lacs summer ## 22 north 3051 a91-20-3 Mille Lacs summer ## MDH_Lab_ID_Code Sampler_Type Result Units Start_Run_Date ## 15 PAS-037 Passive 0.0116525 ng/m3 2013-09-05 ## 19 PAS-014 Passive 0.0898770 ng/m3 2013-06-06 ## 22 PAS-014 Passive 8.1820500 ng/m3 2013-06-06 ## passive_active Qstd_Volume Primary_Occurence_Code Year ## 15 passive NA 2 2013 ## 19 passive NA 2 2013 ## 22 passive NA 2 2013 ## Flags_Text Study_Year MDL ## 15 farther from lake (North) First 0.00871265 ## 19 farther from lake (North) First 0.02205980 ## 22 farther from lake (North) First 0.09124480 ## Analyte Qualifier Censored Season_StudyYear Site.Name ## 15 Dibenzothiophene sulfone J,J FALSE fall First Mille Lacs ## 19 Pyrene J,J FALSE summer First Mille Lacs ## 22 Naphthalene , FALSE summer First Mille Lacs ## Long.Site.Name Unified_Date ## 15 Mille Lacs (MPCA ID 3051)_north 2013-09-01 ## 19 Mille Lacs (MPCA ID 3051)_north 2013-06-01 ## 22 Mille Lacs (MPCA ID 3051)_north 2013-06-01
data_north <- subset(data, location_if_collocated="north") nrow(data)
## [1] 21364
nrow(data_north)
## [1] 21364
data_north <- subset(data, location_if_collocated=="north") nrow(data)
## [1] 21364
nrow(data_north)
## [1] 4655
data <- read.csv("X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Data Processing/allPAHconcentrations.csv")
class(data$Season_StudyYear)
## [1] "factor"
data[data$location_if_collocated == "north", "location_if_collocated"] <- "northeast"
## Warning in `[<-.factor`(`*tmp*`, iseq, value = c("northeast",
## "northeast", : invalid factor level, NA generated
head(data, n=3)
## location_if_collocated MPCA_Site_ID CAS Study_Location Season ## 1 <NA> 3051 a3697-24-3 Mille Lacs winter ## 2 <NA> 3051 a7496-02-8 Mille Lacs spring ## 3 <NA> 3051 a5522-43-0 Mille Lacs winter ## MDH_Lab_ID_Code Sampler_Type Result Units Start_Run_Date passive_active ## 1 PAS-060 Passive NA ng/m3 2013-12-05 passive ## 2 PAS-083 Passive NA ng/m3 2014-03-04 passive ## 3 PAS-060 Passive NA ng/m3 2013-12-05 passive ## Qstd_Volume Primary_Occurence_Code Year Flags_Text ## 1 NA 2 2013 farther from lake (north) ## 2 NA 2 2014 north: farther from lake ## 3 NA 2 2013 farther from lake (north) ## Study_Year MDL Analyte Qualifier Censored Season_StudyYear ## 1 First NaN 5-Methylchrysene , TRUE winter First ## 2 First NaN 6-Nitrochrysene , TRUE spring First ## 3 First NaN 1-Nitropyrene , TRUE winter First ## Site.Name Long.Site.Name Unified_Date ## 1 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01 ## 2 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2014-03-01 ## 3 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01
data <- read.csv("X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Data Processing/allPAHconcentrations.csv", stringsAsFactors=FALSE)
data[data$location_if_collocated == "north", "location_if_collocated"] <- "northeast"
head(data, n=3)
## location_if_collocated MPCA_Site_ID CAS Study_Location Season ## 1 northeast 3051 a3697-24-3 Mille Lacs winter ## 2 northeast 3051 a7496-02-8 Mille Lacs spring ## 3 northeast 3051 a5522-43-0 Mille Lacs winter ## MDH_Lab_ID_Code Sampler_Type Result Units Start_Run_Date passive_active ## 1 PAS-060 Passive NA ng/m3 2013-12-05 passive ## 2 PAS-083 Passive NA ng/m3 2014-03-04 passive ## 3 PAS-060 Passive NA ng/m3 2013-12-05 passive ## Qstd_Volume Primary_Occurence_Code Year Flags_Text ## 1 NA 2 2013 farther from lake (north) ## 2 NA 2 2014 north: farther from lake ## 3 NA 2 2013 farther from lake (north) ## Study_Year MDL Analyte Qualifier Censored Season_StudyYear ## 1 First NaN 5-Methylchrysene , TRUE winter First ## 2 First NaN 6-Nitrochrysene , TRUE spring First ## 3 First NaN 1-Nitropyrene , TRUE winter First ## Site.Name Long.Site.Name Unified_Date ## 1 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01 ## 2 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2014-03-01 ## 3 Mille Lacs Mille Lacs (MPCA ID 3051)_north 2013-12-01
-If you open a pdf device, and then run a plot function by parameter, A pdf document is created with each parameter on a separate page.
closest<-read.csv("M:/KME Files/Model Monitor/2008 - Update_112014/Statistics/Data Tables and Processing/Measurements MNRiskS Results Closest Receptors.csv",header=TRUE, stringsAsFactors=F)
closest=na.omit(closest)
pdf("Single Pollutant Modeled vs. Observed Plots_Closest Receptors.pdf")
splitspolls = function(x) {
closest= subset(closest, substance==x)
plot(closest$obs, closest$mod, type="p", xlab="observed ug/m3", ylab="modeled ug/m3", main = x)
abline(0,1)
}
lapply(levels(as.factor(closest$substance)), function(x) splitspolls(x))
dev.off()
closest<-read.csv("M:/KME Files/Model Monitor/2008 - Update_112014/Statistics/Data Tables and Processing/Measurements MNRiskS Results Closest Receptors.csv",header=TRUE, stringsAsFactors=F)
closest=na.omit(closest)
splitspolls = function(x) {
pdf("Single Pollutant Modeled vs. Observed Plots_Closest Receptors.pdf")
closest= subset(closest, substance==x)
plot(closest$obs, closest$mod, type="p", xlab="observed ug/m3", ylab="modeled ug/m3", main = x)
abline(0,1)
}
lapply(levels(as.factor(closest$substance)), function(x) splitspolls(x))
dev.off()
-If you do open MANY graphic devices, remember this command.
graphics.off()
library(RODBC)
library(openair)
db="X:/Programs/Air_Quality_Programs/Air Focus Areas/EPA Community Scale Monitoring of PAHs Project/Data Analysis/Model Monitor Comparison/Statistics/MNRiskS Measured Data Processing 2008.accdb"
bigdb=odbcConnectAccess2007(db)
buffers=sqlFetch(bigdb, "Monitor and Model Data for 1km Buffers", stringsAsFactors = FALSE)
buffers=data.frame(buffers)
Sampler_Types <- unique(buffers$Sampler_Type)
summaries <- unique(buffers$summary)
for(i in Sampler_Types) {
for(j in summaries) {
##ScatterPlot All Data
tiff(paste(i, j, "scatterplotloglog.tiff"), width=10, height=7, units="in", pointsize=12, res=300, type=c("cairo"))
scatterPlot(filter(buffers, Sampler_Type == i, summary == j), x="obs", y="mod", method="scatter", group="substance", data.thresh=0,
type="default", mod.line=TRUE, linear=TRUE, log.x=TRUE, log.y=TRUE, xlab = paste(i, "_", "observed"), ylab="average modeled",
main=paste(i, "Measured PAHs and ", j, " Modeled PAHs within a 1km Buffer"))
dev.off()
}
}
odbcClose(conn) # close the connection to the file
check.names=FALSE100,000library(utils)
alldata <- read.table("alldata.csv", dec=",", stringsAsFactors=F)
##OR YOU CAN gsub()
alldata <- read.csv("alldata.csv", stringsAsFactors=F)
alldata$numbers <- as.numeric(gsub(",", "", alldata$numbers)
alldata <- unique(alldata) alldata=as.data.frame(alldata, stringsAsFactors=FALSE) names(alldata) <- str_replace_all(str_trim(names(alldata)), " ", "_") alldata <- group_by(alldata, MDH_Lab_ID_Code, Sampler_Type, MPCA_Site_ID, Result, Units, CAS, Start_Run_Date, Study_Location, passive_active, Qstd_Volume, Primary_Occurence_Code, location_if_collocated, Year, Season, Flags_Text, Study_Year, MDL, Analyte) %>% summarise(Qualifier=paste(Qualifiers, collapse=",")) alldata$passive_active <- as.character(alldata$passive_active) passives<- filter(alldata, passive_active %in% "passive") passivecalcs<- group_by(passives, CAS, Study_Location, Season) %>% summarise(passive_cal_mean=mean(Result, na.rm=T)) gasactives<- filter(alldata, Sampler_Type %in% "PAHs Air, HiVol XAD") gasactives<- group_by(gasactives, CAS, Study_Location, Season) %>% summarise(gasactive_cal_mean=mean(Result, na.rm=T)) allconcentrations <- mutate(allconcentrations, Unified_Date = ifelse(Sampler_Type %in% "Passive", FirstoftheMonth, Start_Run_Date)) allconcentrations$FirstoftheMonth <- NULL allconcentrations$Unified_Date <- as.POSIXct(strptime(allconcentrations$Unified_Date, format="%Y-%m-%d"), tz="GMT") allconcentrations$Start_Run_Date <- as.POSIXct(strptime(allconcentrations$Start_Run_Date, format="%Y-%m-%d"), tz="GMT")
Sys.info()