The raw data was grouped together by Fips, Date and Parameter Name to show the average Arithmetic.Mean for each Parameter per county for each day
df <- read.csv("daily_SPEC_2018.csv")
df1 <- df %>%
select(State.Code, County.Code, Parameter.Name, Arithmetic.Mean,
Longitude, Latitude, Date.Local, State.Name, County.Name, CBSA.Name,
POC, Event.Type) %>%
mutate(State.Code = sprintf("%02d", State.Code),
County.Code = sprintf("%03d", County.Code),
Fips.Code = paste0(State.Code, County.Code))
ch <- df1 %>%
group_by(Fips.Code, Date.Local, Parameter.Name) %>%
reframe(mean(Arithmetic.Mean)) %>%
arrange(Fips.Code, Date.Local)
The new data was created as instructed by selecting relevant columns and changing the long data to wide data for each Parameter Name.
df2 <- df1 %>%
group_by(Date.Local, Fips.Code, Parameter.Name) %>%
reframe(value = mean(Arithmetic.Mean)) %>%
pivot_wider(names_from = Parameter.Name, values_from = value) %>%
mutate_all(~ ifelse(is.na(.), "NA", as.character(.)))
df3 <- df1 %>%
group_by(Date.Local, Fips.Code, Parameter.Name) %>%
reframe(Dup = n_distinct(POC)) %>%
mutate(Duplicates = ifelse(Dup > 1, "Yes", "No")) %>%
select(-Dup) %>%
filter(Duplicates == "Yes") %>%
distinct(Date.Local, Fips.Code, Duplicates)
df4 <- df2 %>%
merge(df3, by = c("Date.Local", "Fips.Code"), all = TRUE) %>%
mutate(Duplicates = ifelse(is.na(Duplicates), "No", Duplicates))
df5 <- df1 %>%
select(State.Code, County.Code, Fips.Code, Date.Local, Event.Type,
State.Name, County.Name, CBSA.Name) %>%
distinct(.)
df6 <- df5[duplicated(df5[, c("Date.Local", "Fips.Code")]) |
duplicated(df5[, c("Date.Local", "Fips.Code")], fromLast = TRUE), ] %>%
filter(Event.Type == "Included")
df7 <- df5 %>%
filter(!(Fips.Code %in% df6$Fips.Code)) %>%
rbind(df6)
df8 <- df5 %>%
merge(df4, by = c("Date.Local", "Fips.Code")) %>%
arrange(Fips.Code, Date.Local)
colnames(df8)
## [1] "Date.Local"
## [2] "Fips.Code"
## [3] "State.Code"
## [4] "County.Code"
## [5] "Event.Type"
## [6] "State.Name"
## [7] "County.Name"
## [8] "CBSA.Name"
## [9] "Ambient Max Temperature"
## [10] "Ambient Min Temperature"
## [11] "Average Ambient Pressure"
## [12] "Average Ambient Temperature"
## [13] "Sample Flow Rate- CV"
## [14] "Sample Max Baro Pressure"
## [15] "Sample Min Baro Pressure"
## [16] "Sample Volume"
## [17] "Black Carbon PM2.5 at 880 nm"
## [18] "UV Carbon PM2.5 at 370 nm"
## [19] "Sulfate PM2.5 LC"
## [20] "Total Nitrate PM2.5 LC"
## [21] "Aluminum PM2.5 LC"
## [22] "Ammonium Ion PM2.5 LC"
## [23] "Antimony PM2.5 LC"
## [24] "Arsenic PM2.5 LC"
## [25] "Average Ambient Pressure for URG3000N"
## [26] "Average Ambient Temperature for URG3000N"
## [27] "Barium PM2.5 LC"
## [28] "Bromine PM2.5 LC"
## [29] "Cadmium PM2.5 LC"
## [30] "Calcium PM2.5 LC"
## [31] "Cerium PM2.5 LC"
## [32] "Cesium PM2.5 LC"
## [33] "Chloride PM2.5 LC"
## [34] "Chlorine PM2.5 LC"
## [35] "Chromium PM2.5 LC"
## [36] "Cobalt PM2.5 LC"
## [37] "Copper PM2.5 LC"
## [38] "EC CSN_Rev Unadjusted PM2.5 LC TOR"
## [39] "EC CSN_Rev Unadjusted PM2.5 LC TOT"
## [40] "EC PM2.5 LC TOR"
## [41] "EC PM2.5 LC TOT"
## [42] "EC1 CSN_Rev Unadjusted PM2.5 LC"
## [43] "EC1 PM2.5 LC"
## [44] "EC2 CSN_Rev Unadjusted PM2.5 LC"
## [45] "EC2 PM2.5 LC"
## [46] "EC3 CSN_Rev Unadjusted PM2.5 LC"
## [47] "EC3 PM2.5 LC"
## [48] "Indium PM2.5 LC"
## [49] "Iron PM2.5 LC"
## [50] "Lead PM2.5 LC"
## [51] "Magnesium PM2.5 LC"
## [52] "Manganese PM2.5 LC"
## [53] "Nickel PM2.5 LC"
## [54] "OC CSN_Rev Unadjusted PM2.5 LC TOR"
## [55] "OC CSN_Rev Unadjusted PM2.5 LC TOT"
## [56] "OC PM2.5 LC TOR"
## [57] "OC PM2.5 LC TOT"
## [58] "OC1 CSN_Rev Unadjusted PM2.5 LC"
## [59] "OC1 PM2.5 LC"
## [60] "OC2 CSN_Rev Unadjusted PM2.5 LC"
## [61] "OC2 PM2.5 LC"
## [62] "OC3 CSN_Rev Unadjusted PM2.5 LC"
## [63] "OC3 PM2.5 LC"
## [64] "OC4 CSN_Rev Unadjusted PM2.5 LC"
## [65] "OC4 PM2.5 LC"
## [66] "OP CSN_Rev Unadjusted PM2.5 LC TOR"
## [67] "OP CSN_Rev Unadjusted PM2.5 LC TOT"
## [68] "OP PM2.5 LC TOR"
## [69] "OP PM2.5 LC TOT"
## [70] "Phosphorus PM2.5 LC"
## [71] "Potassium Ion PM2.5 LC"
## [72] "Potassium PM2.5 LC"
## [73] "Reconstructed Mass PM2.5 LC"
## [74] "Rubidium PM2.5 LC"
## [75] "Sample Flow Rate CV - Nylon Filter"
## [76] "Sample Flow Rate CV - Quartz Filter"
## [77] "Sample Flow Rate CV - Teflon Filter"
## [78] "Sample Volume - Nylon Filter"
## [79] "Sample Volume - Quartz Filter"
## [80] "Sample Volume - Teflon Filter"
## [81] "Selenium PM2.5 LC"
## [82] "Silicon PM2.5 LC"
## [83] "Silver PM2.5 LC"
## [84] "Sodium Ion Pm2.5 LC"
## [85] "Sodium PM2.5 LC"
## [86] "Soil PM2.5 LC"
## [87] "Strontium PM2.5 LC"
## [88] "Sulfur PM2.5 LC"
## [89] "Tin PM2.5 LC"
## [90] "Titanium PM2.5 LC"
## [91] "Vanadium PM2.5 LC"
## [92] "Zinc PM2.5 LC"
## [93] "Zirconium PM2.5 LC"
## [94] "Mercury PM2.5 LC"
## [95] "Molybdenum PM2.5 LC"
## [96] "Yttrium PM2.5 LC"
## [97] "Uranium PM2.5 LC"
## [98] "EC CSN PM2.5 LC TOT"
## [99] "OC CSN Unadjusted PM2.5 LC TOT"
## [100] "Optical EC PM2.5 LC TOT"
## [101] "Total Carbon PM2.5 LC TOT"
## [102] "Europium PM2.5 LC"
## [103] "Gallium PM2.5 LC"
## [104] "Gold PM2.5 LC"
## [105] "Hafnium PM2.5 LC"
## [106] "Iridium PM2.5 LC"
## [107] "Lanthanum PM2.5 LC"
## [108] "Niobium PM2.5 LC"
## [109] "Non-volatile Nitrate PM2.5 LC"
## [110] "Samarium PM2.5 LC"
## [111] "Scandium PM2.5 LC"
## [112] "Tantalum PM2.5 LC"
## [113] "Terbium PM2.5 LC"
## [114] "Tungsten PM2.5 LC"
## [115] "Duplicates"
A Few Parameter Name were selected to compare the raw and newly created data for Validation.
## NEWLY CREATED DATA
## Sulfate PM2.5 LC
ch1 <- ch %>%
filter(Fips.Code == "01073") %>%
filter(Parameter.Name == "Sulfate PM2.5 LC")
head(ch1, 10)
## # A tibble: 10 × 4
## Fips.Code Date.Local Parameter.Name `mean(Arithmetic.Mean)`
## <chr> <chr> <chr> <dbl>
## 1 01073 2018-01-02 Sulfate PM2.5 LC 0.921
## 2 01073 2018-01-05 Sulfate PM2.5 LC 0.945
## 3 01073 2018-01-08 Sulfate PM2.5 LC 0.732
## 4 01073 2018-01-11 Sulfate PM2.5 LC 0.923
## 5 01073 2018-01-14 Sulfate PM2.5 LC 0.794
## 6 01073 2018-01-20 Sulfate PM2.5 LC 0.791
## 7 01073 2018-01-23 Sulfate PM2.5 LC 0.285
## 8 01073 2018-01-26 Sulfate PM2.5 LC 0.83
## 9 01073 2018-01-29 Sulfate PM2.5 LC 1.02
## 10 01073 2018-02-01 Sulfate PM2.5 LC 0.938
## Total Nitrate PM2.5 LC
ch2 <- ch %>%
filter(Fips.Code == "01073") %>%
filter(Parameter.Name == "Total Nitrate PM2.5 LC")
head(ch2, 10)
## # A tibble: 10 × 4
## Fips.Code Date.Local Parameter.Name `mean(Arithmetic.Mean)`
## <chr> <chr> <chr> <dbl>
## 1 01073 2018-01-02 Total Nitrate PM2.5 LC 2.10
## 2 01073 2018-01-05 Total Nitrate PM2.5 LC 2.38
## 3 01073 2018-01-08 Total Nitrate PM2.5 LC 0.916
## 4 01073 2018-01-11 Total Nitrate PM2.5 LC 0.163
## 5 01073 2018-01-14 Total Nitrate PM2.5 LC 1.36
## 6 01073 2018-01-20 Total Nitrate PM2.5 LC 0.272
## 7 01073 2018-01-23 Total Nitrate PM2.5 LC 0.104
## 8 01073 2018-01-26 Total Nitrate PM2.5 LC 0.722
## 9 01073 2018-01-29 Total Nitrate PM2.5 LC 1.11
## 10 01073 2018-02-01 Total Nitrate PM2.5 LC 0.346
## Average Ambient Pressure
ch3 <- ch %>%
filter(Fips.Code == "01073") %>%
filter(Parameter.Name == "Average Ambient Pressure")
head(ch3, 10)
## # A tibble: 10 × 4
## Fips.Code Date.Local Parameter.Name `mean(Arithmetic.Mean)`
## <chr> <chr> <chr> <dbl>
## 1 01073 2018-01-02 Average Ambient Pressure 756.
## 2 01073 2018-01-05 Average Ambient Pressure 754.
## 3 01073 2018-01-08 Average Ambient Pressure 747.
## 4 01073 2018-01-11 Average Ambient Pressure 742.
## 5 01073 2018-01-14 Average Ambient Pressure 756.
## 6 01073 2018-01-17 Average Ambient Pressure 758.
## 7 01073 2018-01-20 Average Ambient Pressure 749.
## 8 01073 2018-01-23 Average Ambient Pressure 746.
## 9 01073 2018-01-26 Average Ambient Pressure 755.
## 10 01073 2018-01-29 Average Ambient Pressure 749.
## Average Ambient Temperature
ch4 <- ch %>%
filter(Fips.Code == "01073") %>%
filter(Parameter.Name == "Average Ambient Temperature")
head(ch4, 10)
## # A tibble: 10 × 4
## Fips.Code Date.Local Parameter.Name `mean(Arithmetic.Mean)`
## <chr> <chr> <chr> <dbl>
## 1 01073 2018-01-02 Average Ambient Temperature -4.41
## 2 01073 2018-01-05 Average Ambient Temperature -1.52
## 3 01073 2018-01-08 Average Ambient Temperature 6.71
## 4 01073 2018-01-11 Average Ambient Temperature 19.2
## 5 01073 2018-01-14 Average Ambient Temperature -1.25
## 6 01073 2018-01-17 Average Ambient Temperature -6.75
## 7 01073 2018-01-20 Average Ambient Temperature 11.2
## 8 01073 2018-01-23 Average Ambient Temperature 8.98
## 9 01073 2018-01-26 Average Ambient Temperature 10.2
## 10 01073 2018-01-29 Average Ambient Temperature 10.4
## RAW DATA
ch5 <- df8 %>%
filter(Fips.Code == "01073") %>%
select(Date.Local, Fips.Code, 'Sulfate PM2.5 LC',
'Total Nitrate PM2.5 LC', 'Average Ambient Pressure', 'Average Ambient Temperature')
head(ch5, 10)
## Date.Local Fips.Code Sulfate PM2.5 LC Total Nitrate PM2.5 LC
## 1 2018-01-02 01073 0.921333333333333 2.09733333333333
## 2 2018-01-05 01073 0.945333333333333 2.38366666666667
## 3 2018-01-08 01073 0.732 0.916333333333333
## 4 2018-01-11 01073 0.923333333333333 0.162666666666667
## 5 2018-01-14 01073 0.794 1.36233333333333
## 6 2018-01-17 01073 NA NA
## 7 2018-01-20 01073 0.791 0.272333333333333
## 8 2018-01-23 01073 0.285 0.104
## 9 2018-01-26 01073 0.83 0.721666666666667
## 10 2018-01-29 01073 1.01966666666667 1.11333333333333
## Average Ambient Pressure Average Ambient Temperature
## 1 756.181818181818 -4.40909090909091
## 2 753.5 -1.525
## 3 747.090909090909 6.70909090909091
## 4 741.75 19.2
## 5 756.090909090909 -1.25454545454545
## 6 758.5 -6.75
## 7 749.111111111111 11.2333333333333
## 8 745.75 8.975
## 9 754.909090909091 10.2
## 10 749.2 10.38