Aggregating the raw data

The raw data was grouped together by Fips, Date and Parameter Name to show the average Arithmetic.Mean for each Parameter per county for each day

df <- read.csv("daily_SPEC_2018.csv")

df1 <- df %>%
    select(State.Code, County.Code, Parameter.Name, Arithmetic.Mean,
    Longitude, Latitude, Date.Local, State.Name, County.Name, CBSA.Name,
    POC, Event.Type) %>%
    mutate(State.Code = sprintf("%02d", State.Code),
      County.Code = sprintf("%03d", County.Code),
      Fips.Code = paste0(State.Code, County.Code))

ch <- df1 %>%
  group_by(Fips.Code, Date.Local, Parameter.Name) %>%
  reframe(mean(Arithmetic.Mean)) %>%
  arrange(Fips.Code, Date.Local)

Creating the New Data

The new data was created as instructed by selecting relevant columns and changing the long data to wide data for each Parameter Name.

df2 <- df1 %>%
  group_by(Date.Local, Fips.Code, Parameter.Name) %>%
  reframe(value = mean(Arithmetic.Mean)) %>%
  pivot_wider(names_from = Parameter.Name, values_from = value) %>%
  mutate_all(~ ifelse(is.na(.), "NA", as.character(.)))

df3 <- df1 %>%
  group_by(Date.Local, Fips.Code, Parameter.Name) %>%
  reframe(Dup = n_distinct(POC)) %>%
  mutate(Duplicates = ifelse(Dup > 1, "Yes", "No")) %>%
  select(-Dup) %>%
  filter(Duplicates == "Yes") %>%
  distinct(Date.Local, Fips.Code, Duplicates)

df4 <- df2 %>%
  merge(df3, by = c("Date.Local", "Fips.Code"), all = TRUE) %>%
  mutate(Duplicates = ifelse(is.na(Duplicates), "No", Duplicates))

df5 <- df1 %>%
   select(State.Code, County.Code, Fips.Code, Date.Local, Event.Type,
   State.Name, County.Name, CBSA.Name) %>%
  distinct(.)

df6 <- df5[duplicated(df5[, c("Date.Local", "Fips.Code")]) |
  duplicated(df5[, c("Date.Local", "Fips.Code")], fromLast = TRUE), ] %>%
  filter(Event.Type == "Included")

df7 <- df5 %>%
  filter(!(Fips.Code %in% df6$Fips.Code)) %>%
  rbind(df6)

df8 <- df5 %>%
  merge(df4, by = c("Date.Local", "Fips.Code")) %>%
  arrange(Fips.Code, Date.Local)

colnames(df8)
##   [1] "Date.Local"                              
##   [2] "Fips.Code"                               
##   [3] "State.Code"                              
##   [4] "County.Code"                             
##   [5] "Event.Type"                              
##   [6] "State.Name"                              
##   [7] "County.Name"                             
##   [8] "CBSA.Name"                               
##   [9] "Ambient Max Temperature"                 
##  [10] "Ambient Min Temperature"                 
##  [11] "Average Ambient Pressure"                
##  [12] "Average Ambient Temperature"             
##  [13] "Sample Flow Rate- CV"                    
##  [14] "Sample Max Baro Pressure"                
##  [15] "Sample Min Baro Pressure"                
##  [16] "Sample Volume"                           
##  [17] "Black Carbon PM2.5 at 880 nm"            
##  [18] "UV Carbon PM2.5 at 370 nm"               
##  [19] "Sulfate PM2.5 LC"                        
##  [20] "Total Nitrate PM2.5 LC"                  
##  [21] "Aluminum PM2.5 LC"                       
##  [22] "Ammonium Ion PM2.5 LC"                   
##  [23] "Antimony PM2.5 LC"                       
##  [24] "Arsenic PM2.5 LC"                        
##  [25] "Average Ambient Pressure for URG3000N"   
##  [26] "Average Ambient Temperature for URG3000N"
##  [27] "Barium PM2.5 LC"                         
##  [28] "Bromine PM2.5 LC"                        
##  [29] "Cadmium PM2.5 LC"                        
##  [30] "Calcium PM2.5 LC"                        
##  [31] "Cerium PM2.5 LC"                         
##  [32] "Cesium PM2.5 LC"                         
##  [33] "Chloride PM2.5 LC"                       
##  [34] "Chlorine PM2.5 LC"                       
##  [35] "Chromium PM2.5 LC"                       
##  [36] "Cobalt PM2.5 LC"                         
##  [37] "Copper PM2.5 LC"                         
##  [38] "EC CSN_Rev Unadjusted PM2.5 LC TOR"      
##  [39] "EC CSN_Rev Unadjusted PM2.5 LC TOT"      
##  [40] "EC PM2.5 LC TOR"                         
##  [41] "EC PM2.5 LC TOT"                         
##  [42] "EC1 CSN_Rev Unadjusted PM2.5 LC"         
##  [43] "EC1 PM2.5 LC"                            
##  [44] "EC2 CSN_Rev Unadjusted PM2.5 LC"         
##  [45] "EC2 PM2.5 LC"                            
##  [46] "EC3 CSN_Rev Unadjusted PM2.5 LC"         
##  [47] "EC3 PM2.5 LC"                            
##  [48] "Indium PM2.5 LC"                         
##  [49] "Iron PM2.5 LC"                           
##  [50] "Lead PM2.5 LC"                           
##  [51] "Magnesium PM2.5 LC"                      
##  [52] "Manganese PM2.5 LC"                      
##  [53] "Nickel PM2.5 LC"                         
##  [54] "OC CSN_Rev Unadjusted PM2.5 LC TOR"      
##  [55] "OC CSN_Rev Unadjusted PM2.5 LC TOT"      
##  [56] "OC PM2.5 LC TOR"                         
##  [57] "OC PM2.5 LC TOT"                         
##  [58] "OC1 CSN_Rev Unadjusted PM2.5 LC"         
##  [59] "OC1 PM2.5 LC"                            
##  [60] "OC2 CSN_Rev Unadjusted PM2.5 LC"         
##  [61] "OC2 PM2.5 LC"                            
##  [62] "OC3 CSN_Rev Unadjusted PM2.5 LC"         
##  [63] "OC3 PM2.5 LC"                            
##  [64] "OC4 CSN_Rev Unadjusted PM2.5 LC"         
##  [65] "OC4 PM2.5 LC"                            
##  [66] "OP CSN_Rev Unadjusted PM2.5 LC TOR"      
##  [67] "OP CSN_Rev Unadjusted PM2.5 LC TOT"      
##  [68] "OP PM2.5 LC TOR"                         
##  [69] "OP PM2.5 LC TOT"                         
##  [70] "Phosphorus PM2.5 LC"                     
##  [71] "Potassium Ion PM2.5 LC"                  
##  [72] "Potassium PM2.5 LC"                      
##  [73] "Reconstructed Mass PM2.5 LC"             
##  [74] "Rubidium PM2.5 LC"                       
##  [75] "Sample Flow Rate CV - Nylon Filter"      
##  [76] "Sample Flow Rate CV - Quartz Filter"     
##  [77] "Sample Flow Rate CV - Teflon Filter"     
##  [78] "Sample Volume - Nylon Filter"            
##  [79] "Sample Volume - Quartz Filter"           
##  [80] "Sample Volume - Teflon Filter"           
##  [81] "Selenium PM2.5 LC"                       
##  [82] "Silicon PM2.5 LC"                        
##  [83] "Silver PM2.5 LC"                         
##  [84] "Sodium Ion Pm2.5 LC"                     
##  [85] "Sodium PM2.5 LC"                         
##  [86] "Soil PM2.5 LC"                           
##  [87] "Strontium PM2.5 LC"                      
##  [88] "Sulfur PM2.5 LC"                         
##  [89] "Tin PM2.5 LC"                            
##  [90] "Titanium PM2.5 LC"                       
##  [91] "Vanadium PM2.5 LC"                       
##  [92] "Zinc PM2.5 LC"                           
##  [93] "Zirconium PM2.5 LC"                      
##  [94] "Mercury PM2.5 LC"                        
##  [95] "Molybdenum PM2.5 LC"                     
##  [96] "Yttrium PM2.5 LC"                        
##  [97] "Uranium PM2.5 LC"                        
##  [98] "EC CSN PM2.5 LC TOT"                     
##  [99] "OC CSN Unadjusted PM2.5 LC TOT"          
## [100] "Optical EC PM2.5 LC TOT"                 
## [101] "Total Carbon PM2.5 LC TOT"               
## [102] "Europium PM2.5 LC"                       
## [103] "Gallium PM2.5 LC"                        
## [104] "Gold PM2.5 LC"                           
## [105] "Hafnium PM2.5 LC"                        
## [106] "Iridium PM2.5 LC"                        
## [107] "Lanthanum PM2.5 LC"                      
## [108] "Niobium PM2.5 LC"                        
## [109] "Non-volatile Nitrate PM2.5 LC"           
## [110] "Samarium PM2.5 LC"                       
## [111] "Scandium PM2.5 LC"                       
## [112] "Tantalum PM2.5 LC"                       
## [113] "Terbium PM2.5 LC"                        
## [114] "Tungsten PM2.5 LC"                       
## [115] "Duplicates"

Validating the Data

A Few Parameter Name were selected to compare the raw and newly created data for Validation.

## NEWLY CREATED DATA
## Sulfate PM2.5 LC
ch1 <- ch %>%
    filter(Fips.Code == "01073") %>%
    filter(Parameter.Name == "Sulfate PM2.5 LC")
head(ch1, 10)
## # A tibble: 10 × 4
##    Fips.Code Date.Local Parameter.Name   `mean(Arithmetic.Mean)`
##    <chr>     <chr>      <chr>                              <dbl>
##  1 01073     2018-01-02 Sulfate PM2.5 LC                   0.921
##  2 01073     2018-01-05 Sulfate PM2.5 LC                   0.945
##  3 01073     2018-01-08 Sulfate PM2.5 LC                   0.732
##  4 01073     2018-01-11 Sulfate PM2.5 LC                   0.923
##  5 01073     2018-01-14 Sulfate PM2.5 LC                   0.794
##  6 01073     2018-01-20 Sulfate PM2.5 LC                   0.791
##  7 01073     2018-01-23 Sulfate PM2.5 LC                   0.285
##  8 01073     2018-01-26 Sulfate PM2.5 LC                   0.83 
##  9 01073     2018-01-29 Sulfate PM2.5 LC                   1.02 
## 10 01073     2018-02-01 Sulfate PM2.5 LC                   0.938
## Total Nitrate PM2.5 LC
ch2 <- ch %>%
    filter(Fips.Code == "01073") %>%
    filter(Parameter.Name == "Total Nitrate PM2.5 LC")
head(ch2, 10)
## # A tibble: 10 × 4
##    Fips.Code Date.Local Parameter.Name         `mean(Arithmetic.Mean)`
##    <chr>     <chr>      <chr>                                    <dbl>
##  1 01073     2018-01-02 Total Nitrate PM2.5 LC                   2.10 
##  2 01073     2018-01-05 Total Nitrate PM2.5 LC                   2.38 
##  3 01073     2018-01-08 Total Nitrate PM2.5 LC                   0.916
##  4 01073     2018-01-11 Total Nitrate PM2.5 LC                   0.163
##  5 01073     2018-01-14 Total Nitrate PM2.5 LC                   1.36 
##  6 01073     2018-01-20 Total Nitrate PM2.5 LC                   0.272
##  7 01073     2018-01-23 Total Nitrate PM2.5 LC                   0.104
##  8 01073     2018-01-26 Total Nitrate PM2.5 LC                   0.722
##  9 01073     2018-01-29 Total Nitrate PM2.5 LC                   1.11 
## 10 01073     2018-02-01 Total Nitrate PM2.5 LC                   0.346
## Average Ambient Pressure
ch3 <- ch %>%
    filter(Fips.Code == "01073") %>%
    filter(Parameter.Name == "Average Ambient Pressure")
head(ch3, 10)
## # A tibble: 10 × 4
##    Fips.Code Date.Local Parameter.Name           `mean(Arithmetic.Mean)`
##    <chr>     <chr>      <chr>                                      <dbl>
##  1 01073     2018-01-02 Average Ambient Pressure                    756.
##  2 01073     2018-01-05 Average Ambient Pressure                    754.
##  3 01073     2018-01-08 Average Ambient Pressure                    747.
##  4 01073     2018-01-11 Average Ambient Pressure                    742.
##  5 01073     2018-01-14 Average Ambient Pressure                    756.
##  6 01073     2018-01-17 Average Ambient Pressure                    758.
##  7 01073     2018-01-20 Average Ambient Pressure                    749.
##  8 01073     2018-01-23 Average Ambient Pressure                    746.
##  9 01073     2018-01-26 Average Ambient Pressure                    755.
## 10 01073     2018-01-29 Average Ambient Pressure                    749.
## Average Ambient Temperature
ch4 <- ch %>%
    filter(Fips.Code == "01073") %>%
    filter(Parameter.Name == "Average Ambient Temperature")
head(ch4, 10)
## # A tibble: 10 × 4
##    Fips.Code Date.Local Parameter.Name              `mean(Arithmetic.Mean)`
##    <chr>     <chr>      <chr>                                         <dbl>
##  1 01073     2018-01-02 Average Ambient Temperature                   -4.41
##  2 01073     2018-01-05 Average Ambient Temperature                   -1.52
##  3 01073     2018-01-08 Average Ambient Temperature                    6.71
##  4 01073     2018-01-11 Average Ambient Temperature                   19.2 
##  5 01073     2018-01-14 Average Ambient Temperature                   -1.25
##  6 01073     2018-01-17 Average Ambient Temperature                   -6.75
##  7 01073     2018-01-20 Average Ambient Temperature                   11.2 
##  8 01073     2018-01-23 Average Ambient Temperature                    8.98
##  9 01073     2018-01-26 Average Ambient Temperature                   10.2 
## 10 01073     2018-01-29 Average Ambient Temperature                   10.4
## RAW DATA
ch5 <- df8 %>%
    filter(Fips.Code == "01073") %>%
    select(Date.Local, Fips.Code, 'Sulfate PM2.5 LC',
    'Total Nitrate PM2.5 LC', 'Average Ambient Pressure', 'Average Ambient Temperature')
head(ch5, 10)
##    Date.Local Fips.Code  Sulfate PM2.5 LC Total Nitrate PM2.5 LC
## 1  2018-01-02     01073 0.921333333333333       2.09733333333333
## 2  2018-01-05     01073 0.945333333333333       2.38366666666667
## 3  2018-01-08     01073             0.732      0.916333333333333
## 4  2018-01-11     01073 0.923333333333333      0.162666666666667
## 5  2018-01-14     01073             0.794       1.36233333333333
## 6  2018-01-17     01073                NA                     NA
## 7  2018-01-20     01073             0.791      0.272333333333333
## 8  2018-01-23     01073             0.285                  0.104
## 9  2018-01-26     01073              0.83      0.721666666666667
## 10 2018-01-29     01073  1.01966666666667       1.11333333333333
##    Average Ambient Pressure Average Ambient Temperature
## 1          756.181818181818           -4.40909090909091
## 2                     753.5                      -1.525
## 3          747.090909090909            6.70909090909091
## 4                    741.75                        19.2
## 5          756.090909090909           -1.25454545454545
## 6                     758.5                       -6.75
## 7          749.111111111111            11.2333333333333
## 8                    745.75                       8.975
## 9          754.909090909091                        10.2
## 10                    749.2                       10.38