All installed packages were loaded to be used in the R Program.
The data was loaded then subset to the desired year (2012).
load("County Level Ozone with Population.RData")
df2 <- df1 %>%
filter(Year == 2012)
The 2012 data was then:
1. Max.Ozone column subset to
observations greater than 70 ppb (True Conditions)
2. Then Average
Ozone Mean, Unique Counties, County Days, County Population Days was
calculated.
## True Conditions
df3 <- df2 %>%
filter(Max.Ozone >= 70) %>%
group_by(GEOID, State.Name, County.Name) %>%
reframe(count = n(), ozone.level = mean(Max.Ozone),
Pop = mean(Population)) %>%
mutate(total.pop = count * Pop)
# Average Ozone Level
mean(df3$ozone.level)
## [1] 75.19524
# Unique Counties Affected
n_distinct(df3$GEOID)
## [1] 642
# County Days
sum(df3$count)
## [1] 7314
# County Person Days
sum(df3$total.pop)
## [1] 2563919444
Seperate columns were created for each Case (A, B, C) and the Ozone
value was re-calculated based on these:
1. Severe drought
conditions are reduced to moderate drought conditions (0.906).
2.
Severe drought conditions are reduced to moderate drought conditions
(0.906), plus moderate drought conditions are reduced to no drought
(1.693).
3. Severe and moderate drought conditions are reduced to
no drought (Severe = 2.598, Moderate = 1.693).
df4 <- df2 %>%
filter(GEOID %in% df3$GEOID) %>%
mutate(Case_A = ifelse(USDM.categorical == "SevereDrought",
Max.Ozone - 0.906, Max.Ozone),
Case_B = case_when(
USDM.categorical == "SevereDrought" ~ Max.Ozone - 0.906,
USDM.categorical == "ModerateDrought" ~ Max.Ozone - 1.693,
TRUE ~ Max.Ozone),
Case_C = case_when(
USDM.categorical == "SevereDrought" ~ Max.Ozone - 2.598,
USDM.categorical == "ModerateDrought" ~ Max.Ozone - 1.693,
TRUE ~ Max.Ozone)
)
The re-calculated data was then:
1. Case_A column subset to
observations greater than 70 ppb (True Conditions)
2. Then Average
Ozone Mean, Unique Counties, County Days, County Population Days was
calculated.
## CASE A
df8 <- df4 %>%
filter(Case_A >= 70) %>%
group_by(GEOID, State.Name, County.Name) %>%
reframe(count = n(), ozone.level = mean(Max.Ozone),
Pop = mean(Population)) %>%
mutate(total.pop = count * Pop)
## Case A Average Ozone Level
mean(df8$ozone.level)
## [1] 75.33615
# Unique Counties Affected
n_distinct(df8$GEOID)
## [1] 637
# Counties % Change from True
((n_distinct(df8$GEOID)/n_distinct(df3$GEOID)) * 100) - 100
## [1] -0.7788162
# County Days
sum(df8$count)
## [1] 7156
# County Days % Change from True
((sum(df8$count)/sum(df3$count)) * 100) - 100
## [1] -2.160241
# County Person Days
sum(df8$total.pop)
## [1] 2522412665
# County Person Days % Change from True
((sum(df8$total.pop)/sum(df3$total.pop)) * 100) - 100
## [1] -1.61888
The re-calculated data was:
1. Case_B column subset to
observations greater than 70 ppb (True Conditions)
2. Then Average
Ozone Mean, Unique Counties, County Days, County Population Days was
calculated.
df9 <- df4 %>%
filter(Case_B >= 70) %>%
group_by(GEOID, State.Name, County.Name) %>%
reframe(count = n(), ozone.level = mean(Max.Ozone),
Pop = mean(Population)) %>%
mutate(total.pop = count * Pop)
## Case B Average Ozone Level
mean(df9$ozone.level)
## [1] 76.00021
# Unique Counties Affected
n_distinct(df9$GEOID)
## [1] 622
# Counties % Change from True
((n_distinct(df9$GEOID)/n_distinct(df3$GEOID)) * 100) - 100
## [1] -3.115265
# County Days
sum(df9$count)
## [1] 6405
# County Days % Change from True
((sum(df9$count)/sum(df3$count)) * 100) - 100
## [1] -12.42822
# County Person Days
sum(df9$total.pop)
## [1] 2241680467
# County Person Days % Change from True
((sum(df9$total.pop)/sum(df3$total.pop)) * 100) - 100
## [1] -12.56822
The re-calculated data was:
1. Case_C column subset to
observations greater than 70 ppb (True Conditions)
2. Then Average
Ozone Mean, Unique Counties, County Days, County Population Days was
calculated.
df11 <- df4 %>%
filter(Case_C >= 70) %>%
group_by(GEOID, State.Name, County.Name) %>%
reframe(count = n(), ozone.level = mean(Max.Ozone),
Pop = mean(Population)) %>%
mutate(total.pop = count * Pop)
## Case C Average Ozone Level
mean(df11$ozone.level)
## [1] 76.27168
# Unique Counties Affected
n_distinct(df11$GEOID)
## [1] 611
# Counties % Change from True
((n_distinct(df11$GEOID)/n_distinct(df3$GEOID)) * 100) - 100
## [1] -4.82866
# County Days
sum(df11$count)
## [1] 6114
# County Days % Change from True
((sum(df11$count)/sum(df3$count)) * 100) - 100
## [1] -16.40689
# County Person Days
sum(df11$total.pop)
## [1] 2155756665
# County Person Days % Change from True
((sum(df11$total.pop)/sum(df3$total.pop)) * 100) - 100
## [1] -15.91949