Data Processing

Load Data and Packages

#Across the United States, which types of events are most harmful with respect to population health?

# Summarize total fatalities and injuries by event type
harmful_events <- repdata_data_StormData %>%
  group_by(EVTYPE) %>%
  summarise(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE),
    total_harmed = total_fatalities + total_injuries
  ) %>%
  arrange(desc(total_harmed))

View top 10 harmful event types

head(harmful_events, 10)
## # A tibble: 10 × 4
##    EVTYPE            total_fatalities total_injuries total_harmed
##    <chr>                        <dbl>          <dbl>        <dbl>
##  1 TORNADO                       5633          91346        96979
##  2 EXCESSIVE HEAT                1903           6525         8428
##  3 TSTM WIND                      504           6957         7461
##  4 FLOOD                          470           6789         7259
##  5 LIGHTNING                      816           5230         6046
##  6 HEAT                           937           2100         3037
##  7 FLASH FLOOD                    978           1777         2755
##  8 ICE STORM                       89           1975         2064
##  9 THUNDERSTORM WIND              133           1488         1621
## 10 WINTER STORM                   206           1321         1527

Plot top 10 harmful_events

top10 <- harmful_events %>% top_n(10, total_harmed)

ggplot(top10, aes(x = reorder(EVTYPE, total_harmed), y = total_harmed)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(
    title = "Top 10 Most Harmful Weather Events in the US (Population Health)",
    x = "Event Type", y = "Total Fatalities + Injuries"
  )

#Across the United States, which types of events have the greatest economic consequences?

Function to convert exponent codes

convert_exp <- function(e) {
  e <- toupper(as.character(e))
  val <- rep(1, length(e))
  val[e == "K"] <- 1e3
  val[e == "M"] <- 1e6
  val[e == "B"] <- 1e9
  val[e %in% c("H")] <- 1e2
  val[!e %in% c("", "K", "M", "B", "H", "0")] <- NA
  return(val)
}

Apply exponent conversion

repdata_data_StormData <- repdata_data_StormData %>%
  mutate(
    prop_dmg_exp = convert_exp(PROPDMGEXP),
    crop_dmg_exp = convert_exp(CROPDMGEXP),
    prop_dmg_val = PROPDMG * prop_dmg_exp,
    crop_dmg_val = CROPDMG * crop_dmg_exp,
    total_damage = prop_dmg_val + crop_dmg_val
  ) 

Summarize total economic damage by event type

econ_damage <- repdata_data_StormData %>%
  group_by(EVTYPE) %>%
  summarise(
    property_damage = sum(prop_dmg_val, na.rm = TRUE),
    crop_damage = sum(crop_dmg_val, na.rm = TRUE),
    total_damage = sum(total_damage, na.rm = TRUE)
  ) %>%
  arrange(desc(total_damage))

View top 10 economic damage

head(econ_damage, 10)
## # A tibble: 10 × 4
##    EVTYPE            property_damage crop_damage total_damage
##    <chr>                       <dbl>       <dbl>        <dbl>
##  1 FLOOD               144657709800   5661968450 138007444500
##  2 HURRICANE/TYPHOON    69305840000   2607872800  29348167800
##  3 TORNADO              56937160614.   414953270  16570326363
##  4 HURRICANE            11868319010   2741910000  12405268000
##  5 RIVER FLOOD           5118945500   5029459000  10108369000
##  6 HAIL                 15732267456.  3025954470  10020596590
##  7 FLASH FLOOD          16140861772.  1421317100   8715455177
##  8 ICE STORM             3944927860   5022113500   5925150850
##  9 STORM SURGE/TIDE      4641188000       850000   4641493000
## 10 THUNDERSTORM WIND     3483121270    414843050   3813647990

##Plot Top 10 Economically Damaging Events

top10_econ <- econ_damage %>% top_n(10, total_damage)

ggplot(top10_econ, aes(x = reorder(EVTYPE, total_damage), y = total_damage)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(
    title = "Top 10 Weather Events with Greatest Economic Damage in the US",
    x = "Event Type", y = "Total Economic Damage (USD)"
  )