Synopsis

This analysis examines the NOAA Storm Database to determine which types of weather events cause the greatest harm to population health and economic impact across the United States. The dataset includes information about fatalities, injuries, property damage, and crop damage associated with severe weather events. The analysis aggregates these impacts by event type to identify the most severe categories. Results show the event types responsible for the highest number of fatalities and injuries as well as those producing the greatest financial losses. Understanding these patterns supports improved disaster preparedness and resource allocation.

Data Processing

storm <- read.csv("repdata_data_StormData.csv")
dim(storm)
## [1] 902297     37
str(storm)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
storm_data <- storm %>%
    select(EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG)
health <- storm_data %>%
    group_by(EVTYPE) %>%
    summarise(
        fatalities = sum(FATALITIES),
        injuries = sum(INJURIES),
        total = fatalities + injuries
    ) %>%
    arrange(desc(total))

top_health <- head(health, 10)
top_health
## # A tibble: 10 × 4
##    EVTYPE            fatalities injuries total
##    <chr>                  <dbl>    <dbl> <dbl>
##  1 TORNADO                 5633    91346 96979
##  2 EXCESSIVE HEAT          1903     6525  8428
##  3 TSTM WIND                504     6957  7461
##  4 FLOOD                    470     6789  7259
##  5 LIGHTNING                816     5230  6046
##  6 HEAT                     937     2100  3037
##  7 FLASH FLOOD              978     1777  2755
##  8 ICE STORM                 89     1975  2064
##  9 THUNDERSTORM WIND        133     1488  1621
## 10 WINTER STORM             206     1321  1527
economic <- storm_data %>%
    group_by(EVTYPE) %>%
    summarise(
        property = sum(PROPDMG),
        crop = sum(CROPDMG),
        total = property + crop
    ) %>%
    arrange(desc(total))

top_economic <- head(economic, 10)
top_economic
## # A tibble: 10 × 4
##    EVTYPE             property    crop    total
##    <chr>                 <dbl>   <dbl>    <dbl>
##  1 TORNADO            3212258. 100019. 3312277.
##  2 FLASH FLOOD        1420125. 179200. 1599325.
##  3 TSTM WIND          1335966. 109203. 1445168.
##  4 HAIL                688693. 579596. 1268290.
##  5 FLOOD               899938. 168038. 1067976.
##  6 THUNDERSTORM WIND   876844.  66791.  943636.
##  7 LIGHTNING           603352.   3581.  606932.
##  8 THUNDERSTORM WINDS  446293.  18685.  464978.
##  9 HIGH WIND           324732.  17283.  342015.
## 10 WINTER STORM        132721.   1979.  134700.

Results

Events Most Harmful to Population Health

ggplot(top_health,
       aes(x = reorder(EVTYPE, total), y = total)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    labs(
        title = "Top Weather Events Causing Health Impact",
        x = "Event Type",
        y = "Fatalities + Injuries"
    )

Events With Greatest Economic Consequences

ggplot(top_economic,
       aes(x = reorder(EVTYPE, total), y = total)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    labs(
        title = "Top Weather Events Causing Economic Damage",
        x = "Event Type",
        y = "Total Damage"
    )

Fatalities vs Injuries by Event Type

top_health_long <- top_health %>%
    select(EVTYPE, fatalities, injuries) %>%
    tidyr::pivot_longer(cols = c(fatalities, injuries),
                        names_to = "type",
                        values_to = "count")

ggplot(top_health_long,
       aes(x = reorder(EVTYPE, count), y = count, fill = type)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    labs(
        title = "Fatalities vs Injuries by Event Type",
        x = "Event Type",
        y = "Number of Cases"
    )