Synopsis

This analysis is done to assess what types of weather events cause the most harm to persons and property/crops in the United States. The data is collected between 1950 and November 2011.

We found that:

The major events that cause damage are: FLOOD, HURRICANE/TYPHOON, TORNADO, and STORM SURGE

The major events that cause injuries are: TORNADO, TSTM WIND, FLOOD, EXCESSIVE HEAT, and LIGHTNING

The major events that cause fatalities are: TORNADO, EXCESSIVE HEAT, FLASH FLOOD, HEAT, and LIGHTNING

Data Processing

The data was loaded and the injury/fatality/property damage/crop damage columns were selected for relevancy. The exponents to the damage numbers were applied to the damage value column to acquire the correct numbers in one column.

Each column was summed according to the event type. Events with zero harm were removed from the summary.

storm_data <- read.csv("repdata_data_StormData.csv.bz2")
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
storm_data_clean <- select(storm_data, EVTYPE, FATALITIES : CROPDMGEXP)

print(unique(storm_data_clean$PROPDMGEXP))
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
print(unique(storm_data_clean$CROPDMGEXP))
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
storm_data_clean <- storm_data_clean %>% 
  mutate(PROPDMG = ifelse(grepl("[Bb]", PROPDMGEXP ), 1e9*PROPDMG, PROPDMG)) %>%
  mutate(PROPDMG = ifelse(grepl("[Mm]", PROPDMGEXP ), 1e6*PROPDMG, PROPDMG)) %>%
  mutate(PROPDMG = ifelse(grepl("[Kk]", PROPDMGEXP ), 1e3*PROPDMG, PROPDMG)) %>%
  mutate(PROPDMG = ifelse(grepl("[Hh]", PROPDMGEXP ), 1e2*PROPDMG, PROPDMG)) %>%
  mutate(PROPDMG = ifelse(grepl("[0-8+]", PROPDMGEXP ), 10*PROPDMG, PROPDMG)) %>%
  
  mutate(CROPDMG = ifelse(grepl("[Bb]", CROPDMGEXP ), 1e9*CROPDMG, CROPDMG)) %>%
  mutate(CROPDMG = ifelse(grepl("[Mm]", CROPDMGEXP ), 1e6*CROPDMG, CROPDMG)) %>%
  mutate(CROPDMG = ifelse(grepl("[Kk]", CROPDMGEXP ), 1e3*CROPDMG, CROPDMG)) %>%
  mutate(CROPDMG = ifelse(grepl("[0-8+]", CROPDMGEXP ), 10*CROPDMG, CROPDMG))

storm_data_clean <- select(storm_data_clean, -PROPDMGEXP, -CROPDMGEXP)

grouped_storm_data <- group_by(storm_data_clean, EVTYPE) 
storm_data_summary <- summarize(grouped_storm_data, 
          fatalities = sum(FATALITIES ), 
          injuries = sum(INJURIES ),
          damage = sum(PROPDMG) + sum(CROPDMG)
          )

storm_data_summary <- rename(storm_data_summary, event=EVTYPE)
storm_data_summary <- filter(storm_data_summary, (fatalities + injuries + damage) > 0 )

Results

Damage

damage_data <- storm_data_summary %>% 
          select(event, damage) %>%
          filter(damage > 0) %>%
          arrange(desc(damage))

damage_data <- damage_data %>% mutate(event = factor(event, levels = event[order(damage)]))

print(damage_data)
## # A tibble: 431 × 2
##    event                   damage
##    <fct>                    <dbl>
##  1 FLOOD             150319678257
##  2 HURRICANE/TYPHOON  71913712800
##  3 TORNADO            57352118150
##  4 STORM SURGE        43323541000
##  5 HAIL               18758224587
##  6 FLASH FLOOD        17562132318
##  7 DROUGHT            15018672000
##  8 HURRICANE          14610229010
##  9 RIVER FLOOD        10148404500
## 10 ICE STORM           8967041810
## # ℹ 421 more rows

Plotting

library(ggplot2)

g <-ggplot(damage_data[1:15, ], aes( y = event, x= damage))
g + geom_col() + 
  labs(x = "Damage in USD") + 
  labs(y = "Weather Event") + 
  labs(title = "Total Property and Crop Damage")

The major events that cause damage are: FLOOD, HURRICANE/TYPHOON, TORNADO, and STORM SURGE

Fatalities and Injuries

injury_data <- storm_data_summary %>% 
          select(event, injuries) %>%
          filter(injuries > 0) %>%
          arrange(desc(injuries))

injury_data <- injury_data %>% mutate(event = factor(event, levels = event[order(injuries)]))

print(injury_data)
## # A tibble: 158 × 2
##    event             injuries
##    <fct>                <dbl>
##  1 TORNADO              91346
##  2 TSTM WIND             6957
##  3 FLOOD                 6789
##  4 EXCESSIVE HEAT        6525
##  5 LIGHTNING             5230
##  6 HEAT                  2100
##  7 ICE STORM             1975
##  8 FLASH FLOOD           1777
##  9 THUNDERSTORM WIND     1488
## 10 HAIL                  1361
## # ℹ 148 more rows
fatality_data <- storm_data_summary %>% 
          select(event, fatalities) %>%
          filter(fatalities > 0) %>%
          arrange(desc(fatalities))

fatality_data <- fatality_data %>% mutate(event = factor(event, levels = event[order(fatalities)]))

print(fatality_data)
## # A tibble: 168 × 2
##    event          fatalities
##    <fct>               <dbl>
##  1 TORNADO              5633
##  2 EXCESSIVE HEAT       1903
##  3 FLASH FLOOD           978
##  4 HEAT                  937
##  5 LIGHTNING             816
##  6 TSTM WIND             504
##  7 FLOOD                 470
##  8 RIP CURRENT           368
##  9 HIGH WIND             248
## 10 AVALANCHE             224
## # ℹ 158 more rows

Plotting

g <-ggplot(injury_data[1:15, ], aes( y = event, x= injuries))
g + geom_col() + 
  labs(x = "Person count") + 
  labs(y = "Weather Event") + 
  labs(title = "Injuries")

The major events that cause injuries are: TORNADO, TSTM WIND, FLOOD, EXCESSIVE HEAT, and LIGHTNING

g <-ggplot(fatality_data[1:15, ], aes( y = event, x= fatalities))
g + geom_col() + 
  labs(x = "Person count") + 
  labs(y = "Weather Event") + 
  labs(title = "Fatalities")

The major events that cause fatalities are: TORNADO, EXCESSIVE HEAT, FLASH FLOOD, HEAT, and LIGHTNING