Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Key Findings

Data Processing

Load data

stormdf <- read.csv("~/Online Courses/Johns Hopkins University Data Science Foundations using R Specialization/Assignment/Module 5/RepData_PeerAssessment2/repdata_data_StormData.csv")

Load libraries

library('magrittr')
library('dplyr')
library('ggplot2')
library('patchwork')

Extract useful columns for analysis

stormdf2 <- stormdf %>%  
  select(REFNUM, COUNTYNAME, STATE, EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG)

See structure of data

str(stormdf2)
## 'data.frame':    902297 obs. of  8 variables:
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...

Data Cleaning

## Ensure all character entries are capitalized 
stormdf2$COUNTYNAME <- toupper(stormdf2$COUNTYNAME)
stormdf2$STATE <- toupper(stormdf2$STATE)
stormdf2$EVTYPE <- toupper(stormdf2$EVTYPE)

## Remove leading and trailing spaces 
stormdf2$COUNTYNAME <- trimws(as.character(stormdf2$COUNTYNAME))
stormdf2$STATE <- trimws(as.character(stormdf2$STATE))
stormdf2$EVTYPE <- trimws(as.character(stormdf2$EVTYPE))

Results

Impact of event types on both population health and economy

## Create a subset of data on total fatalities and injuries combined for each event type
pophealthdf <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = stormdf2, sum) %>%
  mutate(TOTAL_FATALITIES_AND_INJURIES = FATALITIES + INJURIES) %>%
  select(EVTYPE, TOTAL_FATALITIES_AND_INJURIES)
## Create a subset of data on total property and crop damages combined for each event type 
econdf <- aggregate(cbind(CROPDMG, PROPDMG) ~ EVTYPE, data = stormdf2, sum) %>% 
  mutate(TOTAL_PROP_CROPDMG = CROPDMG + PROPDMG) %>% 
  select(EVTYPE, TOTAL_PROP_CROPDMG)

Summary of total fatalities and injuries combined across the top 5 event types

top5_pophealthdf <- pophealthdf %>% 
  arrange(desc(TOTAL_FATALITIES_AND_INJURIES)) %>% 
  top_n(5, TOTAL_FATALITIES_AND_INJURIES)
summary(top5_pophealthdf)
##     EVTYPE          TOTAL_FATALITIES_AND_INJURIES
##  Length:5           Min.   : 6046                
##  Class :character   1st Qu.: 7259                
##  Mode  :character   Median : 7461                
##                     Mean   :25235                
##                     3rd Qu.: 8428                
##                     Max.   :96979

Summary of total property and crop damage combined across the top 5 event types

top5_econdf <- econdf %>% 
  arrange(desc(TOTAL_PROP_CROPDMG)) %>% 
  top_n(5, TOTAL_PROP_CROPDMG)
summary(top5_econdf)
##     EVTYPE          TOTAL_PROP_CROPDMG
##  Length:5           Min.   :1067976   
##  Class :character   1st Qu.:1268290   
##  Mode  :character   Median :1445306   
##                     Mean   :1738645   
##                     3rd Qu.:1599375   
##                     Max.   :3312277
plot1_colour <- c("HAIL" = "honeydew4",
                  "FLOOD" = "maroon4",
                  "FLASH FLOOD" = "lightsteelblue4", 
                  "THUNDERSTORM WIND" ="plum4", 
                  "LIGHTNING" = "paleturquoise4", 
                  "TORNADO" = "bisque4", 
                  "TSTM WIND" = "lavenderblush4",
                  "EXCESSIVE HEAT" = "indianred4")

pophealthplot <- ggplot(top5_pophealthdf, aes(x = EVTYPE, y = TOTAL_FATALITIES_AND_INJURIES, fill = EVTYPE)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = plot1_colour) +
  labs(x = "Event_Type", y = "Total Fatalities and Injuries",
       title = "Total Fatalities and Injuries by Event Type",
       fill = "Event Type") +
  theme(axis.title = element_text(size = 10),
        plot.title = element_text(size = 14),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))

econplot <- ggplot(top5_econdf, aes(x = EVTYPE, y = TOTAL_PROP_CROPDMG, fill = EVTYPE)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = plot1_colour) +
  labs(x = "Event_Type", y = "Total_Properties_and_Crops_Damage_($)",
       title = "Total Properties and Crop Damage by Event Type",
       fill = "Event Type") +
  theme(axis.title = element_text(size = 10),
        plot.title = element_text(size = 14),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))
       

combined_pophealth_econ_plot <- pophealthplot + econplot + plot_layout(nrow = 2)
combined_pophealth_econ_plot

Impact of event types on population health

## Create subset of data on total fatalities and injuries for each event type
fatdf <- aggregate(FATALITIES ~ EVTYPE, data = stormdf2, sum)
injdf <- aggregate(INJURIES ~ EVTYPE, data = stormdf2, sum)

Summary of total number of fatalities across the top 5 event types

top5_evtypes_fat <- fatdf %>% 
  arrange(desc(FATALITIES)) %>% 
  top_n(5, FATALITIES)
summary(top5_evtypes_fat)
##     EVTYPE            FATALITIES  
##  Length:5           Min.   : 816  
##  Class :character   1st Qu.: 937  
##  Mode  :character   Median : 978  
##                     Mean   :2053  
##                     3rd Qu.:1903  
##                     Max.   :5633

Summary of total number of injuries across the top 5 event types

top5_evtypes_inj <- injdf %>% 
  arrange(desc(INJURIES)) %>% 
  top_n(5, INJURIES)
summary(top5_evtypes_inj)
##     EVTYPE             INJURIES    
##  Length:5           Min.   : 5230  
##  Class :character   1st Qu.: 6525  
##  Mode  :character   Median : 6789  
##                     Mean   :23369  
##                     3rd Qu.: 6957  
##                     Max.   :91346
fat_inj_colour <- c("EXCESSIVE HEAT" = "indianred4",
                    "FLOOD" = "maroon4",
                    "FLASH FLOOD" = "lightsteelblue4", 
                    "HEAT" ="lightpink4", 
                    "LIGHTNING" = "paleturquoise4", 
                    "TORNADO" = "bisque4", 
                    "TSTM WIND" = "lavenderblush4")

fat <- ggplot(top5_evtypes_fat, aes(x = EVTYPE, y = FATALITIES, fill = EVTYPE)) + 
  geom_bar(stat = "identity") +
  scale_fill_manual(values = fat_inj_colour) +
  labs(x = "Event_Type" , y = "Fatalities_Count", 
       title = "Total Fatalities by Event Type", 
       fill = "Event Type") +
  theme(axis.title = element_text(size = 10),
        plot.title = element_text(size = 14),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))

inj <- ggplot(top5_evtypes_inj, aes(x = EVTYPE, y = INJURIES, fill = EVTYPE)) + 
  geom_bar(stat = "identity") +
  scale_fill_manual(values = fat_inj_colour) +
  labs(x = "Event_Type" , y = "Injuries_Count", 
       title = "Total Injuries by Event Type",
       fill = "Event Type") +
  theme(axis.title = element_text(size = 10),
        plot.title = element_text(size = 14),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))

combined_fat_inj <- fat + inj + plot_layout(nrow = 2)
combined_fat_inj

Impact of event types on the economy

## Create a subset of data on total property and crop damage for each event type 
property <- aggregate(PROPDMG ~ EVTYPE, data = stormdf2, sum)
crop <- aggregate(CROPDMG ~ EVTYPE, data = stormdf2, sum)

Summary of total property damage across the top 5 event types

top5_propdmg <- property %>%  
  arrange(desc(PROPDMG)) %>% 
  top_n(5, PROPDMG)
summary(top5_propdmg)
##     EVTYPE             PROPDMG       
##  Length:5           Min.   : 876844  
##  Class :character   1st Qu.: 899938  
##  Mode  :character   Median :1336104  
##                     Mean   :1549064  
##                     3rd Qu.:1420175  
##                     Max.   :3212258

Summary of total crop damage across the top 5 event types

top5_cropdmg <- crop %>%
  arrange(desc(CROPDMG)) %>%  
  top_n(5, CROPDMG)
summary(top5_cropdmg)
##     EVTYPE             CROPDMG      
##  Length:5           Min.   :100019  
##  Class :character   1st Qu.:109203  
##  Mode  :character   Median :168038  
##                     Mean   :227211  
##                     3rd Qu.:179201  
##                     Max.   :579596
prop_cropdmg_colour <- c( "HAIL" = "honeydew4",
                          "FLOOD" = "maroon4",
                          "FLASH FLOOD" = "lightsteelblue4", 
                          "THUNDERSTORM WIND" ="plum4", 
                          "LIGHTNING" = "paleturquoise4", 
                          "TORNADO" = "bisque4", 
                          "TSTM WIND" = "lavenderblush4")

propdmg <- ggplot(top5_propdmg, aes(x = EVTYPE, y = PROPDMG, fill = EVTYPE)) + 
  geom_bar(stat = "identity") + 
  scale_fill_manual(values = prop_cropdmg_colour) +
  labs(x = "Event_Type", y = "Property_Damage_($)", 
       title = "Total Property Damage by Event Type",
       fill = "Event Type") +
  theme(axis.title = element_text(size = 10),
        plot.title = element_text(size = 14),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))

cropdmg <- ggplot(top5_cropdmg, aes(x = EVTYPE, y = CROPDMG, fill = EVTYPE)) + 
  geom_bar(stat = "identity") + 
  scale_fill_manual(values = prop_cropdmg_colour) +
  labs(x = "Event_Type", y = "Crop_Damage_($)", 
       title = "Total Crop Damage by Event Type",
       fill = "Event Type") +
  theme(axis.title = element_text(size = 10),
        plot.title = element_text(size = 14),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))

combined_prop_cropdmg <- propdmg + cropdmg + plot_layout(nrow = 2)
combined_prop_cropdmg