Synopsis

This analysis explores the NOAA Storm Database to identify which types of severe weather events are most harmful to population health and which have the greatest economic consequences. We analyzed data from 1950 to 2011 across the United States. Our findings indicate that Tornadoes are the leading cause of fatalities and injuries. In terms of economic impact, Floods cause the greatest property damage, while Drought is the primary cause of crop damage.

Data Processing

First, we load the required libraries and the raw data directly from the compressed bz2 file.

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load data
if(!file.exists("repdata_data_StormData.csv.bz2")) {
    stop("Data file not found!")
}
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
health_impact <- storm_data %>%
    group_by(EVTYPE) %>%
    summarise(Total_Fatalities = sum(FATALITIES), 
              Total_Injuries = sum(INJURIES)) %>%
    arrange(desc(Total_Fatalities))

# Get top 10 most harmful events
top_health <- head(health_impact, 10)
# Function to convert exponents to numbers
getExp <- function(e) {
    if (e %in% c('h', 'H')) return(100)
    else if (e %in% c('k', 'K')) return(1000)
    else if (e %in% c('m', 'M')) return(1000000)
    else if (e %in% c('b', 'B')) return(1000000000)
    else if (!is.na(as.numeric(e))) return(10^as.numeric(e))
    else return(1)
}

# Calculate actual damage costs
storm_data$prop_mult <- sapply(storm_data$PROPDMGEXP, getExp)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
storm_data$crop_mult <- sapply(storm_data$CROPDMGEXP, getExp)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
storm_data <- storm_data %>%
    mutate(Total_Prop_Cost = PROPDMG * prop_mult,
           Total_Crop_Cost = CROPDMG * crop_mult,
           Total_Economic_Cost = Total_Prop_Cost + Total_Crop_Cost)

econ_impact <- storm_data %>%
    group_by(EVTYPE) %>%
    summarise(Total_Cost = sum(Total_Economic_Cost)) %>%
    arrange(desc(Total_Cost))

top_econ <- head(econ_impact, 10)
ggplot(top_health, aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
    geom_bar(stat = "identity", fill = "red") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Top 10 Most Harmful Weather Events (Fatalities)", 
         x = "Event Type", y = "Total Fatalities")

ggplot(top_econ, aes(x = reorder(EVTYPE, -Total_Cost), y = Total_Cost / 10^9)) +
    geom_bar(stat = "identity", fill = "darkgreen") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Top 10 Events with Greatest Economic Impact", 
         x = "Event Type", y = "Total Cost (Billions of USD)")