Synopsis

This report analyzes the NOAA Storm Database to determine which severe weather events are most harmful to population health and which cause the greatest economic consequences in the United States from 1950 to 2011. Data was processed directly from the raw CSV file, cleaned, and aggregated by event type. Analysis shows tornadoes cause the most harm to human health, while floods cause the highest economic damage. The methodology is fully reproducible, and results are visualized through bar plots.

Data Processing

Loading Data

library(dplyr)
library(ggplot2)
library(tidyr)

# Check available files
list.files()
## [1] "activity.csv"      "activity.zip"      "storm_cache"      
## [4] "storm.Rmd"         "StormData.csv.bz2"
# Download data if not present (for sandbox)
if (!file.exists("StormData.csv.bz2")) {
  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
                "StormData.csv.bz2", mode = "wb")
}

# Load the compressed data directly
storm <- read.csv("StormData.csv.bz2", stringsAsFactors = FALSE)
# Quick summaries
summary(storm$FATALITIES)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.0000   0.0000   0.0000   0.0168   0.0000 583.0000
summary(storm$INJURIES)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##    0.0000    0.0000    0.0000    0.1557    0.0000 1700.0000
# Look at event types
head(unique(storm$EVTYPE), 20)
##  [1] "TORNADO"                   "TSTM WIND"                
##  [3] "HAIL"                      "FREEZING RAIN"            
##  [5] "SNOW"                      "ICE STORM/FLASH FLOOD"    
##  [7] "SNOW/ICE"                  "WINTER STORM"             
##  [9] "HURRICANE OPAL/HIGH WINDS" "THUNDERSTORM WINDS"       
## [11] "RECORD COLD"               "HURRICANE ERIN"           
## [13] "HURRICANE OPAL"            "HEAVY RAIN"               
## [15] "LIGHTNING"                 "THUNDERSTORM WIND"        
## [17] "DENSE FOG"                 "RIP CURRENT"              
## [19] "THUNDERSTORM WINS"         "FLASH FLOOD"
health_impact <- storm %>%
  group_by(EVTYPE) %>%
  summarise(FATALITIES = sum(FATALITIES, na.rm = TRUE),
            INJURIES = sum(INJURIES, na.rm = TRUE),
            TOTAL_HARM = FATALITIES + INJURIES) %>%
  arrange(desc(TOTAL_HARM))

top_health <- head(health_impact, 10)
# Function to convert damage values
convert_damage <- function(damage, exp) {
  exp <- toupper(exp)
  multiplier <- case_when(
    exp == "K" ~ 1000,
    exp == "M" ~ 1e6,
    exp == "B" ~ 1e9,
    exp == "H" ~ 100,
    exp == "" ~ 1,
    TRUE ~ 1
  )
  return(damage * multiplier)
}

economic_impact <- storm %>%
  mutate(PROPDMG_VAL = convert_damage(PROPDMG, PROPDMGEXP),
         CROPDMG_VAL = convert_damage(CROPDMG, CROPDMGEXP),
         TOTAL_DAMAGE = PROPDMG_VAL + CROPDMG_VAL) %>%
  group_by(EVTYPE) %>%
  summarise(TOTAL_ECONOMIC = sum(TOTAL_DAMAGE, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_ECONOMIC))

top_economic <- head(economic_impact, 10)
top_health
## # A tibble: 10 x 4
##    EVTYPE            FATALITIES INJURIES TOTAL_HARM
##    <chr>                  <dbl>    <dbl>      <dbl>
##  1 TORNADO                 5633    91346      96979
##  2 EXCESSIVE HEAT          1903     6525       8428
##  3 TSTM WIND                504     6957       7461
##  4 FLOOD                    470     6789       7259
##  5 LIGHTNING                816     5230       6046
##  6 HEAT                     937     2100       3037
##  7 FLASH FLOOD              978     1777       2755
##  8 ICE STORM                 89     1975       2064
##  9 THUNDERSTORM WIND        133     1488       1621
## 10 WINTER STORM             206     1321       1527
ggplot(top_health, aes(x = reorder(EVTYPE, -TOTAL_HARM), y = TOTAL_HARM)) +
  geom_bar(stat = "identity", fill = "firebrick") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "Event Type", y = "Total Harm", 
       title = "Top 10 Most Harmful Weather Events to Population Health")

top_economic
## # A tibble: 10 x 2
##    EVTYPE            TOTAL_ECONOMIC
##    <chr>                      <dbl>
##  1 FLOOD              150319678257 
##  2 HURRICANE/TYPHOON   71913712800 
##  3 TORNADO             57352114049.
##  4 STORM SURGE         43323541000 
##  5 HAIL                18758222016.
##  6 FLASH FLOOD         17562129167.
##  7 DROUGHT             15018672000 
##  8 HURRICANE           14610229010 
##  9 RIVER FLOOD         10148404500 
## 10 ICE STORM            8967041360
ggplot(top_economic, aes(x = reorder(EVTYPE, -TOTAL_ECONOMIC), y = TOTAL_ECONOMIC / 1e9)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "Event Type", y = "Total Damage (Billions USD)", 
       title = "Top 10 Weather Events by Economic Damage")