Synopsis

This report analyzes the impact of severe weather events in the United States using data from the NOAA Storm Database. The analysis focuses on identifying: 1. The most harmful weather events in terms of fatalities and injuries. 2. The weather events causing the greatest economic damage.

The dataset includes storm data from 1950 to November 2011, but early records may be incomplete. Data transformations include cleaning event types, adjusting damage multipliers, and filtering relevant variables. The results are presented through summaries and visualizations to assist policymakers in prioritizing disaster preparedness.

Data Processing

Loading Required Libraries

# Load required libraries
library(knitr)
library(ggplot2)
library(dplyr)
library(readr)

Loading the Data

# Define file path
file_path <- "repdata_data_StormData.csv.bz2"

# Read the compressed CSV file
storm_data <- read.csv(file_path, stringsAsFactors = FALSE)

Inspecting the Dataset

# Display structure and first few rows
str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Filtering Relevant Data

# Select relevant columns
storm_filtered <- storm_data %>%
  filter(FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0) %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

Results

Most Harmful Events to Population Health

# Summarize fatalities and injuries
health_impact <- storm_filtered %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
    Total_Injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  arrange(desc(Total_Fatalities))

# Display top 10 most harmful events
head(health_impact, 10)
## # A tibble: 10 × 3
##    EVTYPE         Total_Fatalities Total_Injuries
##    <chr>                     <dbl>          <dbl>
##  1 TORNADO                    5633          91346
##  2 EXCESSIVE HEAT             1903           6525
##  3 FLASH FLOOD                 978           1777
##  4 HEAT                        937           2100
##  5 LIGHTNING                   816           5230
##  6 TSTM WIND                   504           6957
##  7 FLOOD                       470           6789
##  8 RIP CURRENT                 368            232
##  9 HIGH WIND                   248           1137
## 10 AVALANCHE                   224            170

Visualization: Top 10 Deadliest Events

# Select top 10 events
top_health_events <- health_impact[1:10, ]

# Create bar plot
ggplot(top_health_events, aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
  geom_bar(stat = "identity", fill = "red") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Top 10 Deadliest Weather Events in the U.S.",
       x = "Event Type",
       y = "Number of Fatalities")

Events with the Greatest Economic Consequences

# Function to convert damage exponents
convert_damage_exp <- function(exp) {
  exp <- toupper(exp)
  if (exp %in% c("B")) return(1e9)  # Billion
  if (exp %in% c("M")) return(1e6)  # Million
  if (exp %in% c("K")) return(1e3)  # Thousand
  if (exp %in% c("H")) return(1e2)  # Hundred
  return(1)  # Default to 1 if unspecified
}

# Apply conversion
storm_filtered$PROPDMGEXP <- sapply(storm_filtered$PROPDMGEXP, convert_damage_exp)
storm_filtered$CROPDMGEXP <- sapply(storm_filtered$CROPDMGEXP, convert_damage_exp)

# Calculate total economic damage
storm_filtered <- storm_filtered %>%
  mutate(
    Property_Damage = PROPDMG * PROPDMGEXP,
    Crop_Damage = CROPDMG * CROPDMGEXP,
    Total_Economic_Damage = Property_Damage + Crop_Damage
  )

# Aggregate economic damage by event type
economic_impact <- storm_filtered %>%
  group_by(EVTYPE) %>%
  summarise(Total_Economic_Damage = sum(Total_Economic_Damage, na.rm = TRUE)) %>%
  arrange(desc(Total_Economic_Damage))

# Display top 10 events with the highest economic impact
head(economic_impact, 10)
## # A tibble: 10 × 2
##    EVTYPE            Total_Economic_Damage
##    <chr>                             <dbl>
##  1 FLOOD                     150319678257 
##  2 HURRICANE/TYPHOON          71913712800 
##  3 TORNADO                    57352114049.
##  4 STORM SURGE                43323541000 
##  5 HAIL                       18758222016.
##  6 FLASH FLOOD                17562129167.
##  7 DROUGHT                    15018672000 
##  8 HURRICANE                  14610229010 
##  9 RIVER FLOOD                10148404500 
## 10 ICE STORM                   8967041360

Visualization: Top 10 Costliest Events

# Select top 10 events
top_economic_events <- economic_impact[1:10, ]

# Create bar plot
ggplot(top_economic_events, aes(x = reorder(EVTYPE, -Total_Economic_Damage), y = Total_Economic_Damage)) +
  geom_bar(stat = "identity", fill = "blue") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Top 10 Weather Events with Highest Economic Impact in the U.S.",
       x = "Event Type",
       y = "Total Economic Damage (USD)")

Conclusion

The analysis of NOAA storm data shows that:

  1. Tornadoes are the most harmful events in terms of fatalities and injuries.
  2. Floods, hurricanes, and storm surges cause the greatest economic damage.
  3. Understanding these trends helps prioritize disaster preparedness and allocate resources efficiently.

Further improvements could involve: