Synopsis

This analysis examines the NOAA Storm Database to identify which weather events are most harmful to population health and have the greatest economic consequences in the United States. The data spans from 1950 to 2011, with more complete records in recent years. After downloading and processing the raw data, we found that tornadoes cause the most injuries and fatalities, while floods and hurricanes/typhoons result in the highest economic damages. The analysis uses R for data processing, aggregation, and visualization, with results presented through tables and plots to inform resource allocation decisions for severe weather preparedness.

Data Processing

# Load libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.5.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(data.table)
## Warning: package 'data.table' was built under R version 4.5.3
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.5.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
# Set URL for the data file
data_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"

# Download the file
if(!file.exists("StormData.csv.bz2")) {
  download.file(data_url, destfile = "StormData.csv.bz2", method = "curl")
}

# Read the compressed CSV file
storm_data <- read.csv("StormData.csv.bz2", stringsAsFactors = FALSE)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
# Check the structure
cat("Dataset dimensions:", dim(storm_data), "\n")
## Dataset dimensions: 749555 37
cat("Number of columns:", ncol(storm_data), "\n")
## Number of columns: 37
cat("Number of rows:", nrow(storm_data), "\n")
## Number of rows: 749555
# Standardize event types
storm_data$EVTYPE <- toupper(trimws(storm_data$EVTYPE))

# Function to convert damage values with exponents
convert_damage <- function(value, exponent) {
  if (is.na(value) | is.na(exponent)) return(0)
  
  exponent <- toupper(as.character(exponent))
  
  if (exponent %in% c("", "?", "-", "+", "0")) {
    multiplier <- 1
  } else if (exponent == "H") {
    multiplier <- 100
  } else if (exponent == "K") {
    multiplier <- 1000
  } else if (exponent == "M") {
    multiplier <- 1000000
  } else if (exponent == "B") {
    multiplier <- 1000000000
  } else if (exponent %in% c("2", "3", "4", "5", "6", "7", "8")) {
    multiplier <- 10^as.numeric(exponent)
  } else {
    multiplier <- 1
  }
  
  value * multiplier
}

# Apply conversion to property damage
storm_data$PROPDMG_NUM <- mapply(convert_damage,
                                 storm_data$PROPDMG,
                                 storm_data$PROPDMGEXP)

# Apply conversion to crop damage
storm_data$CROPDMG_NUM <- mapply(convert_damage,
                                 storm_data$CROPDMG,
                                 storm_data$CROPDMGEXP)

# Calculate totals
storm_data$TOTAL_ECONOMIC_DMG <- storm_data$PROPDMG_NUM + storm_data$CROPDMG_NUM
storm_data$TOTAL_HEALTH_IMPACT <- storm_data$FATALITIES + storm_data$INJURIES

cat("Data cleaning completed.\n")
## Data cleaning completed.
### Results



``` r
# Group by event type for health impacts
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE),
    total_health_impact = sum(TOTAL_HEALTH_IMPACT, na.rm = TRUE),
    event_count = n()
  ) %>%
  arrange(desc(total_health_impact))

# Get top 10 events
top_health_events <- head(health_impact, 10)

# Display table
cat("Top 10 Events by Health Impact:\n")
## Top 10 Events by Health Impact:
print(top_health_events[, c("EVTYPE", "total_fatalities", "total_injuries", "total_health_impact")])
## # A tibble: 10 × 4
##    EVTYPE            total_fatalities total_injuries total_health_impact
##    <chr>                        <dbl>          <dbl>               <dbl>
##  1 TORNADO                       4989          84141               89130
##  2 EXCESSIVE HEAT                1821           6312                8133
##  3 TSTM WIND                      504           6957                7461
##  4 FLOOD                          350           6642                6992
##  5 LIGHTNING                      727           4657                5384
##  6 FLASH FLOOD                    812           1543                2355
##  7 HEAT                           804           1436                2240
##  8 ICE STORM                       89           1974                2063
##  9 WINTER STORM                   198           1315                1513
## 10 HURRICANE/TYPHOON               64           1275                1339
# Prepare for plotting
plot_health_data <- top_health_events %>%
  pivot_longer(cols = c(total_fatalities, total_injuries),
               names_to = "impact_type",
               values_to = "count")

# Create plot
health_plot <- ggplot(plot_health_data, 
                      aes(x = reorder(EVTYPE, -count), 
                          y = count, 
                          fill = impact_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(
    title = "Top 10 Weather Events by Population Health Impact",
    x = "Event Type",
    y = "Number of People Affected",
    fill = "Impact Type"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)) +
  scale_fill_manual(values = c("total_fatalities" = "#E74C3C", 
                               "total_injuries" = "#F39C12"),
                    labels = c("Fatalities", "Injuries"))

print(health_plot)

# Group by event type for economic impacts
economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(
    total_property_dmg = sum(PROPDMG_NUM, na.rm = TRUE),
    total_crop_dmg = sum(CROPDMG_NUM, na.rm = TRUE),
    total_economic_dmg = sum(TOTAL_ECONOMIC_DMG, na.rm = TRUE),
    event_count = n()
  ) %>%
  arrange(desc(total_economic_dmg))

# Get top 10 events
top_economic_events <- head(economic_impact, 10)

# Convert to billions for readability
top_economic_events$total_economic_dmg_billions <- 
  top_economic_events$total_economic_dmg / 1000000000

# Display table
cat("\nTop 10 Events by Economic Impact (in Billions USD):\n")
## 
## Top 10 Events by Economic Impact (in Billions USD):
print(top_economic_events[, c("EVTYPE", "total_property_dmg", "total_crop_dmg", "total_economic_dmg_billions")])
## # A tibble: 10 × 4
##    EVTYPE            total_property_dmg total_crop_dmg total_economic_dmg_bill…¹
##    <chr>                          <dbl>          <dbl>                     <dbl>
##  1 FLOOD                  133303922657      4372682450                    138.  
##  2 HURRICANE/TYPHOON       69305840000      2607872800                     71.9 
##  3 TORNADO                 45448334926.      337521270                     45.8 
##  4 STORM SURGE             43323536000            5000                     43.3 
##  5 FLASH FLOOD             14159071408.     1171142100                     15.3 
##  6 DROUGHT                  1045942000     13921797000                     15.0 
##  7 HURRICANE               11857799010      2731400000                     14.6 
##  8 HAIL                    10469407963.     2490646473                     13.0 
##  9 RIVER FLOOD              5118945500      5029459000                     10.1 
## 10 ICE STORM                3929835560      5022033500                      8.95
## # ℹ abbreviated name: ¹​total_economic_dmg_billions
# Prepare for plotting
plot_economic_data <- top_economic_events %>%
  pivot_longer(cols = c(total_property_dmg, total_crop_dmg),
               names_to = "damage_type",
               values_to = "damage_amount")

# Convert to billions
plot_economic_data$damage_amount_billions <- 
  plot_economic_data$damage_amount / 1000000000

# Create plot
economic_plot <- ggplot(plot_economic_data, 
                        aes(x = reorder(EVTYPE, -damage_amount_billions), 
                            y = damage_amount_billions, 
                            fill = damage_type)) +
  geom_bar(stat = "identity", position = "stack") +
  theme_minimal() +
  labs(
    title = "Top 10 Weather Events by Economic Impact",
    x = "Event Type",
    y = "Total Damage (Billions USD)",
    fill = "Damage Type"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5)) +
  scale_fill_manual(values = c("total_property_dmg" = "#3498DB", 
                               "total_crop_dmg" = "#27AE60"),
                    labels = c("Property Damage", "Crop Damage"))

print(economic_plot)

# Create panel plot
health_plot_panel <- health_plot + 
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  ggtitle("A) Population Health Impact")

economic_plot_panel <- economic_plot + 
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  ggtitle("B) Economic Impact")

# Arrange plots side by side
library(gridExtra)
panel_plot <- grid.arrange(health_plot_panel, economic_plot_panel, ncol = 2)

# Display panel plot
print(panel_plot)
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
# Summary statistics
cat("\n=== SUMMARY ===\n")
## 
## === SUMMARY ===
cat("1. Most harmful events to population health:\n")
## 1. Most harmful events to population health:
cat("   -", top_health_events$EVTYPE[1], "caused", top_health_events$total_health_impact[1], "total health impacts\n")
##    - TORNADO caused 89130 total health impacts
cat("   -", top_health_events$EVTYPE[2], "caused", top_health_events$total_health_impact[2], "total health impacts\n")
##    - EXCESSIVE HEAT caused 8133 total health impacts
cat("\n2. Events with greatest economic consequences:\n")
## 
## 2. Events with greatest economic consequences:
cat("   -", top_economic_events$EVTYPE[1], "caused $", 
    round(top_economic_events$total_economic_dmg_billions[1], 2), "billion in damages\n")
##    - FLOOD caused $ 137.68 billion in damages
cat("   -", top_economic_events$EVTYPE[2], "caused $", 
    round(top_economic_events$total_economic_dmg_billions[2], 2), "billion in damages\n")
##    - HURRICANE/TYPHOON caused $ 71.91 billion in damages

Step 16: Add Conclusion

After the last code chunk, press Enter twice and type:

```markdown ### Conclusion

This analysis of the NOAA Storm Database reveals clear patterns in weather-related impacts across the United States. Tornadoes emerge as the most harmful event type for population health, causing the highest combined fatalities and injuries. For economic consequences, floods and hurricanes/typhoons result in the greatest damages, with property damage constituting the majority of economic losses. These findings can inform prioritization of resources for severe weather preparedness, suggesting that tornado warning systems and flood mitigation infrastructure should receive significant attention from government and municipal managers responsible for public safety and economic stability.