Synopsis

This analysis explores the impacts of severe weather events across the United States using data from the NOAA Storm Database. The primary focus is to identify the event types most detrimental to public health and those with the greatest economic consequences. Key metrics include fatalities, injuries, property damage, and crop damage. Data preprocessing involved standardizing damage estimates and filtering for relevant columns. Analysis shows that tornadoes are the leading cause of fatalities and injuries, while floods and hurricanes result in the highest property damage. Visualizations highlight the disparities in impact across event types and provide insights for prioritizing mitigation efforts. This project emphasizes the importance of clean and reproducible analysis to support informed decision-making. Findings can guide policymakers and emergency response teams in allocating resources effectively. The report uses R for data processing and visualization, ensuring transparency and replicability.

Data Processing

# Download and load the data
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, "StormData.csv.bz2")
storm_data <- read.csv("StormData.csv.bz2")
#storm_data
#Examining column names

colnames(storm_data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
# Select relevant columns
storm_data <- storm_data[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", 
                             "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]

# Convert damage exponents (e.g., K = 1,000, M = 1,000,000)
convert_exp <- function(exp) {
  ifelse(toupper(exp) == "K", 1000,
         ifelse(toupper(exp) == "M", 1e6,
                ifelse(toupper(exp) == "B", 1e9, 1)))
}
storm_data$PROPDMG <- storm_data$PROPDMG * convert_exp(storm_data$PROPDMGEXP)
storm_data$CROPDMG <- storm_data$CROPDMG * convert_exp(storm_data$CROPDMGEXP)

Public Health Impact Analysis

#Aggregate data to identify events with the most significant impacts on public health:

# Aggregate fatalities and injuries
health_impact <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = storm_data, sum)

# Sort by highest impact
health_impact <- health_impact[order(-health_impact$FATALITIES, -health_impact$INJURIES), ]

# Top 10 event types by health impact
head(health_impact, 10)
##             EVTYPE FATALITIES INJURIES
## 834        TORNADO       5633    91346
## 130 EXCESSIVE HEAT       1903     6525
## 153    FLASH FLOOD        978     1777
## 275           HEAT        937     2100
## 464      LIGHTNING        816     5230
## 856      TSTM WIND        504     6957
## 170          FLOOD        470     6789
## 585    RIP CURRENT        368      232
## 359      HIGH WIND        248     1137
## 19       AVALANCHE        224      170

Economic Impact Analysis

#Aggregate property and crop damage data to identify events with the greatest economic costs:

# Aggregate economic damage
econ_impact <- aggregate(PROPDMG + CROPDMG ~ EVTYPE, data = storm_data, sum)
colnames(econ_impact) <- c("EVTYPE", "TOTAL_DAMAGE")

# Sort by highest economic impact
econ_impact <- econ_impact[order(-econ_impact$TOTAL_DAMAGE), ]

# Top 10 event types by economic impact
head(econ_impact, 10)
##                EVTYPE TOTAL_DAMAGE
## 170             FLOOD 150319678257
## 411 HURRICANE/TYPHOON  71913712800
## 834           TORNADO  57352114049
## 670       STORM SURGE  43323541000
## 244              HAIL  18758221521
## 153       FLASH FLOOD  17562129167
## 95            DROUGHT  15018672000
## 402         HURRICANE  14610229010
## 590       RIVER FLOOD  10148404500
## 427         ICE STORM   8967041360

Results

You can also embed plots, for example:

library(ggplot2)
library(ggplot2)
ggplot(health_impact[1:10, ], aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "Capstone Insight: Events Most Harmful to Public Health",
       subtitle = "Tornadoes lead to the highest fatalities across the US",
       x = "Event Type", y = "Fatalities") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)

# Assuming econ_impact contains the aggregated total damage (property + crop damage) sorted
ggplot(econ_impact[1:10, ], aes(x = reorder(EVTYPE, -TOTAL_DAMAGE), y = TOTAL_DAMAGE)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(
    title = "Capstone Analysis: Economic Impact of Weather Events",
    subtitle = "Top 10 Event Types by Property and Crop Damage",
    x = "Event Type",
    y = "Total Damage ($)"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),  # Rotate x-axis labels
        plot.title = element_text(face = "bold", size = 16), # Bold title
        plot.subtitle = element_text(size = 12))            # Adjust subtitle size

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.