This analysis explores the NOAA Storm Database to identify the types of severe weather events that cause the greatest harm to population health and result in the largest economic damages in the United States. The results show that tornadoes lead in total health impact with nearly 100,000 combined fatalities and injuries, followed by excessive heat events with under 10,000. Economically, floods cause the highest damages, estimated at around 150 billion US dollars, followed by hurricanes and typhoons at nearly 75 billion, and tornadoes at over 50 billion dollars. These findings highlight the significant risks posed by these weather events to both human health and the economy.

library(dplyr)
library(ggplot2)

# Load the storm data CSV file (make sure the file is in your working directory)
storm_data <- read.csv("repdata-data-StormData.csv.bz2", stringsAsFactors = FALSE)

# Show first 6 rows so the reader can see the data loaded
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Clean and preprocess event types and damage exponents

# Normalize event types to uppercase and trim whitespace
storm_data$EVTYPE <- toupper(trimws(storm_data$EVTYPE))

# Replace common typos or variants manually if needed (optional)

# Create a function to convert damage exponents to multipliers
convert_exp <- function(exp) {
  if (exp %in% c("K", "k")) return(1e3)
  if (exp %in% c("M", "m")) return(1e6)
  if (exp %in% c("B", "b")) return(1e9)
  if (grepl("^[0-9]+$", exp)) return(as.numeric(exp))
  return(1)  # default multiplier if unknown
}

# Apply the function to property and crop damage exponents
storm_data$PROPDMGEXP <- sapply(storm_data$PROPDMGEXP, convert_exp)
storm_data$CROPDMGEXP <- sapply(storm_data$CROPDMGEXP, convert_exp)

# Calculate total property and crop damage in dollars
storm_data$PROPDMG_TOTAL <- storm_data$PROPDMG * storm_data$PROPDMGEXP
storm_data$CROPDMG_TOTAL <- storm_data$CROPDMG * storm_data$CROPDMGEXP

# Calculate total economic damage
storm_data$TOTAL_DAMAGE <- storm_data$PROPDMG_TOTAL + storm_data$CROPDMG_TOTAL

# Calculate total health impact as sum of fatalities and injuries
storm_data$HEALTH_IMPACT <- storm_data$FATALITIES + storm_data$INJURIES

# Quick look at processed data
head(storm_data %>% select(EVTYPE, PROPDMG_TOTAL, CROPDMG_TOTAL, TOTAL_DAMAGE, HEALTH_IMPACT))
##    EVTYPE PROPDMG_TOTAL CROPDMG_TOTAL TOTAL_DAMAGE HEALTH_IMPACT
## 1 TORNADO         25000             0        25000            15
## 2 TORNADO          2500             0         2500             0
## 3 TORNADO         25000             0        25000             2
## 4 TORNADO          2500             0         2500             2
## 5 TORNADO          2500             0         2500             2
## 6 TORNADO          2500             0         2500             6

We converted all event types to uppercase and trimmed any whitespace to standardize the data.

We transformed the damage exponents (e.g., 'K', 'M', 'B') into numeric multipliers to calculate actual dollar damages for property and crops.

We then computed the total economic damage by summing property and crop damages.

Additionally, we created a total health impact variable by adding fatalities and injuries.

Results: Most Harmful Events to Population Health

We now analyze which types of events caused the greatest harm in terms of fatalities and injuries.

# Summarize total health impact by event type
health_summary <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(Total_Health_Impact = sum(HEALTH_IMPACT, na.rm = TRUE)) %>%
  arrange(desc(Total_Health_Impact))

# Show top 10 events by health impact
head(health_summary, 10)
## # A tibble: 10 × 2
##    EVTYPE            Total_Health_Impact
##    <chr>                           <dbl>
##  1 TORNADO                         96979
##  2 EXCESSIVE HEAT                   8428
##  3 TSTM WIND                        7461
##  4 FLOOD                            7259
##  5 LIGHTNING                        6046
##  6 HEAT                             3037
##  7 FLASH FLOOD                      2755
##  8 ICE STORM                        2064
##  9 THUNDERSTORM WIND                1621
## 10 WINTER STORM                     1527
# Plot top 10 most harmful event types by health impact
top_health_events <- head(health_summary, 10)

ggplot(top_health_events, aes(x = reorder(EVTYPE, Total_Health_Impact), y = Total_Health_Impact)) +
  geom_bar(stat = "identity", fill = "red") +
  coord_flip() +
  labs(title = "Top 10 Most Harmful Weather Events by Health Impact",
       x = "Event Type",
       y = "Total Fatalities + Injuries") +
  theme_minimal()

Results: Events with Greatest Economic Consequences

Next, we identify which events caused the greatest economic damage.

# Summarize total economic damage by event type
economic_summary <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(Total_Economic_Damage = sum(TOTAL_DAMAGE, na.rm = TRUE)) %>%
  arrange(desc(Total_Economic_Damage))

# Show top 10 events by economic damage
head(economic_summary, 10)
## # A tibble: 10 × 2
##    EVTYPE            Total_Economic_Damage
##    <chr>                             <dbl>
##  1 FLOOD                     150319678257 
##  2 HURRICANE/TYPHOON          71913712800 
##  3 TORNADO                    57352114164 
##  4 STORM SURGE                43323541000 
##  5 HAIL                       18758221385 
##  6 FLASH FLOOD                17562179394.
##  7 DROUGHT                    15018672000 
##  8 HURRICANE                  14610229010 
##  9 RIVER FLOOD                10148404500 
## 10 ICE STORM                   8967041310
# Plot top 10 events with greatest economic consequences
top_economic_events <- head(economic_summary, 10)

ggplot(top_economic_events, aes(x = reorder(EVTYPE, Total_Economic_Damage), y = Total_Economic_Damage / 1e9)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() +
  labs(title = "Top 10 Weather Events by Economic Damage (Billion USD)",
       x = "Event Type",
       y = "Total Damage (Billion USD)") +
  theme_minimal()