Synopsis

Severe weather events can have significant impacts on human health and the economy. This analysis explores the NOAA Storm Database to determine: 1. The types of weather events that cause the most harm to population health (injuries and fatalities). 2. The types of weather events that result in the greatest economic consequences (property and crop damage).

The data is processed to clean and standardize event types, and summary statistics are used to assess the impact of different event categories. The results are visualized through bar plots to highlight the most severe weather events in terms of health and economic impact.

Data Processing

Load Required Libraries

library(dplyr)
library(ggplot2)
library(readr)

Download and read the data if not already present

file_url <- "https://d396qusza40orc.cloudfront.net/repdata/data/StormData.csv.bz2"
file_name <- "StormData.csv.bz2"

if (!file.exists(file_name)) {
  download.file(file_url, destfile = file_name, mode = "wb")
}

# Read the dataset
storm_data <- read_csv(file_name)
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl  (1): COUNTYENDN
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Select necessary columns

storm_data <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# Convert event type to uppercase for consistency
storm_data$EVTYPE <- toupper(storm_data$EVTYPE)
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
            Total_Injuries = sum(INJURIES, na.rm = TRUE),
            Total_Health_Impact = Total_Fatalities + Total_Injuries) %>%
  arrange(desc(Total_Health_Impact))

# Top 10 most harmful events for health
top_health_events <- head(health_impact, 10)
print(top_health_events)
## # A tibble: 10 × 4
##    EVTYPE            Total_Fatalities Total_Injuries Total_Health_Impact
##    <chr>                        <dbl>          <dbl>               <dbl>
##  1 TORNADO                       5633          91346               96979
##  2 EXCESSIVE HEAT                1903           6525                8428
##  3 TSTM WIND                      504           6957                7461
##  4 FLOOD                          470           6789                7259
##  5 LIGHTNING                      816           5230                6046
##  6 HEAT                           937           2100                3037
##  7 FLASH FLOOD                    978           1777                2755
##  8 ICE STORM                       89           1975                2064
##  9 THUNDERSTORM WIND              133           1488                1621
## 10 WINTER STORM                   206           1321                1527
# Function to convert damage exponent values
convert_exp <- function(exp) {
  if (exp %in% c("H", "h")) return(100) 
  if (exp %in% c("K", "k")) return(1000) 
  if (exp %in% c("M", "m")) return(1000000) 
  if (exp %in% c("B", "b")) return(1000000000) 
  if (grepl("[0-9]", exp)) return(10^as.numeric(exp)) 
  return(1)
}

# Apply conversion
storm_data$PROPDMGEXP <- sapply(storm_data$PROPDMGEXP, convert_exp)
storm_data$CROPDMGEXP <- sapply(storm_data$CROPDMGEXP, convert_exp)

# Calculate actual damage amounts
storm_data <- storm_data %>%
  mutate(Property_Damage = PROPDMG * as.numeric(PROPDMGEXP),
         Crop_Damage = CROPDMG * as.numeric(CROPDMGEXP),
         Total_Economic_Damage = Property_Damage + Crop_Damage)

# Summarize economic impact
economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(Total_Property_Damage = sum(Property_Damage, na.rm = TRUE),
            Total_Crop_Damage = sum(Crop_Damage, na.rm = TRUE),
            Total_Economic_Impact = Total_Property_Damage + Total_Crop_Damage) %>%
  arrange(desc(Total_Economic_Impact))

# Top 10 most costly events
top_economic_events <- head(economic_impact, 10)
print(top_economic_events)
## # A tibble: 10 × 4
##    EVTYPE          Total_Property_Damage Total_Crop_Damage Total_Economic_Impact
##    <chr>                           <dbl>             <dbl>                 <dbl>
##  1 FLOOD                   144657709807         5661968450         150319678257 
##  2 HURRICANE/TYPH…          69305840000         2607872800          71913712800 
##  3 TORNADO                  56947380676.         414953270          57362333946.
##  4 STORM SURGE              43323536000               5000          43323541000 
##  5 HAIL                     15735267513.        3025954473          18761221986.
##  6 FLASH FLOOD              16822723978.        1421317100          18244041078.
##  7 DROUGHT                   1046106000        13972566000          15018672000 
##  8 HURRICANE                11868319010         2741910000          14610229010 
##  9 RIVER FLOOD               5118945500         5029459000          10148404500 
## 10 ICE STORM                 3944927860         5022113500           8967041360
ggplot(top_health_events, aes(x = reorder(EVTYPE, -Total_Health_Impact), y = Total_Health_Impact)) +
  geom_bar(stat="identity", fill="red") +
  coord_flip() +
  labs(title = "Top 10 Weather Events Affecting Population Health",
       x = "Event Type",
       y = "Total Fatalities and Injuries") +
  theme_minimal()

ggplot(top_economic_events, aes(x = reorder(EVTYPE, -Total_Economic_Impact), y = Total_Economic_Impact)) +
  geom_bar(stat="identity", fill="blue") +
  coord_flip() +
  labs(title = "Top 10 Weather Events with Highest Economic Impact",
       x = "Event Type",
       y = "Total Economic Damage (USD)") +
  theme_minimal()