Synopsis

This analysis investigates severe weather events and their impact on population health and the economy using the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The goal is to identify which types of events are most harmful to population health and which have the greatest economic consequences.


Data Processing

Data Processing

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(readr)

# Set working directory (Update to your file path)
setwd("C:/Users/yagini/Documents/githubUps/4")

# Load the dataset
storm_data <- read.csv("StormData.csv.bz2")

# View the first few rows to inspect the data
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
# Convert event type to lowercase for consistency
storm_data$EVTYPE <- tolower(storm_data$EVTYPE)

# Clean up damage exponents (Property and Crop Damage)
storm_data$PROPDMGEXP <- toupper(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- toupper(storm_data$CROPDMGEXP)

# Map damage exponents to numeric values
storm_data$PROPDMGEXP <- recode(storm_data$PROPDMGEXP, 
                                "K" = 1e3, "M" = 1e6, "B" = 1e9, "H" = 1e2, .default = 1)
storm_data$CROPDMGEXP <- recode(storm_data$CROPDMGEXP, 
                                "K" = 1e3, "M" = 1e6, "B" = 1e9, "H" = 1e2, .default = 1)

# Calculate total property and crop damage
storm_data$PROP_DMG_VAL <- storm_data$PROPDMG * storm_data$PROPDMGEXP
storm_data$CROP_DMG_VAL <- storm_data$CROPDMG * storm_data$CROPDMGEXP

# Total economic damage
storm_data$TOTAL_ECONOMIC_IMPACT <- storm_data$PROP_DMG_VAL + storm_data$CROP_DMG_VAL
storm_data$LOG_ECONOMIC_IMPACT <- log10(storm_data$TOTAL_ECONOMIC_IMPACT + 1)  # Add 1 to avoid log(0)
# Summarize data by event type (EVTYPE) and calculate the total economic impact
economic_impact_by_event <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(TOTAL_ECONOMIC_IMPACT = sum(TOTAL_ECONOMIC_IMPACT, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_ECONOMIC_IMPACT)) %>%
  top_n(10, TOTAL_ECONOMIC_IMPACT)

# Plot top 10 events with the highest economic impact (log-transformed)
ggplot(economic_impact_by_event, aes(x = reorder(EVTYPE, TOTAL_ECONOMIC_IMPACT), y = log10(TOTAL_ECONOMIC_IMPACT + 1))) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 Events Causing Economic Hazards",
       x = "Event Type",
       y = "Log10 of Total Economic Impact") +
  theme_minimal()

# Summarize data by event type (EVTYPE) and calculate the total health impact (fatalities + injuries)
health_impact_by_event <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(TOTAL_HEALTH_IMPACT = sum(FATALITIES + INJURIES, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_HEALTH_IMPACT)) %>%
  top_n(10, TOTAL_HEALTH_IMPACT)

# Plot top 10 events with the highest health impact
ggplot(health_impact_by_event, aes(x = reorder(EVTYPE, TOTAL_HEALTH_IMPACT), y = TOTAL_HEALTH_IMPACT)) +
  geom_bar(stat = "identity", fill = "red") +
  coord_flip() +
  labs(title = "Top 10 Events Causing Health Hazards",
       x = "Event Type",
       y = "Total Health Impact (Fatalities + Injuries)") +
  theme_minimal()

# Summarize data by event type (EVTYPE) and calculate the total crop damage
crop_damage_by_event <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(TOTAL_CROP_DMG = sum(CROP_DMG_VAL, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_CROP_DMG)) %>%
  top_n(10, TOTAL_CROP_DMG)

# Plot top 10 events with the highest crop damage
ggplot(crop_damage_by_event, aes(x = reorder(EVTYPE, TOTAL_CROP_DMG), y = TOTAL_CROP_DMG)) +
  geom_bar(stat = "identity", fill = "green") +
  coord_flip() +
  labs(title = "Top 10 Events Causing Crop Damage",
       x = "Event Type",
       y = "Total Crop Damage") +
  theme_minimal()

# Summarize data by event type (EVTYPE) and calculate the total property damage
property_damage_by_event <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(TOTAL_PROP_DMG = sum(PROP_DMG_VAL, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_PROP_DMG)) %>%
  top_n(10, TOTAL_PROP_DMG)

# Plot top 10 events with the highest property damage
ggplot(property_damage_by_event, aes(x = reorder(EVTYPE, TOTAL_PROP_DMG), y = TOTAL_PROP_DMG)) +
  geom_bar(stat = "identity", fill = "orange") +
  coord_flip() +
  labs(title = "Top 10 Events Causing Property Damage",
       x = "Event Type",
       y = "Total Property Damage") +
  theme_minimal()

# Summary of the top events
summary_health <- health_impact_by_event %>%
  select(EVTYPE, TOTAL_HEALTH_IMPACT)

summary_economic <- economic_impact_by_event %>%
  select(EVTYPE, TOTAL_ECONOMIC_IMPACT)

summary_crop_damage <- crop_damage_by_event %>%
  select(EVTYPE, TOTAL_CROP_DMG)

summary_property_damage <- property_damage_by_event %>%
  select(EVTYPE, TOTAL_PROP_DMG)

# Display results
summary_health
## # A tibble: 10 × 2
##    EVTYPE            TOTAL_HEALTH_IMPACT
##    <chr>                           <dbl>
##  1 tornado                         96979
##  2 excessive heat                   8428
##  3 tstm wind                        7461
##  4 flood                            7259
##  5 lightning                        6046
##  6 heat                             3037
##  7 flash flood                      2755
##  8 ice storm                        2064
##  9 thunderstorm wind                1621
## 10 winter storm                     1527
summary_economic
## # A tibble: 10 × 2
##    EVTYPE            TOTAL_ECONOMIC_IMPACT
##    <chr>                             <dbl>
##  1 flood                     150319678257 
##  2 hurricane/typhoon          71913712800 
##  3 tornado                    57352114049.
##  4 storm surge                43323541000 
##  5 hail                       18758222016.
##  6 flash flood                17562129167.
##  7 drought                    15018672000 
##  8 hurricane                  14610229010 
##  9 river flood                10148404500 
## 10 ice storm                   8967041360
summary_crop_damage
## # A tibble: 10 × 2
##    EVTYPE            TOTAL_CROP_DMG
##    <chr>                      <dbl>
##  1 drought              13972566000
##  2 flood                 5661968450
##  3 river flood           5029459000
##  4 ice storm             5022113500
##  5 hail                  3025954473
##  6 hurricane             2741910000
##  7 hurricane/typhoon     2607872800
##  8 flash flood           1421317100
##  9 extreme cold          1312973000
## 10 frost/freeze          1094186000
summary_property_damage
## # A tibble: 10 × 2
##    EVTYPE            TOTAL_PROP_DMG
##    <chr>                      <dbl>
##  1 flood              144657709807 
##  2 hurricane/typhoon   69305840000 
##  3 tornado             56937160779.
##  4 storm surge         43323536000 
##  5 flash flood         16140812067.
##  6 hail                15732267543.
##  7 hurricane           11868319010 
##  8 tropical storm       7703890550 
##  9 winter storm         6688497251 
## 10 high wind            5270046295