This analysis investigates severe weather events and their impact on population health and the economy using the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The goal is to identify which types of events are most harmful to population health and which have the greatest economic consequences.
# Load necessary libraries
library(dplyr)
library(ggplot2)
library(readr)
# Set working directory (Update to your file path)
setwd("C:/Users/yagini/Documents/githubUps/4")
# Load the dataset
storm_data <- read.csv("StormData.csv.bz2")
# View the first few rows to inspect the data
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
# Convert event type to lowercase for consistency
storm_data$EVTYPE <- tolower(storm_data$EVTYPE)
# Clean up damage exponents (Property and Crop Damage)
storm_data$PROPDMGEXP <- toupper(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- toupper(storm_data$CROPDMGEXP)
# Map damage exponents to numeric values
storm_data$PROPDMGEXP <- recode(storm_data$PROPDMGEXP,
"K" = 1e3, "M" = 1e6, "B" = 1e9, "H" = 1e2, .default = 1)
storm_data$CROPDMGEXP <- recode(storm_data$CROPDMGEXP,
"K" = 1e3, "M" = 1e6, "B" = 1e9, "H" = 1e2, .default = 1)
# Calculate total property and crop damage
storm_data$PROP_DMG_VAL <- storm_data$PROPDMG * storm_data$PROPDMGEXP
storm_data$CROP_DMG_VAL <- storm_data$CROPDMG * storm_data$CROPDMGEXP
# Total economic damage
storm_data$TOTAL_ECONOMIC_IMPACT <- storm_data$PROP_DMG_VAL + storm_data$CROP_DMG_VAL
storm_data$LOG_ECONOMIC_IMPACT <- log10(storm_data$TOTAL_ECONOMIC_IMPACT + 1) # Add 1 to avoid log(0)
# Summarize data by event type (EVTYPE) and calculate the total economic impact
economic_impact_by_event <- storm_data %>%
group_by(EVTYPE) %>%
summarise(TOTAL_ECONOMIC_IMPACT = sum(TOTAL_ECONOMIC_IMPACT, na.rm = TRUE)) %>%
arrange(desc(TOTAL_ECONOMIC_IMPACT)) %>%
top_n(10, TOTAL_ECONOMIC_IMPACT)
# Plot top 10 events with the highest economic impact (log-transformed)
ggplot(economic_impact_by_event, aes(x = reorder(EVTYPE, TOTAL_ECONOMIC_IMPACT), y = log10(TOTAL_ECONOMIC_IMPACT + 1))) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Events Causing Economic Hazards",
x = "Event Type",
y = "Log10 of Total Economic Impact") +
theme_minimal()
# Summarize data by event type (EVTYPE) and calculate the total health impact (fatalities + injuries)
health_impact_by_event <- storm_data %>%
group_by(EVTYPE) %>%
summarise(TOTAL_HEALTH_IMPACT = sum(FATALITIES + INJURIES, na.rm = TRUE)) %>%
arrange(desc(TOTAL_HEALTH_IMPACT)) %>%
top_n(10, TOTAL_HEALTH_IMPACT)
# Plot top 10 events with the highest health impact
ggplot(health_impact_by_event, aes(x = reorder(EVTYPE, TOTAL_HEALTH_IMPACT), y = TOTAL_HEALTH_IMPACT)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(title = "Top 10 Events Causing Health Hazards",
x = "Event Type",
y = "Total Health Impact (Fatalities + Injuries)") +
theme_minimal()
# Summarize data by event type (EVTYPE) and calculate the total crop damage
crop_damage_by_event <- storm_data %>%
group_by(EVTYPE) %>%
summarise(TOTAL_CROP_DMG = sum(CROP_DMG_VAL, na.rm = TRUE)) %>%
arrange(desc(TOTAL_CROP_DMG)) %>%
top_n(10, TOTAL_CROP_DMG)
# Plot top 10 events with the highest crop damage
ggplot(crop_damage_by_event, aes(x = reorder(EVTYPE, TOTAL_CROP_DMG), y = TOTAL_CROP_DMG)) +
geom_bar(stat = "identity", fill = "green") +
coord_flip() +
labs(title = "Top 10 Events Causing Crop Damage",
x = "Event Type",
y = "Total Crop Damage") +
theme_minimal()
# Summarize data by event type (EVTYPE) and calculate the total property damage
property_damage_by_event <- storm_data %>%
group_by(EVTYPE) %>%
summarise(TOTAL_PROP_DMG = sum(PROP_DMG_VAL, na.rm = TRUE)) %>%
arrange(desc(TOTAL_PROP_DMG)) %>%
top_n(10, TOTAL_PROP_DMG)
# Plot top 10 events with the highest property damage
ggplot(property_damage_by_event, aes(x = reorder(EVTYPE, TOTAL_PROP_DMG), y = TOTAL_PROP_DMG)) +
geom_bar(stat = "identity", fill = "orange") +
coord_flip() +
labs(title = "Top 10 Events Causing Property Damage",
x = "Event Type",
y = "Total Property Damage") +
theme_minimal()
# Summary of the top events
summary_health <- health_impact_by_event %>%
select(EVTYPE, TOTAL_HEALTH_IMPACT)
summary_economic <- economic_impact_by_event %>%
select(EVTYPE, TOTAL_ECONOMIC_IMPACT)
summary_crop_damage <- crop_damage_by_event %>%
select(EVTYPE, TOTAL_CROP_DMG)
summary_property_damage <- property_damage_by_event %>%
select(EVTYPE, TOTAL_PROP_DMG)
# Display results
summary_health
## # A tibble: 10 × 2
## EVTYPE TOTAL_HEALTH_IMPACT
## <chr> <dbl>
## 1 tornado 96979
## 2 excessive heat 8428
## 3 tstm wind 7461
## 4 flood 7259
## 5 lightning 6046
## 6 heat 3037
## 7 flash flood 2755
## 8 ice storm 2064
## 9 thunderstorm wind 1621
## 10 winter storm 1527
summary_economic
## # A tibble: 10 × 2
## EVTYPE TOTAL_ECONOMIC_IMPACT
## <chr> <dbl>
## 1 flood 150319678257
## 2 hurricane/typhoon 71913712800
## 3 tornado 57352114049.
## 4 storm surge 43323541000
## 5 hail 18758222016.
## 6 flash flood 17562129167.
## 7 drought 15018672000
## 8 hurricane 14610229010
## 9 river flood 10148404500
## 10 ice storm 8967041360
summary_crop_damage
## # A tibble: 10 × 2
## EVTYPE TOTAL_CROP_DMG
## <chr> <dbl>
## 1 drought 13972566000
## 2 flood 5661968450
## 3 river flood 5029459000
## 4 ice storm 5022113500
## 5 hail 3025954473
## 6 hurricane 2741910000
## 7 hurricane/typhoon 2607872800
## 8 flash flood 1421317100
## 9 extreme cold 1312973000
## 10 frost/freeze 1094186000
summary_property_damage
## # A tibble: 10 × 2
## EVTYPE TOTAL_PROP_DMG
## <chr> <dbl>
## 1 flood 144657709807
## 2 hurricane/typhoon 69305840000
## 3 tornado 56937160779.
## 4 storm surge 43323536000
## 5 flash flood 16140812067.
## 6 hail 15732267543.
## 7 hurricane 11868319010
## 8 tropical storm 7703890550
## 9 winter storm 6688497251
## 10 high wind 5270046295