This analysis explores the NOAA Storm Database to identify which types of severe weather events are most harmful to population health and have the greatest economic consequences across the United States.
# Load necessary packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load the data (assuming 'storm_data.csv.bz2' is in the same directory)
storm_data <- read.csv("storm_data.csv.bz2", header = TRUE)
# Preview the structure of the dataset
str(storm_data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
# Data cleaning and transformation (if needed)
# Example: Convert relevant columns to appropriate data types, handle missing values, etc.
# Example: Convert DATE column to Date format
storm_data$DATE <- as.Date(storm_data$BGN_DATE, format = "%m/%d/%Y")
# Calculate fatalities and injuries by event type
events_health <- storm_data %>%
group_by(EVTYPE) %>%
summarize(total_fatalities = sum(FATALITIES, na.rm = TRUE),
total_injuries = sum(INJURIES, na.rm = TRUE)) %>%
mutate(total_health_impact = total_fatalities + total_injuries) %>%
arrange(desc(total_health_impact)) %>%
head(10)
# Plot top 10 event types by total health impact
library(ggplot2)
ggplot(events_health, aes(x = reorder(EVTYPE, total_health_impact), y = total_health_impact)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Top 10 Event Types by Total Health Impact",
x = "Event Type",
y = "Total Health Impact (Fatalities + Injuries)",
caption = "Data source: NOAA Storm Database")
# Calculate property and crop damage by event type
events_economic <- storm_data %>%
group_by(EVTYPE) %>%
summarize(total_property_damage = sum(PROPDMG, na.rm = TRUE),
total_crop_damage = sum(CROPDMG, na.rm = TRUE)) %>%
mutate(total_economic_impact = total_property_damage + total_crop_damage) %>%
arrange(desc(total_economic_impact)) %>%
head(10)
# Plot top 10 event types by total economic impact
ggplot(events_economic, aes(x = reorder(EVTYPE, total_economic_impact), y = total_economic_impact)) +
geom_bar(stat = "identity", fill = "green") +
labs(title = "Top 10 Event Types by Total Economic Impact",
x = "Event Type",
y = "Total Economic Impact (Property + Crop Damage)",
caption = "Data source: NOAA Storm Database")
This analysis of the NOAA Storm Database highlights the severe weather events that pose the highest risks to population health and have the greatest economic consequences. Municipalities and government agencies can use this information to prioritize resources and improve preparedness efforts.