This report analyzes severe weather events in the United States using the NOAA Storm Database. The goal is to determine:
The dataset covers events from 1950 to 2011, though earlier years contain fewer recorded events.
library(dplyr)
library(ggplot2)
library(readr)
# Download and load the dataset
file_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
file_name <- "StormData.csv.bz2"
if (!file.exists(file_name)) {
download.file(file_url, file_name, mode = "wb")
}
storm_data <- read_csv(file_name)
# Check structure of data
dim(storm_data)
## [1] 902297 37
str(storm_data)
## spc_tbl_ [902,297 × 37] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ STATE__ : num [1:902297] 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr [1:902297] "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr [1:902297] "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr [1:902297] "CST" "CST" "CST" "CST" ...
## $ COUNTY : num [1:902297] 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr [1:902297] "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr [1:902297] "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr [1:902297] "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr [1:902297] NA NA NA NA ...
## $ BGN_LOCATI: chr [1:902297] NA NA NA NA ...
## $ END_DATE : chr [1:902297] NA NA NA NA ...
## $ END_TIME : chr [1:902297] NA NA NA NA ...
## $ COUNTY_END: num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi [1:902297] NA NA NA NA NA NA ...
## $ END_RANGE : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr [1:902297] NA NA NA NA ...
## $ END_LOCATI: chr [1:902297] NA NA NA NA ...
## $ LENGTH : num [1:902297] 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num [1:902297] 100 150 123 100 150 177 33 33 100 100 ...
## $ F : num [1:902297] 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num [1:902297] 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num [1:902297] 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num [1:902297] 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr [1:902297] "K" "K" "K" "K" ...
## $ CROPDMG : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr [1:902297] NA NA NA NA ...
## $ WFO : chr [1:902297] NA NA NA NA ...
## $ STATEOFFIC: chr [1:902297] NA NA NA NA ...
## $ ZONENAMES : chr [1:902297] NA NA NA NA ...
## $ LATITUDE : num [1:902297] 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num [1:902297] 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num [1:902297] 3051 0 0 0 0 ...
## $ LONGITUDE_: num [1:902297] 8806 0 0 0 0 ...
## $ REMARKS : chr [1:902297] NA NA NA NA ...
## $ REFNUM : num [1:902297] 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "spec")=
## .. cols(
## .. STATE__ = col_double(),
## .. BGN_DATE = col_character(),
## .. BGN_TIME = col_character(),
## .. TIME_ZONE = col_character(),
## .. COUNTY = col_double(),
## .. COUNTYNAME = col_character(),
## .. STATE = col_character(),
## .. EVTYPE = col_character(),
## .. BGN_RANGE = col_double(),
## .. BGN_AZI = col_character(),
## .. BGN_LOCATI = col_character(),
## .. END_DATE = col_character(),
## .. END_TIME = col_character(),
## .. COUNTY_END = col_double(),
## .. COUNTYENDN = col_logical(),
## .. END_RANGE = col_double(),
## .. END_AZI = col_character(),
## .. END_LOCATI = col_character(),
## .. LENGTH = col_double(),
## .. WIDTH = col_double(),
## .. F = col_double(),
## .. MAG = col_double(),
## .. FATALITIES = col_double(),
## .. INJURIES = col_double(),
## .. PROPDMG = col_double(),
## .. PROPDMGEXP = col_character(),
## .. CROPDMG = col_double(),
## .. CROPDMGEXP = col_character(),
## .. WFO = col_character(),
## .. STATEOFFIC = col_character(),
## .. ZONENAMES = col_character(),
## .. LATITUDE = col_double(),
## .. LONGITUDE = col_double(),
## .. LATITUDE_E = col_double(),
## .. LONGITUDE_ = col_double(),
## .. REMARKS = col_character(),
## .. REFNUM = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# Select relevant columns
storm_data <- storm_data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Function to convert exponent values
convert_exp <- function(exp) {
if (is.na(exp) || exp == "" || exp == " ") return(1)
if (exp %in% c("H", "h")) return(100)
if (exp %in% c("K", "k")) return(1000)
if (exp %in% c("M", "m")) return(1e6)
if (exp %in% c("B", "b")) return(1e9)
as.numeric(exp)
}
# Apply the function to calculate actual damage values
storm_data <- storm_data %>%
mutate(
PROPDMGEXP = sapply(PROPDMGEXP, convert_exp),
CROPDMGEXP = sapply(CROPDMGEXP, convert_exp),
PROPERTY_DAMAGE = PROPDMG * PROPDMGEXP,
CROP_DAMAGE = CROPDMG * CROPDMGEXP
)
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
Total_Injuries = sum(INJURIES, na.rm = TRUE)
) %>%
arrange(desc(Total_Fatalities + Total_Injuries)) %>%
head(10)
# Plot
ggplot(health_impact, aes(x = reorder(EVTYPE, -(Total_Fatalities + Total_Injuries)), y = Total_Fatalities + Total_Injuries)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(title = "Top 10 Most Harmful Weather Events to Population Health",
x = "Event Type",
y = "Total Fatalities & Injuries")
economic_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
Total_Property_Damage = sum(PROPERTY_DAMAGE, na.rm = TRUE),
Total_Crop_Damage = sum(CROP_DAMAGE, na.rm = TRUE),
Total_Economic_Damage = Total_Property_Damage + Total_Crop_Damage
) %>%
arrange(desc(Total_Economic_Damage)) %>%
head(10)
# Plot
ggplot(economic_impact, aes(x = reorder(EVTYPE, -Total_Economic_Damage), y = Total_Economic_Damage)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(title = "Top 10 Most Costly Weather Events",
x = "Event Type",
y = "Total Economic Damage (USD)")
All code for this analysis is provided in the document. The results can be fully reproduced using the NOAA Storm Database.