Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
This project is an analysis on the impact of storms and weather events on the public health and economy in the United States between 1950 and 2011. The health impact of weather events was analyzed by getting the total number of fatalities and injuries caused by the event. The analysis revealed that the Tornado is the most dangerous weather event to the public health as it causes the most number of both fatalities and injuries. The economic impact of weather events was analyzed by getting the total damage on properties and crops caused by the event. The analysis revealed that Flood has the greatest impact on the economy as it caused roughly around 150 Billion Dollars property damages, followed by Hurricane/Typhoon and Tornado which caused roughly around 71 Billion Dollars and 57 Billion Dollars respectively.
library(dplyr)
library(ggplot2)
library(gridExtra)
if(!file.exists("./data")){dir.create("./data")}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "StormData.csv.bz2")
StormData <- read.csv("StormData.csv.bz2")
# Grouping the dataset based on event types and getting the total number of fatalities and injuries
by_EVTPE <- group_by(StormData, EVTYPE)
summary_EVTYPE <- summarise(by_EVTPE, TotalFatalities = sum(FATALITIES, na.rm = TRUE),
TotalInjuries = sum(INJURIES, na.rm = TRUE))
# Arranging the total fatalities in descending order and getting the top 10 event types
Fatalities <- arrange(summary_EVTYPE[,1:2], desc(TotalFatalities))[1:10,]
# Arranging the total injuries in descending order and getting the top 10 event types
Injuries <- arrange(summary_EVTYPE[,c(1,3)], desc(TotalInjuries))[1:10,]
# Selecting the necessary columns for economic impact analysis
Economic <- select(StormData, EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Converting the PROPDMGEXP to numerical value
Economic$PROPDMGEXP <- gsub("H", 1e+02, Economic$PROPDMGEXP, ignore.case = TRUE)
Economic$PROPDMGEXP <- gsub("K", 1e+03, Economic$PROPDMGEXP, ignore.case = TRUE)
Economic$PROPDMGEXP <- gsub("M", 1e+06, Economic$PROPDMGEXP, ignore.case = TRUE)
Economic$PROPDMGEXP <- gsub("B", 1e+09, Economic$PROPDMGEXP, ignore.case = TRUE)
# Converting the class of PROPDMGEXP colummn to numeric
Economic$PROPDMGEXP <- as.numeric(Economic$PROPDMGEXP)
## Warning: NAs introduced by coercion
# Converting the CROPDMGEXP to numerical value
Economic$CROPDMGEXP <- gsub("H", 1e+02, Economic$CROPDMGEXP, ignore.case = TRUE)
Economic$CROPDMGEXP <- gsub("K", 1e+03, Economic$CROPDMGEXP, ignore.case = TRUE)
Economic$CROPDMGEXP <- gsub("M", 1e+06, Economic$CROPDMGEXP, ignore.case = TRUE)
Economic$CROPDMGEXP <- gsub("B", 1e+09, Economic$CROPDMGEXP, ignore.case = TRUE)
# Converting the class of CROPDMGEXP colummn to numeric
Economic$CROPDMGEXP <- as.numeric(Economic$CROPDMGEXP)
## Warning: NAs introduced by coercion
# Converting all NAs to 0 for computation
Economic[is.na(Economic)] <- 0
# Computing the total damage
Economic$TotalDamage <- (Economic$PROPDMG * Economic$PROPDMGEXP) +
(Economic$CROPDMG * Economic$CROPDMGEXP)
# Grouping the dataset based on event types and getting the total damage per event type
by_Type_Economic <- group_by(Economic, EVTYPE)
summary_Economic <- summarise(by_Type_Economic, TotalDamage = sum(TotalDamage, na.rm = TRUE))
# Arranging the total economic damage in descending order and getting the top 10 event types
EconomicDamage <- arrange(summary_Economic, desc(TotalDamage))[1:10,]
# Creating the plot for total fatalities per event type
Fatalities_Plot <- ggplot(Fatalities, aes(x = reorder(EVTYPE, -TotalFatalities),
y =TotalFatalities, fill = EVTYPE)) +
geom_bar(stat = "identity", show.legend = F) +
labs(x = "Weather Event Type", y = "Total Fatalities",
title = "Top 10 Weather Event Types Causing Fatalities") +
theme(axis.text.x = element_text(angle = 30, hjust = 1),
plot.title = element_text(hjust = 0.5))
# Creating the plot for total injuries per event type
Injuries_Plot <- ggplot(Injuries, aes(x = reorder(EVTYPE, -TotalInjuries), y = TotalInjuries,
fill = EVTYPE)) + geom_bar(stat = "identity", show.legend = F) +
labs(x = "Weather Event Type", y = "Total Injuries",
title = "Top 10 Weather Event Types Causing Injuries") +
theme(axis.text.x = element_text(angle = 30, hjust = 1),
plot.title = element_text(hjust = 0.5))
# Arranging the plots for total fatalities and total injuries on a single panel
grid.arrange(Fatalities_Plot,Injuries_Plot, nrow = 2)
# Creating the plot for total economic damage per event type
ggplot(EconomicDamage, aes(x = reorder(EVTYPE, -TotalDamage), y = TotalDamage, fill = EVTYPE)) +
geom_bar(stat = "identity", show.legend = F) +
labs(x = "Weather Event Type", y = "Total Damage (in Dollars)",
title = "Top 10 Weather Event Types Causing Economic Damage") +
theme(axis.text.x = element_text(angle = 30, hjust = 1),
plot.title = element_text(hjust = 0.5))