The National Oceanic and Atmospheric Administration (NOAA) studies and provides data on storms and significant weather phenomena that have enough intensity to cause harm to the general public, significant property damage, or economic disruption. The dataset evaluated here is provided by NOAA. The goal is to look at which types of weather events are most harmful to population health and also which types carry the greatest economic conseuqences.
Data can be downloaded in a bzip2-compressed CSV file.
options(warn = -1)
library(ggplot2)
destfile <- "stormdata.csv.bz2"
if(!file.exists(destfile)) {
URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(URL, destfile = "stormdata.csv.bz2", method="curl")
}
raw_data <- read.csv("stormdata.csv.bz2")
# Breakdown the dataset to only those columns impacting health or economics
health_data <- raw_data[,c("EVTYPE","FATALITIES","INJURIES")]
# Add health incidents together
health_data$TOTALS <- as.integer(health_data$FATALITIES) + as.integer(health_data$INJURIES);
health_data <- health_data[,c("EVTYPE","TOTALS")];
# Further reduce the data so that if neither fatalities nor injuries occured they are removed
health_data <- lapply(health_data, function(x) type.convert(as.character(x)))
health_data <- aggregate(. ~ EVTYPE, health_data, sum, na.rm=TRUE)
sorted_data <- head(health_data[order(-health_data$TOTALS),], n=5)
#Separate the columns we need to determine property/economic damage and clean them up
economic_data <- raw_data[,c("EVTYPE","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
economic_data$PROPDMG <- as.numeric(economic_data$PROPDMG)
economic_data$CROPDMG <- as.numeric(economic_data$CROPDMG)
economic_data$PROPDMGEXP <- as.character(economic_data$PROPDMGEXP)
economic_data$CROPDMGEXP <- as.character(economic_data$CROPDMGEXP)
economic_data$PROPDMGEXP_MULTIPLIER[economic_data$PROPDMGEXP == "K"] <- 1000
economic_data$PROPDMGEXP_MULTIPLIER[economic_data$PROPDMGEXP == "M"] <- 1000000
economic_data$PROPDMGEXP_MULTIPLIER[economic_data$PROPDMGEXP == "B"] <- 1000000000
economic_data$CROPDMGEXP_MULTIPLIER[economic_data$PROPDMGEXP == "K"] <- 1000
economic_data$CROPDMGEXP_MULTIPLIER[economic_data$PROPDMGEXP == "M"] <- 1000000
economic_data$CROPDMGEXP_MULTIPLIER[economic_data$PROPDMGEXP == "B"] <- 1000000000
economic_data$TOTALPROP_DMG <- (economic_data$PROPDMG*economic_data$PROPDMGEXP_MULTIPLIER)
economic_data$TOTALCROP_DMG <- (economic_data$CROPDMG*economic_data$CROPDMGEXP_MULTIPLIER)
economic_data$TOTAL_DMG <- economic_data$TOTALPROP_DMG+economic_data$TOTALCROP_DMG;
# We're just looking at total damages, aggregate based on that relevant data
economic_data <- economic_data[,c("EVTYPE","TOTAL_DMG")];
economic_data <- aggregate(. ~ EVTYPE, economic_data, sum, na.rm=TRUE)
# Top 5
sorted_econ_data <- head(economic_data[order(-economic_data$TOTAL_DMG),], n=5)
# Clear the raw data
rm(raw_data)
When it comes to injury or fatalities, tornadoes appear to be many multitudes more dangerous to citizens than any other event type.
ggplot(sorted_data, aes(x = factor(sorted_data$EVTYPE), y = sorted_data$TOTALS)) +
geom_bar(stat = "identity")+
ggtitle("Top 5 Event Types by Fatalities/Injury counts") +
labs(x="Events",y="Incidents") +
theme(axis.title = element_text(family = "Trebuchet MS", color="#666666", face="bold", size=12))
Evaluating the economic damage caused weather events, we’ll see that the top 1 and 2 events are Hurricanes and Hurricanes that become Typhoons. These event types are many times more costly than the other event types tracked within this dataset.
ggplot(sorted_econ_data, aes(x = factor(sorted_econ_data$EVTYPE), y = sorted_econ_data$TOTAL_DMG/1000000000)) +
geom_bar(stat = "identity")+
ggtitle("Top 5 Event Types by Economic Damage") +
labs(x="Events",y="Cost of Damage (in billions)") +
theme(axis.title = element_text(family = "Trebuchet MS", color="#666666", face="bold", size=12))