The basic goal of this assignment is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) Storm Database. More specifically, this study identifies which severe weather events have the biggest effect on public health (fatalities, injuties) and economic consequences (property and crop damange). This assessment concludes that Tornados have the highest impact on public health and result on the biggest economic consequences.
The environment is cleared and the data file downloaded - if not available in the working directory. The data is loaded to R. No preprocess was made to the data.
rm(list= ls())
if (!file.exists("rep_data_StormData.csv.bz2")) {
fileURL <- 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
download.file(fileURL, destfile='StormData.csv.bz2', method = 'curl')
}
data <- read.csv("StormData.csv.bz2",header=TRUE, stringsAsFactors = FALSE)
fatalities <- aggregate(data["FATALITIES"], by=data["EVTYPE"], sum)
TOPfatalities <- fatalities[order(-fatalities$FATALITIES),][1:10,]
library(ggplot2)
library(gridExtra)
p1 <- ggplot(data = TOPfatalities, aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
geom_bar(stat = "identity", fill = "red", color = "black") +
scale_x_discrete(name = "Event Type") +
scale_y_continuous(name = "Nu. fatalities", breaks = c(0, 1500, 3000, 4500, 6000), limits = c(0, 6000)) +
labs(title = "Top 10 events\nwith higher fatalities") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1))
injuries <- aggregate(data["INJURIES"], by=data["EVTYPE"], sum)
TOPinjuries <- injuries[order(-injuries$INJURIES),][1:10,]
p2 <- ggplot(data = TOPinjuries, aes(x = reorder(EVTYPE, -INJURIES), y = INJURIES)) +
geom_bar(stat = "identity", fill = "orange", color = "black") +
scale_x_discrete(name = "Event Type") +
scale_y_continuous(name = "Nu. injuries", breaks = c(0, 25000, 50000, 75000, 100000), limits = c(0, 100000)) +
labs(title = "Top 10 events\nwith higher injuries") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 7))
grid.arrange(p1, p2, nrow = 1)
According to the database tornados are the events with more impact on public health, showing the largest numbers both in the fatalities and injuries categories.
properties <- aggregate(data["PROPDMG"], by=data["EVTYPE"], sum)
TOPproperties <- properties[order(-properties$PROPDMG),][1:10,]
p1 <- ggplot(data = TOPproperties, aes(x = reorder(EVTYPE, -PROPDMG), y = PROPDMG)) +
geom_bar(stat = "identity", fill = "black", color = "black") +
scale_x_discrete(name = "Event Type") +
scale_y_continuous(name = "Property damage", breaks = c(0, 1000000, 2000000, 3000000, 4000000), limits = c(0, 4000000)) +
labs(title = "Top 10 events\nwith higher property damage") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 8))
crop <- aggregate(data["CROPDMG"], by=data["EVTYPE"], sum)
TOPcrop <- crop[order(-crop$CROPDMG),][1:10,]
p2 <- ggplot(data = TOPcrop, aes(x = reorder(EVTYPE, -CROPDMG), y = CROPDMG)) +
geom_bar(stat = "identity", fill = "green", color = "black") +
scale_x_discrete(name = "Event Type") +
scale_y_continuous(name = "Crop damage", breaks = c(0, 150000, 300000, 450000, 600000), limits = c(0, 600000)) +
labs(title = "Top 10 events\nwith higher crop damage") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 8))
grid.arrange(p1, p2, nrow = 1)
TotalDamage <- aggregate(data["CROPDMG"]+data["PROPDMG"], by=data["EVTYPE"], sum)
TOPTotalDamage <- TotalDamage[order(-TotalDamage$CROPDMG),][1:10,]
ggplot(data = TOPTotalDamage, aes(x = reorder(EVTYPE, -CROPDMG), y = CROPDMG)) +
geom_bar(stat = "identity", fill = "darkblue", color = "black") +
scale_x_discrete(name = "Event Type") +
scale_y_continuous(name = "Total damage", breaks = c(0, 1000000, 2000000, 3000000, 4000000), limits = c(0, 4000000)) +
labs(title = "Top 10 events with higher total damage") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 8))
Globally, tornado events result in the most damage on properties and crops.