Synopsis

The basic goal of this assignment is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) Storm Database. More specifically, this study identifies which severe weather events have the biggest effect on public health (fatalities, injuties) and economic consequences (property and crop damange). This assessment concludes that Tornados have the highest impact on public health and result on the biggest economic consequences.

Data Processing

The environment is cleared and the data file downloaded - if not available in the working directory. The data is loaded to R. No preprocess was made to the data.

rm(list= ls())

if (!file.exists("rep_data_StormData.csv.bz2")) {
    fileURL <- 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
    download.file(fileURL, destfile='StormData.csv.bz2', method = 'curl')
}

data <- read.csv("StormData.csv.bz2",header=TRUE, stringsAsFactors = FALSE)

Results

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

fatalities <- aggregate(data["FATALITIES"], by=data["EVTYPE"], sum)
TOPfatalities <- fatalities[order(-fatalities$FATALITIES),][1:10,]

library(ggplot2)
library(gridExtra)

p1 <- ggplot(data = TOPfatalities, aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
        geom_bar(stat = "identity", fill = "red", color = "black") +
        scale_x_discrete(name = "Event Type") +
        scale_y_continuous(name = "Nu. fatalities", breaks = c(0, 1500, 3000, 4500, 6000), limits = c(0, 6000)) +
        labs(title = "Top 10 events\nwith higher fatalities") +
        theme_classic() +
        theme(axis.text.x = element_text(angle = 90,hjust = 1))

injuries <- aggregate(data["INJURIES"], by=data["EVTYPE"], sum)
TOPinjuries <- injuries[order(-injuries$INJURIES),][1:10,]

p2 <- ggplot(data = TOPinjuries, aes(x = reorder(EVTYPE, -INJURIES), y = INJURIES)) +
        geom_bar(stat = "identity", fill = "orange", color = "black") +
        scale_x_discrete(name = "Event Type") +
        scale_y_continuous(name = "Nu. injuries", breaks = c(0, 25000, 50000, 75000, 100000), limits = c(0, 100000)) +
        labs(title = "Top 10 events\nwith higher injuries") +
        theme_classic() +
        theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 7))

grid.arrange(p1, p2, nrow = 1)

According to the database tornados are the events with more impact on public health, showing the largest numbers both in the fatalities and injuries categories.

Across the United States, which types of events have the greatest economic consequences?

properties <- aggregate(data["PROPDMG"], by=data["EVTYPE"], sum)
TOPproperties <- properties[order(-properties$PROPDMG),][1:10,]

p1 <- ggplot(data = TOPproperties, aes(x = reorder(EVTYPE, -PROPDMG), y = PROPDMG)) +
        geom_bar(stat = "identity", fill = "black", color = "black") +
        scale_x_discrete(name = "Event Type") +
        scale_y_continuous(name = "Property damage", breaks = c(0, 1000000, 2000000, 3000000, 4000000), limits = c(0, 4000000)) +
        labs(title = "Top 10 events\nwith higher property damage") +
        theme_classic() +
        theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 8))

crop <- aggregate(data["CROPDMG"], by=data["EVTYPE"], sum)
TOPcrop <- crop[order(-crop$CROPDMG),][1:10,]

p2 <- ggplot(data = TOPcrop, aes(x = reorder(EVTYPE, -CROPDMG), y = CROPDMG)) +
        geom_bar(stat = "identity", fill = "green", color = "black") +
        scale_x_discrete(name = "Event Type") +
        scale_y_continuous(name = "Crop damage", breaks = c(0, 150000, 300000, 450000, 600000), limits = c(0, 600000)) +
        labs(title = "Top 10 events\nwith higher crop damage") +
        theme_classic() +
        theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 8))

grid.arrange(p1, p2, nrow = 1)

TotalDamage <- aggregate(data["CROPDMG"]+data["PROPDMG"], by=data["EVTYPE"], sum)
TOPTotalDamage <- TotalDamage[order(-TotalDamage$CROPDMG),][1:10,]

 ggplot(data = TOPTotalDamage, aes(x = reorder(EVTYPE, -CROPDMG), y = CROPDMG)) +
        geom_bar(stat = "identity", fill = "darkblue", color = "black") +
        scale_x_discrete(name = "Event Type") +
        scale_y_continuous(name = "Total damage", breaks = c(0, 1000000, 2000000, 3000000, 4000000), limits = c(0, 4000000)) +
        labs(title = "Top 10 events with higher total damage") +
        theme_classic() +
        theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 8))

Globally, tornado events result in the most damage on properties and crops.