Synopsis

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This report mainly answers the two following questions:Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? Across the United States, which types of events have the greatest economic consequences?

1. Loading and preprocessing the data

reading the csv file into R

stormData <- read.csv("repdata-data-StormData.csv")

processing the data

library(tidyverse)

stormDataSubset <- stormData %>% select(EVTYPE, FATALITIES, INJURIES,PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
stormDataSubset$EVTYPE <- as.factor(stormDataSubset$EVTYPE)

accumulating the data

totalFatalties <- aggregate(FATALITIES ~ EVTYPE, stormDataSubset, FUN = sum)
totalFatalties <- arrange(totalFatalties, desc(FATALITIES))
totalFataltiesTop10 <- totalFatalties[1:10,]

totalInjuries <- aggregate(INJURIES ~ EVTYPE, stormDataSubset, FUN = sum)
totalInjuries <- arrange(totalInjuries, desc(INJURIES))
totalInjuriesTop10 <- totalInjuries[1:10,]

if (stormDataSubset$PROPDMGEXP == "K") {
    stormDataSubset$PROPDMG = stormDataSubset$PROPDMG * 1000
}
## Warning in if (stormDataSubset$PROPDMGEXP == "K") {: the condition has length >
## 1 and only the first element will be used
if (stormDataSubset$PROPDMGEXP == "M"){
    stormDataSubset$PROPDMG = stormDataSubset$PROPDMG * 1000000
}
## Warning in if (stormDataSubset$PROPDMGEXP == "M") {: the condition has length >
## 1 and only the first element will be used
if (stormDataSubset$PROPDMGEXP == "B"){
    stormDataSubset$PROPDMG = stormDataSubset$PROPDMG * 1000000000
}
## Warning in if (stormDataSubset$PROPDMGEXP == "B") {: the condition has length >
## 1 and only the first element will be used
totalPropertyDMG <- aggregate(PROPDMG ~ EVTYPE, stormDataSubset, FUN = sum)
totalPropertyDMG <- arrange(totalPropertyDMG, desc(PROPDMG))

if (stormDataSubset$CROPDMGEXP == "K") {
    stormDataSubset$CROPDMG = stormDataSubset$CROPDMG * 1000
}
## Warning in if (stormDataSubset$CROPDMGEXP == "K") {: the condition has length >
## 1 and only the first element will be used
if (stormDataSubset$CROPDMGEXP == "M"){
    stormDataSubset$CROPDMG = stormDataSubset$CROPDMG * 1000000
}
## Warning in if (stormDataSubset$CROPDMGEXP == "M") {: the condition has length >
## 1 and only the first element will be used
if (stormDataSubset$CROPDMGEXP == "B"){
    stormDataSubset$CROPDMG = stormDataSubset$CROPDMG * 1000000000
}
## Warning in if (stormDataSubset$CROPDMGEXP == "B") {: the condition has length >
## 1 and only the first element will be used
totalCorpDMG <- aggregate(CROPDMG ~ EVTYPE, stormDataSubset, FUN = sum)
totalCorpDMG <- arrange(totalCorpDMG, desc(CROPDMG))

totalEconomyDMG<- merge(totalPropertyDMG, totalCorpDMG, by = "EVTYPE")
totalEconomyDMG$totalDMG <- totalEconomyDMG$PROPDMG + totalEconomyDMG$CROPDMG
totalEconomyDMG <- arrange(totalEconomyDMG, desc(totalDMG))
totalEconomyDMGTop10 <- totalEconomyDMG[1:10, ]

2. Across the United States, which types of events are most harmful with respect to population health?

library(ggplot2)
library(grid)
library(gridExtra)

injuriesPlot <- ggplot(data = totalInjuriesTop10, aes(x=EVTYPE,y=INJURIES)) +
        geom_bar(stat="identity", fill = "blue") +
        coord_flip() +
        xlab("Type of Event") +
        ylab("Total number of Injuries") +
        ggtitle("Types of Event impact on Population Health")

fataltiesPlot <- ggplot(data = totalFataltiesTop10, aes(x=EVTYPE,y=FATALITIES)) +
        geom_bar(stat="identity", fill = "red") +
        coord_flip() +
        xlab("Type of Event") +
        ylab("Total number of Fatalties") +
        ggtitle("Types of Event impact on Population Health")

grid.arrange(injuriesPlot, fataltiesPlot)

3. Across the United States, which types of events have the greatest economic consequences?

library(ggplot2)
library(grid)
library(gridExtra)

ggplot(data = totalEconomyDMGTop10, aes(x=EVTYPE,y=totalDMG)) +
        geom_bar(stat="identity", fill = "green") +
        coord_flip() +
        xlab("Type of Event") +
        ylab("Total dollars of property and crop damage") +
        ggtitle("Types of Event impact on Economy")

4. Results

As it is clearly indicated in the figures above, the two main questions can be easily answered. Tornado is the event that has caused the most damage on both human health and general economy.