knitr::opts_chunk$set(message = FALSE)

SUMMARY

This document analyses the injuries, fatalities and economic damages that are caused by the event types measured in the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The most dangerous events for people safety are Tornados, Thunderstorm winds and Flash floods, Excessive heat and Lightnings. On the other hand, the worse economic damages are due to Tornados, Thunderstorm winds and Flash floods, Hails and Lightnings.

DATA PROCESSING

Installing and loading the required packages and libraries

require(readr) || install.packages("readr")
## [1] TRUE
require(stringr) || install.packages("stringr")
## [1] TRUE
require(dplyr) || install.packages("dplyr")
## [1] TRUE
require(ggplot2) || install.packages("ggplot2")
## [1] TRUE
require(ggcorrplot) || install.packages("ggcorrplot")
## [1] TRUE
require(reshape) || install.packages("reshape2")
## [1] TRUE
require(data.table) || install.packages("data.table")
## [1] TRUE
require(R.utils) || install.packages("R.utils")
## [1] TRUE
require(ggthemes) || install.packages("ggthemes")
## [1] TRUE
library(readr)
library(stringr)
library(dplyr)
library(ggplot2)
library(ggcorrplot)
library(reshape)
library(data.table)
library(R.utils)
library(ggthemes)

Code for reading in the dataset and/or processing the data Download the data and unzip in local folder

if(file.exists("stormdata.bz2")){
    file.remove("stormdata.bz2")
}
if(file.exists("stormdata")){
    file.remove("stormdata")
}
## [1] TRUE
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "./stormdata.bz2")
bunzip2("./stormdata.bz2")

## read the csv file using read_delim which is very very fast and returns a teeble
file <- read_delim("./stormdata",delim=",",
                   col_types = cols(
                           .default = col_character(),
                           BGN_DATE = col_datetime(format="%m/%d/%Y %H:%M:%OS"),
                           BGN_TIME = col_character(),
                           STATE__ = col_double(),
                           COUNTY = col_double(),
                           BGN_RANGE = col_double(),
                           END_DATE = col_datetime(format="%m/%d/%Y %H:%M:%OS"),
                           END_TIME = col_character(),
                           COUNTY_END = col_double(),
                           COUNTYENDN = col_logical(),
                           END_RANGE = col_double(),
                           LENGTH = col_double(),
                           WIDTH = col_double(),
                           F = col_double(),
                           MAG = col_double(),
                           FATALITIES = col_double(),
                           INJURIES = col_double(),
                           PROPDMG = col_double(),
                           CROPDMG = col_double(),
                           LATITUDE = col_double(),
                           LONGITUDE = col_double(),
                           LATITUDE_E = col_double(),
                           LONGITUDE_ = col_double(),
                           REFNUM = col_double()
                   ))

Some values of the EVTYPE column needs to be changed because the same condition has different expressions

file$EVTYPE <- 
    gsub("^HEAT$", "EXCESSIVE HEAT", file$EVTYPE)
file$EVTYPE <- 
    gsub("^TSTM WIND$", "THUNDERSTORM WIND", file$EVTYPE)
file$EVTYPE <- 
    gsub("^THUNDERSTORM WIND$", "THUNDERSTORM WINDS", file$EVTYPE)

Agregation of data disregarding of the date and grouping health and economic damages by event type I keep only the top 10 event types

agg_data <- file %>%
        select(EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG) %>%
        group_by(EVTYPE) %>%
        summarise(INJURIES=sum(INJURIES), FATALITIES=sum(FATALITIES), PROPERTYDMG=sum(PROPDMG), CROPDMG=sum(CROPDMG)) %>%
        mutate(health=FATALITIES+INJURIES, economic=PROPERTYDMG+CROPDMG) %>%
        arrange(desc(health))
top10_pophealth<- agg_data[1:10,]

setorder(agg_data, -economic)
top10_economic <- agg_data[1:10,]

RESULTS

Injuries by event type

ggplot(data=top10_pophealth, aes(x=reorder(EVTYPE,-INJURIES), y=INJURIES, fill=rainbow(10))) + 
    geom_bar(stat="identity") + 
    xlab("Event type") + 
    ylab("Total injuries") + 
    ggtitle("Injuries By Event Type") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none", plot.title = element_text(hjust = 0.5)) 

Fatalities by event type

ggplot(data=top10_pophealth, aes(x=reorder(EVTYPE,-FATALITIES), y=FATALITIES, fill=rainbow(10))) + 
    geom_bar(stat="identity") + 
    xlab("Event type") + 
    ylab("Total fatalities") + 
    ggtitle("Fatalities By Event Type") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none", plot.title = element_text(hjust = 0.5)) 

Economic damages by event type

ggplot(data=top10_economic, aes(x=reorder(EVTYPE,-economic), y=economic, fill=rainbow(10))) + 
    geom_bar(stat="identity") + 
    xlab("Event type") + 
    ylab("Total economic losses") + 
    ggtitle("Economic losses By Event Type") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none", plot.title = element_text(hjust = 0.5))