Synopsis

This report explores U.S. National Oceanic and Atmospheric Administration NOAA’s data from 1950-2011, to answer the following questions:

  1. Across the United States, which types of events (as indicated in the event type variable) are most harmful with respect to population health?

  2. Across the United States, which types of events have the greatest economic consequences?

This analysis shows that the tornado is most harmful for population health, and the floods results in greatest economic loss.

Data processing

Downloading data and storing into ‘noaa.storm’ variable

setwd("C:/Users/ssreevatsa/Documents/Personal/R/Storm")

data.url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
data.dir <- "C:/Users/ssreevatsa/Documents/Personal/R/Storm"
data.file <- "StormData.csv.bz2"
data.file <- file.path(data.dir,data.file)

if(!file.exists(data.file)) {
        download.file(url=data.url,destfile=data.file,method="curl")
}

noaa.storm <- read.table(data.file,header = TRUE,sep = ",",stringsAsFactors = FALSE,na.strings="NA")

Exploring data

dim(noaa.storm)
## [1] 902297     37
names(noaa.storm) <- tolower(names(noaa.storm))
head(noaa.storm[, c(8, 23:28)])
##    evtype fatalities injuries propdmg propdmgexp cropdmg cropdmgexp
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Analysis 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Fatalities and injuries collected, summed and ordered into variable ‘ordered.harmful’
fe <- aggregate(fatalities ~ evtype, data = noaa.storm, FUN = sum)
ie <- aggregate(injuries ~ evtype, data = noaa.storm, FUN = sum)

harmful <- merge(fe, ie, all = TRUE)
harmful$total <- rowSums(harmful[,c("fatalities","injuries")], na.rm=T)
ordered.harmful <- harmful[order(harmful$total, decreasing = T),]
dim(ordered.harmful)
## [1] 985   4
head(ordered.harmful)
##             evtype fatalities injuries total
## 834        TORNADO       5633    91346 96979
## 130 EXCESSIVE HEAT       1903     6525  8428
## 856      TSTM WIND        504     6957  7461
## 170          FLOOD        470     6789  7259
## 464      LIGHTNING        816     5230  6046
## 275           HEAT        937     2100  3037
The data shows that Tornado was the most harmful type of event

Analysis 2: Across the United States, which types of events have the greatest economic consequences?

Converting to million factor and adding property and crop damage expenses
pd  <- noaa.storm$propdmg
pde <- noaa.storm$propdmgexp
cd  <- noaa.storm$cropdmg
cde <- noaa.storm$cropdmgexp

pd[pde %in% "B"]         <- pd[pde %in% "B"] * 1000
pd[pde %in% c("M", "m")] <- pd[pde %in% c("M", "m")] * 1
pd[pde %in% c("K")]      <- pd[pde %in% c("K")] * 0.001
pd[pde %in% c("H", "h")] <- pd[pde %in% c("H", "h")] * 1e-04
pd[!(pde %in% c("B", "M", "m", "K", "H", "h"))] <- pd[!(pde %in% c("B", "M", "m", "K", "H", "h"))] * 1e-06

cd[cde %in% "B"] <- cd[cde %in% "B"] * 1000
cd[cde %in% c("M", "m")] <- cd[cde %in% c("M", "m")] * 1
cd[cde %in% c("K", "k")] <- cd[cde %in% c("K", "k")] * 0.001
cd[!(cde %in% c("B", "M", "m", "K", "k"))] <- cd[!(cde %in% c("B", "M", "m", "K", "k"))] * 1e-06

econdmg        <- cd + pd
edt            <- aggregate(econdmg ~ noaa.storm$evtype, FUN = sum)
oedt           <- edt[order(edt$econdmg, decreasing = T), ]
names(oedt)[1] <- "evtype"
head(oedt)
##                evtype   econdmg
## 170             FLOOD 150319.68
## 411 HURRICANE/TYPHOON  71913.71
## 834           TORNADO  57352.11
## 670       STORM SURGE  43323.54
## 244              HAIL  18758.22
## 153       FLASH FLOOD  17562.13
The data shows that Floods had the most economic damage

Results

Results for question 1

library(dplyr, quietly = T, warn.conflicts = F)
library(ggplot2, quietly = T, warn.conflicts = F)
## Warning: package 'ggplot2' was built under R version 3.4.1
library(tidyr, quietly = T, warn.conflicts = F)

ordered.harmful <- gather(ordered.harmful[1:5, 1:4], Type, total, fatalities:injuries)
ggplot(ordered.harmful[1:5, ], aes(evtype, total)) + geom_histogram(stat = "identity", alpha=1) + ylab("Fatalities and Injuries") + xlab("Event Type") + ggtitle("Top Five Types of Events Causing fatalities and injuries Across the U.S") 
## Warning: Ignoring unknown parameters: binwidth, bins, pad

It can be concluded that Tornadoes are the most severe event

Results for question 2

ggplot(oedt[1:5, ], aes(evtype, econdmg)) + geom_histogram(stat = "identity", alpha=1) + ylab("Propert and Crop Damages (million dollars)") + xlab("Event Type") + ggtitle("Top Five Types of Events Causing Economic Damages Across the U.S")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

It can be concluded that Floods cause the most economic damage