This report explores U.S. National Oceanic and Atmospheric Administration NOAA’s data from 1950-2011, to answer the following questions:
Across the United States, which types of events (as indicated in the event type variable) are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
This analysis shows that the tornado is most harmful for population health, and the floods results in greatest economic loss.
setwd("C:/Users/ssreevatsa/Documents/Personal/R/Storm")
data.url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
data.dir <- "C:/Users/ssreevatsa/Documents/Personal/R/Storm"
data.file <- "StormData.csv.bz2"
data.file <- file.path(data.dir,data.file)
if(!file.exists(data.file)) {
download.file(url=data.url,destfile=data.file,method="curl")
}
noaa.storm <- read.table(data.file,header = TRUE,sep = ",",stringsAsFactors = FALSE,na.strings="NA")
dim(noaa.storm)
## [1] 902297 37
names(noaa.storm) <- tolower(names(noaa.storm))
head(noaa.storm[, c(8, 23:28)])
## evtype fatalities injuries propdmg propdmgexp cropdmg cropdmgexp
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
fe <- aggregate(fatalities ~ evtype, data = noaa.storm, FUN = sum)
ie <- aggregate(injuries ~ evtype, data = noaa.storm, FUN = sum)
harmful <- merge(fe, ie, all = TRUE)
harmful$total <- rowSums(harmful[,c("fatalities","injuries")], na.rm=T)
ordered.harmful <- harmful[order(harmful$total, decreasing = T),]
dim(ordered.harmful)
## [1] 985 4
head(ordered.harmful)
## evtype fatalities injuries total
## 834 TORNADO 5633 91346 96979
## 130 EXCESSIVE HEAT 1903 6525 8428
## 856 TSTM WIND 504 6957 7461
## 170 FLOOD 470 6789 7259
## 464 LIGHTNING 816 5230 6046
## 275 HEAT 937 2100 3037
pd <- noaa.storm$propdmg
pde <- noaa.storm$propdmgexp
cd <- noaa.storm$cropdmg
cde <- noaa.storm$cropdmgexp
pd[pde %in% "B"] <- pd[pde %in% "B"] * 1000
pd[pde %in% c("M", "m")] <- pd[pde %in% c("M", "m")] * 1
pd[pde %in% c("K")] <- pd[pde %in% c("K")] * 0.001
pd[pde %in% c("H", "h")] <- pd[pde %in% c("H", "h")] * 1e-04
pd[!(pde %in% c("B", "M", "m", "K", "H", "h"))] <- pd[!(pde %in% c("B", "M", "m", "K", "H", "h"))] * 1e-06
cd[cde %in% "B"] <- cd[cde %in% "B"] * 1000
cd[cde %in% c("M", "m")] <- cd[cde %in% c("M", "m")] * 1
cd[cde %in% c("K", "k")] <- cd[cde %in% c("K", "k")] * 0.001
cd[!(cde %in% c("B", "M", "m", "K", "k"))] <- cd[!(cde %in% c("B", "M", "m", "K", "k"))] * 1e-06
econdmg <- cd + pd
edt <- aggregate(econdmg ~ noaa.storm$evtype, FUN = sum)
oedt <- edt[order(edt$econdmg, decreasing = T), ]
names(oedt)[1] <- "evtype"
head(oedt)
## evtype econdmg
## 170 FLOOD 150319.68
## 411 HURRICANE/TYPHOON 71913.71
## 834 TORNADO 57352.11
## 670 STORM SURGE 43323.54
## 244 HAIL 18758.22
## 153 FLASH FLOOD 17562.13
library(dplyr, quietly = T, warn.conflicts = F)
library(ggplot2, quietly = T, warn.conflicts = F)
## Warning: package 'ggplot2' was built under R version 3.4.1
library(tidyr, quietly = T, warn.conflicts = F)
ordered.harmful <- gather(ordered.harmful[1:5, 1:4], Type, total, fatalities:injuries)
ggplot(ordered.harmful[1:5, ], aes(evtype, total)) + geom_histogram(stat = "identity", alpha=1) + ylab("Fatalities and Injuries") + xlab("Event Type") + ggtitle("Top Five Types of Events Causing fatalities and injuries Across the U.S")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplot(oedt[1:5, ], aes(evtype, econdmg)) + geom_histogram(stat = "identity", alpha=1) + ylab("Propert and Crop Damages (million dollars)") + xlab("Event Type") + ggtitle("Top Five Types of Events Causing Economic Damages Across the U.S")
## Warning: Ignoring unknown parameters: binwidth, bins, pad