This analysis is part of Coursera’s Reproducible Research. Quoting the assignment,
“This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.” The specific questions to be answered is what types of event most greatly effect population health, and which events most greatly effect the economy in the U.S.
The data is available here
Documentation and FAQ are available:
library(pacman)
p_load(dplyr, tidyr, stringr, lubridate, httr, ggvis, ggplot2, shiny, rio, rmarkdown)
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileURL,
destfile = "/Users/danieltheng/Desktop/Learning R/Reproducible Research/RepResearch-Course-project-2/StormData.csv.bz4")
StormData <- read.csv("/Users/danieltheng/Desktop/Learning R/Reproducible Research/RepResearch-Course-project-2/StormData.csv.bz4")
fatal.by.evtype <- aggregate(FATALITIES ~ EVTYPE, data = StormData, sum)
injure.by.evtype <- aggregate(INJURIES ~ EVTYPE, data = StormData, sum)
damage.by.evtype <- cbind(fatal.by.evtype, INJURIES = injure.by.evtype$INJURIES)
empty.ev <- which((damage.by.evtype$FATALITIES & damage.by.evtype$INJURIES) != 0)
damage.by.evtype1 <- damage.by.evtype[empty.ev, ]
damage.by.evtype1 <- arrange(damage.by.evtype1, desc(FATALITIES))
damage.by.evtype1 <- damage.by.evtype1[1:10, ]
damage.by.evtype1 <- gather(damage.by.evtype1, CType, Value, c(FATALITIES, INJURIES))
g <- ggplot(damage.by.evtype1, aes(x = reorder(EVTYPE, -Value),
y = Value,
fill = (CType)))
g + geom_col() + labs(title = "Top Ten Events by Fatalities",
x = "Type of Event",
y = "Counts",
fill = "Type") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
As the figure shows, Tornados cause the most injury and deaths in the United States. # Which Event has the great economic impact? ## Organizing the Data The data has the damage number with a code B, M, K for Billions, Millions, and Thousands respectively. We need to convert those damage codes into numerics.
econdmg <- StormData[ , c(8, 25:28)]
rm.empty <- which((econdmg$PROPDMG & econdmg$CROPDMG) != 0)
econdmg <- econdmg[rm.empty, ]
row.names(econdmg) <- 1:16242
K.prop <- grep("[Kk]", econdmg$PROPDMGEXP)
M.prop <- grep("[Mm]", econdmg$PROPDMGEXP)
B.prop <- grep("[Bb]", econdmg$PROPDMGEXP)
K.crop <- grep("[Kk]", econdmg$CROPDMGEXP)
M.crop <- grep("[Mm]", econdmg$CROPDMGEXP)
B.crop <- grep("[Bb]", econdmg$CROPDMGEXP)
econdmg[K.prop, 2] <- econdmg[K.prop, 2] * 1e4
econdmg[M.prop, 2] <- econdmg[M.prop, 2] * 1e6
econdmg[B.prop, 2] <- econdmg[B.prop, 2] * 1e12
econdmg[K.crop, 4] <- econdmg[K.crop, 4] * 1e4
econdmg[M.crop, 4] <- econdmg[M.crop, 4] * 1e6
econdmg[B.crop, 4] <- econdmg[B.crop, 4] * 1e12
econdmg <- econdmg[ , c(1, 2, 4)]
prop.dmg.evtype <- aggregate(PROPDMG ~ EVTYPE, data = econdmg, sum)
crop.dmg.evtype <- aggregate(CROPDMG ~ EVTYPE, data = econdmg, sum)
dmg.evtype <- cbind(prop.dmg.evtype, CROPDMG = crop.dmg.evtype$CROPDMG)
dmg.evtype <- arrange(dmg.evtype, desc(PROPDMG))
dmg.evtype <- dmg.evtype[1:10, ]
dmg.evtype <- gather(dmg.evtype, Type, Value, c(PROPDMG, CROPDMG))
g <- ggplot(data = dmg.evtype,
aes(x = reorder(EVTYPE, -Value),
y = Value,
fill = (Type)))
g + geom_col()+ labs(title = "Top Ten Events by Economic Impact",
x = "Type of Event",
y = "Damage in Dollars",
fill = "Type") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
As the figure shows Flood caused the most economic damage than another other event.