As an effort of analyzing the effect of severe weather conditions on public health and economic problems, this project uses data from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
In this project, the impact on the population health is measured by injuries and fatalities. Economic impact is measured by crop and property damage. The results of this analysis conclude that Tornados have been most harmful with respect to population health, as they produce the most injuries and deaths of any storm type. In terms of economic costs, floods have inflicted the most damage.
# Load Libraries
library(stats)
library(dplyr, warn.conflicts = FALSE)
library(ggplot2, warn.conflicts = FALSE)
library(tidyr, warn.conflicts = FALSE)
require(gridExtra, warn.conflicts = FALSE)
## Loading required package: gridExtra
# Downloading and Reading data file
file_URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if(!file.exists("./proj2data.csv")){
download.file(file_URL, "./proj2data.csv")
}
# Read data
rawData <- read.csv("./proj2data.csv", header = T)
# dplyr data frame
data_Dfr <- tbl_df(rawData)
For the public health data, appropriate variables have been selected and subsets are created for only events which had either an injury or a fatality or both.
# Read Public health data
health_Dfr <- select(data_Dfr, EVTYPE, FATALITIES, INJURIES)
health_Dfr <- filter(health_Dfr , FATALITIES >0 | INJURIES >0)
fatalities <- health_Dfr %>% select(EVTYPE, FATALITIES) %>% group_by(EVTYPE) %>%
summarize(totalDeaths = sum(FATALITIES)) %>% arrange(desc(totalDeaths))
injuries <- health_Dfr %>% select(EVTYPE, INJURIES) %>% group_by(EVTYPE) %>%
summarize(totalInjured = sum(INJURIES)) %>% arrange(desc(totalInjured))
# top 10 weather types
fatal_data <- fatalities[1:10,]
injury_data <-injuries[1:10,]
Economic damage is measured by Property Damage and Crop Damage .
#select crop & propert damage
damage_Dfr <- select(data_Dfr, EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
damage_Dfr <- filter(damage_Dfr , PROPDMG >0 | CROPDMG > 0)
## Create damage multiplier
damage_Dfr <- mutate(damage_Dfr,
PDMultiplier= ifelse(PROPDMGEXP == "-", 0,
ifelse(PROPDMGEXP == "+", 0,
ifelse(PROPDMGEXP == "?", 0,
ifelse(PROPDMGEXP == "0", 0,
ifelse(PROPDMGEXP == "1", 10,
ifelse(PROPDMGEXP == "2", 100,
ifelse(PROPDMGEXP == "3", 1000,
ifelse(PROPDMGEXP == "4", 10000,
ifelse(PROPDMGEXP == "5", 100000,
ifelse(PROPDMGEXP == "6", 1000000,
ifelse(PROPDMGEXP == "7", 10000000,
ifelse(PROPDMGEXP == "8", 100000000,
ifelse(PROPDMGEXP == "9", 1000000000,
ifelse(PROPDMGEXP == "H", 100,
ifelse(PROPDMGEXP == "h", 100,
ifelse(PROPDMGEXP == "K", 1000,
ifelse(PROPDMGEXP == "k", 1000,
ifelse(PROPDMGEXP == "M", 1000000,
ifelse(PROPDMGEXP == "m", 1000000,
ifelse(PROPDMGEXP == "B", 1000000000,
ifelse(PROPDMGEXP == "b", 10000000000, 0))))))))))))))))))))))
damage_Dfr <- mutate(damage_Dfr ,
CDMultiplier= ifelse(CROPDMGEXP == "-", 0,
ifelse(CROPDMGEXP == "+", 0,
ifelse(CROPDMGEXP == "?", 0,
ifelse(CROPDMGEXP == "0", 0,
ifelse(CROPDMGEXP == "1", 10,
ifelse(CROPDMGEXP == "2", 100,
ifelse(CROPDMGEXP == "3", 1000,
ifelse(CROPDMGEXP == "4", 10000,
ifelse(CROPDMGEXP == "5", 100000,
ifelse(CROPDMGEXP == "6", 1000000,
ifelse(CROPDMGEXP == "7", 10000000,
ifelse(CROPDMGEXP == "8", 100000000,
ifelse(CROPDMGEXP == "9", 1000000000,
ifelse(CROPDMGEXP == "H", 100,
ifelse(CROPDMGEXP == "h", 100,
ifelse(CROPDMGEXP == "K", 1000,
ifelse(CROPDMGEXP == "k", 1000,
ifelse(CROPDMGEXP == "M", 1000000,
ifelse(CROPDMGEXP == "m", 1000000,
ifelse(CROPDMGEXP == "B", 1000000000,
ifelse(CROPDMGEXP == "b", 10000000000, 0))))))))))))))))))))))
# Calculate damage in dollars
damage_Dfr <- mutate(damage_Dfr, CROPDMG = CROPDMG * CDMultiplier)
damage_Dfr <- mutate(damage_Dfr, PROPDMG = PROPDMG*PDMultiplier)
damage_Dfr <- select(damage_Dfr , EVTYPE, PROPDMG, CROPDMG)
## Calculate total damage
damageTotal <- damage_Dfr %>% group_by(EVTYPE) %>% summarize(propDamage = sum(PROPDMG), cropDamage = sum(CROPDMG), totalDamage = sum(CROPDMG,PROPDMG))
## tidy data
damageTidy <- gather(damageTotal, key = damageType, value = damage, -c(EVTYPE, totalDamage))
## crop damage alone
cropDmg <- filter(damageTidy, damageType == "cropDamage")
cropDmg <- select(cropDmg,EVTYPE,damage)
cropDmg <- cropDmg %>% arrange(desc(damage))
cropDmg <- cropDmg [1:10,]
## prop damage alone
propDmg <- filter(damageTidy, damageType == "propDamage")
propDmg <- select(propDmg,EVTYPE,damage)
propDmg <- propDmg %>% arrange(desc(damage))
propDmg <- propDmg [1:10,]
## Total damage
damageTidy <- damageTidy %>% arrange(desc(totalDamage))
damageTidy<-damageTidy[1:20,]
plotInj <- ggplot(injury_data , aes(x = reorder(EVTYPE, -totalInjured), y= totalInjured)) +
geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
labs(title = "Total Injuries by Storm Type", x = "", y = "Total Injuries")
plotInj
plotDeath <- ggplot(fatal_data , aes(x = reorder(EVTYPE, -totalDeaths), y= totalDeaths)) +
geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
labs(title = "Total Deaths by Storm Type", x = "", y = "Total Deaths")
plotDeath
The results imply that tornados are the most harmful in terms of both injuries and fatalities.
group.colors <- c(propDamage = "#333BFF", cropDamage = "#CC6600")
#Total Ecomomic Damage
plotEco <- ggplot(damageTidy, aes(x = reorder(EVTYPE, -damage), y = damage/10^9, fill = damageType)) +
geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
labs(title = "Total Economic Damage", x = "", y = "Damage (Billion US $)") +
scale_y_continuous(limits = c(0, 155)) +
scale_fill_manual(values=group.colors) +
theme(legend.position = c(0.5, 0.8))
# Crop Damage
plotCrop<- ggplot(cropDmg, aes(x = reorder(EVTYPE, -damage), y = damage/10^9)) +
geom_bar(stat = "identity",fill="#CC6600") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
labs(title = "Crop Damage", x = "", y = "") + scale_y_continuous(limits = c(0, 155))
# Prop Damage
plotProp<- ggplot(propDmg, aes(x = reorder(EVTYPE, -damage), y = damage/10^9)) +
geom_bar(stat = "identity",fill="#333BFF") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
labs(title = "Property Damage", x = "", y = "") + scale_y_continuous(limits = c(0, 155))
grid.arrange(plotEco, plotProp,plotCrop, ncol=3)
The results imply that floods have caused the greatest amount of economic damage of any storm type. For crop damage, most influenced natural disaster is droughts.