Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
From analyzing the NOAA weather events, the following is concluded:
The most harmful effect on population health is caused by weather event: TORNADO
The greatest economic consequence is caused by weather event: FLOOD.
library(ggplot2)
library(RCurl)
## Loading required package: bitops
if(!file.exists("./Data/StormData.csv.bz2")) {
if (!file.exists("./Data")) {
dir.create("./Data")
}
# If first time, download data file from Internet
getwd()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="./Data/StormData.csv.bz2", method="curl")
}
# Read and uncompress the downloaded data file
StormData <- read.csv("./Data/StormData.csv.bz2")
dim(StormData)
## [1] 902297 37
# Data transformation helper function
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
## Prepare health data
StormData$PopulationHealth<-StormData$FATALITIES+StormData$INJURIES
## Normalize values
StormData$Param1<-1 # Default
StormData$Param1[StormData$PROPDMGEXP=="K"]<-1000
StormData$Param1[StormData$PROPDMGEXP=="M"]<-1000000
StormData$Param1[StormData$PROPDMGEXP=="B"]<-1000000000
StormData$Param2<-1 # Default
StormData$Param2[StormData$CROPDMGEXP=="K"]<-1000
StormData$Param2[StormData$CROPDMGEXP=="M"]<-1000000
StormData$Param2[StormData$CROPDMGEXP=="B"]<-1000000000
# Determine cost for property and cro
EconomicConsquences<-StormData$PROPDMG*StormData$Param1 + StormData$CROPDMG*StormData$Param2
## Prepare economic data
StormData$eventtype<-toupper(trim(StormData$EVTYPE))
StormData$eventtype[grep("HEAT",StormData$eventtype)]<-"HOT WEATHER EVENT*"
StormData$eventtype[grep("WIND",StormData$eventtype)]<-"WINDY WEATHER EVENT*"
StormData$eventtype[grep("FLOOD",StormData$eventtype)]<-"FLOOD EVENT*"
StormData$eventtype[grep("WINTER",StormData$eventtype)]<-"WINTER EVENT*"
StormData$eventtype[grep("BLIZZARD",StormData$eventtype)]<-"WINTER EVENT*"
StormData$eventtype[grep("SNOW",StormData$eventtype)]<-"WINTER EVENT*"
StormData$eventtype[grep("RIP CURRENT",StormData$eventtype)]<-"RIP CURRENT"
# Aggregate data
PopulationHealth.df<-aggregate(PopulationHealth ~ eventtype, data=StormData, sum)
EconomicConsquences.df<-aggregate(EconomicConsquences ~ eventtype, data=StormData, sum)
plotdata<-tail(PopulationHealth.df[order(PopulationHealth.df$PopulationHealth),],10)
plot<-ggplot(plotdata,aes(x=factor(eventtype),y=PopulationHealth)) +
stat_summary(fun.y=mean,geom="bar") + coord_flip() + xlab("Event Type") + ylab("Fatalities and Injuries") +
ggtitle("Population health")
print(plot)
plotdata<-tail(EconomicConsquences.df[order(EconomicConsquences.df$EconomicConsquences),],10)
plot<-ggplot(plotdata,aes(x=factor(eventtype),y=EconomicConsquences/1000000)) + stat_summary(fun.y=mean,geom="bar") + coord_flip() + xlab("Event Type") + ylab("Million $") +
ggtitle("Economic consequences")
print(plot)