In this report, we aim to analyze the impact of different weather events on public health and economy based on the storm database collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) from 1950 - 2011. We will use the estimates of fatalities, injuries, property and crop damage to decide which types of event are most harmful to the population health and economy. From these data, we found that tornadoes and heat are the severe weather event types by far most dangerous to people, while flooding, hurricanes, and storm surges are the most costly event types to the economy.
echo = TRUE
require(R.utils)
## Loading required package: R.utils
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.0 (2015-02-19) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.19.0 (2015-02-27) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
##
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
##
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
##
## R.utils v2.1.0 (2015-05-27) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
##
## The following object is masked from 'package:utils':
##
## timestamp
##
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
require(ggplot2)
## Loading required package: ggplot2
require(plyr)
## Loading required package: plyr
First, we download the data file and unzip it into our working directory. and read it into memory.
if(!file.exists("./repdata-data-StormData.csv.bz2")) {
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
setInternet2()
download.file(url, "./repdata-data-StormData.csv.bz2")
}
if(!file.exists("./repdata-data-stormData.csv")){
bunzip2("./repdata-data-StormData.csv.bz2", remove = F)
}
if(!"datastorm" %in% ls()){
datastorm <- read.csv("repdata-data-stormData.csv",stringsAsFactors = F,sep = ",")
}
Select the variable we are interested into a new dataset,and turn it into a machine readable format.
datastorm <- datastorm[c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
datastorm$PROPDMGEXP <- gsub("b|B", "9", datastorm$PROPDMGEXP)
datastorm$PROPDMGEXP <- gsub("m|M", "6", datastorm$PROPDMGEXP)
datastorm$PROPDMGEXP <- gsub("k|K", "3", datastorm$PROPDMGEXP)
datastorm$PROPDMGEXP <- gsub("h|H", "2", datastorm$PROPDMGEXP)
datastorm$CROPDMGEXP <- gsub("b|B", "9", datastorm$CROPDMGEXP)
datastorm$CROPDMGEXP <- gsub("m|M", "6", datastorm$CROPDMGEXP)
datastorm$CROPDMGEXP <- gsub("k|K", "3", datastorm$CROPDMGEXP)
datastorm$CROPDMGEXP <- gsub("h|H", "2", datastorm$CROPDMGEXP)
datastorm$PROPDMGEXP <- as.numeric(datastorm$PROPDMGEXP)
## Warning: NAs introduced by coercion
datastorm$CROPDMGEXP <- as.numeric(datastorm$CROPDMGEXP)
## Warning: NAs introduced by coercion
datastorm$PROPDMGEXP[is.na(datastorm$PROPDMGEXP)]<-0
datastorm$CROPDMGEXP[is.na(datastorm$CROPDMGEXP)]<-0
#sum up the properties damage and crops damage.value unit in billions.
datastorm$PROCRODMG <- ((datastorm$CROPDMG*(10^datastorm$CROPDMGEXP)) + (datastorm$PROPDMG*(10^datastorm$PROPDMGEXP)))/(10^9)
# Aggregate the data by type
fatalities <- aggregate(FATALITIES ~ EVTYPE, data = datastorm, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data = datastorm, sum)
ecodmg <- aggregate(PROCRODMG ~ EVTYPE, data = datastorm, sum)
Q1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
fatalitiesF <- fatalities[order(-fatalities$FA),][1:10,]
fatalitiesF <- within(fatalitiesF,position <-factor(fatalitiesF$EVT,levels = fatalitiesF$EVT))
fatalitiesPlot <- qplot(x = position, data = fatalitiesF, y = FATALITIES, geom = "bar", binwidth = 1,stat ="identity" ) +scale_y_continuous("Number of Fatalities") +theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Severe Weather Type") +ggtitle("Total Fatalities by Severe Weather")
fatalitiesPlot
injuriesF <- injuries[order(-injuries$INJURIES),][1:10,]
injuriesF <- within(injuriesF,positionj <- factor(injuriesF$EVT,levels = injuriesF$EVT))
injuriesPlot <- qplot(x = positionj,data = injuriesF, y =INJURIES, geom = "bar", binwidth = 1,stat = "identity" ) + scale_y_continuous("Number of Injuries") + theme(axis.text.x = element_text(angle = 45,hjust = 1)) + xlab("Serve weather Type") + ggtitle("Total Injuries by Severe weather")
injuriesPlot
Across the United States, Tornado is most harmful with respect to population health.
Q2. Across the United States, which types of events have the greatest economic consequences?
ecodmgF <- ecodmg[order(-ecodmg$PROCRODMG),][1:10,]
ecodmgF <- within(ecodmgF,positione <- factor(ecodmgF$EVT,levels = ecodmgF$EVT))
ecodmgPlot <- qplot(x = positione,data = ecodmgF, y =PROCRODMG, geom = "bar", binwidth = 1,stat = "identity" ) + scale_y_continuous("economy damage cost($billions") + theme(axis.text.x = element_text(angle = 45,hjust = 1)) + xlab("Serve weather Type") + ggtitle("Economy damage by Severe weather")
ecodmgPlot
Across the United States, Flood,hurricane,typhoon and tornado have the greatest economic consequences.