This document describes the weather events which are the most harmful to the population health and the economic consequences in the United States.
The source data for this project was downloaded from here: Storm Data
Some additional description of the data used in this project can be found here: Storm Data Documentation
Load required library and dataset.
library(ggplot2)
if(!file.exists("../original_data_set")){dir.create("../original_data_set")}
data_set_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(data_set_url,
destfile="../original_data_set/StormData.csv.bz2")
stormdata <- read.csv(bzfile("../original_data_set/StormData.csv.bz2"), sep=",", header=TRUE)
Observe the column names.
names(stormdata)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
To observe the population health impact by the weather event types, focus on EVTYPE, FATALITIES, and INJURIES.
health_impact <- stormdata[, c('EVTYPE', 'FATALITIES', 'INJURIES')]
Caluculate the total population health impact for each weather event types.
health_impact_by_event <-
setNames(
aggregate(health_impact$FATALITIES +
health_impact$INJURIES,
by=list(health_impact$EVTYPE),FUN=sum),
c("EventType", "HealthImpact")
)
To observe the most harmful weather event with respect to population health, visualize the total population health impact for the top 10 weather events.
health_impact_by_event <-
head(health_impact_by_event[order(health_impact_by_event$HealthImpact,
decreasing= TRUE),], n = 10)
g<- ggplot(health_impact_by_event,
aes(x=reorder(EventType, HealthImpact), y=HealthImpact))
g + geom_bar(stat="identity") +
labs(x="Weather event Type",
y="Population health impact",
title = "Fig.1: Population health impact by weather event type") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),
plot.title = element_text(hjust = 0.5))
To observe the economic consequences by the weather event types, focus on EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, and CROPDMGEXP
economic_impact <-
stormdata[, c('EVTYPE', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
Observe PROPDMGEXP and CROPDMGEXP.
levels(economic_impact$PROPDMGEXP)
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
levels(economic_impact$CROPDMGEXP)
## [1] "" "?" "0" "2" "B" "k" "K" "m" "M"
summary(economic_impact)
## EVTYPE PROPDMG PROPDMGEXP
## HAIL :288661 Min. : 0.00 :465934
## TSTM WIND :219940 1st Qu.: 0.00 K :424665
## THUNDERSTORM WIND: 82563 Median : 0.00 M : 11330
## TORNADO : 60652 Mean : 12.06 0 : 216
## FLASH FLOOD : 54277 3rd Qu.: 0.50 B : 40
## FLOOD : 25326 Max. :5000.00 5 : 28
## (Other) :170878 (Other): 84
## CROPDMG CROPDMGEXP
## Min. : 0.000 :618413
## 1st Qu.: 0.000 K :281832
## Median : 0.000 M : 1994
## Mean : 1.527 k : 21
## 3rd Qu.: 0.000 0 : 19
## Max. :990.000 B : 9
## (Other): 9
Based on the information from the 2.7 Damage section in Storm Data Documentation, calculate the estimation for property damages and corporation damages as follows:
economic_impact['CROPDMG_ESTIMATE']<-0
economic_impact['PROPDMG_ESTIMATE']<-0
economic_impact[economic_impact$PROPDMGEXP == "H", ]$PROPDMG_ESTIMATE <-
economic_impact[economic_impact$PROPDMGEXP == "H", ]$PROPDMG * 10^2
economic_impact[economic_impact$PROPDMGEXP == "K", ]$PROPDMG_ESTIMATE <-
economic_impact[economic_impact$PROPDMGEXP == "K", ]$PROPDMG * 10^3
economic_impact[economic_impact$PROPDMGEXP == "M", ]$PROPDMG_ESTIMATE <-
economic_impact[economic_impact$PROPDMGEXP == "M", ]$PROPDMG * 10^6
economic_impact[economic_impact$PROPDMGEXP == "B", ]$PROPDMG_ESTIMATE <-
economic_impact[economic_impact$PROPDMGEXP == "B", ]$PROPDMG * 10^9
economic_impact[economic_impact$PROPDMGEXP == "5", ]$PROPDMG_ESTIMATE <-
economic_impact[economic_impact$PROPDMGEXP == "5", ]$PROPDMG * 10^5
economic_impact[economic_impact$CROPDMGEXP == "K", ]$CROPDMG_ESTIMATE <-
economic_impact[economic_impact$CROPDMGEXP == "K", ]$CROPDMG * 10^3
economic_impact[economic_impact$CROPDMGEXP == "M", ]$CROPDMG_ESTIMATE <-
economic_impact[economic_impact$CROPDMGEXP == "M", ]$CROPDMG * 10^6
economic_impact[economic_impact$CROPDMGEXP == "k", ]$CROPDMG_ESTIMATE <-
economic_impact[economic_impact$CROPDMGEXP == "k", ]$CROPDMG * 10^3
economic_impact[economic_impact$CROPDMGEXP == "B", ]$CROPDMG_ESTIMATE <-
economic_impact[economic_impact$CROPDMGEXP == "B", ]$CROPDMG * 10^9
Calculate the total economic damage estimations for each weather event types.
economic_impact_by_event <-
setNames(
aggregate(economic_impact$PROPDMG_ESTIMATE +
economic_impact$CROPDMG_ESTIMATE,
by=list(economic_impact$EVTYPE), FUN=sum),
c("EventType", "EconimicImpact")
)
To observe the weather event which has the greatest economic consequences, visualize the total economic impact for the top 10 weather events.
economic_impact_by_event<-
head(economic_impact_by_event[order(economic_impact_by_event$EconimicImpact,
decreasing= TRUE),], n = 10)
g<- ggplot(economic_impact_by_event,
aes(x=reorder(EventType, EconimicImpact), y=EconimicImpact))
g + geom_bar(stat="identity") +
labs(x="Weather event type",
y="Economic damage estimations",
title = "Fig.2: Economic damage estimations by weather event type") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),
plot.title = element_text(hjust = 0.5))