The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.
This data analysis will address the following questions:
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
Data preparation consists of three steps:
source = "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(source, dest = "data.bz2")
data <- read.csv(bzfile("data.bz2"))
data_dmgs <- sqldf("select *, (case upper(PROPDMGEXP) when 'B' then 1000*1000*1000 when 'M' then 1000*1000 when 'K' then 1000 else 0 end) * propdmg as propdmg_calc,(case upper(CROPDMGexp) when 'B' then 1000*1000*1000 when 'M' then 1000*1000 when 'K' then 1000 else 0 end) * CROPDMG as CROPDMG_calc from data")
## Loading required package: tcltk
require(sqldf)
sumFatInjByEvType <- sqldf("select evtype, sum(fatalities) as sum_fatalites, sum(injuries) as sum_injuries from data group by evtype order by sum(fatalities) desc, sum(injuries) desc")
## Loading required package: tcltk
head(sumFatInjByEvType, n = 10)
## EVTYPE sum_fatalites sum_injuries
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
require(ggplot2)
p1 <- ggplot(sumFatInjByEvType[1:10, ], aes(x = EVTYPE, y = sum_fatalites/1000)) +
geom_bar(stat = "identity", fill = "#1144AA")
p1 <- p1 + labs(title = "Top 10 with highest fatality counts") + xlab("EVENT TYPE") +
ylab("FATALITIES (in thousand)")
p1 + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
require(sqldf)
sumDmgByEvType <- sqldf("select evtype, sum(propdmg_calc)+sum(CROPDMG_calc) as DMG_SUM from data_dmgs group by evtype order by DMG_SUM desc")
head(sumDmgByEvType, n = 10)
## EVTYPE DMG_SUM
## 1 FLOOD 1.503e+11
## 2 HURRICANE/TYPHOON 7.191e+10
## 3 TORNADO 5.735e+10
## 4 STORM SURGE 4.332e+10
## 5 HAIL 1.876e+10
## 6 FLASH FLOOD 1.756e+10
## 7 DROUGHT 1.502e+10
## 8 HURRICANE 1.461e+10
## 9 RIVER FLOOD 1.015e+10
## 10 ICE STORM 8.967e+09
require(ggplot2)
p2 <- ggplot(sumDmgByEvType[1:10, ], aes(x = EVTYPE, y = DMG_SUM/1e+09)) + geom_bar(stat = "identity",
fill = "#1144AA")
p2 <- p2 + labs(title = "Top 10 with highest damages") + xlab("EVENT TYPE") +
ylab("DAMAGES (in bilions of dollars)")
p2 + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())