Reproducible Research - PA 2 - Exploration and analysis of NOAA Storm Database

Synopsis

The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.

This data analysis will address the following questions:

Data Processing

Data preparation consists of three steps:

  1. Downloading file
  2. Decompressing and loading to data frame
  3. Calculating property and crop damages.
source = "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(source, dest = "data.bz2")
data <- read.csv(bzfile("data.bz2"))

data_dmgs <- sqldf("select *, (case upper(PROPDMGEXP) when 'B' then 1000*1000*1000 when 'M' then 1000*1000 when 'K' then 1000 else 0 end) * propdmg as propdmg_calc,(case upper(CROPDMGexp) when 'B' then 1000*1000*1000  when 'M' then 1000*1000 when 'K' then 1000 else 0 end) * CROPDMG as CROPDMG_calc   from data")
## Loading required package: tcltk

Results


require(sqldf)


sumFatInjByEvType <- sqldf("select evtype, sum(fatalities) as sum_fatalites, sum(injuries) as sum_injuries from data group by evtype order by sum(fatalities) desc, sum(injuries) desc")
## Loading required package: tcltk

head(sumFatInjByEvType, n = 10)
##            EVTYPE sum_fatalites sum_injuries
## 1         TORNADO          5633        91346
## 2  EXCESSIVE HEAT          1903         6525
## 3     FLASH FLOOD           978         1777
## 4            HEAT           937         2100
## 5       LIGHTNING           816         5230
## 6       TSTM WIND           504         6957
## 7           FLOOD           470         6789
## 8     RIP CURRENT           368          232
## 9       HIGH WIND           248         1137
## 10      AVALANCHE           224          170
require(ggplot2)
p1 <- ggplot(sumFatInjByEvType[1:10, ], aes(x = EVTYPE, y = sum_fatalites/1000)) + 
    geom_bar(stat = "identity", fill = "#1144AA")
p1 <- p1 + labs(title = "Top 10  with highest fatality counts") + xlab("EVENT TYPE") + 
    ylab("FATALITIES (in thousand)")
p1 + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_blank(), 
    panel.grid.minor.y = element_blank())

plot of chunk results2



require(sqldf)


sumDmgByEvType <- sqldf("select evtype, sum(propdmg_calc)+sum(CROPDMG_calc) as DMG_SUM from data_dmgs group by evtype order by DMG_SUM  desc")

head(sumDmgByEvType, n = 10)
##               EVTYPE   DMG_SUM
## 1              FLOOD 1.503e+11
## 2  HURRICANE/TYPHOON 7.191e+10
## 3            TORNADO 5.735e+10
## 4        STORM SURGE 4.332e+10
## 5               HAIL 1.876e+10
## 6        FLASH FLOOD 1.756e+10
## 7            DROUGHT 1.502e+10
## 8          HURRICANE 1.461e+10
## 9        RIVER FLOOD 1.015e+10
## 10         ICE STORM 8.967e+09
require(ggplot2)
p2 <- ggplot(sumDmgByEvType[1:10, ], aes(x = EVTYPE, y = DMG_SUM/1e+09)) + geom_bar(stat = "identity", 
    fill = "#1144AA")
p2 <- p2 + labs(title = "Top 10  with highest damages") + xlab("EVENT TYPE") + 
    ylab("DAMAGES (in bilions of dollars)")
p2 + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_blank(), 
    panel.grid.minor.y = element_blank())

plot of chunk results4