Title: Reproducible Project 2

Author: Scott Semel

Date: October 23, 2015

Synopsis

The goal is to explore the NOAA Storm Database and answer which types of events are most harmful with respect to population health?

And which types of events have the greatest economic consequences? An Event is an individual type of storm event.

Thunderstorm Wind, Hail, Tornado and Flood are events. The fatalities, injuries, and damage amounts appearing in

tropical cyclone events are attributed only to wind damage experienced in the coastal counties/parishes listed.

Data Processing

# The csv file can be found here:
# https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
# We are going to need these libraries.

library(data.table)
library(dplyr)
library(knitr)
require(data.table)

First check and see what the column headers are Can remove the lines with excessive text with this for easier reading but it's not necessary:

# dat=fread("repdata_data_StormData.csv", verbose=TRUE, drop = c("REMARKS","STATEOFFIC","COUNTYNAME","ZONENAMES"))
fread("repdata_data_StormData.csv", nrows=2)
##    STATE__          BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1:       1 4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2:       1 4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
##     EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1: TORNADO         0      NA         NA       NA       NA          0
## 2: TORNADO         0      NA         NA       NA       NA          0
##    COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1:         NA         0      NA         NA     14   100 3   0          0
## 2:         NA         0      NA         NA      2   150 2   0          0
##    INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1:       15    25.0          K       0         NA  NA         NA        NA
## 2:        0     2.5          K       0         NA  NA         NA        NA
##    LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1:     3040      8812       3051       8806      NA      1
## 2:     3042      8755          0          0      NA      2
dat=fread("repdata_data_StormData.csv")
## 
Read 43.4% of 967216 rows
Read 69.3% of 967216 rows
Read 89.9% of 967216 rows
Read 902297 rows and 37 (of 37) columns from 0.523 GB file in 00:00:05
## Warning in fread("repdata_data_StormData.csv"): Read less rows (902297)
## than were allocated (967216). Run again with verbose=TRUE and please
## report.
dat = as.data.frame(dat)
max(dat$FATALITIES)
## [1] 583
max(dat$INJURIES)        
## [1] 1700
which(dat$FATALITIES>=570)
## [1] 198704
#dat[198704,] 
which(dat$INJURIES>=1600)
## [1] 157885
#dat[157885,] 

The most fatalities in one event was a heat wave in IL in July 1995.

The most injuries in one event was a tornado in TX in 1979.

But we need the sum of all the casualties per event type to see where most of the people were killed or injured historically.

by_type = group_by(dat, EVTYPE)
a = summarize(by_type, sum(FATALITIES))
names(a)=c("EVENT", "TOTALFATAL")
a = arrange(a,desc(TOTALFATAL))
a = a[1:10,]
b = summarize(by_type, sum(INJURIES))
names(b)=c("EVENT", "TOTALINJURIES")
b = arrange(b,desc(TOTALINJURIES))
b = b[1:10,]

They did not just put the numbers in for $ damage. They also put a column for the exponent. So we will just keep the

one with billions in damage for the top ten list. Since crops and property are sometimes damages separately we will show

separate results for each.

PROPDMGEXP = as.factor(dat$PROPDMGEXP)
levels(PROPDMGEXP)
##  [1] ""  "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
CROPDMGEXP = as.factor(dat$CROPDMGEXP)
levels(CROPDMGEXP)
## [1] ""  "?" "0" "2" "B" "k" "K" "m" "M"
propdat = dat[which(dat$PROPDMGEXP=="B" | dat$PROPDMGEXP=="b"),]
propdat$PROPDMG
##  [1]   5.00   0.10   2.10   1.60   1.00   5.00   2.50   1.20   3.00   1.70
## [11]   3.00   1.50   5.15   1.00   1.04   2.50   5.42   1.30   4.83   4.00
## [21]   1.00   1.50  10.00  16.93  31.30   4.00   7.35  11.26   5.88   2.09
## [31] 115.00   1.00   4.00   1.50   1.80   1.00   1.50   2.80   1.00   2.00
by_type = group_by(propdat, EVTYPE)
c = summarize(by_type, sum(PROPDMG))
names(c)=c("EVENT", "TOTALPROPCOST")
c = arrange(c,desc(TOTALPROPCOST))
c = c[1:10,]

cropdat = dat[which(dat$CROPDMGEXP=="B" | dat$CROPDMGEXP=="b"),]
cropdat$CROPDMG
## [1] 0.40 5.00 0.50 0.20 5.00 1.51 1.00 0.00 0.00
by_type = group_by(cropdat, EVTYPE)
d = summarize(by_type, sum(CROPDMG))
names(d)=c("EVENT", "TOTALCROPCOST")
d = arrange(d,desc(TOTALCROPCOST))
d = d[1:10,]
arrange(d,desc(TOTALCROPCOST))
## Source: local data frame [10 x 2]
## 
##                EVENT TOTALCROPCOST
## 1          ICE STORM          5.00
## 2        RIVER FLOOD          5.00
## 3  HURRICANE/TYPHOON          1.51
## 4            DROUGHT          1.50
## 5               HEAT          0.40
## 6             FREEZE          0.20
## 7                 NA            NA
## 8                 NA            NA
## 9                 NA            NA
## 10                NA            NA

Results

barplot(a$TOTALFATAL,names=as.character(a$EVENT),col="red",las=2,main="Event Type vs. Number of Deaths",ylab="Total Deaths") 
box()

plot of chunk unnamed-chunk-5

barplot(b$TOTALINJURIES,names=as.character(b$EVENT),col="orange",las=2,main="Event Type vs. Number of Injuries",ylab="Total Injuries") 
box()

plot of chunk unnamed-chunk-5

barplot(c$TOTALPROPCOST,names=as.character(c$EVENT),col="blue",las=2,main="Event Type vs Property Damage",ylab="Billions $") 
box()

plot of chunk unnamed-chunk-5

#barplot(d$TOTALCROPCOST,names=as.character(d$EVENT),col="green",las=2,main="Event Type vs Crop Damage",ylab="Billions $") 
#box()

Tornados cause the most injuries and deaths. Floods cause the most property damage. Ice Storm and river floods cause the most crop damage.