Reproducible Research - Peer Assessment II

Impact of Severe Weather Event Types on Public Health and Wealth


Synopsis


Data Processing

fdat <- read.csv("repdatA-data-StormData.csv",header=T)
tdat <- fdat[c("INJURIES","FATALITIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP","EVTYPE")]
tdat$EVTYPE <- tolower(tdat$EVTYPE)
tdat$PROPDMGEXP <- tolower(tdat$PROPDMGEXP)
tdat$CROPDMGEXP <- tolower(tdat$CROPDMGEXP)
tdat <- subset(tdat,!grepl("summary",EVTYPE))
tdat <- tdat[tdat$CROPDMGEXP %in% c("0","k","m","b"),]
nrow(tdat)
## [1] 283876
## Remove some typographic errors / abbreviations
tdat$EVTYPE <- gsub("avalance","avalanche",tdat$EVTYPE)
tdat$EVTYPE <- gsub("erosin","erosion",tdat$EVTYPE)
tdat$EVTYPE <- gsub("cstl","coastal",tdat$EVTYPE)
tdat$EVTYPE <- gsub("hvy","heavy",tdat$EVTYPE)
tdat$EVTYPE <- gsub("wnd","wind",tdat$EVTYPE)
tdat$EVTYPE <- gsub("w inds","winds",tdat$EVTYPE)

## Replace "/" and "\\", and other spl chars with a blank space
tdat$EVTYPE <- gsub("/"," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub("\\\\"," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub(" and"," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub(" &"," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub(","," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub("\\("," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub("\\)"," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub(";"," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub("-"," ",tdat$EVTYPE)
tdat$EVTYPE <- gsub("  "," ",tdat$EVTYPE)

## Replace some common plurals with singulars to avoid duplicate classification
tdat$EVTYPE <- gsub("winds","wind",tdat$EVTYPE)
tdat$EVTYPE <- gsub("temperatures","temperature",tdat$EVTYPE)
tdat$EVTYPE <- gsub("fires","fire",tdat$EVTYPE)
tdat$EVTYPE <- gsub("funnels","funnel",tdat$EVTYPE)
tdat$EVTYPE <- gsub("floods","flood",tdat$EVTYPE)
tdat$EVTYPE <- gsub("storms","storm",tdat$EVTYPE)
tdat$EVTYPE <- gsub("rains","rain",tdat$EVTYPE)
tdat$EVTYPE <- gsub("tornados","tornado",tdat$EVTYPE)
tdat$EVTYPE <- gsub("tornadoes","tornado",tdat$EVTYPE)
tdat$EVTYPE <- gsub("currents","current",tdat$EVTYPE)

## Trim leading or trailing blank spaces
# Source: http://stackoverflow.com/questions/2261079/how-to-trim-leading-and-trailing-whitespace-in-r
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
tdat$EVTYPE <- trim(tdat$EVTYPE)

Analysis & Results

Impact of event types on Fatalities:

## Aggregate the fatalities data by event type (fatalities data - fataldat)
fataldat <- aggregate(tdat$FATALITIES,by=list(tdat$EVTYPE),FUN=sum,na.rm=T)
colnames(fataldat)<-c("EVTYPE","FATALITIES")

## Retain only the rows with non-zero fatalities
fataldat <- fataldat[fataldat$FATALITIES!=0,]

nrow(fataldat)
## [1] 55
## Sort the data by fatalities in descending order
fataldat <- fataldat[order(-fataldat$FATALITIES),]

## Select the top ten fatalities (Select fatalities data - sfdat)
sfdat <- fataldat[1:10,]
sfdat[,1] <- toupper(sfdat[,1])
print(sfdat)
##                EVTYPE FATALITIES
## 110           TORNADO       1064
## 28        FLASH FLOOD        388
## 32              FLOOD        262
## 54               HEAT        219
## 87        RIP CURRENT        211
## 80          LIGHTNING        173
## 22     EXCESSIVE HEAT        171
## 103 THUNDERSTORM WIND        141
## 10    COLD WIND CHILL         94
## 4           AVALANCHE         83
## Dot chart for selected fatalities data
maintxt="Figure 1: Top 10 causes of Fatality"
xlabtxt="Number of fatalities since 1996"
dotchart(sfdat[,2],labels=sfdat[,1],xlab=xlabtxt,main=maintxt)

plot of chunk unnamed-chunk-8


Impact of event types on Injuries:

## Aggregate the injuries data by event type (injury data - injdat)
injdat <- aggregate(tdat$INJURIES,by=list(tdat$EVTYPE),FUN=sum,na.rm=T)
colnames(injdat)<-c("EVTYPE","INJURIES")

## Retain only the rows with non-zero injuries
injdat <- injdat[injdat$INJURIES!=0,]
nrow(injdat)
## [1] 50
## Sort the data by injuries in descending order
injdat <- injdat[order(-injdat$INJURIES),]

## Select the top ten injuries (select injury data - sidat)
sidat <- injdat[1:10,]
sidat[,1] <- toupper(sidat[,1])
print(sidat)
##                EVTYPE INJURIES
## 110           TORNADO    11960
## 32              FLOOD     6495
## 75          ICE STORM     1616
## 54               HEAT     1554
## 103 THUNDERSTORM WIND     1491
## 80          LIGHTNING     1014
## 73  HURRICANE TYPHOON      909
## 22     EXCESSIVE HEAT      899
## 28        FLASH FLOOD      667
## 132          WILDFIRE      560

Impact of event types on Property damage:

## Property damage value computation: property damage value - pdmgval
## A lookup vector is used to convert damage exponents to dollar value
## Idea Source: https://class.coursera.org/repdata-006/forum/thread?thread_id=131

expref <- c("0" = 1, "k" = 1000, "m" = 1E6, "b" = 1E9)
pdmgval <- as.numeric(tdat$PROPDMG*expref[tdat$PROPDMGEXP])

## Aggregate the property damage data by event type
pdat <- aggregate(pdmgval,by=list(tdat$EVTYPE),FUN=sum,na.rm=T)
colnames(pdat)<-c("EVTYPE","PROPDMG")

## Retain only the rows with non-zero property damage value
pdat <- pdat[pdat$PROPDMG!=0,]
nrow(pdat)
## [1] 109
## Sort the data by property damages in descending order
pdat <- pdat[order(-pdat$PROPDMG),]

## Select the top ten property damages (select property data - spdat)
spdat <- pdat[1:10,]
spdat[,1] <- toupper(spdat[,1])
print(spdat)
##                EVTYPE   PROPDMG
## 32              FLOOD 1.328e+11
## 73  HURRICANE TYPHOON 2.674e+10
## 110           TORNADO 1.617e+10
## 68          HURRICANE 9.716e+09
## 46               HAIL 7.992e+09
## 28        FLASH FLOOD 7.328e+09
## 88        RIVER FLOOD 5.080e+09
## 98   STORM SURGE TIDE 4.641e+09
## 103 THUNDERSTORM WIND 3.679e+09
## 132          WILDFIRE 3.499e+09
## Dot chart for selected property damage data
maintxt="Figure 2: Top 10 causes of Property Damage"
xlabtxt="Property damage in Billion US $s"
dotchart(spdat[,2]/1E9,labels=spdat[,1],xlab=xlabtxt,main=maintxt)

plot of chunk unnamed-chunk-11


Impact of event types on Crop damage:

## Crop damage value computation: crop damage value - cdmgval
cdmgval <- as.numeric(tdat$CROPDMG*expref[tdat$CROPDMGEXP])

## Aggregate the crop damage data by event type (crop damage data - cdat)
cdat <- aggregate(cdmgval,by=list(tdat$EVTYPE),FUN=sum,na.rm=T)
colnames(cdat)<-c("EVTYPE","CROPDMG")

## Retain only the rows with non-zero crop damage value
cdat <- cdat[cdat$CROPDMG!=0,]
nrow(cdat)
## [1] 112
## Sort the data by crop damages in descending order
cdat <- cdat[order(-cdat$CROPDMG),]

## Select the top ten crop damages (select crop damage data - scdat)
scdat <- cdat[1:10,]
scdat[,1] <- toupper(scdat[,1])
print(scdat)
##               EVTYPE   CROPDMG
## 15           DROUGHT 1.397e+10
## 32             FLOOD 5.662e+09
## 88       RIVER FLOOD 5.029e+09
## 75         ICE STORM 5.022e+09
## 46              HAIL 3.026e+09
## 68         HURRICANE 2.742e+09
## 73 HURRICANE TYPHOON 2.608e+09
## 28       FLASH FLOOD 1.421e+09
## 24      EXTREME COLD 1.313e+09
## 41      FROST FREEZE 1.094e+09
## Dot chart for selected crop damage data
maintxt="Figure 3: Top 10 causes of Crop Damage"
xlabtxt="Property damage in Billion US $s."
dotchart(scdat[,2]/1E9,labels=scdat[,1],xlab=xlabtxt,main=maintxt)

plot of chunk unnamed-chunk-12