In this report we briefly analyze both the human injuries or fatalities and economic cost (property and crops) of major weather events recorded by NOAA from January 1950 - December 2017 The following columns are extracted from the raw data set:
| events | descriptions |
|---|---|
| EVTYPE | name of the type of the event, e.g. TORNADO |
| FATALITIES | number of fatalities for the event |
| INJURIES | number of injuries for the event |
| PROPDMG | property damage incurred by the event |
| PROPDMGEXP | property damage multiplier for the event, e.g. thousands or millions |
| CROPDMG | crop damage incurred by the event |
| CROPDMGEXP | crop damage multiplier for the event, e.g. thousands or millions |
We first read in the data from the raw text file included in the zip archive. Then narrow the working data set to just the columns of interest.
destfile = "file.csv.bz2"
fileURL = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if(!file.exists(destfile)){
setwd("/Users/toddrimes/Desktop/coursera/ReproducibleResearch/Week4")
res <- tryCatch(download.file(fileURL, destfile=destfile, method="auto"), error=function(e) {})
}
df <- read.csv("file.csv.bz2", header = TRUE, sep = ",", quote = "\"", stringsAsFactors = FALSE)
df0 <- subset(df,select=c(EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP))
After reading in the data we check the first few rows. There are 902297 events in this dataset.
head(df0)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25000 1000 NA
## 2 TORNADO 0 0 2500 1000 NA
## 3 TORNADO 0 2 25000 1000 NA
## 4 TORNADO 0 2 2500 1000 NA
## 5 TORNADO 0 2 2500 1000 NA
## 6 TORNADO 0 6 2500 1000 NA
Property and Crop damage costs are described in the data with two columns each - a base number and an “EXP” multiplier. To simplify later aggregation and plotting, pre-calculate the singular damage value for each record.
## PROPERTY DAMAGE
# replace each EXP value with its numeric value
df0$PROPDMGEXP <- ifelse(toupper(df0$PROPDMGEXP)=="H", as.numeric(100), df0$PROPDMGEXP)
df0$PROPDMGEXP <- ifelse(toupper(df0$PROPDMGEXP)=="K", as.numeric(1000), df0$PROPDMGEXP)
df0$PROPDMGEXP <- ifelse(toupper(df0$PROPDMGEXP)=="M", as.numeric(1000000), df0$PROPDMGEXP)
df0$PROPDMGEXP <- ifelse(toupper(df0$PROPDMGEXP)=="B", as.numeric(1000000000), df0$PROPDMGEXP)
# if the EXP is missing, zero it
df0[is.na(df0$PROPDMGEXP), 4] <- 0
# if the EXP is not a number, default the multplier to 10
df0[is.nan(df0$PROPDMGEXP), 4] <- 10
# for each record calculate the single value for property damage
df0$PROPDMG <- as.numeric(df0$PROPDMG) * as.numeric(df0$PROPDMGEXP)
## Warning: NAs introduced by coercion
## CROP DAMAGE
# replace each EXP value with its numeric value
df0$CROPDMGEXP <- ifelse(toupper(df0$CROPDMGEXP)=="H", as.numeric(100), df0$CROPDMGEXP)
df0$CROPDMGEXP <- ifelse(toupper(df0$CROPDMGEXP)=="K", as.numeric(1000), df0$CROPDMGEXP)
df0$CROPDMGEXP <- ifelse(toupper(df0$CROPDMGEXP)=="M", as.numeric(1000000), df0$CROPDMGEXP)
df0$CROPDMGEXP <- ifelse(toupper(df0$CROPDMGEXP)=="B", as.numeric(1000000000), df0$CROPDMGEXP)
# if the EXP is missing, zero it
df0[is.na(df0$PCROPDMGEXP), 6] <- 0
## Warning in is.na(df0$PCROPDMGEXP): is.na() applied to non-(list or vector)
## of type 'NULL'
# if the EXP is not a number, default the multplier to 10
df0[is.nan(df0$CROPDMGEXP), 6] <- 10
# for each record calculate the single value for crop damage
df0$CROPDMG <- as.numeric(df0$CROPDMG) * as.numeric(df0$CROPDMGEXP)
## Warning: NAs introduced by coercion
The items in which we are most interested are the INJURIES, FATALITIES, PROPDMG and CROPDMG columns which contain the measurements we want. Here we summarize FATALITIES and print a brief summary.
x0 <- sum(df$FATALITIES)
The total number of fatalities reported across all events is 1.514510^{4}.
In order to show aggregate fatalities, here is a barplot of all fatalities by event type.
aggFatal <- aggregate(FATALITIES ~ EVTYPE, df0, sum)
topFatal <- head(aggFatal[with(aggFatal, order(-FATALITIES)), ], n=10L)
g <- ggplot(data=topFatal, aes(x=EVTYPE,y=FATALITIES)) + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g <- g + ggtitle("Fatalities by Event type") + xlab("Event") + ylab("Fatalities")
print(g)
In order to show aggregate injuries, here is a barplot of all injuries by event type.
aggInjury <- aggregate(INJURIES ~ EVTYPE, df0, sum)
topInjury <- head(aggInjury[with(aggInjury, order(-INJURIES)), ], n=10L)
g <- ggplot(data=topInjury, aes(x=EVTYPE,y=INJURIES)) + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g <- g + ggtitle("Injuries by Event type") + xlab("Event") + ylab("Injuries")
print(g)
In order to show aggregate crop damage costs, here is a barplot of all crop damage totals by event type.
aggCrops <- aggregate(CROPDMG ~ EVTYPE, df0, sum)
topCrops <- head(aggCrops[with(aggCrops, order(-CROPDMG)), ], n=10L)
g <- ggplot(data=topCrops, aes(x=EVTYPE,y=CROPDMG)) + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g <- g + ggtitle("Crop Damage by Event Type") + xlab("Event") + ylab("Crop Damage $USD")
print(g)
In order to show aggregate property damage costs, here is a table of all property damage totals by event type.
aggProps <- aggregate(PROPDMG ~ EVTYPE, df0, sum)
topProps <- head(aggProps[with(aggProps, order(-PROPDMG)), ], n=10L)
topProps$PROPDMG <- round(topProps$PROPDMG/1000000000, digits=2)
kable(topProps,row.name=F,col.names=c("Event","Damage (B $USD)"), caption="Property Damage by Event", format="html")
| Event | Damage (B $USD) |
|---|---|
| FLOOD | 144.66 |
| HURRICANE/TYPHOON | 69.31 |
| TORNADO | 56.94 |
| STORM SURGE | 43.32 |
| FLASH FLOOD | 16.14 |
| HAIL | 15.73 |
| HURRICANE | 11.87 |
| TROPICAL STORM | 7.70 |
| WINTER STORM | 6.69 |
| HIGH WIND | 5.27 |