Synopsis

Across the United States, tornadoes, excessive heat, and flash floods are most harmful with respect to population health.

Across the United States, tornadoes, thunderstorm winds, and flash floods have the greatest economic consequences.

Our raw data are taken from National Weather Service Instruction 10-1605. The events in the database start in the year 1950 and end in November 2011. Fatalities, injuries, and property damage (in dollars) are totalled over that time.

Reference Linked used: Author: Maurício Linhares https://raw.githubusercontent.com/mauricio/reproductible-research-assignment-2/master/replacements.csv https://raw.githubusercontent.com/mauricio/reproductible-research-assignment-2/master/multipliers.csv

Data Processing

Read the data and check the first f

storm.data  <- read.csv("../data/repdata_data_StormData.csv", strip.white=TRUE,  
  colClasses=c("NULL","character","NULL","NULL","numeric","character",
  "character","character", "NULL",
  "NULL","NULL","character","NULL","NULL","NULL","NULL","NULL","NULL",
  "NULL","NULL","NULL","NULL","numeric","numeric","numeric","character",
  "numeric","character","NULL","NULL","NULL","NULL","NULL","NULL","NULL",
  "NULL","NULL"))
head(storm.data,5)
##             BGN_DATE COUNTY COUNTYNAME STATE  EVTYPE END_DATE FATALITIES
## 1  4/18/1950 0:00:00     97     MOBILE    AL TORNADO                   0
## 2  4/18/1950 0:00:00      3    BALDWIN    AL TORNADO                   0
## 3  2/20/1951 0:00:00     57    FAYETTE    AL TORNADO                   0
## 4   6/8/1951 0:00:00     89    MADISON    AL TORNADO                   0
## 5 11/15/1951 0:00:00     43    CULLMAN    AL TORNADO                   0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1       15    25.0          K       0           
## 2        0     2.5          K       0           
## 3        2    25.0          K       0           
## 4        2     2.5          K       0           
## 5        2     2.5          K       0

Get damage or injury rows rows only

storm.data.DamageOrInjury <- storm.data[which(storm.data$FATALITIES > 0 | storm.data$INJURIES > 0 |
                                                      storm.data$PROPDMG > 0 | storm.data$CROPDMG > 0),]
head(storm.data.DamageOrInjury,5)
##             BGN_DATE COUNTY COUNTYNAME STATE  EVTYPE END_DATE FATALITIES
## 1  4/18/1950 0:00:00     97     MOBILE    AL TORNADO                   0
## 2  4/18/1950 0:00:00      3    BALDWIN    AL TORNADO                   0
## 3  2/20/1951 0:00:00     57    FAYETTE    AL TORNADO                   0
## 4   6/8/1951 0:00:00     89    MADISON    AL TORNADO                   0
## 5 11/15/1951 0:00:00     43    CULLMAN    AL TORNADO                   0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1       15    25.0          K       0           
## 2        0     2.5          K       0           
## 3        2    25.0          K       0           
## 4        2     2.5          K       0           
## 5        2     2.5          K       0

Remove extra white spaces make events to upper case

storm.data.DamageOrInjury$EVTYPE <- toupper(gsub("^\\s+|\\s+$", "",storm.data.DamageOrInjury$EVTYPE))

Reference table for classifying the group: link https://raw.githubusercontent.com/mauricio/reproductible-research-assignment-2/master/replacements.csv replacement.csv help on mapping table

replacements <- read.csv("../data/replacements.csv", stringsAsFactors=FALSE)

eventForClassify <- function( evtype ) {
        replacements[replacements$event == evtype,]$actual
}
storm.data.DamageOrInjury$CLEANEV <- sapply(storm.data.DamageOrInjury$EVTYPE, eventForClassify)

Change the value to Upper Case

storm.data.DamageOrInjury$PROPDMGEXP <- toupper(storm.data.DamageOrInjury$PROPDMGEXP)
storm.data.DamageOrInjury$CROPDMGEXP <- toupper(storm.data.DamageOrInjury$CROPDMGEXP)

CSV that maps the damage (either crop or property) to the exponent so we can get a total damage number link: https://raw.githubusercontent.com/mauricio/reproductible-research-assignment-2/master/multipliers.csv

multipliers <- read.csv("../data/multipliers.csv", colClasses=c("character", "numeric"))

#create function for mapping
mapDamage <- function(damage, mapping) {
        damage * multipliers[multipliers$key == mapping,]$number
}
storm.data.DamageOrInjury$property_damage <- mapply(mapDamage, storm.data.DamageOrInjury$PROPDMG,                                                            storm.data.DamageOrInjury$PROPDMGEXP)
storm.data.DamageOrInjury$crop_damage <- mapply(mapDamage, storm.data.DamageOrInjury$CROPDMG,                                                                storm.data.DamageOrInjury$CROPDMGEXP)

storm.data.DamageOrInjury$total_damage <- storm.data.DamageOrInjury$property_damage + storm.data.DamageOrInjury$crop_damage

use the library plyr for plotting

library(plyr)

Plot the top 5 event causing fatality

pop.harmed <- ddply(
        storm.data.DamageOrInjury,
        c("CLEANEV"),
        summarise,
        total_deaths=sum(FATALITIES),
        total_injuries=sum(INJURIES)
)


# sort based on total deaths descending
fatality.measure <- pop.harmed[order(-pop.harmed$total_deaths),]
injury.measure <- pop.harmed[order(-pop.harmed$total_injuries),]

# plot top 5
barplot(
        fatality.measure[1:5,2],
        names.arg=fatality.measure[1:5,1],
        cex.names=0.75,
        main="Fatalities Caused By Weather Events, 1950-2011",
        xlab="Event",
        ylab="Fatalities",
        ylim=c(0,6000))

Plot the top 5 event causing Injury

barplot(
        injury.measure[1:5,3],
        names.arg=injury.measure[1:5,1],
        cex.names=0.75,
        main="Injuries Caused By Weather Events, 1950-2011",
        xlab="Event",
        ylab="Injuries",
        ylim=c(0,95000))

Plot the top 5 event causing Damage on Economy

economic.harmed <- ddply(
        storm.data.DamageOrInjury,
        c("CLEANEV"),
        summarise,
        total_damage=sum(total_damage)
)

# order by descending
economic.measure <- economic.harmed[order(-economic.harmed$total_damage),]
# divide by 10^9 to get dollars in Billions
economic.measure$total_damage <- economic.measure$total_damage / 10^9


barplot(
        economic.measure[1:5,2],
        names.arg=economic.measure[1:5,1],
        cex.names=0.75,
        main="Total Economic Damage Caused By Weather Events, 1950-2011",
        xlab="Event",
        ylab="Cost (In Billions)",
        ylim=c(0,150))