Synopsis:

The following analysis is has two goals. 1. Find across the United States, which types of events are most harmful with respsect to health. 2. Find across the United States, which types of events have the greatest economic consequences.

Required Packages:

library(knitr)

Now firstly we need to load the data:

stormData <- read.csv("repdata-data-StormData.csv")

Data Processing:

Determine the top ten causes of fatalaties and injuries across America.

usFatalitiesMean <- with(stormData, tapply(FATALITIES,EVTYPE,mean))
usFatalitiesTopTenPerEvent <- sort(usFatalitiesMean, decreasing = TRUE)[1:10]

usInjuriesMean <- with(stormData, tapply(INJURIES,EVTYPE,mean))
usInjuriesTopTenPerEvent <- sort(usInjuriesMean, decreasing = TRUE)[1:10]

usFatalitiesTotal <- with(stormData, tapply(FATALITIES,EVTYPE,sum))
usFatalitiesTopTenTotal <- sort(usFatalitiesTotal, decreasing = TRUE)[1:10]

usInjuriesTotal <- with(stormData, tapply(INJURIES,EVTYPE,sum))
usInjuriesTopTenTotal <- sort(usInjuriesTotal, decreasing = TRUE)[1:10]

Now to find the greatest single event that caused the most fatalities/injuries.

greatestFatality <- stormData[which.max(stormData$FATALITIES),]
greatestFatalityDate <- as.character(greatestFatality$BGN_DATE)


greatestInjuries <- stormData[which.max(stormData$INJURIES),]
greatestInjuriesDate <- as.character(greatestInjuries$BGN_DATE)

Analysis from above claims that Tornados do the most damage to health. Let’s see what states they occur in.

usTornadoesByState <- with(stormData[stormData$EVTYPE == "TORNADO",], 
                           tapply(FATALITIES,STATE,sum))
usTornadoesByStateTopTen <- sort(usTornadoesByState, decreasing = TRUE)[1:10]

For Economic Evaluation need to convert costs of each event to like terms.

moneyConverter <- function(xDMG,xEXP){
        if (xEXP == "k" || xEXP == "K"){
                xDMG = xDMG*1e3
        } else if (xEXP == "m" || xEXP == "M"){
                xDMG = xDMG*1e6
        } else if (xEXP == "B") {
                xDMG = xDMG*1e9
        } else if (xEXP == "h" || xEXP == "H") {
                xDMG = xDMG*1e2
        }
        
        if(is.numeric(xEXP)) {
                xDMG = xDMG^xEXP
        }
        
        return (xDMG)
}

expensesByEvent <- with(stormData, tapply(
        moneyConverter(PROPDMG,PROPDMGEXP)
        + moneyConverter(CROPDMG,CROPDMGEXP),EVTYPE,sum))
## Warning in if (xEXP == "B") {: the condition has length > 1 and only the
## first element will be used
expensesByEventTopTen <- sort(expensesByEvent, decreasing = TRUE)[1:10]
Million <- 1e6
expensesByEventTopTenInBillions <- expensesByEventTopTen/Million 

Results:

kable(as.list(usFatalitiesTopTenPerEvent), col.names = names(usFatalitiesTopTenPerEvent), 
      format = "html", digits = 1, caption = "Events with the Highest Average 
      Number of Fatalities per event", align = 'c')
Events with the Highest Average Number of Fatalities per event
TORNADOES, TSTM WIND, HAIL COLD AND SNOW TROPICAL STORM GORDON RECORD/EXCESSIVE HEAT EXTREME HEAT HEAT WAVE DROUGHT HIGH WIND/SEAS MARINE MISHAP WINTER STORMS Heavy surf and wind
25 14 8 5.7 4.4 4 4 3.5 3.3 3
kable(as.list(usInjuriesTopTenPerEvent), 
      col.names = names(usInjuriesTopTenPerEvent),
      format = "html", digits = 1, caption = "Ten Highest Averages of US Injuries
      per Event", align = 'c')
Ten Highest Averages of US Injuries per Event
Heat Wave TROPICAL STORM GORDON WILD FIRES THUNDERSTORMW HIGH WIND AND SEAS SNOW/HIGH WINDS GLAZE/ICE STORM HEAT WAVE DROUGHT WINTER STORM HIGH WINDS HURRICANE/TYPHOON
70 43 37.5 27 20 18 15 15 15 14.5
kable(as.list(usFatalitiesTopTenTotal), 
      col.names = names(usInjuriesTopTenTotal),
      format = "html", digits = 1, caption = "Events with the Highest Total Number 
      of Fatalities", align = 'c')
Events with the Highest Total Number of Fatalities
TORNADO TSTM WIND FLOOD EXCESSIVE HEAT LIGHTNING HEAT ICE STORM FLASH FLOOD THUNDERSTORM WIND HAIL
5633 1903 978 937 816 504 470 368 248 224
kable(as.list(usInjuriesTopTenTotal), col.names = names(usInjuriesTopTenTotal),
      format = "html", digits = 1, caption = "Events with the Highest Total Number
      of Injuries", align = 'c')
Events with the Highest Total Number of Injuries
TORNADO TSTM WIND FLOOD EXCESSIVE HEAT LIGHTNING HEAT ICE STORM FLASH FLOOD THUNDERSTORM WIND HAIL
91346 6957 6789 6525 5230 2100 1975 1777 1488 1361
sprintf("The Greatest Fatality occured in %s, which was a/an %s on %s
        ", greatestFatality$STATE, greatestFatality$EVTYPE
        ,strsplit(greatestFatalityDate," ")[[1]][1])
## [1] "The Greatest Fatality occured in IL, which was a/an HEAT on 7/12/1995\n        "
sprintf("The Greatest Injuries occured in %s, which was a/an %s on %s
        ", greatestInjuries$STATE, greatestInjuries$EVTYPE
        ,strsplit(greatestInjuriesDate," ")[[1]][1])
## [1] "The Greatest Injuries occured in TX, which was a/an TORNADO on 4/10/1979\n        "
barplot(usTornadoesByStateTopTen, main = "Total number of deaths by Tornadoes per State",
        ylab = "Number of deaths", xlab = "State")

Economic Results:

kable(as.list(expensesByEventTopTenInBillions), 
      col.names = names(expensesByEventTopTenInBillions),
      format = "html", digits = 2, caption = "Events with Highest Expenses (in millions)",
      align = 'c')
Events with Highest Expenses (in millions)
TORNADO FLASH FLOOD TSTM WIND FLOOD THUNDERSTORM WIND HAIL LIGHTNING THUNDERSTORM WINDS HIGH WIND WINTER STORM
3212.36 1420.3 1336.07 900.11 876.91 689.27 603.36 446.31 324.75 132.72