Install Packages

library(ggplot2)
library(plyr)

Read in data

data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))

Analyze data and process data so in correct format for later analysis

str(data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
data$BGN_DATE <- as.Date(data$BGN_DATE, "%m/%d/%Y %H:%M:%S")

Question 1: Across the United States, which types of events are most harmful with respect to population health?

Identifying factors related to health: Fatalities & Injuries, then grouping them by event.

most_harmful_event <- aggregate(FATALITIES + INJURIES ~ EVTYPE, data, sum)
top_most_harmful_event <- head(most_harmful_event[order(-most_harmful_event$`FATALITIES + INJURIES`),])
names(top_most_harmful_event) <- c("Event", "Count")

Plot to show the processed data.

ggplot(top_most_harmful_event, aes(Event, Count)) +
    geom_bar(stat = "identity", fill = "dark blue") +
    labs(title = "Most Harmful Events to Health") 

In conclusion it can be seen that TORNADO with a total of 9.697910^{4} of Injuries and Fatalities is the event most harmful to health.

Question 2: Across the United States, which types of events have the greatest economic consequences?

Looking through the data, property and crop damage are related to economic failures.

Process the data so aforementioned variables are normalized.

unique(data$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
tempPROPDMGEXP <- mapvalues(data$PROPDMGEXP,
                         c("K","M","", "B","m","+","0","5","6","?","4","2","3","h","7","H","-","1","8"), 
                         c(1e3,1e6, 1, 1e9,1e6,  1,  1,1e5,1e6,  1,1e4,1e2,1e3,  1,1e7,1e2,  1, 10,1e8))

unique(data$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
tempCROPDMGEXP <- mapvalues(data$CROPDMGEXP,
                         c("","M","K","m","B","?","0","k","2"),
                         c( 1,1e6,1e3,1e6,1e9,1,1,1e3,1e2))


data$PROPTOTALDMG <- as.numeric(tempPROPDMGEXP) * data$PROPDMG
data$CROPTOTALDMG <- as.numeric(tempCROPDMGEXP) * data$CROPDMG


most_damage <- aggregate( PROPTOTALDMG + CROPTOTALDMG ~ EVTYPE, data, sum)
top_most_damage <- head(most_damage[order(-most_damage$`PROPTOTALDMG + CROPTOTALDMG`),])
names(top_most_damage) <- c("Event", "Total_Damage")

Plot to show which events have the greatest consequences to the economy.

ggplot(top_most_damage, aes(Event, Total_Damage)) +
    geom_bar(stat = "identity", fill = "dark blue") +
    labs(title = "Most Damage to the Economy by Event") 

In conclusion it can be seen that FLOOD with a total of 1.503196810^{11} of crop and property damage is the event most harmful to the economy.