## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Synopsis

This document represents the analysis of the storm data from the NOAA. It attemps to answer 2 questions - the eventy type that has most health impact across the United States - the event type that has mot economical impact across the United States

Data Processing

First let us download the data from the storm data web site

# load data set
download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2','stormdata.csv.bz2')
# read data set
dataset<-read.csv('stormdata.csv.bz2',stringsAsFactors = FALSE)

Data exploration

Summary of events by type

Let us look at number of events by type

ds<-dataset%>% group_by(EVTYPE) %>% summarize(n()) %>% top_n(n=10)
## Selecting by n()
ggplot(ds, aes(x=EVTYPE,y=`n()`))+geom_bar(stat = "identity")+labs(y="Total events")+theme(axis.text.x = element_text(color="#993333",  size=12, angle=90))

Let us do some data transformation to get economic impact

We need to convert the amounts using the maginitude of the number as indicated in the documentation, i.e. multiply by 1000 for k or by 1000000 for B, we will use 1 for all other values Then let us add property to the crop damage to have full costs by event

dataset$multiplePROP <- ifelse(dataset$PROPDMGEXP %in% c("K", "k"), 1000,
        ifelse(dataset$PROPDMGEXP %in% c("M", "m"), 1000000,
        ifelse(dataset$PROPDMGEXP %in% c("B", "b"), 1000000000,
        1)))
dataset$multipleCROP <- ifelse(dataset$CROPDMGEXP %in% c("K", "k"), 1000,
        ifelse(dataset$CROPDMGEXP %in% c("M", "m"), 1000000,
        ifelse(dataset$CROPDMGEXP %in% c("B", "b"), 1000000000,
        1)))
dataset$PROPDMG<-dataset$PROPDMG*dataset$multiplePROP
dataset$CROPDMG<-dataset$CROPDMG*dataset$multipleCROP
dataset$fullDamage<-dataset$PROPDMG+dataset$CROPDMG

Results

Fatalities and Injuries

The below table displays the top 3 events with the most fatalities and injuries across the United States, we can see that tornadoes account for a large part of total fatalities and injuries

ds<-dataset%>% group_by(EVTYPE) %>% summarize(fatalities=sum(FATALITIES+INJURIES)) %>% top_n(n=3)  %>% arrange(fatalities)
## Selecting by fatalities
ggplot(ds, aes(x=reorder(EVTYPE,-fatalities),y=fatalities))+geom_bar(stat = "identity")+labs(title= "Top 3 events by fatalities & Injuries",y="Total Fatalities & Injuries",x="Event type")+theme(axis.text.x = element_text(color="#993333",  size=12, angle=90))+theme(axis.title.x = element_text(hjust=1))+theme(plot.title = element_text(hjust=0.5))

Total damage costs

The below table displays the top 3 events by costs of damages across the United States, we can see that tornadoes account for the largest costs across United States to properties and crops

ds<-dataset%>% group_by(EVTYPE) %>% summarize(damage=sum(fullDamage)) %>% top_n(n=3)  %>% arrange(damage)
## Selecting by damage
ggplot(ds, aes(x=reorder(EVTYPE,-damage),y=damage))+geom_bar(stat = "identity")+labs(title= "Top 3 events by cost of damages",y="Total Damage Costs",x="Event type")+theme(axis.text.x = element_text(color="#993333",  size=12, angle=90))+theme(axis.title.x = element_text(hjust=1))+theme(plot.title = element_text(hjust=0.5))

Conclusion

  1. Tornadoes have the most impact on public health across the United States
  2. Flood events have the most ecomonical impact across the uS
  3. Most events measured is Hail