##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
This document represents the analysis of the storm data from the NOAA. It attemps to answer 2 questions - the eventy type that has most health impact across the United States - the event type that has mot economical impact across the United States
First let us download the data from the storm data web site
# load data set
download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2','stormdata.csv.bz2')
# read data set
dataset<-read.csv('stormdata.csv.bz2',stringsAsFactors = FALSE)
Let us look at number of events by type
ds<-dataset%>% group_by(EVTYPE) %>% summarize(n()) %>% top_n(n=10)
## Selecting by n()
ggplot(ds, aes(x=EVTYPE,y=`n()`))+geom_bar(stat = "identity")+labs(y="Total events")+theme(axis.text.x = element_text(color="#993333", size=12, angle=90))
We need to convert the amounts using the maginitude of the number as indicated in the documentation, i.e. multiply by 1000 for k or by 1000000 for B, we will use 1 for all other values Then let us add property to the crop damage to have full costs by event
dataset$multiplePROP <- ifelse(dataset$PROPDMGEXP %in% c("K", "k"), 1000,
ifelse(dataset$PROPDMGEXP %in% c("M", "m"), 1000000,
ifelse(dataset$PROPDMGEXP %in% c("B", "b"), 1000000000,
1)))
dataset$multipleCROP <- ifelse(dataset$CROPDMGEXP %in% c("K", "k"), 1000,
ifelse(dataset$CROPDMGEXP %in% c("M", "m"), 1000000,
ifelse(dataset$CROPDMGEXP %in% c("B", "b"), 1000000000,
1)))
dataset$PROPDMG<-dataset$PROPDMG*dataset$multiplePROP
dataset$CROPDMG<-dataset$CROPDMG*dataset$multipleCROP
dataset$fullDamage<-dataset$PROPDMG+dataset$CROPDMG
The below table displays the top 3 events with the most fatalities and injuries across the United States, we can see that tornadoes account for a large part of total fatalities and injuries
ds<-dataset%>% group_by(EVTYPE) %>% summarize(fatalities=sum(FATALITIES+INJURIES)) %>% top_n(n=3) %>% arrange(fatalities)
## Selecting by fatalities
ggplot(ds, aes(x=reorder(EVTYPE,-fatalities),y=fatalities))+geom_bar(stat = "identity")+labs(title= "Top 3 events by fatalities & Injuries",y="Total Fatalities & Injuries",x="Event type")+theme(axis.text.x = element_text(color="#993333", size=12, angle=90))+theme(axis.title.x = element_text(hjust=1))+theme(plot.title = element_text(hjust=0.5))
The below table displays the top 3 events by costs of damages across the United States, we can see that tornadoes account for the largest costs across United States to properties and crops
ds<-dataset%>% group_by(EVTYPE) %>% summarize(damage=sum(fullDamage)) %>% top_n(n=3) %>% arrange(damage)
## Selecting by damage
ggplot(ds, aes(x=reorder(EVTYPE,-damage),y=damage))+geom_bar(stat = "identity")+labs(title= "Top 3 events by cost of damages",y="Total Damage Costs",x="Event type")+theme(axis.text.x = element_text(color="#993333", size=12, angle=90))+theme(axis.title.x = element_text(hjust=1))+theme(plot.title = element_text(hjust=0.5))