library(dplyr)
library(ggplot2)
library(stringr)

Data Analysis

stormdata<-read.csv('repdata_data_StormData.csv.bz2')
attach(stormdata)
summary(stormdata)
str(EVTYPE)
str(FATALITIES)
summary(FATALITIES)
length(which(FATALITIES!=0))
str(INJURIES)
summary(INJURIES)
length(which(INJURIES!=0))

Ran some analysis and determined:

The EVTYPE variable is a factor with 985 levels

There are 2 variables related to population health:

+FATALITIES

+INJURIES

The FATALITIES variable is numeric with a max of 583 and 6,974 non-zero values

The INJURIES variable is numeric with a max of 1,700 and 17,604 non-zero values

Data Processing: Population Health Figures

Summarizing the fatalities and injuries data to identify population health consequences of each event type

Fatalities.bytype<-stormdata %>%
    group_by(EVTYPE) %>%
    summarize(sumfatal=sum(FATALITIES))
Injuries.bytype<-stormdata %>%
    group_by(EVTYPE) %>%
    summarize(suminj=sum(INJURIES))
Health.bytype<-cbind(Fatalities.bytype,Injuries.bytype$suminj)
names(Health.bytype)[3]<-'suminj'

head(Health.bytype)
##                  EVTYPE sumfatal suminj
## 1    HIGH SURF ADVISORY        0      0
## 2         COASTAL FLOOD        0      0
## 3           FLASH FLOOD        0      0
## 4             LIGHTNING        0      0
## 5             TSTM WIND        0      0
## 6       TSTM WIND (G45)        0      0

Results: Population Health Figures

Plotting Fatalities by Event Type

fatal<-Health.bytype[order(Health.bytype$sumfatal),]
plotfatal<-tail(fatal)
plotfatal$EVTYPE<-str_wrap(plotfatal$EVTYPE,width=12)

ggplot(plotfatal, aes(x=EVTYPE, y=sumfatal)) + geom_point() +
    ggtitle('Top 6 Most Fatal Event Types') +
    labs(x='Event Type',y='Number of Fatalities')

Plotting Injuries by Event Type

injuries<-Health.bytype[order(Health.bytype$suminj),]
plotinj<-tail(injuries)
plotinj$EVTYPE<-str_wrap(plotinj$EVTYPE,width=12)

ggplot(plotinj, aes(x=EVTYPE, y=suminj)) + geom_point() +
    ggtitle('Top 6 Most Injuries per Event Type') +
    labs(x='Event Type',y='Number of Injuries')

Data Processing: Economic Impact Figures

According to the site’s description of the data, when listing damage in dollars

+'B' is Billions

+'M' is Millions

+'K' is Thousands

Creating new variables, propdmg_mod and cropdmg_mod, to extend the dollar amounts to the same scale

stormdata<-mutate(stormdata,propdmg_mod=ifelse(PROPDMGEXP=='B',PROPDMG*1000000000,
                                    ifelse(PROPDMGEXP=='M',PROPDMG*1000000,
                                           ifelse(PROPDMGEXP=='K',PROPDMG*1000,
                                                  PROPDMG))))
stormdata<-mutate(stormdata,cropdmg_mod=ifelse(CROPDMGEXP=='B',CROPDMG*1000000000,
                                    ifelse(CROPDMGEXP=='M',CROPDMG*1000000,
                                           ifelse(CROPDMGEXP=='K',CROPDMG*1000,
                                                  CROPDMG))))

Summarizing the property and crop damage data to identify economic consequences of each event type

Property and crop damage figures were combined

CropDmg.bytype<-stormdata %>%
    group_by(EVTYPE) %>%
    summarize(sumcrop=sum(cropdmg_mod))
PropDmg.bytype<-stormdata %>%
    group_by(EVTYPE) %>%
    summarize(sumprop=sum(propdmg_mod))
Damage.bytype<-cbind(CropDmg.bytype,PropDmg.bytype$sumprop)
names(Damage.bytype)[3]<-'sumprop'
Damage.bytype$sumtotal<-Damage.bytype$sumcrop+Damage.bytype$sumprop

head(Damage.bytype)
##                  EVTYPE sumcrop sumprop sumtotal
## 1    HIGH SURF ADVISORY       0  200000   200000
## 2         COASTAL FLOOD       0       0        0
## 3           FLASH FLOOD       0   50000    50000
## 4             LIGHTNING       0       0        0
## 5             TSTM WIND       0 8100000  8100000
## 6       TSTM WIND (G45)       0    8000     8000

Results: Economic Impact Figures

Taking the top 6 most expensive event types and plotting by millions of dollars

cost<-Damage.bytype[order(Damage.bytype$sumtotal),]
plotcost<-tail(cost)
plotcost$Millions<-plotcost$sumtotal/1000000
plotcost$EVTYPE<-str_wrap(plotcost$EVTYPE,width=12)

ggplot(plotcost, aes(x=EVTYPE, y=Millions)) + geom_point() +
    ggtitle('Top 6 Most Expensive Event Types') +
    labs(x='Event Type',y='Cost in Millions')

It appears that tornadoes cause the most fatalities and injuries and are the greatest risk to population health. The final figure combines damage to property and crops and shows that flooding has the greatest economic impact.