Synopsis

This report is written to answer the following questions:
Across the United States, which types of events are most harmful with respect to population health?

Across the United States, which types of events have the greatest economic consequences?

Data from the National Weather Service Storm database was used to answer these questions. Total injuries and fatalities are used to measure effects on population health. Propery and crop damage are used to measure the economic consequences. In summary, I have found that Tornadoes are responsible for the biggest impacts on population health in terms of injuries and fatalities. Floods cause the most damage in terms of property and crop damage.

Data Processing

R version 3.6.0 is used. Read data using read.csv and keep the columns that are of interest to this project. I also fix some mixed case issues in the EVTYPE variable so that events labeled more consistently. Also we need to convert the coding of the property and crop damage to single numeric variables using the codes taken from the PDF manual.

Load required libraries

library(dplyr)
library(ggplot2)
library(lattice)
library(tidyr)
data<-read.csv("repdata_data_StormData.csv.bz2",header=TRUE,sep=",")

keep<-c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
anadat<-data[keep]

anadat$EVTYPE<-toupper(anadat$EVTYPE)

head(anadat)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0
anadat$PROPDMGEXP<-toupper(anadat$PROPDMGEXP)
anadat$PROPMULT[anadat$PROPDMGEXP=='H']<-100
anadat$PROPMULT[anadat$PROPDMGEXP=='K']<-1000
anadat$PROPMULT[anadat$PROPDMGEXP=='M']<-1000000
anadat$PROPMULT[anadat$PROPDMGEXP=='B']<-1000000000
anadat$PROPMULT[anadat$PROPDMGEXP=='1']<-10
anadat$PROPMULT[anadat$PROPDMGEXP=='2']<-100
anadat$PROPMULT[anadat$PROPDMGEXP=='3']<-1000
anadat$PROPMULT[anadat$PROPDMGEXP=='4']<-10000
anadat$PROPMULT[anadat$PROPDMGEXP=='5']<-100000
anadat$PROPMULT[anadat$PROPDMGEXP=='6']<-1000000
anadat$PROPMULT[anadat$PROPDMGEXP=='7']<-10000000
anadat$PROPMULT[anadat$PROPDMGEXP=='8']<-100000000
anadat$PROPMULT[anadat$PROPDMGEXP %in% c("?","-","+","0")]<-0
anadat$PROPDMGNUM<-anadat$PROPDMG*anadat$PROPMULT

anadat$CROPDMGEXP<-toupper(anadat$CROPDMGEXP)
anadat$CROPMULT[anadat$CROPDMGEXP=='H']<-100
anadat$CROPMULT[anadat$CROPDMGEXP=='K']<-1000
anadat$CROPMULT[anadat$CROPDMGEXP=='M']<-1000000
anadat$CROPMULT[anadat$CROPDMGEXP=='B']<-1000000000
anadat$CROPMULT[anadat$CROPDMGEXP=='1']<-10
anadat$CROPMULT[anadat$CROPDMGEXP=='2']<-100
anadat$CROPMULT[anadat$CROPDMGEXP=='3']<-1000
anadat$CROPMULT[anadat$CROPDMGEXP=='4']<-10000
anadat$CROPMULT[anadat$CROPDMGEXP=='5']<-100000
anadat$CROPMULT[anadat$CROPDMGEXP=='6']<-1000000
anadat$CROPMULT[anadat$CROPDMGEXP=='7']<-10000000
anadat$CROPMULT[anadat$CROPDMGEXP=='8']<-100000000
anadat$CROPMULT[anadat$CROPDMGEXP %in% c("?","-","+","0")]<-0
anadat$CROPDMGNUM<-anadat$CROPDMG*anadat$CROPMULT

Next, get totals of injuries, fatalities, propery, and crop damage by event type Sort these using arrange so the most common are at the top.

total_fatal<-aggregate(FATALITIES~EVTYPE,anadat,sum)
total_fatal<-arrange(total_fatal,desc(FATALITIES))
head(total_fatal)
##           EVTYPE FATALITIES
## 1        TORNADO       5633
## 2 EXCESSIVE HEAT       1903
## 3    FLASH FLOOD        978
## 4           HEAT        937
## 5      LIGHTNING        816
## 6      TSTM WIND        504
total_injury<-aggregate(INJURIES~EVTYPE,anadat,sum)
total_injury<-arrange(total_injury,desc(INJURIES))
head(total_injury)
##           EVTYPE INJURIES
## 1        TORNADO    91346
## 2      TSTM WIND     6957
## 3          FLOOD     6789
## 4 EXCESSIVE HEAT     6525
## 5      LIGHTNING     5230
## 6           HEAT     2100
total_propdmg<-aggregate(PROPDMGNUM~EVTYPE,anadat,sum)
total_propdmg<-arrange(total_propdmg,desc(PROPDMGNUM))
head(total_propdmg)
##              EVTYPE   PROPDMGNUM
## 1             FLOOD 144657709800
## 2 HURRICANE/TYPHOON  69305840000
## 3           TORNADO  56947380480
## 4       STORM SURGE  43323536000
## 5       FLASH FLOOD  16822673510
## 6              HAIL  15735267220
total_cropdmg<-aggregate(CROPDMGNUM~EVTYPE,anadat,sum)
total_cropdmg<-arrange(total_cropdmg,desc(CROPDMGNUM))
head(total_cropdmg)
##        EVTYPE  CROPDMGNUM
## 1     DROUGHT 13972566000
## 2       FLOOD  5661968450
## 3 RIVER FLOOD  5029459000
## 4   ICE STORM  5022113500
## 5        HAIL  3025954450
## 6   HURRICANE  2741910000

As we might expect, tornadoes, flood, and heat are among the types of events associated with the most injuries, fatalities, and property damage. Floods are also associate with crop damage but drought causes the most crop damage.

Results

Bar plots are created to illustrate the top 10 events and their corresponding fatalities and injuries.

fatal10<-total_fatal[1:10,]
f<-ggplot(fatal10,aes(reorder(EVTYPE,-FATALITIES),FATALITIES))
f+geom_bar(stat="Identity")+theme(axis.text.x=element_text(angle=90))+geom_text(aes(label=FATALITIES),vjust=-1)+ylim(0,7000)+labs(title="Weather Events causing the top 10 most Fatalities: 1950-2011",x="Type of Event",y="Total Fatalities")

injury10<-total_injury[1:10,]
i<-ggplot(injury10,aes(reorder(EVTYPE,-INJURIES),INJURIES))
i+geom_bar(stat="Identity")+theme(axis.text.x=element_text(angle=90))+geom_text(aes(label=INJURIES),vjust=-1)+ylim(0,100000)+labs(title="Weather Events causing the top 10 most Injuries: 1950-2011",x="Type of Event",y="Total Injuries")

Economic consequences are broken down into propery and crop damage. We will look at the sums grouped by type.

pc<-inner_join(total_propdmg,total_cropdmg)
pc2<-pc
pc2$total_cost<-pc2$PROPDMGNUM+pc2$CROPDMGNUM
pc2<-arrange(pc2,desc(total_cost))
keep<-data.frame(pc2[1:10,1])
names(keep)<-c("EVTYPE")
pc10<-inner_join(pc,keep)
pc10<-gather(pc10,class,total,-EVTYPE)
pc10$total<-round(pc10$total/1000000000,0.1)

d<-ggplot(pc10,aes(reorder(EVTYPE,-total),total,fill=class))
d+geom_bar(stat="Identity",position="Stack")+theme(axis.text.x=element_text(angle=90))+labs(title="Weather Events causing the Highest Damage: 1950-2011",x="Type of Event",y="Total Cost in Billions ($)")+scale_fill_discrete(labels=c("Property","Crop"),name="Type of Damage")