This report is written to answer the following questions:
Across the United States, which types of events are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
Data from the National Weather Service Storm database was used to answer these questions. Total injuries and fatalities are used to measure effects on population health. Propery and crop damage are used to measure the economic consequences. In summary, I have found that Tornadoes are responsible for the biggest impacts on population health in terms of injuries and fatalities. Floods cause the most damage in terms of property and crop damage.
R version 3.6.0 is used. Read data using read.csv and keep the columns that are of interest to this project. I also fix some mixed case issues in the EVTYPE variable so that events labeled more consistently. Also we need to convert the coding of the property and crop damage to single numeric variables using the codes taken from the PDF manual.
Load required libraries
library(dplyr)
library(ggplot2)
library(lattice)
library(tidyr)
data<-read.csv("repdata_data_StormData.csv.bz2",header=TRUE,sep=",")
keep<-c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
anadat<-data[keep]
anadat$EVTYPE<-toupper(anadat$EVTYPE)
head(anadat)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
anadat$PROPDMGEXP<-toupper(anadat$PROPDMGEXP)
anadat$PROPMULT[anadat$PROPDMGEXP=='H']<-100
anadat$PROPMULT[anadat$PROPDMGEXP=='K']<-1000
anadat$PROPMULT[anadat$PROPDMGEXP=='M']<-1000000
anadat$PROPMULT[anadat$PROPDMGEXP=='B']<-1000000000
anadat$PROPMULT[anadat$PROPDMGEXP=='1']<-10
anadat$PROPMULT[anadat$PROPDMGEXP=='2']<-100
anadat$PROPMULT[anadat$PROPDMGEXP=='3']<-1000
anadat$PROPMULT[anadat$PROPDMGEXP=='4']<-10000
anadat$PROPMULT[anadat$PROPDMGEXP=='5']<-100000
anadat$PROPMULT[anadat$PROPDMGEXP=='6']<-1000000
anadat$PROPMULT[anadat$PROPDMGEXP=='7']<-10000000
anadat$PROPMULT[anadat$PROPDMGEXP=='8']<-100000000
anadat$PROPMULT[anadat$PROPDMGEXP %in% c("?","-","+","0")]<-0
anadat$PROPDMGNUM<-anadat$PROPDMG*anadat$PROPMULT
anadat$CROPDMGEXP<-toupper(anadat$CROPDMGEXP)
anadat$CROPMULT[anadat$CROPDMGEXP=='H']<-100
anadat$CROPMULT[anadat$CROPDMGEXP=='K']<-1000
anadat$CROPMULT[anadat$CROPDMGEXP=='M']<-1000000
anadat$CROPMULT[anadat$CROPDMGEXP=='B']<-1000000000
anadat$CROPMULT[anadat$CROPDMGEXP=='1']<-10
anadat$CROPMULT[anadat$CROPDMGEXP=='2']<-100
anadat$CROPMULT[anadat$CROPDMGEXP=='3']<-1000
anadat$CROPMULT[anadat$CROPDMGEXP=='4']<-10000
anadat$CROPMULT[anadat$CROPDMGEXP=='5']<-100000
anadat$CROPMULT[anadat$CROPDMGEXP=='6']<-1000000
anadat$CROPMULT[anadat$CROPDMGEXP=='7']<-10000000
anadat$CROPMULT[anadat$CROPDMGEXP=='8']<-100000000
anadat$CROPMULT[anadat$CROPDMGEXP %in% c("?","-","+","0")]<-0
anadat$CROPDMGNUM<-anadat$CROPDMG*anadat$CROPMULT
Next, get totals of injuries, fatalities, propery, and crop damage by event type Sort these using arrange so the most common are at the top.
total_fatal<-aggregate(FATALITIES~EVTYPE,anadat,sum)
total_fatal<-arrange(total_fatal,desc(FATALITIES))
head(total_fatal)
## EVTYPE FATALITIES
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
total_injury<-aggregate(INJURIES~EVTYPE,anadat,sum)
total_injury<-arrange(total_injury,desc(INJURIES))
head(total_injury)
## EVTYPE INJURIES
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
total_propdmg<-aggregate(PROPDMGNUM~EVTYPE,anadat,sum)
total_propdmg<-arrange(total_propdmg,desc(PROPDMGNUM))
head(total_propdmg)
## EVTYPE PROPDMGNUM
## 1 FLOOD 144657709800
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56947380480
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16822673510
## 6 HAIL 15735267220
total_cropdmg<-aggregate(CROPDMGNUM~EVTYPE,anadat,sum)
total_cropdmg<-arrange(total_cropdmg,desc(CROPDMGNUM))
head(total_cropdmg)
## EVTYPE CROPDMGNUM
## 1 DROUGHT 13972566000
## 2 FLOOD 5661968450
## 3 RIVER FLOOD 5029459000
## 4 ICE STORM 5022113500
## 5 HAIL 3025954450
## 6 HURRICANE 2741910000
As we might expect, tornadoes, flood, and heat are among the types of events associated with the most injuries, fatalities, and property damage. Floods are also associate with crop damage but drought causes the most crop damage.
Bar plots are created to illustrate the top 10 events and their corresponding fatalities and injuries.
fatal10<-total_fatal[1:10,]
f<-ggplot(fatal10,aes(reorder(EVTYPE,-FATALITIES),FATALITIES))
f+geom_bar(stat="Identity")+theme(axis.text.x=element_text(angle=90))+geom_text(aes(label=FATALITIES),vjust=-1)+ylim(0,7000)+labs(title="Weather Events causing the top 10 most Fatalities: 1950-2011",x="Type of Event",y="Total Fatalities")
injury10<-total_injury[1:10,]
i<-ggplot(injury10,aes(reorder(EVTYPE,-INJURIES),INJURIES))
i+geom_bar(stat="Identity")+theme(axis.text.x=element_text(angle=90))+geom_text(aes(label=INJURIES),vjust=-1)+ylim(0,100000)+labs(title="Weather Events causing the top 10 most Injuries: 1950-2011",x="Type of Event",y="Total Injuries")
Economic consequences are broken down into propery and crop damage. We will look at the sums grouped by type.
pc<-inner_join(total_propdmg,total_cropdmg)
pc2<-pc
pc2$total_cost<-pc2$PROPDMGNUM+pc2$CROPDMGNUM
pc2<-arrange(pc2,desc(total_cost))
keep<-data.frame(pc2[1:10,1])
names(keep)<-c("EVTYPE")
pc10<-inner_join(pc,keep)
pc10<-gather(pc10,class,total,-EVTYPE)
pc10$total<-round(pc10$total/1000000000,0.1)
d<-ggplot(pc10,aes(reorder(EVTYPE,-total),total,fill=class))
d+geom_bar(stat="Identity",position="Stack")+theme(axis.text.x=element_text(angle=90))+labs(title="Weather Events causing the Highest Damage: 1950-2011",x="Type of Event",y="Total Cost in Billions ($)")+scale_fill_discrete(labels=c("Property","Crop"),name="Type of Damage")