Across the United States, tornadoes, excessive heat, and flash floods are most harmful with respect to population health.
Across the United States, tornadoes, thunderstorm winds, and flash floods have the greatest economic consequences.
The raw data are taken from National Weather Service Instruction 10-1605. The events in the database start in the year 1950 and end in November 2011. Fatalities, injuries, and property damage (in dollars) are totalled over that time.
Load the data.
storm.data = read.csv(bzfile("repdata_data_StormData.csv.bz2"), header = TRUE)
Remove some columns.
reduced.storm.data <-
storm.data[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG")]
Normalize event names.
reduced.storm.data$EVTYPE <-
gsub("^HEAT$", "EXCESSIVE HEAT", reduced.storm.data$EVTYPE)
reduced.storm.data$EVTYPE <-
gsub("^TSTM WIND$", "THUNDERSTORM WIND", reduced.storm.data$EVTYPE)
reduced.storm.data$EVTYPE <-
gsub("^THUNDERSTORM WIND$", "THUNDERSTORM WINDS", reduced.storm.data$EVTYPE)
Aggregate data on fatalities and find which events are the top 10 causes of fatalities.
agg.fatalities.data <-
aggregate(
reduced.storm.data$FATALITIES,
by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.fatalities.data) = c("event.type", "fatality.total")
fatalities.sorted <-
agg.fatalities.data[order(-agg.fatalities.data$fatality.total),]
top.fatalities <- fatalities.sorted[1:10,]
top.fatalities$event.type <-
factor(
top.fatalities$event.type, levels=top.fatalities$event.type,
ordered=TRUE)
Aggregate data for injuries.
agg.injuries.data <-
aggregate(
reduced.storm.data$INJURIES,
by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.injuries.data) = c("event.type", "injury.total")
injuries.sorted <- agg.injuries.data[order(-agg.injuries.data$injury.total),]
top.injuries <- injuries.sorted[1:10,]
top.injuries$event.type <-
factor(
top.injuries$event.type, levels=top.injuries$event.type,
ordered=TRUE)
Aggregate data for property damage.
agg.prop.dmg.data <-
aggregate(
reduced.storm.data$PROPDMG,
by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.prop.dmg.data) = c("event.type", "prop.dmg.total")
prop.dmg.sorted <- agg.prop.dmg.data[order(-agg.prop.dmg.data$prop.dmg.total),]
top.prop.dmg <- prop.dmg.sorted[1:10,]
top.prop.dmg$event.type <-
factor(
top.prop.dmg$event.type, levels=top.prop.dmg$event.type,
ordered=TRUE)
Graph the top 10 causes of fatalities.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.1
ggplot(data=top.fatalities, aes(x=event.type, y=fatality.total)) +
geom_bar(stat="identity") + xlab("Event type") + ylab("Total fatalities") +
ggtitle("Fatalities By Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Graph top ten causes of injuries.
ggplot(data=top.injuries, aes(x=event.type, y=injury.total)) +
geom_bar(stat="identity") + xlab("Event type") + ylab("Total injuries") +
ggtitle("Injuries By Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Graph property damage.
ggplot(data=top.prop.dmg, aes(x=event.type, y=prop.dmg.total)) +
geom_bar(stat="identity") + xlab("Event type") +
ylab("Total property damage") + ggtitle("Property Damage By Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))