Synopsis

The U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database tracks characteristics of major storms and weather events in that country, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This data analysis try to answer: Across the U.S.,which types of events are most harmful with respect to population health?. Across the U.S., which types of events have the greatest economic consecuences?.

Data Processing

We load our data.

storm.data = read.csv(bzfile("StormData.csv.bz2"), header = TRUE)
#We don't need all the columns.
reduced.storm.data <- storm.data[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG")]

#Normalize event names.
reduced.storm.data$EVTYPE <- gsub("^HEAT$", "EXCESSIVE HEAT", reduced.storm.data$EVTYPE)
reduced.storm.data$EVTYPE <- gsub("^TSTM WIND$", "THUNDERSTORM WIND", reduced.storm.data$EVTYPE)
reduced.storm.data$EVTYPE <- gsub("^THUNDERSTORM WIND$", "THUNDERSTORM WINDS", reduced.storm.data$EVTYPE)

#First we aggregate data on fatalities and find which events are the top 10 causes of fatalities.
agg.fatalities.data <- aggregate(reduced.storm.data$FATALITIES, by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.fatalities.data) = c("event.type", "fatality.total")
fatalities.sorted <- agg.fatalities.data[order(-agg.fatalities.data$fatality.total),] 
top.fatalities <- fatalities.sorted[1:10,]
top.fatalities$event.type <- factor(top.fatalities$event.type, levels=top.fatalities$event.type, 
ordered=TRUE)

#We next do the same for injuries.
agg.injuries.data <- aggregate(reduced.storm.data$INJURIES, by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.injuries.data) = c("event.type", "injury.total")
injuries.sorted <- agg.injuries.data[order(-agg.injuries.data$injury.total),] 
top.injuries <- injuries.sorted[1:10,]
top.injuries$event.type <- factor(top.injuries$event.type, levels=top.injuries$event.type, ordered=TRUE)

#Finally we do the same for property damage.
agg.prop.dmg.data <- aggregate(reduced.storm.data$PROPDMG, by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.prop.dmg.data) = c("event.type", "prop.dmg.total")
prop.dmg.sorted <- agg.prop.dmg.data[order(-agg.prop.dmg.data$prop.dmg.total),] 
top.prop.dmg <- prop.dmg.sorted[1:10,]
top.prop.dmg$event.type <- factor(top.prop.dmg$event.type, levels=top.prop.dmg$event.type, ordered=TRUE)

Results

We graph the top 10 causes of fatalities.

library(ggplot2)
ggplot(data=top.fatalities, aes(x=event.type, y=fatality.total)) + 
    geom_bar(stat="identity") + xlab("Event type") + ylab("Total fatalities") + 
    ggtitle("Fatalities By Event Type") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

We do the same for injuries.

ggplot(data=top.injuries, aes(x=event.type, y=injury.total)) + 
    geom_bar(stat="identity") + xlab("Event type") + ylab("Total injuries") + 
    ggtitle("Injuries By Event Type") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Finally we do so for property damage.

ggplot(data=top.prop.dmg, aes(x=event.type, y=prop.dmg.total)) + 
    geom_bar(stat="identity") + xlab("Event type") + 
    ylab("Total property damage") +  ggtitle("Property Damage By Event Type") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Conclusion

Across the U.S.,which types of events are most harmful with respect to population health?. The Wind events, which include tornadoes and hurricanes, are by the far the most harmful. Though less frequent, excessive heat events have the second highest incidence of fatalities by event type.

Across the U.S., which types of events have the greatest economic consecuences?. The Tornadoes and Thunderstorm winds as by far the most important weather events that have drastic economical effects