Synopsis

Using a U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, which “tracks characteristics of major storms and weather events in the United States” between 1950 and 2011, a comparison of the different weather event types was completed to assess which had the largest public health and economic impacts.
Defined either as cumulative total injuries or deaths for all recorded incidences of an event type, tornado was the weather event with worst public health impact. By contrats, flooding had the highest economic impact, as measured by combined crop and property damage estimates.

Data processing

Data is downloded from the below URL into a temporary file on your computer, and then read into the R environment as a dataframe called ‘df’. It is a large dataframe, so only the variables of interest are kept for analysis, which are:

url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
temp <- tempfile()
download.file(url,temp)
df <- read.csv(temp)
unlink(temp)
rm(temp, url)
df <- df[ ,c(8,23:28)]

Some of the exponents on the property damage and crop damage estimates are given as characters (B = billions etc), which needs to be converted to numeric for calculating totals. Any blank or ambiguous entries (eg “+” and “?”) will be made NA. The exponents are applied to the numbers in PROPDMG and CROPDMG to extract the actual estimates in one numeric variable each (property.cost and crop.cost). These two estimates are added for each event to give a combined “economic.cost” variable, with and ‘NA’ missing values assumed to be zero.

df$CROPDMGEXP <- as.character(df$CROPDMGEXP)
df$CROPDMGEXP[df$CROPDMGEXP %in% c("B")] = "9"
df$CROPDMGEXP[df$CROPDMGEXP %in% c("M")] = "6"
df$CROPDMGEXP[df$CROPDMGEXP %in% c("m")] = "6"
df$CROPDMGEXP[df$CROPDMGEXP %in% c("K")] = "3"
df$CROPDMGEXP[df$CROPDMGEXP %in% c("k")] = "3"
df$CROPDMGEXP[df$CROPDMGEXP %in% c("H")] = "2"
df$CROPDMGEXP[df$CROPDMGEXP %in% c("h")] = "2"
df$CROPDMGEXP <- as.numeric(df$CROPDMGEXP)
## Warning: NAs introduced by coercion
df <- dplyr::mutate(df, crop.costs=CROPDMG*(10^CROPDMGEXP))

df$PROPDMGEXP <- as.character(df$PROPDMGEXP)
df$PROPDMGEXP[df$PROPDMGEXP %in% c("B")] = "9"
df$PROPDMGEXP[df$PROPDMGEXP %in% c("M")] = "6"
df$PROPDMGEXP[df$PROPDMGEXP %in% c("m")] = "6"
df$PROPDMGEXP[df$PROPDMGEXP %in% c("K")] = "3"
df$PROPDMGEXP[df$PROPDMGEXP %in% c("H")] = "2"
df$PROPDMGEXP[df$PROPDMGEXP %in% c("h")] = "2"
df$PROPDMGEXP <- as.numeric(df$PROPDMGEXP)
## Warning: NAs introduced by coercion
df <- dplyr::mutate(df, property.costs=PROPDMG*(10^PROPDMGEXP))

df$property.costs[is.na(df$property.costs)==TRUE] <- 0
df$crop.costs[is.na(df$crop.costs)==TRUE] <- 0

df <- dplyr::mutate(df, economic.cost=crop.costs+property.costs)

Results

To estimate public health impact, we look at the total number of injuries recorded for each event type and give the top 10. The same is repeated for deaths.

events <- dplyr::group_by(df,EVTYPE)
events <- dplyr::summarise(events, total.injuries=sum(INJURIES))
events <- events[order(events$total.injuries, decreasing = TRUE), ]
events <- events[c(1:10),]
print(events)
## Source: local data frame [10 x 2]
## 
##               EVTYPE total.injuries
##               (fctr)          (dbl)
## 1            TORNADO          91346
## 2          TSTM WIND           6957
## 3              FLOOD           6789
## 4     EXCESSIVE HEAT           6525
## 5          LIGHTNING           5230
## 6               HEAT           2100
## 7          ICE STORM           1975
## 8        FLASH FLOOD           1777
## 9  THUNDERSTORM WIND           1488
## 10              HAIL           1361
p1 <- ggplot(events, aes(x=EVTYPE, y=log10(total.injuries), fill=EVTYPE))
p1 + geom_bar(stat = "identity", alpha=0.7) + xlab("") + ylab("Log10 total injuries count") + ggtitle("Worst 10 weather events for injuries") +
  guides(fill=FALSE) + theme(axis.text.x=element_text(angle = 45, hjust=1, vjust=1))

Figure 1 | The total cumulative injuries recorded for different weather event categories, calculated by summing the injuries for each incidence of a given weather event. Only the 10 weather events with the highest cumulative injuries are shown.


events <- dplyr::group_by(df,EVTYPE)
events <- dplyr::summarise(events, total.deaths=sum(FATALITIES))
events <- events[order(events$total.deaths, decreasing = TRUE), ]
events <- events[c(1:10),]
print(events)
## Source: local data frame [10 x 2]
## 
##            EVTYPE total.deaths
##            (fctr)        (dbl)
## 1         TORNADO         5633
## 2  EXCESSIVE HEAT         1903
## 3     FLASH FLOOD          978
## 4            HEAT          937
## 5       LIGHTNING          816
## 6       TSTM WIND          504
## 7           FLOOD          470
## 8     RIP CURRENT          368
## 9       HIGH WIND          248
## 10      AVALANCHE          224
p2 <- ggplot(events, aes(x=EVTYPE, y=log10(total.deaths), fill=EVTYPE))
p2 + geom_bar(stat = "identity", alpha=0.7) + xlab("") + ylab("Log10 total deaths count") + ggtitle("Worst 10 weather events for deaths") +
  guides(fill=FALSE) + theme(axis.text.x=element_text(angle = 45, hjust=1, vjust=1))

Figure 2 | The total cumulative deaths recorded for different weather event categories, calculated by summing the fatalities for each incidence of each given weather event. Only the 10 weather events with the highest cumulative fatalities are shown.


To estimate economic impact we look at estimated total combined cost of property damage and crop loss associated with different weather events, again giving top ten.

events <- dplyr::group_by(df,EVTYPE)
events <- dplyr::summarise(events, cumulative.cost=sum(economic.cost))
events <- events[order(events$cumulative.cost, decreasing = TRUE), ]
events <- events[c(1:10),]
print(events)
## Source: local data frame [10 x 2]
## 
##               EVTYPE cumulative.cost
##               (fctr)           (dbl)
## 1              FLOOD    150319678250
## 2  HURRICANE/TYPHOON     71913712800
## 3            TORNADO     57362333884
## 4        STORM SURGE     43323541000
## 5               HAIL     18761221926
## 6        FLASH FLOOD     18243990872
## 7            DROUGHT     15018672000
## 8          HURRICANE     14610229010
## 9        RIVER FLOOD     10148404500
## 10         ICE STORM      8967041360
p3 <- ggplot(events, aes(x=EVTYPE, y=(cumulative.cost), fill=EVTYPE))
p3 + geom_bar(stat = "identity", alpha=0.7) + xlab("") + ylab("Cumulative cost in property damage and crop loss") + ggtitle("Ten weather events with highest economic impact") +
  guides(fill=FALSE) + theme(axis.text.x=element_text(angle = 45, hjust=1, vjust=1))

Figure 3 | The total cumulative costs in USD recorded for different weather event categories, calculated by adding the property damage estimates and crop loss estimates for each incidence of each weather event. Only the 10 weather events with the highest costs are shown.