Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

In this paper, the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database is explored and analyzed. Fatalities, injuries and property damage of major storms and weather events across United States is estimated and summarized. Based on the analysis, the most devastating events are presented at the end which could help the government or municipal manager to prepare for severe weather events and prioritize resources for different types of events.

Data Processing

Load and preprocess the data, after which we will have a dataset consisting of four columns that we are interested in–the event type, the fatalities, the injuries and the economic loss.

# loading the data
library(utils)
storm<-read.csv(bzfile("repdata_data_StormData.csv.bz2"))

# summarize the fatalities data
sumFatal<-aggregate(FATALITIES~EVTYPE, storm, sum)
sumFatalOrder<-sumFatal[order(sumFatal$FATALITIES,decreasing=TRUE),]

# summarize the injuries data
sumInjur<-aggregate(INJURIES~EVTYPE, storm, sum)
sumInjurOrder<-sumInjur[order(sumInjur$INJURIES,decreasing=TRUE),]

# calculate and summarize the economic loss 
DamExponent <- function(exp) {
  ifelse(exp%in%0:8, exp, 
        ifelse(exp =="", 1, 
              ifelse(exp %in% c("k","K"), 3,
                    ifelse(exp %in% c("H","h"), 2,
                          ifelse(exp %in% c("M","m"), 6, 
                                ifelse(exp %in% c("B","b"), 9, 0))))))
}

Dam<-storm$PROPDMG*10^DamExponent(storm$PROPDMGEXP)+storm$CROPDMG*10^DamExponent(storm$CROPDMGEXP)
DamFrame<-data.frame(EVTYPE=storm$EVTYPE,totalDam=Dam)
sumDam<-aggregate(totalDam~EVTYPE,DamFrame,sum)
sumDamOrder<-sumDam[order(sumDam$totalDam,decreasing=TRUE),]

# merged data set
sumTotal<-merge(sumFatalOrder,merge(sumInjurOrder,sumDamOrder,by=c("EVTYPE")),by=c("EVTYPE"))

# we will remove those observations which didn't cause loss to us, neither population health nor economic damage
sumTotal<-sumTotal[sumTotal$FATALITIES!=0|sumTotal$INJURIES!=0|sumTotal$totalDam!=0,]

Now let’s have a look at our rough dataset.

##                   EVTYPE FATALITIES INJURIES totalDam
## 1     HIGH SURF ADVISORY          0        0   200000
## 3            FLASH FLOOD          0        0    50000
## 5              TSTM WIND          0        0  8100000
## 6        TSTM WIND (G45)          0        0     8000
## 9                      ?          0        0     5000
## 14   AGRICULTURAL FREEZE          0        0 28820000

Event type cleaning

Since we have a messed up event type recording, we need to tidy this column so that it is consistent with the 48 standard event types given by the NATIONAL WEATHER SERVICE INSTRUCTION. Here is how we are going to do it.

Firstly replace “TSTM WIND” with “THUNDERSTORM WIND” which is very unlikely to get matched automatically. Then use approximate string matching method to replace each evtype in the dataset with the standard name. Here amatch with maximum distance of 8 is used to do the approximate matching. Event type that can not be matched will be categorized as others.

library(stringdist)
# standard event names to be matched
classifiedEvents <- c("Astronomical Low Tide", "Avalanche", "Blizzard", "Coastal Flood", "Cold/Wind Chill",
                 "Debris Flow", "Dense Fog", "Dense Smoke", "Drought", "Dust Devil", "Dust Storm",
                 "Excessive Heat", "Extreme Cold/Wind Chill", "Flash Flood", "Flood", "Freezing Fog",
                 "Frost/Freeze", "Funnel Cloud", "Hail", "Heat", "Heavy Rain", "Heavy Snow",
                 "High Surf", "High Wind", "Hurricane (Typhoon)", "Ice Storm", "Lake-Effect Snow",
                 "Lakeshore Flood", "Lightning", "Marine Hail", "Marine High Wind", "Marine Strong Wind",
                 "Marine Thunderstorm Wind", "Rip Current", "Seiche", "Sleet", "Storm Surge/Tide",
                 "Strong Wind", "Thunderstorm Wind", "Tornado", "Tropical Depression", "Tropical Storm",
                 "Tsunami", "Volcanic Ash", "Waterspout", "Wildfire", "Winter Storm", "Winter Weather")

sumTotal$EVTYPE <- toupper(sumTotal$EVTYPE)

# replace TSTM with thunderstorm
for(i in 1:length(sumTotal$EVTYPE))
  if(grepl("TSTM WIND", sumTotal[i,"EVTYPE"]))
    sumTotal[i,"EVTYPE"]="Thunderstorm Wind"

# define a function to match an event type to standard names
closestMatch <- function(string, stringVector) {
  stringVector[amatch(string, stringVector, maxDist=14)]
}

# create a vector with corresponding standard event type names
Evtype <- {}
for(i in 1:length(sumTotal$EVTYPE)) {
  stdName<-closestMatch(sumTotal[i,"EVTYPE"], toupper(classifiedEvents))
  Evtype <- append(Evtype,ifelse(is.na(stdName),"OTHERS",stdName))
}

# create data for plotting purpose
sumTotal$EVTYPE<-Evtype
tidyFatal<-aggregate(FATALITIES~EVTYPE,sumTotal,sum)
tidyInjur<-aggregate(INJURIES~EVTYPE,sumTotal,sum)
tidyDam<-aggregate(totalDam~EVTYPE,sumTotal,sum)

tidyFatalOrder<-tidyFatal[order(tidyFatal$FATALITIES,decreasing=TRUE),]
tidyInjurOrder<-tidyInjur[order(tidyInjur$INJURIES,decreasing=TRUE),]
tidyDamOrder<-tidyDam[order(tidyDam$totalDam,decreasing=TRUE),]

tidySumTotal<-merge(tidyFatalOrder,merge(tidyInjurOrder,tidyDamOrder,by=c("EVTYPE")),by=c("EVTYPE"))

Results

Then we have a dataset with only 49 observations (48 standard event types plus others)

library(knitr)
kable(tidySumTotal,digits=2)
EVTYPE FATALITIES INJURIES totalDam
ASTRONOMICAL LOW TIDE 0 0 9.745000e+06
AVALANCHE 226 194 8.721800e+06
BLIZZARD 101 805 7.713939e+08
COASTAL FLOOD 40 89 5.639656e+08
COLD/WIND CHILL 96 16 2.595000e+06
DEBRIS FLOW 1 0 9.805000e+07
DENSE FOG 19 342 9.774000e+06
DENSE SMOKE 0 0 1.000500e+05
DROUGHT 16 52 1.502575e+10
DUST DEVIL 2 43 7.431300e+05
DUST STORM 22 440 9.799100e+06
EXCESSIVE HEAT 2020 6703 6.492115e+08
EXTREME COLD/WIND CHILL 142 29 2.650300e+07
FLASH FLOOD 1041 1805 6.821729e+13
FLOOD 613 7915 2.103644e+11
FREEZING FOG 12 39 1.355450e+07
FROST/FREEZE 0 0 1.456816e+09
FUNNEL CLOUD 162 234 1.380905e+09
HAIL 15 1362 3.187892e+11
HEAT 1116 2533 4.213605e+08
HEAVY RAIN 114 272 1.562002e+09
HEAVY SNOW 159 1091 1.808255e+10
HIGH SURF 159 249 1.015555e+08
HIGH WIND 304 1520 6.703149e+09
HURRICANE (TYPHOON) 72 1279 7.550117e+10
ICE STORM 96 2022 9.215100e+09
LAKE-EFFECT SNOW 0 0 4.036200e+07
LAKESHORE FLOOD 0 0 7.540000e+06
LIGHTNING 818 5233 1.729630e+11
MARINE HAIL 8 18 7.091700e+07
MARINE HIGH WIND 5 1 1.112970e+08
MARINE STRONG WIND 15 22 2.118330e+06
MARINE THUNDERSTORM WIND 10 27 1.254731e+09
OTHERS 32 11 7.271000e+07
RIP CURRENT 577 529 1.630000e+05
SEICHE 71 185 1.462548e+10
SLEET 5 1 4.584850e+08
STORM SURGE/TIDE 24 43 4.796583e+10
STRONG WIND 111 302 2.515777e+08
THUNDERSTORM WIND 746 9532 2.087639e+13
TORNADO 5633 91364 1.079371e+12
TROPICAL DEPRESSION 0 0 1.737000e+06
TROPICAL STORM 66 383 8.409292e+09
TSUNAMI 33 129 1.441320e+08
VOLCANIC ASH 0 0 5.000000e+05
WATERSPOUT 13 72 6.077720e+07
WILDFIRE 137 1687 9.240155e+09
WINTER STORM 218 1415 6.717604e+09
WINTER WEATHER 75 540 2.542298e+09
library(grid)
library(gridBase)

topTenFatal<-tidyFatalOrder[1:10,]
label<-as.vector(topTenFatal$EVTYPE)
bp<-barplot(matrix(c(1:10,topTenFatal[,2]),nrow=2, byrow=TRUE), xaxt="n",beside = FALSE, col="blue", ylab="number of death",
            main="Ten events that cause most population death")

vps <- baseViewports()
pushViewport(vps$inner, vps$figure, vps$plot)
grid.text(label, x = unit(bp, "native"), y = unit(-0.5,"lines"),just="right",rot = 20,gp=gpar(cex=0.9))
popViewport(3)

Figure 1. This figure shows the top ten severe weather events that cause most population death from 1950 to 2011. As can be seen, tornado, excessive heat and heat are the leading three fatal events.

Figure 2. This figure shows the top ten severe weather events that cause most injuries across United States. Tornado still the No.1 killer followed by thunderstorm wind and flood.

library(ggplot2)

topTenDam<-tidyDamOrder[1:10,]
topTenDam<-transform(topTenDam, EVTYPE = reorder(EVTYPE, order(totalDam,decreasing=TRUE)))

ggplot(data=topTenDam, aes(x=EVTYPE,y=totalDam)) +geom_bar(stat="identity") +
      xlab("")+ylab("Economic Damage (USD)")+scale_y_log10()+coord_cartesian(ylim=c(1e+10,8e+13))+
      theme(axis.text.x = element_text(angle = 20, hjust = 1))+ggtitle("Top ten economically devastating events")

Figure 3. Crop damage and property damage are summed up here as economic damage. The log scale plot shows that flash flood and thunderstorm wind have order of magnitude larger impact on the economy than other types of weather events.