Synopsis

Severe weather events have shown in the last decades an increasing threat on population safety and a costly impact on economy. Hence, their potential consequences should be studied for a better mitigation of their inherent risk. The U.S. National Oceanic and Atmospheric Administration (NOAA) has implemented a catalog where all characteristics of major storms and weather events in the United States from 1950 to November 2011 were collected. These characteristics include when and where these events occurred, as well as estimates of any fatalities, injuries, property and crop damages. The aim of the present analysis is to identify events that are most harmful to population health and economy across the United States.

Data processing

Before doing such analysis, the homogeneity and completeness of the catalog should be assessed ton insure reliability of the results. Unfortunately, I am forced in this study to use the catalog as it is since no further details are given.

library(utils)
library(ggplot2)
library(gridExtra)
library(scales)
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2")
StormData <- read.csv("StormData.csv.bz2")
dim(StormData)
## [1] 902297     37
names(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Correct the data

As many observations in variable EVTYPE (representing the different weather events) can be gathered in one:

unique(StormData$EVTYPE[grep("fire", StormData$EVTYPE, ignore.case = TRUE)])
##  [1] WILD FIRES        WILDFIRE          WILD/FOREST FIRE 
##  [4] GRASS FIRES       LIGHTNING FIRE    FOREST FIRES     
##  [7] WILDFIRES         WILD/FOREST FIRES BRUSH FIRES      
## [10] BRUSH FIRE        RED FLAG FIRE WX 
## 985 Levels:    HIGH SURF ADVISORY  COASTAL FLOOD ... WND

The column should be updated for the events: wind, heat, fire, flood and hurricane:

elements <- c("wind", "heat", "fire", "flood", "hurricane")
StormDataCorr <- StormData
levels(StormDataCorr$EVTYPE) <- c(levels(StormDataCorr$EVTYPE), "FIRE")
for (i in elements){
    StormDataCorr$EVTYPE[grep(i, StormDataCorr$EVTYPE, ignore.case = TRUE)] <- toupper(i)
}
unique(StormDataCorr$EVTYPE[grep("fire", StormDataCorr$EVTYPE, ignore.case = TRUE)])
## [1] FIRE
## 986 Levels:    HIGH SURF ADVISORY  COASTAL FLOOD ... FIRE

Compute the impact on population health (fatalities and injuries)

AggreFatInj <- function(data = StormDataCorr, field) {
    aggre <- aggregate(data[,field], by = list(EVTYPE = data$EVTYPE), FUN = sum)
    aggreOrd <- aggre[order(-aggre$x),]
    aggreOrd <- aggreOrd[which(aggreOrd$x>0),]
    aggreOrd
}

Compute the economical impact (property and crop damages)

Before computing the correct amount of property and crop damages, we should take in account the appropriate exponentials. First, I set up a dictionary:

exponent <- cbind(exp = c("H","h","K","k","M","m","B","b"," ","0","1","2","3","4","5","6","7","8"), value = c(10^2,10^2,10^3,10^3,10^6,10^6,10^9,10^9,1,10,10,10,10,10,10,10,10,10))

and I compute then the correct impact:

aggrePropCrop <- function(data = StormDataCorr, field, fieldExp) {
    DataDMG <- cbind(EVTYPE = as.character(data$EVTYPE), field = as.numeric(data[,field]) * as.numeric(exponent[match(data[,fieldExp],exponent),2]))
    aggreDMG <- aggregate(as.numeric(DataDMG[,2]), by = list(EVTYPE = DataDMG[,1]), FUN = sum, na.rm = TRUE)
    aggreOrd <- aggreDMG[order(-aggreDMG$x),]
    aggreOrd <- aggreOrd[which(aggreOrd$x>0),]
    aggreOrd
}

Results

Fatalities:

aggreFatOrd <- AggreFatInj(StormDataCorr, "FATALITIES")
head(aggreFatOrd, 10)
##           EVTYPE    x
## 527      TORNADO 5633
## 177         HEAT 3138
## 109        FLOOD 1525
## 607         WIND 1451
## 286    LIGHTNING  816
## 379  RIP CURRENT  368
## 14     AVALANCHE  224
## 609 WINTER STORM  206
## 380 RIP CURRENTS  204
## 102 EXTREME COLD  160

Injuries:

aggreInjOrd <- AggreFatInj(StormDataCorr, "INJURIES")
head(aggreInjOrd, 10)
##           EVTYPE     x
## 527      TORNADO 91346
## 607         WIND 11498
## 177         HEAT  9224
## 109        FLOOD  8604
## 286    LIGHTNING  5230
## 254    ICE STORM  1975
## 620         FIRE  1608
## 149         HAIL  1361
## 240    HURRICANE  1328
## 609 WINTER STORM  1321
FatPlot <- ggplot(head(aggreFatOrd,10), aes(x=reorder(EVTYPE, -x), y = x)) +
    geom_bar(stat="identity", width = 0.5) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
    theme(plot.title = element_text(hjust = 0.5)) + 
    labs(title="Total fatalities by severe weather\n events in the US", x="Severe weather elements", y = "Number of fatalities")

InjPlot <- ggplot(head(aggreInjOrd,10), aes(x=reorder(EVTYPE, -x), y = x)) +
    geom_bar(stat="identity", width = 0.5) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
    theme(plot.title = element_text(hjust = 0.5)) + 
    labs(title="Total Injuries by severe weather\n events in the US", x="Severe weather elements", y = "Number of Injuries")

grid.arrange(FatPlot, InjPlot, ncol = 2)

1st Answer:

From the tables and plots it appears clearly that TORNADOES are the most harmful with respect to the population health.

Property damage:

aggrePropOrd <- aggrePropCrop(StormDataCorr, "PROPDMG", "PROPDMGEXP")
head(aggrePropOrd, 10)
##               EVTYPE            x
## 110            FLOOD 167523218973
## 241        HURRICANE  84656180010
## 528          TORNADO  56937162837
## 448      STORM SURGE  43323536000
## 608             WIND  17742639462
## 150             HAIL  15732269877
## 106             FIRE   8501628500
## 541   TROPICAL STORM   7703890550
## 610     WINTER STORM   6688497260
## 449 STORM SURGE/TIDE   4641188000

Crop damage:

aggreCropOrd <- aggrePropCrop(StormDataCorr, "CROPDMG", "CROPDMGEXP")
head(aggreCropOrd, 10)
##           EVTYPE           x
## 64       DROUGHT 13972566000
## 110        FLOOD 12267259100
## 241    HURRICANE  5505292800
## 255    ICE STORM  5022113500
## 150         HAIL  3025954650
## 608         WIND  2159305250
## 102 EXTREME COLD  1292973000
## 134 FROST/FREEZE  1094086000
## 178         HEAT   904469280
## 186   HEAVY RAIN   733399800
PropPlot <- ggplot(head(aggrePropOrd,10), aes(x=reorder(EVTYPE, -x), y = x)) + 
    geom_bar(stat="identity", width = 0.5) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size=12)) +
    theme(plot.title = element_text(hjust = 0.5)) +
    scale_y_continuous(labels = comma) + 
    labs(title="Total amount of property damages\n by severe weather events in the US", x="Severe weather elements", y = "Amount of property damages")

CropPlot <- ggplot(head(aggreCropOrd,10), aes(x=reorder(EVTYPE, -x), y = x)) + 
    geom_bar(stat="identity", width = 0.5) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size=12)) +
    theme(plot.title = element_text(hjust = 0.5)) + 
    scale_y_continuous(labels = comma) + 
    labs(title="Total amount of crop damages\n by severe weather events in the US", x="Severe weather elements", y = "Amount of crop damages")

grid.arrange(PropPlot, CropPlot, ncol = 2)

2nd answer

From the tables and plots, it appears clearly that FLOODS and DROUGHTS have the greatest economic consequences.