Health and Economic Consequences in U.S. caused by Storms ans Weather Events

Synopsis

The NOAA Storm database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This analysis will address two questions - Which type of storm and weather events are most harmful to the population health of the United States? - Which type of storm and weather events have the greatest economic consequences?

Data Processing

Loading libraries and the data

library(data.table)
library(ggplot2)

if(!file.exists("stormData.csv.bz2")) {
  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
  destfile = "stormData.csv.bz2", method = "curl")
}

StormData <- read.csv('stormData.csv.bz2')
DT <- data.table(StormData)

Processing the data

Definition of variables that will be used:

  • EVTYPE: Event Type (Tornados, Flood, ….)

  • FATALITIES: Number of Fatalities

  • INJURIES: Number of Injuries

  • PROGDMG: Property Damage

  • PROPDMGEXP: Units for Property Damage (magnitudes - H,K,M,B which means Hundreds, Thousands, Millions and Billions respectively.)

  • CROPDMG: Crop Damage

  • CROPDMGEXP: Units for Crop Damage (magnitudes - H,K,M,B which means Hundreds, Thousands, Millions and Billions respectively.)

# Create Year variable
DT[, YEAR :=  as.integer(format(as.POSIXct(BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))]

#Subset the storm database
DT_subset <- DT[,c("STATE", "YEAR", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]

dim(DT_subset)
## [1] 902297      9
names(DT_subset)
## [1] "STATE"      "YEAR"       "EVTYPE"     "FATALITIES" "INJURIES"  
## [6] "PROPDMG"    "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP"
str(DT_subset)
## Classes 'data.table' and 'data.frame':   902297 obs. of  9 variables:
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ YEAR      : int  1950 1950 1951 1951 1951 1951 1951 1952 1952 1952 ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  - attr(*, ".internal.selfref")=<externalptr>
#Create & Map Property Damage Units
unique(DT_subset$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
PropertyDamageUnits <- data.table(
  PROPDMGEXP=c("K", "M","", "B", "m", "+", "0", "5", "6", "?", "4", "2", "3", "h", "7", "H", "-", "1", "8"), 
  PROPVALUE = c(10^3, 10^6, 1, 10^9, 10^6, 0,1,10^5, 10^6, 0, 10^4, 10^2, 10^3, 10^2, 10^7, 10^2, 0, 10, 10^8)
  )

str(PropertyDamageUnits)
## Classes 'data.table' and 'data.frame':   19 obs. of  2 variables:
##  $ PROPDMGEXP: chr  "K" "M" "" "B" ...
##  $ PROPVALUE : num  1e+03 1e+06 1e+00 1e+09 1e+06 0e+00 1e+00 1e+05 1e+06 0e+00 ...
##  - attr(*, ".internal.selfref")=<externalptr>
DT_subset <- PropertyDamageUnits[DT_subset, on = .(PROPDMGEXP)]

#Create & Map Crop Damage Units
unique(DT_subset$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
CropDamageUnits <- data.table(
  CROPDMGEXP = c("","M", "K", "m", "B", "?", "0", "k","2"),
  CROPVALUE = c(1,10^6, 10^3, 10^6, 10^9, 0, 1, 10^3, 10^2)
  )

DT_subset <- CropDamageUnits[DT_subset, on = .(CROPDMGEXP)]
str(DT_subset)
## Classes 'data.table' and 'data.frame':   902297 obs. of  11 variables:
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ CROPVALUE : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ PROPVALUE : num  1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ YEAR      : int  1950 1950 1951 1951 1951 1951 1951 1952 1952 1952 ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, ".internal.selfref")=<externalptr>
#Adjust property and crop damage values to same units
DT_subset$PROPDMGTOTAL <- (DT_subset$PROPDMG * DT_subset$PROPVALUE)/1000000000
DT_subset$CROPDMGTOTAL <- (DT_subset$CROPDMG * DT_subset$CROPVALUE)/1000000000

Results

Fatalities and Injuries by TOP 10 Events

Which type of storm and weather events are most harmful to the population health of the United States?

#Number of fatalities by top 10 weather events
Fatalities_by_event<- DT_subset[, .(total_fatalities = sum(FATALITIES)
                ), by = EVTYPE][order(-total_fatalities)][1:10]

Fatalities_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]

ggplot(Fatalities_by_event, aes(x = EVTYPE, y = total_fatalities)) + 
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities by top 10 Weather Events")

#Number of fatalities by top 10 weather events over time
top10 <- DT_subset[  , .(total = sum(FATALITIES, na.rm = TRUE)), 
                     by = EVTYPE][order(-total)][1:10, EVTYPE]
ts_top10 <- DT_subset[EVTYPE %in% top10,
  .(fatalities = sum(FATALITIES, na.rm = TRUE)),
  by = .(YEAR, EVTYPE)
]

ggplot(ts_top10, aes(YEAR, fatalities, color = EVTYPE)) +
  geom_line(linewidth = 1) +
  geom_point() +
  labs(
    title = "Fatalities over time for Top 10 Event Types (overall)",
    x = "Year", y = "Fatalities", color = "Event"
  ) +
  theme_minimal()

#Number of fatalities by top 10 weather events over time since 1993
ts_top10lim <- ts_top10[YEAR >= 1993]

ggplot(ts_top10lim, aes(YEAR, fatalities, color = EVTYPE)) +
  geom_line(linewidth = 1) +
  geom_point() +
  labs(
    title = "Fatalities over time for Top 10 Event Types since 1993",
    x = "Year", y = "Fatalities", color = "Event"
  ) +
  theme_minimal()

  ts_top10lim <- ts_top10[YEAR >= 1993]
#Number of fatalities by top 10 weather events
Fatalities_by_event2<- DT_subset[YEAR>=1993, .(total_fatalities = sum(FATALITIES)
                ), by = EVTYPE][order(-total_fatalities)][1:10]

Fatalities_by_event2[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]

ggplot(Fatalities_by_event2, aes(x = EVTYPE, y = total_fatalities)) + 
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities by top 10 Weather Events, since 1993")

#Number of Injuries by top 10 weather events
Injuries_by_event<- DT_subset[, .(total_injuries = sum(INJURIES)
                ), by = EVTYPE][order(-total_injuries)][1:10]

Injuries_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]

ggplot(Injuries_by_event, aes(x = EVTYPE, y = total_injuries)) + 
    geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Injuries") + ggtitle("Number of injuries by top 10 Weather Events")

#Number of Injuries by top 10 weather events over time
top10inj <- DT_subset[  , .(total = sum(INJURIES, na.rm = TRUE)), 
                     by = EVTYPE][order(-total)][1:10, EVTYPE]
inj_top10 <- DT_subset[EVTYPE %in% top10,
  .(injuries = sum(INJURIES, na.rm = TRUE)),
  by = .(YEAR, EVTYPE)
]

ggplot(inj_top10, aes(YEAR, injuries, color = EVTYPE)) +
  geom_line(linewidth = 1) +
  geom_point() +
  labs(
    title = "Injuries over time for Top 10 Event Types (overall)",
    x = "Year", y = "Injuries", color = "Event"
  ) +
  theme_minimal()

#Number of Injuries by top 10 weather events over time since 1993
ing_top10lim <- inj_top10[YEAR >= 1993]

ggplot(ing_top10lim, aes(YEAR, injuries, color = EVTYPE)) +
  geom_line(linewidth = 1) +
  geom_point() +
  labs(
    title = "Injuries over time for Top 10 Event Types (overall) since 1993",
    x = "Year", y = "Injuries", color = "Event"
  ) +
  theme_minimal()

#Number of Injuries by top 10 weather events
Injuries_by_event2<- DT_subset[YEAR >=1993 , .(total_injuries = sum(INJURIES)
                ), by = EVTYPE][order(-total_injuries)][1:10]

Injuries_by_event2[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]

ggplot(Injuries_by_event2, aes(x = EVTYPE, y = total_injuries)) + 
    geom_bar(stat = "identity") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Injuries") + ggtitle("Number of injuries by top 10 Weather Events")

Conclusion: The charts show that over the entire dataset, the event type with largest damage to population health (both in fatalities and injuries) is Tornado.

However, it is important to note that prior to 1993, only Tornado and TSTM Wind were recordedn as events in the dataset.

Looking at more recent data (from 1993), Tornados are still the event with the largest injuries to the population, however Excessive heat is the event with largest fatality to the population (but has had variable impact over the years).

Property and Crop Damage by TOP 10 Events

Which type of storm and weather events have the greatest economic consequences?

#Property Damage by top 10 weather events
PropDM_by_event<- DT_subset[, .(PROPDMGTOTAL = sum(PROPDMGTOTAL) 
                                    ), 
                               by = EVTYPE][order(-PROPDMGTOTAL)][1:10]

PropDM_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]

ggplot(PropDM_by_event, aes(x = EVTYPE, y = PROPDMGTOTAL)) + 
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Damages ($b)") + ggtitle("Total Property Damage by top 10 Weather Events")

#Crop Damage by top 10 weather events
CropDM_by_event<- DT_subset[, .(CROPDMGTOTAL= sum(CROPDMGTOTAL)
                                ), 
                               by = EVTYPE][order(-CROPDMGTOTAL)][1:10]

CropDM_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]

ggplot(CropDM_by_event, aes(x = EVTYPE, y = CROPDMGTOTAL)) + 
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Damages ($b)") + ggtitle("Total Crop Damage by top 10 Weather Events")

#Crop & Property Damage by top 10 weather events
Damage_by_event<- DT_subset[, .(DMGTOTAL = sum(PROPDMGTOTAL,CROPDMGTOTAL)
                                ), 
                               by = EVTYPE][order(-DMGTOTAL)][1:10]

Damage_by_event[,][order(-DMGTOTAL)]
##                EVTYPE   DMGTOTAL
##                <char>      <num>
##  1:             FLOOD 150.319678
##  2: HURRICANE/TYPHOON  71.913713
##  3:           TORNADO  57.362334
##  4:       STORM SURGE  43.323541
##  5:              HAIL  18.761222
##  6:       FLASH FLOOD  18.243991
##  7:           DROUGHT  15.018672
##  8:         HURRICANE  14.610229
##  9:       RIVER FLOOD  10.148404
## 10:         ICE STORM   8.967041
ggplot(Damage_by_event, aes(x = EVTYPE, y = DMGTOTAL)) + 
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Damages ($b)") + ggtitle("Total Property and Crop Damage by top 10 Weather Events")

Conclusion: The charts shows that the event type with largest economic consequence (including both property and crop damages) are floods. Followed by Hurricane/Typhoon, Tornado and Storm Surge. For Crop specific damage, the event with the largest economic consequence is drought, however we note that the economic impact of crop damage is less than property damage.