Overview

The NOAA Storm database that includes registered severe weather events and effects data in the United States from 1950 to 2011 is explored. Their impact on human health, looking and fatalities and injuries caused, and the economy, looking at property and crop losses caused, is analyzed.

Data Processing

Downloading and reading the original data set.

if (!file.exists("repdata")) {
        dir.create("repdata")
}

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "./repdata%2Fdata%2FStormData.csv.bz2", method = "curl")

StormData <- read.csv("repdata%2Fdata%2FStormData.csv.bz2", stringsAsFactors = FALSE)

Loading the packages to be used.

library(lubridate, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
library(gridExtra, warn.conflicts = FALSE)

The data set includes over 900000 records with information for 37 variables.

dim(StormData)
## [1] 902297     37
colnames(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

The event date, event type, deaths caused, injures caused, property and crop damage value and multiplier code are selected for further analysis.

Storms <- select(StormData, BGN_DATE, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP,
                 CROPDMG, CROPDMGEXP)

Setting the event date field as date.

Storms$BGN_DATE <- as.Date(strptime(Storms$BGN_DATE, "%m/%d/%Y %H:%M:%S"))

Adding property damage and crop damage value variables in USD using the correspondent values and multipliers codes.

Storms <- mutate(Storms, PROPDMGUSD = ifelse(PROPDMGEXP == "K", PROPDMG*10^3,
                                             ifelse(PROPDMGEXP == "M", PROPDMG*10^6,
                                                    ifelse(PROPDMGEXP == "B", PROPDMG*10^9, PROPDMG))))

Storms <- mutate(Storms, CROPDMGUSD = ifelse(CROPDMGEXP == "K", CROPDMG*10^3,
                                             ifelse(CROPDMGEXP == "M", CROPDMG*10^6,
                                                    ifelse(CROPDMGEXP == "B", CROPDMG*10^9, CROPDMG))))

There are 985 labels for the event type field.

length(unique(Storms$EVTYPE))
## [1] 985

Adding a new event code variable to consolidate event labels from 985 to 30 designated plus other event codes.

Storms <- mutate(Storms, EVENTCODE = "Other")

Storms$EVENTCODE[grepl("AVALANCHE|AVALANCE", Storms$EVTYPE, ignore.case = TRUE)] <- "Avalanche"
Storms$EVENTCODE[grepl("BLIZZARD", Storms$EVTYPE, ignore.case = TRUE)] <- "Blizzard"
Storms$EVENTCODE[grepl("COLD|COOL|WINTER|FREEZING", Storms$EVTYPE, ignore.case = TRUE)] <- "Cold"
Storms$EVENTCODE[grepl("DRY|DROUGHT|DRIEST", Storms$EVTYPE, ignore.case = TRUE)] <- "Dry Weather"
Storms$EVENTCODE[grepl("DUST|DUSTSTORM", Storms$EVTYPE, ignore.case = TRUE)] <- "Dust"
Storms$EVENTCODE[grepl("FIRE|FIRES", Storms$EVTYPE, ignore.case = TRUE)] <- "Fire"
Storms$EVENTCODE[grepl("FLOOD|FLD|FLOODING", Storms$EVTYPE, ignore.case = TRUE)] <- "Flood"
Storms$EVENTCODE[grepl("FOG", Storms$EVTYPE, ignore.case = TRUE)] <- "Fog"
Storms$EVENTCODE[grepl("FROST|FREEZE", StormData$EVTYPE, ignore.case = TRUE)] <- "Frost"
Storms$EVENTCODE[grepl("HAIL", Storms$EVTYPE, ignore.case = TRUE)] <- "Hail"
Storms$EVENTCODE[grepl("HEAT|WARM|TEMPERATURE|HOT", Storms$EVTYPE, ignore.case = TRUE)] <- "Heat"
Storms$EVENTCODE[grepl("HURRICANE|FUNNEL", Storms$EVTYPE, ignore.case = TRUE)] <- "Hurricane"
Storms$EVENTCODE[grepl("ICE|ICY|GLAZE", Storms$EVTYPE, ignore.case = TRUE)] <- "Ice"
Storms$EVENTCODE[grepl("LIGHT|LIGNTNING", Storms$EVTYPE, ignore.case = TRUE)] <- "Lightning"
Storms$EVENTCODE[grepl("RAIN|SHOWERS|PRECIPITATION", Storms$EVTYPE, ignore.case = TRUE)] <- "Rain"
Storms$EVENTCODE[grepl("RIP", Storms$EVTYPE, ignore.case = TRUE)] <- "Rip Currents"
Storms$EVENTCODE[grepl("SLIDE", Storms$EVTYPE, ignore.case = TRUE)] <- "Landslide"
Storms$EVENTCODE[grepl("SEICHE", Storms$EVTYPE, ignore.case = TRUE)] <- "Seiche"
Storms$EVENTCODE[grepl("SMOKE", Storms$EVTYPE, ignore.case = TRUE)] <- "Dense Smoke"
Storms$EVENTCODE[grepl("SNOW", Storms$EVTYPE, ignore.case = TRUE)] <- "Snow"
Storms$EVENTCODE[grepl("SURF|SWELLS", Storms$EVTYPE, ignore.case = TRUE)] <- "High Surf"
Storms$EVENTCODE[grepl("TIDE|WAVE", Storms$EVTYPE, ignore.case = TRUE)] <- "Tide"
Storms$EVENTCODE[grepl("TORNADO|FUNNEL|FUNNELS", Storms$EVTYPE, ignore.case = TRUE)] <- "Tornado"
Storms$EVENTCODE[grepl("THUNDER|TSTM|TUNDERSTORM", Storms$EVTYPE, ignore.case = TRUE)] <- "Thunderstorm"
Storms$EVENTCODE[grepl("TROPICAL", Storms$EVTYPE, ignore.case = TRUE)] <- "Tropical Depression"
Storms$EVENTCODE[grepl("TSUNAMI", Storms$EVTYPE, ignore.case = TRUE)] <- "Tsunami"
Storms$EVENTCODE[grepl("VOLCANIC", Storms$EVTYPE, ignore.case = TRUE)] <- "Volcanic Ash"
Storms$EVENTCODE[grepl("WATERSPOUT|WAYTERSPOUT", Storms$EVTYPE, ignore.case = TRUE)] <- "Waterspout"
Storms$EVENTCODE[grepl("WIND|WND|TYPHON", Storms$EVTYPE, ignore.case = TRUE)] <- "Strong Wind"
Storms$EVENTCODE[grepl("WET", Storms$EVTYPE, ignore.case = TRUE)] <- "Wet Weather"

Storms$EVTYPE <- as.factor(Storms$EVTYPE)
Storms$EVENTCODE <- as.factor(Storms$EVENTCODE)

Results

Summary of totals for health, fatalities and injuries, and economic damage, property and crop damage in USD.

sapply(Storms[c(3,4,9,10)], sum)
##   FATALITIES     INJURIES   PROPDMGUSD   CROPDMGUSD 
##        15145       140528 427279750338  49093756627

Severe weather events cause 9 times more death than injured and 8 times more property damage than crop damage.

Looking at registered events by year.

g_frecuency <- qplot(year(Storms$BGN_DATE), geom = "histogram", xlab = "Year", ylab = "Counts", binwidth = 1)

Looking at events by designated code.

events <- aggregate(!is.na(EVTYPE) ~ EVENTCODE, Storms, sum)
colnames(events) <- c("EVENTCODE", "NUMBER")

g_events <- ggplot(events) +
        geom_bar(stat = "identity") +
        aes(x = EVENTCODE, y = NUMBER) +
        theme(axis.text.x = element_text(angle = 45,  hjust = 1)) +
        xlab("Event Code") +
        ylab("Counts")

Figure showing registered event distribution by year and designated code.

grid.arrange(g_frecuency, g_events, nrow = 2)

There are far more registered events in the 1995-2011 period. Strong winds and hail are the most abundant followed by floods and tornados.

It is worth noting that the decision and method chosen to consolidate the event labels affects the health and economic impact assessment that follows but there is no easy way around it.

Health Impact Assessment

Aggregating fatalities by event type.

fatalities <- aggregate(FATALITIES ~ EVENTCODE, Storms, sum)
fatalities <- arrange(fatalities, desc(FATALITIES))

fatalities <- mutate(fatalities, FATALITIESPERCENT = round(FATALITIES/(sum(fatalities$FATALITIES)), 3))

20% percent of events account for more than 85% of fatalities caused.

sum(fatalities[1:6, 2])/sum(fatalities$FATALITIES)
## [1] 0.860482
fatalities[1:10,]
##       EVENTCODE FATALITIES FATALITIESPERCENT
## 1       Tornado       5633             0.372
## 2          Heat       3007             0.199
## 3         Flood       1552             0.102
## 4   Strong Wind       1451             0.096
## 5     Lightning        817             0.054
## 6  Rip Currents        572             0.038
## 7          Cold        489             0.032
## 8     Avalanche        225             0.015
## 9          Tide        196             0.013
## 10    High Surf        164             0.011

Aggregating injuries by event type.

injuries <- aggregate(INJURIES ~ EVENTCODE, Storms, sum)
injuries <- arrange(injuries, desc(INJURIES))

injuries <- mutate(injuries, INJURIESPERCENT = round(INJURIES/(sum(injuries$INJURIES)), 3))

20% percent of events account for more than 90% of injuries caused.

sum(injuries[1:6, 2])/sum(injuries$INJURIES)
## [1] 0.9110498
injuries[1:10,]
##      EVENTCODE INJURIES INJURIESPERCENT
## 1      Tornado    91367           0.650
## 2  Strong Wind    11498           0.082
## 3         Heat     8850           0.063
## 4        Flood     8681           0.062
## 5    Lightning     5231           0.037
## 6          Ice     2401           0.017
## 7         Cold     2170           0.015
## 8         Fire     1608           0.011
## 9         Hail     1371           0.010
## 10   Hurricane     1326           0.009

Figure showing the health impact by top 10 most harmful severe weather events.

g_fatalities <- ggplot(fatalities[1:10, ]) +
        geom_bar(stat = "identity") +
        aes(x = reorder(EVENTCODE, FATALITIES), y = FATALITIES) +
        xlab("Weather Event") +
        ylab("Fatalities") +
        coord_flip()

g_injuries <- ggplot(injuries[1:10, ]) +
        geom_bar(stat = "identity") +
        aes(x = reorder(EVENTCODE, INJURIES), y = INJURIES) +
        xlab("Weather Event") +
        ylab("Injuries") +
        coord_flip()

grid.arrange(g_fatalities, g_injuries, ncol = 2)

Tornados, heat, floods, strong winds and ligtnings were the most harmful severe weather events in terms of fatalities and injuries caused accounting for 80% of the total in both cases. Tornados caused 8 times more injures than any other event.

Economic Impact Assessment

Aggregating property damage by event type.

prop_damage <- aggregate(PROPDMGUSD ~ EVENTCODE, Storms, sum)
prop_damage <- arrange(prop_damage, desc(PROPDMGUSD))

prop_damage <- mutate(prop_damage, PROPDMGPERCENT = round(PROPDMGUSD/(sum(prop_damage$PROPDMGUSD)), 3))

10% percent of events account for more than 70% of property damage caused.

sum(prop_damage[1:3, 2])/sum(prop_damage$PROPDMGUSD)
## [1] 0.7234636
prop_damage[1:10,]
##              EVENTCODE   PROPDMGUSD PROPDMGPERCENT
## 1                Flood 167554630579          0.392
## 2            Hurricane  84636105030          0.198
## 3              Tornado  56930626640          0.133
## 4                Other  43928959600          0.103
## 5          Strong Wind  17740081054          0.042
## 6                 Hail  15969143053          0.037
## 7                 Fire   8496628500          0.020
## 8  Tropical Depression   7716127550          0.018
## 9                 Cold   6841331651          0.016
## 10                Tide   4661593200          0.011

Aggregating crop damage by event type.

crop_damage <- aggregate(CROPDMGUSD ~ EVENTCODE, Storms, sum)
crop_damage <- arrange(crop_damage, desc(CROPDMGUSD))

crop_damage <- mutate(crop_damage, CROPDMGPERCENT = round(CROPDMGUSD/(sum(crop_damage$CROPDMGUSD)), 3))

20% percent of events account for more than 80% of crop damage caused.

sum(crop_damage[1:6, 2])/sum(crop_damage$CROPDMGUSD)
## [1] 0.854917
crop_damage[1:10,]
##      EVENTCODE  CROPDMGUSD CROPDMGPERCENT
## 1  Dry Weather 13972581000          0.285
## 2        Flood 12270384210          0.250
## 3    Hurricane  5495292810          0.112
## 4          Ice  5027114300          0.102
## 5         Hail  3046420890          0.062
## 6  Strong Wind  2159295547          0.044
## 7        Frost  1997061000          0.041
## 8         Cold  1385559500          0.028
## 9         Heat   898879280          0.018
## 10        Rain   805005800          0.016

Figure showing economic impact by top 10 most damaging severe weather events.

g_prop <- ggplot(prop_damage[1:10, ]) +
        geom_bar(stat = "identity") +
        aes(x = reorder(EVENTCODE, PROPDMGUSD), y = PROPDMGUSD) +
        xlab("Event") +
        ylab("Property Damage in USD") +
        coord_flip()

g_crop <- ggplot(crop_damage[1:10, ]) +
        geom_bar(stat = "identity") +
        aes(x = reorder(EVENTCODE, CROPDMGUSD), y = CROPDMGUSD) +
        xlab("Event") +
        ylab("Crop Damage in USD") +
        coord_flip()

grid.arrange(g_prop, g_crop, ncol = 2)

Floods, more than doubling any other, followed by hurricanes and strong winds were the 3 main causes of property losses. Dry weather and floods were the top causes of crop losses, more than doubling any other, followed by hurricanes and ice.