Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This analysis shows that Tornados are the most harmful weather event with respect to public health and Floods have the highest economic impact.

Data Processing

The data are from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The events in the database start in the year 1950 and end in November 2011. Documentation of the database is available at the National Weather Service Storm Data Documentation.

#download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.bz2")

setwd("/Users/cameliaguild/coursera/Johns Hopkins/Reproducible Research/PA2_project")

tempdata <- read.csv(bzfile("./StormData.bz2"), header = TRUE, stringsAsFactors=FALSE)

dim(tempdata) # there are 902,297 observations and 37 variables
[1] 902297     37
colnames(tempdata) # get column names
 [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
 [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
[11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
[16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
[21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
[26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
[31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
[36] "REMARKS"    "REFNUM"    
# For managing data
library(dplyr)
library(tidyr)
# For plotting data
library(ggplot2)
library(gridExtra)

Data Selection

# Select relevant variables
Stormdata <- select(tempdata, "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
#str(Stormdata)

Data transformation of Storm Events

Some storm events defined as having the same type of disturbance are grouped together. For example, according to NOAA, the only difference between a hurricane and a typhoon is the location where the storm occurs. That is, a hurricane and typhoon are the same type of disturbance, therefore they are grouped together in this analysis. Likewise, events that tend to occur together such as storm, wind, and hail are grouped.

# Categorize storm events
Stormdata$EVENT[grepl("TORNADO|TORNDAO", Stormdata$EVTYPE, ignore.case = TRUE)]<- "TORNADO"
Stormdata$EVENT[grepl("HURRICANE|TYPHOON", Stormdata$EVTYPE, ignore.case = TRUE)]<- "HURRICANE & TYPHOON"
Stormdata$EVENT[grepl("STORM|WIND|WND|HAIL", Stormdata$EVTYPE, ignore.case = TRUE)]<- "STORM & WIND"
Stormdata$EVENT[grepl("HEAT|WARM|HOT|DROUGHT", Stormdata$EVTYPE, ignore.case = TRUE)]<- "DROUGHT & HEAT"
Stormdata$EVENT[grepl("FROST|ICE|ICY|SNOW|AVALANCHE|AVALANCE|BLIZZARD|FREEZING|SLEET|GLAZE|WINTERY|WINTRY|WINTER",Stormdata$EVTYPE, ignore.case = TRUE)]<- "ICE & SNOW"
Stormdata$EVENT[grepl("EROSION|SLIDE|LANDSLUMP", Stormdata$EVTYPE, ignore.case = TRUE)]<- "EROSIN & LANDSLIDE"
Stormdata$EVENT[grepl("FIRE|WILD FIRE|WILDFIRE", Stormdata$EVTYPE, ignore.case = TRUE)] <- "FIRE"
Stormdata$EVENT[grepl("LIGHTNING", Stormdata$EVTYPE, ignore.case = TRUE)] <- "LIGHTNING"
Stormdata$EVENT[grepl("FLOOD|FLD|DAM BREAK", Stormdata$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
Stormdata$EVENT[grepl("HEAVY SEAS|HIGH SEAS|HIGH WATER|TIDE|TSUNAMI|HIGH WAVE|CURRENT|MARINE|SURF|SWELLS|BLOW-OUT|COASTAL SURGE", Stormdata$EVTYPE, ignore.case = TRUE)] <- "HIGH SEAS & SURF"
Stormdata$EVENT[grepl("HEAVY RAIN", Stormdata$EVTYPE, ignore.case = TRUE)] <- "HEAVY RAIN"
Stormdata$EVENT[grepl("FREEZE|COLD AND WET CONDITIONS", Stormdata$EVTYPE, ignore.case = TRUE)]<- "FREEZE"

# Subset complete cases. 14,668 events are not categorized and therefore removed as NAs
Stormdata <- Stormdata[complete.cases(Stormdata[, "EVENT"]), ]
Stormdata$EVENT <- as.factor(Stormdata$EVENT)

Data transformation of Property and Crop Damage

Convert the magnitude of PROPDMGEXP and CROPDMGEXP into the multiplier for calculating damage cost.

# Property damage
Stormdata$PROPDMGEXP <- recode(Stormdata$PROPDMGEXP, '1'= 10, '2'=100, '3'=1000, '4'=10000, '5'=100000, '6'=1000000, '7'=10000000, '8'=100000000, 'H'=100, 'h'=100, 'K'=1000, 'm'=1000000, 'M'=1000000, 
'B'=1000000000, .default =1)
Stormdata$PROPDMGEXP <- as.numeric(Stormdata$PROPDMGEXP)
#Crop damage
Stormdata$CROPDMGEXP <- recode(Stormdata$CROPDMGEXP, '2'=100, 'H'=100, 'h'=100, 'K'=1000, 'm'=1000000, 'M'=1000000, 'B'=1000000000, .default =1)
Stormdata$CROPDMGEXP <- as.numeric(Stormdata$CROPDMGEXP)

Results

Public Health Consequence

Storm events which are the most harmful to population health as measured by the number of fatalities and the number of injuries.

#Number of fatalities by storm event
fatalities <- aggregate(FATALITIES~EVENT, Stormdata,sum) 
fatalities <-arrange(fatalities, desc(fatalities$FATALITIES))[1:10,]
print(fatalities)
                 EVENT FATALITIES
1              TORNADO       5636
2       DROUGHT & HEAT       3178
3                FLOOD       1552
4         STORM & WIND       1529
5           ICE & SNOW        896
6     HIGH SEAS & SURF        837
7            LIGHTNING        817
8  HURRICANE & TYPHOON        133
9           HEAVY RAIN         99
10                FIRE         90
# Number of injuries by storm event
injuries <- aggregate(INJURIES~EVENT, Stormdata,sum)
injuries <-arrange(injuries, desc(injuries$INJURIES))[1:10,]
print(injuries)
                 EVENT INJURIES
1              TORNADO    91407
2         STORM & WIND    13656
3       DROUGHT & HEAT     9247
4                FLOOD     8683
5           ICE & SNOW     6549
6            LIGHTNING     5232
7                 FIRE     1608
8  HURRICANE & TYPHOON     1331
9     HIGH SEAS & SURF      983
10          HEAVY RAIN      255
# Plot the number of fatalities by storm event
colnames(fatalities)<- c('EVENT', 'FATALITIES')
fig1 <- ggplot(fatalities, aes(x=reorder(EVENT, FATALITIES),
               y=FATALITIES,  fill=FATALITIES))+ scale_fill_gradient(low='red', high='darkred', "Frequency")+
    geom_bar(stat='identity', color='white') +
    ggtitle('Fig. 1: Storm Events Most Harmful to Public Health by Number of Fatalities (1950-2011)') +
    theme(plot.title = element_text(face="bold", size=11, hjust=0.6))+
    xlab('Storm and Weather Event')+ coord_flip()+
    ylab('Total number of Fatalities')
grid.arrange(fig1) 

# Plot the number of injuries by storm event
colnames(injuries)<- c('EVENT', 'INJURIES')
fig2 <- ggplot(injuries, aes(x=reorder(EVENT, INJURIES),
               y=INJURIES, fill=INJURIES)) + scale_fill_gradient(low='blue', high='red', "Frequency")+
    geom_bar(stat='identity', color='white') +
    ggtitle('Fig. 2: Storm Events Most Harmful to Public Health by Number of Injuries (1950-2011)') +
    theme(plot.title = element_text(face="bold", size=11, hjust=0.6))+
    xlab('Storm and Weather Event')+ coord_flip()+
    ylab('Total number of Injuries')

grid.arrange(fig2) 

Economic Consequence

Storm events which are the most devastating to the local economy as measured by estimated dollar value to property and crop damage.

# Calculate property and crop damage
Stormdata <- mutate(Stormdata , CROPDMGCOST = CROPDMG*CROPDMGEXP)
Stormdata  <- mutate(Stormdata , PROPDMGCOST = PROPDMG*PROPDMGEXP)
Stormdata <- mutate(Stormdata, Total.DMG = (PROPDMGCOST + CROPDMGCOST)/ 10^9)
# Total damage (property + crop) by storm event
dmg_cost <- aggregate(Total.DMG~EVENT, Stormdata,sum)
dmg_cost <- arrange(dmg_cost, desc(dmg_cost$Total.DMG))[1:10,]
print(dmg_cost)
                 EVENT  Total.DMG
1                FLOOD 180.644240
2         STORM & WIND  91.896817
3  HURRICANE & TYPHOON  90.762453
4              TORNADO  57.418280
5           ICE & SNOW  17.864480
6       DROUGHT & HEAT  15.943477
7                 FIRE   8.899910
8     HIGH SEAS & SURF   4.906345
9           HEAVY RAIN   4.044311
10              FREEZE   1.974246
# Plot total economic damage by storm event
colnames(dmg_cost)<- c('EVENT', 'Total.DMG')
fig3 <- ggplot(dmg_cost, aes(x=reorder(EVENT, Total.DMG), y=Total.DMG, fill=Total.DMG))+ 
    scale_fill_gradient(low="lightgreen", high="darkgreen", "Billions (USD)")+
    geom_bar(stat='identity', color='white') +
    ggtitle('Fig. 3: Storm Events Most Costly to The U.S. Economy (1950-2011)') +
    theme(plot.title = element_text(face="bold", size=11, hjust=0.5))+
    xlab('Storm and Weather Event')+ coord_flip()+
    ylab('Total Property & Crop Damage Cost (in Billions USD)')
grid.arrange(fig3) 

Summary and Conclusion

During the time period (1950-2011), storm and weather events caused 14,767 total fatalities and 138,951 total injuries. With respect to the public health impact on communities, Tornados accounted for 5,636 (38%) of the total fatalities and 91,407 (66%) of the total injuries. With respect to the economic impact on communities, property and crop damage totaled $474.4 billion dollars; Floods caused $180.6 (38%) billion of that damage. Therefore, Tornados have the most devastating public health consequence and Floods have the highest economic consequence.