Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This analysis shows that Tornados are the most harmful weather event with respect to public health and Floods have the highest economic impact.
The data are from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The events in the database start in the year 1950 and end in November 2011. Documentation of the database is available at the National Weather Service Storm Data Documentation.
#download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.bz2")
setwd("/Users/cameliaguild/coursera/Johns Hopkins/Reproducible Research/PA2_project")
tempdata <- read.csv(bzfile("./StormData.bz2"), header = TRUE, stringsAsFactors=FALSE)
dim(tempdata) # there are 902,297 observations and 37 variables
[1] 902297 37
colnames(tempdata) # get column names
[1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
[6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
[11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
[16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
[21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
[26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
[31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
[36] "REMARKS" "REFNUM"
# For managing data
library(dplyr)
library(tidyr)
# For plotting data
library(ggplot2)
library(gridExtra)
# Select relevant variables
Stormdata <- select(tempdata, "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
#str(Stormdata)
Some storm events defined as having the same type of disturbance are grouped together. For example, according to NOAA, the only difference between a hurricane and a typhoon is the location where the storm occurs. That is, a hurricane and typhoon are the same type of disturbance, therefore they are grouped together in this analysis. Likewise, events that tend to occur together such as storm, wind, and hail are grouped.
# Categorize storm events
Stormdata$EVENT[grepl("TORNADO|TORNDAO", Stormdata$EVTYPE, ignore.case = TRUE)]<- "TORNADO"
Stormdata$EVENT[grepl("HURRICANE|TYPHOON", Stormdata$EVTYPE, ignore.case = TRUE)]<- "HURRICANE & TYPHOON"
Stormdata$EVENT[grepl("STORM|WIND|WND|HAIL", Stormdata$EVTYPE, ignore.case = TRUE)]<- "STORM & WIND"
Stormdata$EVENT[grepl("HEAT|WARM|HOT|DROUGHT", Stormdata$EVTYPE, ignore.case = TRUE)]<- "DROUGHT & HEAT"
Stormdata$EVENT[grepl("FROST|ICE|ICY|SNOW|AVALANCHE|AVALANCE|BLIZZARD|FREEZING|SLEET|GLAZE|WINTERY|WINTRY|WINTER",Stormdata$EVTYPE, ignore.case = TRUE)]<- "ICE & SNOW"
Stormdata$EVENT[grepl("EROSION|SLIDE|LANDSLUMP", Stormdata$EVTYPE, ignore.case = TRUE)]<- "EROSIN & LANDSLIDE"
Stormdata$EVENT[grepl("FIRE|WILD FIRE|WILDFIRE", Stormdata$EVTYPE, ignore.case = TRUE)] <- "FIRE"
Stormdata$EVENT[grepl("LIGHTNING", Stormdata$EVTYPE, ignore.case = TRUE)] <- "LIGHTNING"
Stormdata$EVENT[grepl("FLOOD|FLD|DAM BREAK", Stormdata$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
Stormdata$EVENT[grepl("HEAVY SEAS|HIGH SEAS|HIGH WATER|TIDE|TSUNAMI|HIGH WAVE|CURRENT|MARINE|SURF|SWELLS|BLOW-OUT|COASTAL SURGE", Stormdata$EVTYPE, ignore.case = TRUE)] <- "HIGH SEAS & SURF"
Stormdata$EVENT[grepl("HEAVY RAIN", Stormdata$EVTYPE, ignore.case = TRUE)] <- "HEAVY RAIN"
Stormdata$EVENT[grepl("FREEZE|COLD AND WET CONDITIONS", Stormdata$EVTYPE, ignore.case = TRUE)]<- "FREEZE"
# Subset complete cases. 14,668 events are not categorized and therefore removed as NAs
Stormdata <- Stormdata[complete.cases(Stormdata[, "EVENT"]), ]
Stormdata$EVENT <- as.factor(Stormdata$EVENT)
Convert the magnitude of PROPDMGEXP and CROPDMGEXP into the multiplier for calculating damage cost.
# Property damage
Stormdata$PROPDMGEXP <- recode(Stormdata$PROPDMGEXP, '1'= 10, '2'=100, '3'=1000, '4'=10000, '5'=100000, '6'=1000000, '7'=10000000, '8'=100000000, 'H'=100, 'h'=100, 'K'=1000, 'm'=1000000, 'M'=1000000,
'B'=1000000000, .default =1)
Stormdata$PROPDMGEXP <- as.numeric(Stormdata$PROPDMGEXP)
#Crop damage
Stormdata$CROPDMGEXP <- recode(Stormdata$CROPDMGEXP, '2'=100, 'H'=100, 'h'=100, 'K'=1000, 'm'=1000000, 'M'=1000000, 'B'=1000000000, .default =1)
Stormdata$CROPDMGEXP <- as.numeric(Stormdata$CROPDMGEXP)
Storm events which are the most harmful to population health as measured by the number of fatalities and the number of injuries.
#Number of fatalities by storm event
fatalities <- aggregate(FATALITIES~EVENT, Stormdata,sum)
fatalities <-arrange(fatalities, desc(fatalities$FATALITIES))[1:10,]
print(fatalities)
EVENT FATALITIES
1 TORNADO 5636
2 DROUGHT & HEAT 3178
3 FLOOD 1552
4 STORM & WIND 1529
5 ICE & SNOW 896
6 HIGH SEAS & SURF 837
7 LIGHTNING 817
8 HURRICANE & TYPHOON 133
9 HEAVY RAIN 99
10 FIRE 90
# Number of injuries by storm event
injuries <- aggregate(INJURIES~EVENT, Stormdata,sum)
injuries <-arrange(injuries, desc(injuries$INJURIES))[1:10,]
print(injuries)
EVENT INJURIES
1 TORNADO 91407
2 STORM & WIND 13656
3 DROUGHT & HEAT 9247
4 FLOOD 8683
5 ICE & SNOW 6549
6 LIGHTNING 5232
7 FIRE 1608
8 HURRICANE & TYPHOON 1331
9 HIGH SEAS & SURF 983
10 HEAVY RAIN 255
# Plot the number of fatalities by storm event
colnames(fatalities)<- c('EVENT', 'FATALITIES')
fig1 <- ggplot(fatalities, aes(x=reorder(EVENT, FATALITIES),
y=FATALITIES, fill=FATALITIES))+ scale_fill_gradient(low='red', high='darkred', "Frequency")+
geom_bar(stat='identity', color='white') +
ggtitle('Fig. 1: Storm Events Most Harmful to Public Health by Number of Fatalities (1950-2011)') +
theme(plot.title = element_text(face="bold", size=11, hjust=0.6))+
xlab('Storm and Weather Event')+ coord_flip()+
ylab('Total number of Fatalities')
grid.arrange(fig1)
# Plot the number of injuries by storm event
colnames(injuries)<- c('EVENT', 'INJURIES')
fig2 <- ggplot(injuries, aes(x=reorder(EVENT, INJURIES),
y=INJURIES, fill=INJURIES)) + scale_fill_gradient(low='blue', high='red', "Frequency")+
geom_bar(stat='identity', color='white') +
ggtitle('Fig. 2: Storm Events Most Harmful to Public Health by Number of Injuries (1950-2011)') +
theme(plot.title = element_text(face="bold", size=11, hjust=0.6))+
xlab('Storm and Weather Event')+ coord_flip()+
ylab('Total number of Injuries')
grid.arrange(fig2)
Storm events which are the most devastating to the local economy as measured by estimated dollar value to property and crop damage.
# Calculate property and crop damage
Stormdata <- mutate(Stormdata , CROPDMGCOST = CROPDMG*CROPDMGEXP)
Stormdata <- mutate(Stormdata , PROPDMGCOST = PROPDMG*PROPDMGEXP)
Stormdata <- mutate(Stormdata, Total.DMG = (PROPDMGCOST + CROPDMGCOST)/ 10^9)
# Total damage (property + crop) by storm event
dmg_cost <- aggregate(Total.DMG~EVENT, Stormdata,sum)
dmg_cost <- arrange(dmg_cost, desc(dmg_cost$Total.DMG))[1:10,]
print(dmg_cost)
EVENT Total.DMG
1 FLOOD 180.644240
2 STORM & WIND 91.896817
3 HURRICANE & TYPHOON 90.762453
4 TORNADO 57.418280
5 ICE & SNOW 17.864480
6 DROUGHT & HEAT 15.943477
7 FIRE 8.899910
8 HIGH SEAS & SURF 4.906345
9 HEAVY RAIN 4.044311
10 FREEZE 1.974246
# Plot total economic damage by storm event
colnames(dmg_cost)<- c('EVENT', 'Total.DMG')
fig3 <- ggplot(dmg_cost, aes(x=reorder(EVENT, Total.DMG), y=Total.DMG, fill=Total.DMG))+
scale_fill_gradient(low="lightgreen", high="darkgreen", "Billions (USD)")+
geom_bar(stat='identity', color='white') +
ggtitle('Fig. 3: Storm Events Most Costly to The U.S. Economy (1950-2011)') +
theme(plot.title = element_text(face="bold", size=11, hjust=0.5))+
xlab('Storm and Weather Event')+ coord_flip()+
ylab('Total Property & Crop Damage Cost (in Billions USD)')
grid.arrange(fig3)
During the time period (1950-2011), storm and weather events caused 14,767 total fatalities and 138,951 total injuries. With respect to the public health impact on communities, Tornados accounted for 5,636 (38%) of the total fatalities and 91,407 (66%) of the total injuries. With respect to the economic impact on communities, property and crop damage totaled $474.4 billion dollars; Floods caused $180.6 (38%) billion of that damage. Therefore, Tornados have the most devastating public health consequence and Floods have the highest economic consequence.