Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to answer 2 basic questions: 1) Which types of events are most harmful with respect to population health; and 2) which types of events have the greatest economic consequences.
The following analysis demonstrates that tornadoes are the most harmful weather events with respect to population health, having incurred the highest total injuries and fatalities combined, while floods are responsible for the greatest economic loss, with a total of over USD150billion damages incurred.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. The data can be found on https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
# Read the storm dataset
storm_data <- read.csv("StormData.csv.bz2", sep=",", header=TRUE)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plyr)
## Warning: package 'plyr' was built under R version 3.5.3
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
# Select only relevant variables to look at
tidystorm <- select(storm_data, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Change tidystorm column names
names(tidystorm) <- c("Event", "Fatalities", "Injuries", "Property.Damage", "Property.Damage.Exp", "Crop.Damage", "Crop.Damage.Exp")
# Convert all event type variables to uppercase
tidystorm$Event <- toupper(tidystorm$Event)
# Sub all overlapping event types to specific event
tidystorm$Event <- gsub("^(AVALANCHE).*", "AVALANCHE", tidystorm$Event)
tidystorm$Event <- gsub("^(AVALANCE).*", "AVALANCHE", tidystorm$Event)
tidystorm$Event <- gsub("^(BLIZZARD).*", "BLIZZARD", tidystorm$Event)
tidystorm$Event <- gsub("^(GROUND BLIZZARD).*", "BLIZZARD", tidystorm$Event)
tidystorm$Event <- gsub("^(COASTAL).*", "COASTAL FLOOD", tidystorm$Event)
tidystorm$Event <- gsub("^( COASTAL).*", "COASTAL FLOOD", tidystorm$Event)
tidystorm$Event <- gsub("^(COLD/WIND CHILL).*", "COLD/WIND CHILL", tidystorm$Event)
tidystorm$Event <- gsub("^(DEBRIS FLOW).*", "DEBRIS FLOW", tidystorm$Event)
tidystorm$Event <- gsub("^(DENSE FOG).*", "DENSE FOG", tidystorm$Event)
tidystorm$Event <- gsub("^(DENSE SMOKE).*", "DENSE SMOKE", tidystorm$Event)
tidystorm$Event <- gsub("^(DROUGHT).*", "DROUGHT", tidystorm$Event)
tidystorm$Event <- gsub("^(DUST DEVIL).*", "DUST DEVIL", tidystorm$Event)
tidystorm$Event <- gsub("^(DUST STORM).*", "DUST STORM", tidystorm$Event)
tidystorm$Event <- gsub("^(EXTREME HEAT).*", "EXCESSIVE HEAT", tidystorm$Event)
tidystorm$Event <- gsub("^(EXCESSIVE HEAT).*", "EXCESSIVE HEAT", tidystorm$Event)
tidystorm$Event <- gsub("^(EXTREME COLD).*", "EXTREME COLD/WIND CHILL", tidystorm$Event)
tidystorm$Event <- gsub("^(EXCESSIVE COLD).*", "EXTREME COLD/WIND CHILL", tidystorm$Event)
tidystorm$Event <- gsub("^(EXTREME WIND).*", "EXTREME COLD/WIND CHILL", tidystorm$Event)
tidystorm$Event <- gsub("^(FLASH).*", "FLASH FLOOD", tidystorm$Event)
tidystorm$Event <- gsub("^( FLASH).*", "FLASH FLOOD", tidystorm$Event)
tidystorm$Event <- gsub("^(LOCAL FLASH).*", "FLASH FLOOD", tidystorm$Event)
tidystorm$Event <- gsub("^(FLOOD).*", "FLOOD", tidystorm$Event)
tidystorm$Event <- gsub("^(FROST).*", "FROST/FREEZE", tidystorm$Event)
tidystorm$Event <- gsub("^(FREEZE).*", "FROST/FREEZE", tidystorm$Event)
tidystorm$Event <- gsub("^(FUNNEL).*", "FUNNEL CLOUD", tidystorm$Event)
tidystorm$Event <- gsub("^(HAIL).*", "HAIL", tidystorm$Event)
tidystorm$Event <- gsub("^(HEAT).*", "HEAT", tidystorm$Event)
tidystorm$Event <- gsub("^(HEAVY RAIN).*", "HEAVY RAIN", tidystorm$Event)
tidystorm$Event <- gsub("^(HEAVY SNOW).*", "HEAVY SNOW", tidystorm$Event)
tidystorm$Event <- gsub("^(HIGH SURF).*", "HIGH SURF", tidystorm$Event)
tidystorm$Event <- gsub("^(HIGH WIND).*", "HIGH WIND", tidystorm$Event)
tidystorm$Event <- gsub("^(HURRICANE).*", "HURRICANE (TYPHOON)", tidystorm$Event)
tidystorm$Event <- gsub("^(TYPHOON).*", "HURRICANE (TYPHOON)", tidystorm$Event)
tidystorm$Event <- gsub("^(ICE STORM).*", "ICE STORM", tidystorm$Event)
tidystorm$Event <- gsub("^(ICESTORM).*", "ICE STORM", tidystorm$Event)
tidystorm$Event <- gsub("^(LAKE EFFECT SNOW).*", "LAKE-EFFECT SNOW", tidystorm$Event)
tidystorm$Event <- gsub("^(LIGHTN).*", "LIGHTNING", tidystorm$Event)
tidystorm$Event <- gsub("^( LIGHTN).*", "LIGHTNING", tidystorm$Event)
tidystorm$Event <- gsub("^(MARINE TSTM WIND).*", "MARINE THUNDERSTORM WIND", tidystorm$Event)
tidystorm$Event <- gsub("^(RIP CURRENT).*", "RIP CURRENT", tidystorm$Event)
tidystorm$Event <- gsub("^(SLEET).*", "SLEET", tidystorm$Event)
tidystorm$Event <- gsub("^(STORM SURGE).*", "STORM SURGE/TIDE", tidystorm$Event)
tidystorm$Event <- gsub("^(STRONG WINDS).*", "STRONG WIND", tidystorm$Event)
tidystorm$Event <- gsub("^(STRONG WIND).*", "STRONG WIND", tidystorm$Event)
tidystorm$Event <- gsub("^(THUNDERSTORM WIND).*", "THUNDERSTORM WIND", tidystorm$Event)
tidystorm$Event <- gsub("^(TSTM).*", "THUNDERSTORM WIND", tidystorm$Event)
tidystorm$Event <- gsub("^( TSTM).*", "THUNDERSTORM WIND", tidystorm$Event)
tidystorm$Event <- gsub("^(TORNADO).*", "TORNADO", tidystorm$Event)
tidystorm$Event <- gsub("^(COLD AIR TORNADO).*", "TORNADO", tidystorm$Event)
tidystorm$Event <- gsub("^(TROPICAL STORM).*", "TROPICAL STORM", tidystorm$Event)
tidystorm$Event <- gsub("^(VOLC).*", "VOLCANIC ASH", tidystorm$Event)
tidystorm$Event <- gsub("^(WATERSPOUT).*", "WATERSPOUT", tidystorm$Event)
tidystorm$Event <- gsub("^( WATERSPOUT).*", "WATERSPOUT", tidystorm$Event)
tidystorm$Event <- gsub("^(WATER SPOUT).*", "WATERSPOUT", tidystorm$Event)
tidystorm$Event <- gsub("^(WILD).*", "WILDFIRE", tidystorm$Event)
tidystorm$Event <- gsub("^(WINTER STORM).*", "WINTER STORM", tidystorm$Event)
tidystorm$Event <- gsub("^(WINTER WEATHER).*", "WINTER WEATHER", tidystorm$Event)
tidystorm$Event <- gsub("^(WINTERY).*", "WINTER WEATHER", tidystorm$Event)
tidystorm$Event <- gsub("^(WINTER MIX).*", "WINTER WEATHER", tidystorm$Event)
# Change all elements within Property.Damage.Exp variable to uppercase
tidystorm$Property.Damage.Exp <- toupper(tidystorm$Property.Damage.Exp)
# Change all elements within Crop.Damage.Exp variable to uppercase
tidystorm$Crop.Damage.Exp <- toupper(tidystorm$Crop.Damage.Exp)
# Assuming that H = hundreds = 100; K = thousands = 1000; M = millions = 1000000; B = billions = 1000000000; (+) = 1; numeric 0-8 = 10;
# and assuming that (-), (?) and blank character = 0
# Use mapvalues to replace the exp elements in Property.Damage.Exp and Crop.Damage.Exp
library(plyr)
tidystorm$Property.Damage.Exp <- mapvalues(tidystorm$Property.Damage.Exp,
c("H", "K", "M","B", "+", "0", "5", "6", "4", "2", "3", "7", "1", "8", "", "-", "?"),
c(10^2, 10^3, 10^6, 10^9, 1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0 ))
tidystorm$Crop.Damage.Exp <- mapvalues(tidystorm$Crop.Damage.Exp,
c("K", "M", "B", "0","2", "?", ""),
c(10^3, 10^6, 10^9, 10, 10, 0, 0 ))
# Change the new variables to numeric
tidystorm$Property.Damage.Exp <- as.numeric(tidystorm$Property.Damage.Exp)
tidystorm$Crop.Damage.Exp <- as.numeric(tidystorm$Crop.Damage.Exp)
# Calculate total Property Damage and Crop Damage separately AND adding the new variables into the tidystorm dataset
tidystorm$TotalPropDamages <- tidystorm$Property.Damage.Exp * tidystorm$Property.Damage
tidystorm$TotalCropDamages <- tidystorm$Crop.Damage.Exp * tidystorm$Crop.Damage
# Calculate total Damage AND adding the new variable into the tidystorm dataset
tidystorm$TotalDamage <- tidystorm$TotalPropDamages + tidystorm$TotalCropDamages
# Create new dataset (tidystorm_damages) that only looks at event types and total damages incurred
tidystorm_damages <- select(tidystorm, Event, TotalPropDamages, TotalCropDamages, TotalDamage)
# List event type by fatalities, injuries, and total fatalities + injuries
tidystorm_event_harm <- tidystorm %>%
group_by(Event) %>%
mutate(Total=Fatalities+Injuries) %>%
select(Fatalities, Injuries, Total)
## Adding missing grouping variables: `Event`
tidystorm_event_harm
## # A tibble: 902,297 x 4
## # Groups: Event [551]
## Event Fatalities Injuries Total
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 0 15 15
## 2 TORNADO 0 0 0
## 3 TORNADO 0 2 2
## 4 TORNADO 0 2 2
## 5 TORNADO 0 2 2
## 6 TORNADO 0 6 6
## 7 TORNADO 0 1 1
## 8 TORNADO 0 0 0
## 9 TORNADO 1 14 15
## 10 TORNADO 0 0 0
## # ... with 902,287 more rows
# Arranging total fatalities according to Top 5 event type
fdata <- aggregate(tidystorm_event_harm$Fatalities, by=list(Event=tidystorm_event_harm$Event), FUN=sum)
fdata <- arrange(fdata, desc(x))
fdata <- fdata[1:5,]
# Arranging total injuries according to Top 5 event type
idata <- aggregate(tidystorm_event_harm$Injuries, by=list(Event=tidystorm_event_harm$Event), FUN=sum)
idata <- arrange(idata, desc(x))
idata <- idata[1:5,]
# Arranging total harm according to Top 5 event type
hdata <- aggregate(tidystorm_event_harm$Total, by=list(Event=tidystorm_event_harm$Event), FUN=sum)
hdata <- arrange(hdata, desc(x))
hdata <- hdata[1:5,]
# Binding number of fatalities and number of injuries by Top 5 event type into a dataset called 'harm_type'
fdata$Type <- "Fatalities"
idata$Type <- "Injuries"
hdata$Type <- "Injuries & Fatalities"
harm_type <- rbind(fdata, idata, hdata)
ggplot(harm_type) + aes(y=x, x=reorder(Event, x), fill=Type) +
geom_col() +
ggtitle("Top Weather Events Causing Injuries & Fatalities in the US") +
labs(y="Total", x="Weather Event") +
theme(plot.title = element_text(hjust = 0.5)) +
coord_flip()
As we can observe from the plot, across the United States, tornadoes are the most harmful weather event with respect to population health, accounting for the highest total fatalities and injuries incurred.
# Arranging tidystorm_damages by Total Damages according to top 5 event type
ddata <- aggregate(tidystorm_damages$TotalDamage, by=list(Event=tidystorm_damages$Event), FUN=sum)
ddata <- arrange(ddata, desc(x))
ddata <- ddata[1:5,]
ddata
## Event x
## 1 FLOOD 150836231904
## 2 HURRICANE (TYPHOON) 90872527810
## 3 TORNADO 58959397107
## 4 STORM SURGE/TIDE 47965579000
## 5 HAIL 19000567177
# Arranging tidystorm_damages by Total property damages according to top 5 event type
pdata <- aggregate(tidystorm_damages$TotalPropDamages, by=list(Event=tidystorm_damages$Event), FUN=sum)
pdata <- arrange(pdata, desc(x))
pdata <- pdata[1:5,]
pdata
## Event x
## 1 FLOOD 144957523954
## 2 HURRICANE (TYPHOON) 85356410010
## 3 TORNADO 58541934147
## 4 STORM SURGE/TIDE 47964724000
## 5 FLASH FLOOD 16732872111
# Arranging tidystorm_damages by Total crop damages according to top 5 event type
cdata <- aggregate(tidystorm_damages$TotalCropDamages, by=list(Event=tidystorm_damages$Event), FUN=sum)
cdata <- arrange(cdata, desc(x))
cdata <- cdata[1:5,]
cdata
## Event x
## 1 DROUGHT 13972571780
## 2 FLOOD 5878707950
## 3 HURRICANE (TYPHOON) 5516117800
## 4 RIVER FLOOD 5029459000
## 5 ICE STORM 5022113500
# Binding number of fatalities and number of injuries by Top 5 event type into a dataset called 'economic_type'
pdata$Type <- "Property Damage"
cdata$Type <- "Crop Damage"
ddata$Type <- "Total Damage"
economic_type <- rbind(pdata, cdata, ddata)
ggplot(economic_type) + aes(y=x, x=reorder(Event, x), fill=Type) +
geom_col() +
ggtitle("Economic Consequences due to Weather Events in the US") +
labs(y="Total damage ($)", x="Weather Event") +
theme(plot.title = element_text(hjust = 0.5)) +
coord_flip()
As we can observe from the plot, across the United States, flood has the greatest economic consequences, with a total of over USD150billion damages incurred from property and crop damages combined.
Across the United States, tornadoes are the most harmful weather events with respect to population health, while floods are responsible for the greatest economic loss.