The NOAA Storm database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This analysis will address two questions - Which type of storm and weather events are most harmful to the population health of the United States? - Which type of storm and weather events have the greatest economic consequences?
library(data.table)
library(ggplot2)
if(!file.exists("stormData.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile = "stormData.csv.bz2", method = "curl")
}
StormData <- read.csv('stormData.csv.bz2')
DT <- data.table(StormData)
Definition of variables that will be used:
EVTYPE: Event Type (Tornados, Flood, ….)
FATALITIES: Number of Fatalities
INJURIES: Number of Injuries
PROGDMG: Property Damage
PROPDMGEXP: Units for Property Damage (magnitudes - H,K,M,B which means Hundreds, Thousands, Millions and Billions respectively.)
CROPDMG: Crop Damage
CROPDMGEXP: Units for Crop Damage (magnitudes - H,K,M,B which means Hundreds, Thousands, Millions and Billions respectively.)
# Create Year variable
DT[, YEAR := as.integer(format(as.POSIXct(BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))]
#Subset the storm database
DT_subset <- DT[,c("STATE", "YEAR", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
dim(DT_subset)
## [1] 902297 9
names(DT_subset)
## [1] "STATE" "YEAR" "EVTYPE" "FATALITIES" "INJURIES"
## [6] "PROPDMG" "PROPDMGEXP" "CROPDMG" "CROPDMGEXP"
str(DT_subset)
## Classes 'data.table' and 'data.frame': 902297 obs. of 9 variables:
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ YEAR : int 1950 1950 1951 1951 1951 1951 1951 1952 1952 1952 ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## - attr(*, ".internal.selfref")=<externalptr>
#Create & Map Property Damage Units
unique(DT_subset$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
PropertyDamageUnits <- data.table(
PROPDMGEXP=c("K", "M","", "B", "m", "+", "0", "5", "6", "?", "4", "2", "3", "h", "7", "H", "-", "1", "8"),
PROPVALUE = c(10^3, 10^6, 1, 10^9, 10^6, 0,1,10^5, 10^6, 0, 10^4, 10^2, 10^3, 10^2, 10^7, 10^2, 0, 10, 10^8)
)
str(PropertyDamageUnits)
## Classes 'data.table' and 'data.frame': 19 obs. of 2 variables:
## $ PROPDMGEXP: chr "K" "M" "" "B" ...
## $ PROPVALUE : num 1e+03 1e+06 1e+00 1e+09 1e+06 0e+00 1e+00 1e+05 1e+06 0e+00 ...
## - attr(*, ".internal.selfref")=<externalptr>
DT_subset <- PropertyDamageUnits[DT_subset, on = .(PROPDMGEXP)]
#Create & Map Crop Damage Units
unique(DT_subset$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
CropDamageUnits <- data.table(
CROPDMGEXP = c("","M", "K", "m", "B", "?", "0", "k","2"),
CROPVALUE = c(1,10^6, 10^3, 10^6, 10^9, 0, 1, 10^3, 10^2)
)
DT_subset <- CropDamageUnits[DT_subset, on = .(CROPDMGEXP)]
str(DT_subset)
## Classes 'data.table' and 'data.frame': 902297 obs. of 11 variables:
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ CROPVALUE : num 1 1 1 1 1 1 1 1 1 1 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ PROPVALUE : num 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ YEAR : int 1950 1950 1951 1951 1951 1951 1951 1952 1952 1952 ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
#Adjust property and crop damage values to same units
DT_subset$PROPDMGTOTAL <- (DT_subset$PROPDMG * DT_subset$PROPVALUE)/1000000000
DT_subset$CROPDMGTOTAL <- (DT_subset$CROPDMG * DT_subset$CROPVALUE)/1000000000
Which type of storm and weather events are most harmful to the population health of the United States?
#Number of fatalities by top 10 weather events
Fatalities_by_event<- DT_subset[, .(total_fatalities = sum(FATALITIES)
), by = EVTYPE][order(-total_fatalities)][1:10]
Fatalities_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]
ggplot(Fatalities_by_event, aes(x = EVTYPE, y = total_fatalities)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities by top 10 Weather Events")
#Number of fatalities by top 10 weather events over time
top10 <- DT_subset[ , .(total = sum(FATALITIES, na.rm = TRUE)),
by = EVTYPE][order(-total)][1:10, EVTYPE]
ts_top10 <- DT_subset[EVTYPE %in% top10,
.(fatalities = sum(FATALITIES, na.rm = TRUE)),
by = .(YEAR, EVTYPE)
]
ggplot(ts_top10, aes(YEAR, fatalities, color = EVTYPE)) +
geom_line(linewidth = 1) +
geom_point() +
labs(
title = "Fatalities over time for Top 10 Event Types (overall)",
x = "Year", y = "Fatalities", color = "Event"
) +
theme_minimal()
#Number of fatalities by top 10 weather events over time since 1993
ts_top10lim <- ts_top10[YEAR >= 1993]
ggplot(ts_top10lim, aes(YEAR, fatalities, color = EVTYPE)) +
geom_line(linewidth = 1) +
geom_point() +
labs(
title = "Fatalities over time for Top 10 Event Types since 1993",
x = "Year", y = "Fatalities", color = "Event"
) +
theme_minimal()
ts_top10lim <- ts_top10[YEAR >= 1993]
#Number of fatalities by top 10 weather events
Fatalities_by_event2<- DT_subset[YEAR>=1993, .(total_fatalities = sum(FATALITIES)
), by = EVTYPE][order(-total_fatalities)][1:10]
Fatalities_by_event2[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]
ggplot(Fatalities_by_event2, aes(x = EVTYPE, y = total_fatalities)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities") + ggtitle("Number of fatalities by top 10 Weather Events, since 1993")
#Number of Injuries by top 10 weather events
Injuries_by_event<- DT_subset[, .(total_injuries = sum(INJURIES)
), by = EVTYPE][order(-total_injuries)][1:10]
Injuries_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]
ggplot(Injuries_by_event, aes(x = EVTYPE, y = total_injuries)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Injuries") + ggtitle("Number of injuries by top 10 Weather Events")
#Number of Injuries by top 10 weather events over time
top10inj <- DT_subset[ , .(total = sum(INJURIES, na.rm = TRUE)),
by = EVTYPE][order(-total)][1:10, EVTYPE]
inj_top10 <- DT_subset[EVTYPE %in% top10,
.(injuries = sum(INJURIES, na.rm = TRUE)),
by = .(YEAR, EVTYPE)
]
ggplot(inj_top10, aes(YEAR, injuries, color = EVTYPE)) +
geom_line(linewidth = 1) +
geom_point() +
labs(
title = "Injuries over time for Top 10 Event Types (overall)",
x = "Year", y = "Injuries", color = "Event"
) +
theme_minimal()
#Number of Injuries by top 10 weather events over time since 1993
ing_top10lim <- inj_top10[YEAR >= 1993]
ggplot(ing_top10lim, aes(YEAR, injuries, color = EVTYPE)) +
geom_line(linewidth = 1) +
geom_point() +
labs(
title = "Injuries over time for Top 10 Event Types (overall) since 1993",
x = "Year", y = "Injuries", color = "Event"
) +
theme_minimal()
#Number of Injuries by top 10 weather events
Injuries_by_event2<- DT_subset[YEAR >=1993 , .(total_injuries = sum(INJURIES)
), by = EVTYPE][order(-total_injuries)][1:10]
Injuries_by_event2[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]
ggplot(Injuries_by_event2, aes(x = EVTYPE, y = total_injuries)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Injuries") + ggtitle("Number of injuries by top 10 Weather Events")
Conclusion: The charts show that over the entire dataset, the event type with largest damage to population health (both in fatalities and injuries) is Tornado.
However, it is important to note that prior to 1993, only Tornado and TSTM Wind were recordedn as events in the dataset.
Looking at more recent data (from 1993), Tornados are still the event with the largest injuries to the population, however Excessive heat is the event with largest fatality to the population (but has had variable impact over the years).
Which type of storm and weather events have the greatest economic consequences?
#Property Damage by top 10 weather events
PropDM_by_event<- DT_subset[, .(PROPDMGTOTAL = sum(PROPDMGTOTAL)
),
by = EVTYPE][order(-PROPDMGTOTAL)][1:10]
PropDM_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]
ggplot(PropDM_by_event, aes(x = EVTYPE, y = PROPDMGTOTAL)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Damages ($b)") + ggtitle("Total Property Damage by top 10 Weather Events")
#Crop Damage by top 10 weather events
CropDM_by_event<- DT_subset[, .(CROPDMGTOTAL= sum(CROPDMGTOTAL)
),
by = EVTYPE][order(-CROPDMGTOTAL)][1:10]
CropDM_by_event[, EVTYPE := factor(EVTYPE, levels = EVTYPE)]
ggplot(CropDM_by_event, aes(x = EVTYPE, y = CROPDMGTOTAL)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Damages ($b)") + ggtitle("Total Crop Damage by top 10 Weather Events")
#Crop & Property Damage by top 10 weather events
Damage_by_event<- DT_subset[, .(DMGTOTAL = sum(PROPDMGTOTAL,CROPDMGTOTAL)
),
by = EVTYPE][order(-DMGTOTAL)][1:10]
Damage_by_event[,][order(-DMGTOTAL)]
## EVTYPE DMGTOTAL
## <char> <num>
## 1: FLOOD 150.319678
## 2: HURRICANE/TYPHOON 71.913713
## 3: TORNADO 57.362334
## 4: STORM SURGE 43.323541
## 5: HAIL 18.761222
## 6: FLASH FLOOD 18.243991
## 7: DROUGHT 15.018672
## 8: HURRICANE 14.610229
## 9: RIVER FLOOD 10.148404
## 10: ICE STORM 8.967041
ggplot(Damage_by_event, aes(x = EVTYPE, y = DMGTOTAL)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Damages ($b)") + ggtitle("Total Property and Crop Damage by top 10 Weather Events")
Conclusion: The charts shows that the event type with largest economic consequence (including both property and crop damages) are floods. Followed by Hurricane/Typhoon, Tornado and Storm Surge. For Crop specific damage, the event with the largest economic consequence is drought, however we note that the economic impact of crop damage is less than property damage.