This report investigates the most destructive types of storms with respect to fatalities, injuries and financial damage to both property and crops. Tornados are the most dangerous weather type by a margin of nearly 3:1 over the next highest type when measured by fatalities and by 13:1 when measured by injuries. Heat (Regular and Excessive), flooding (Flash and Regular) and thunderstorm winds make up 5 of the top 10 weather events most injurious to public health. Flooding is the most damaging extreme weather to property and crops, by a factor of 2:1 against the next highest value, Hurricanes. Hurricanes, tornadoes and flooding (Regular, storm surges, flash floods and river flooding) make up 7 of the top 10 most damaging weather events.
The data for this analysis comes from the National Weather Service’s National Climactic Data Center Storm Events data.
Documentation of the database available can be found here:
The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
setwd("C:\\Users\\cchubb\\Coursera\\DataScience\\5ReproducibleResearch\\Assignment2")
#Clear out any local variables, start fresh and remove unneeded data from the global environment
rm(list = ls())
#Connect to the server and download the file if it does not already exist
if (! file.exists("./data")) { dir.create("./data")}
if (! file.exists("./data/StormData.csv.bz2")) {
fileUrl<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile="./data/StormData.csv.bz2")
if (! file.exists("./data/StormData.csv.bz2")) { stop("Unable to download StormData.csv.bz2 file")}
}
StormData <- read.csv("./data/StormData.csv.bz2")
StormData$EVTYPE <- as.factor(StormData$EVTYPE) #Cast the event type as a factor.
#Get libraries
library(dplyr, quietly = TRUE)
library(reshape2, quietly = TRUE)
library(knitr, quietly = TRUE)
#Scale the PROPDMG AND CROPDMG fields
StormData = transform(StormData, PROPDMG_VAL = PROPDMG *
if_else(PROPDMGEXP == "K", 1E3,
if_else(PROPDMGEXP == "M", 1E6,
if_else(PROPDMGEXP == "B", 1E9, 1),
),
)
, CROPDMG_VAL = CROPDMG *
if_else(CROPDMGEXP == "K", 1E3,
if_else(CROPDMGEXP == "M", 1E6,
if_else(CROPDMGEXP == "B", 1E9, 1, ),
),
)
)
#Add up the damage value to get a total value
StormData = transform(StormData, DAMAGE_VAL = PROPDMG_VAL + CROPDMG_VAL);
#Replace "0" values with NA so that sums and means will count only valid events.
StormData = transform(StormData, FATALITIES = if_else(FATALITIES == 0, as.double(NA), FATALITIES),
INJURIES = if_else(INJURIES == 0, as.double(NA), INJURIES),
PROPDMG_VAL = if_else(PROPDMG_VAL == 0, as.double(NA), PROPDMG_VAL),
CROPDMG_VAL = if_else(CROPDMG_VAL == 0, as.double(NA), CROPDMG_VAL),
DAMAGE_VAL = if_else(DAMAGE_VAL == 0, as.double(NA), DAMAGE_VAL)
)
by_event <- StormData %>%
group_by(EVTYPE) %>%
summarise(FATALITIES_TOT=sum(FATALITIES, na.rm=T),
INJURIES_TOT = sum(INJURIES, na.rm=T),
PROPDMG_VAL_TOT = sum(PROPDMG_VAL, na.rm=T),
CROPDMG_VAL_TOT = sum(CROPDMG_VAL, na.rm=T),
DAMAGE_VAL_TOT = sum(DAMAGE_VAL, na.rm=T)
)
#Get the top event types by fatalities, injuries and total damage
how_many <- 10
top_by_fatalities = by_event[order(-by_event$FATALITIES_TOT)[1:how_many], c("EVTYPE", "FATALITIES_TOT")]
top_by_injuries = by_event[order(-by_event$INJURIES_TOT)[1:how_many], c("EVTYPE", "INJURIES_TOT")]
top_by_damage = by_event[order(-by_event$DAMAGE_VAL_TOT)[1:how_many], c("EVTYPE", "DAMAGE_VAL_TOT", "PROPDMG_VAL_TOT", "CROPDMG_VAL_TOT")]
The top 10 most harmful weather events over all time by fatalities and injuries are:
par(mfrow=c(1,2), mar = c(11,4,4,2) + 0.1)
barplot(top_by_fatalities$FATALITIES_TOT, names.arg = top_by_fatalities$EVTYPE, las=2, main="Fatalities")
barplot(top_by_injuries$INJURIES_TOT, names.arg = top_by_injuries$EVTYPE, las=2, main="Injuries")
kable(rename(top_by_fatalities, Event.Type=EVTYPE, Fatalities.Total=FATALITIES_TOT))
Event.Type | Fatalities.Total |
---|---|
TORNADO | 5633 |
EXCESSIVE HEAT | 1903 |
FLASH FLOOD | 978 |
HEAT | 937 |
LIGHTNING | 816 |
TSTM WIND | 504 |
FLOOD | 470 |
RIP CURRENT | 368 |
HIGH WIND | 248 |
AVALANCHE | 224 |
kable(rename(top_by_injuries, Event.Type=EVTYPE, Injuries.Total=INJURIES_TOT))
Event.Type | Injuries.Total |
---|---|
TORNADO | 91346 |
TSTM WIND | 6957 |
FLOOD | 6789 |
EXCESSIVE HEAT | 6525 |
LIGHTNING | 5230 |
HEAT | 2100 |
ICE STORM | 1975 |
FLASH FLOOD | 1777 |
THUNDERSTORM WIND | 1488 |
HAIL | 1361 |
Table Caption: Top 10 Fatalities and Injuries by Event Type
The top 10 most harmful weather events over all time by total damage are:
#Simple plot of the total damage
#barplot(top_by_damage$DAMAGE_VAL_TOT, names.arg = top_by_damage$EVTYPE, las=2, main="Total Damage")
#Making a stacked bar chart of the values
#Pivot using reshape2 library and then scale down by $Billion
x <- melt(top_by_damage[, c(1, 3,4)])
## Using EVTYPE as id variables
x$value <- x$value / 1E9 #Scale to Billions
plot_data <- dcast(x, variable ~ EVTYPE)
row.names(plot_data) <- plot_data$variable
plot_data$variable <- NULL
plot_data <- plot_data[, as.character(top_by_damage$EVTYPE)] #Rearrange the columns to order by total descending
par(mfrow=c(1,1), mar = c(11,4,4,2) + 0.1)
barplot(as.matrix(plot_data),
main="Economic Damage by Event Type",
ylab="Dollars",
las=2,
col=c("red", "green"),
axes = FALSE)
y_lab_val <- pretty(top_by_damage$DAMAGE_VAL_TOT / 1E9)
axis(2, at = y_lab_val, labels = paste0("$", y_lab_val, "B"))
legend("topright", legend=c("Property", "Crops"), fill=c("red", "green"))
kable(rename(top_by_damage[, c(1,2,3,4)], Event.Type=EVTYPE, Damage.Total=DAMAGE_VAL_TOT, Property.Damage=PROPDMG_VAL_TOT, Crop.Damage=CROPDMG_VAL_TOT), format.args=list(big.mark = ','))
Event.Type | Damage.Total | Property.Damage | Crop.Damage |
---|---|---|---|
FLOOD | 150,319,678,257 | 144,657,709,807 | 5,661,968,450 |
HURRICANE/TYPHOON | 71,913,712,800 | 69,305,840,000 | 2,607,872,800 |
TORNADO | 57,340,614,060 | 56,925,660,790 | 414,953,270 |
STORM SURGE | 43,323,541,000 | 43,323,536,000 | 5,000 |
HAIL | 18,752,904,943 | 15,727,367,053 | 3,025,537,890 |
FLASH FLOOD | 17,562,129,167 | 16,140,812,067 | 1,421,317,100 |
DROUGHT | 15,018,672,000 | 1,046,106,000 | 13,972,566,000 |
HURRICANE | 14,610,229,010 | 11,868,319,010 | 2,741,910,000 |
RIVER FLOOD | 10,148,404,500 | 5,118,945,500 | 5,029,459,000 |
ICE STORM | 8,967,041,360 | 3,944,927,860 | 5,022,113,500 |
kable(top_by_damage[, c(1,2,3,4)], format.args=list(big.mark = ',') )
EVTYPE | DAMAGE_VAL_TOT | PROPDMG_VAL_TOT | CROPDMG_VAL_TOT |
---|---|---|---|
FLOOD | 150,319,678,257 | 144,657,709,807 | 5,661,968,450 |
HURRICANE/TYPHOON | 71,913,712,800 | 69,305,840,000 | 2,607,872,800 |
TORNADO | 57,340,614,060 | 56,925,660,790 | 414,953,270 |
STORM SURGE | 43,323,541,000 | 43,323,536,000 | 5,000 |
HAIL | 18,752,904,943 | 15,727,367,053 | 3,025,537,890 |
FLASH FLOOD | 17,562,129,167 | 16,140,812,067 | 1,421,317,100 |
DROUGHT | 15,018,672,000 | 1,046,106,000 | 13,972,566,000 |
HURRICANE | 14,610,229,010 | 11,868,319,010 | 2,741,910,000 |
RIVER FLOOD | 10,148,404,500 | 5,118,945,500 | 5,029,459,000 |
ICE STORM | 8,967,041,360 | 3,944,927,860 | 5,022,113,500 |
Table Caption: Top 10 Financial Damage by Event Type