This analysis examines the different impacts that natural disasters have on the United States both in terms of fatalities and monetary damage. It utilizes the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
The figures in the results section show the comparisons of total cost in dollars for the top ten events causing property damage and crop damage. While there are common denominators, the lists vary in the causes and the comparative effects. Flood & Hurricane/Typhoon are clear front runners in both lists as causing the most economic harm.
The figure in the results section shows the top ten leading causes of mortality in the US. It also includes each event’s injury totals, as the top ten for both of these groups are very similar. Tornadoes cause both the most fatalities and injuries.
Also, converting to a data table and loading the correct packages for later use.
library("data.table")
library("ggplot2")
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = paste0("/Users/mylesjoyce/Desktop", '/repdata%2Fdata%2FStormData.csv.bz2'))
DF <- read.csv("/Users/mylesjoyce/Desktop/repdata%2Fdata%2FStormData.csv.bz2")
DT <- as.data.table(DF)
Trying to get a first idea of what the data looks like.
dim(DT)
## [1] 902297 37
colnames(DT)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
This data table is massive. Identifying which columns will be of use for this analysis and removing the non-useful columns.
usefulcolumns <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
badcolumns <- colnames(DT[, !c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
DT[, c(badcolumns) := NULL]
Getting rid of the data that has no injuries, fatalities, property or crop damage.
DT <- DT[(EVTYPE != "?" & (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0))]
rules <- c("1" = 10^1,"2" = 10^2,"3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,"H" = 10^2,"K" = 10^3,"M" = 10^6,"B" = 10^9,"-" = 10^0,"+" = 10^0,"?" = 10^0, "0" = 10^0, "\"\"" = 10^0)
DT[, PROPDMGEXP := rules[as.character(DT[,PROPDMGEXP])]]
DT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
DT[, CROPDMGEXP := rules[as.character(DT[,CROPDMGEXP])]]
DT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
DT <- DT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP, PCOST = PROPDMG * PROPDMGEXP, CCOST = CROPDMG * CROPDMGEXP)]
Also only taking the top 10 highest total costs for both property and crop.
ecoDT <- DT[, .(PCOST = sum(PCOST), CCOST = sum(CCOST), TCOST = sum(PCOST)+sum(CCOST)), by = .(EVTYPE)]
crop_ecoDT <- ecoDT[order(-CCOST),]
crop_ecoDT <- head(crop_ecoDT, 10)
crop_ecoDT
## EVTYPE PCOST CCOST TCOST
## 1: DROUGHT 1046106000 13972566000 15018672000
## 2: FLOOD 144657709807 5661968450 150319678257
## 3: RIVER FLOOD 5118945500 5029459000 10148404500
## 4: ICE STORM 3944927860 5022113500 8967041360
## 5: HAIL 15730367518 3025537890 18755905408
## 6: HURRICANE 11868319010 2741910000 14610229010
## 7: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 8: FLASH FLOOD 16822673978 1421317100 18243991078
## 9: EXTREME COLD 67737400 1292973000 1360710400
## 10: FROST/FREEZE 9480000 1094086000 1103566000
prop_ecoDT <- ecoDT[order(-PCOST),]
prop_ecoDT <- head(prop_ecoDT, 10)
prop_ecoDT
## EVTYPE PCOST CCOST TCOST
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56935880688 414953270 57350833958
## 4: STORM SURGE 43323536000 5000 43323541000
## 5: FLASH FLOOD 16822673978 1421317100 18243991078
## 6: HAIL 15730367518 3025537890 18755905408
## 7: HURRICANE 11868319010 2741910000 14610229010
## 8: TROPICAL STORM 7703890550 678346000 8382236550
## 9: WINTER STORM 6688497251 26944000 6715441251
## 10: HIGH WIND 5270046295 638571300 5908617595
healthDT <- DT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES)), by = .(EVTYPE)]
fatalDT <- healthDT[order(-FATALITIES), ]
fatalDT <- head(fatalDT, 10)
fatalDT
## EVTYPE FATALITIES INJURIES
## 1: TORNADO 5633 91346
## 2: EXCESSIVE HEAT 1903 6525
## 3: FLASH FLOOD 978 1777
## 4: HEAT 937 2100
## 5: LIGHTNING 816 5230
## 6: TSTM WIND 504 6957
## 7: FLOOD 470 6789
## 8: RIP CURRENT 368 232
## 9: HIGH WIND 248 1137
## 10: AVALANCHE 224 170
injuryDT <- healthDT[order(-INJURIES), ]
injuryDT <- head(injuryDT, 10)
injuryDT
## EVTYPE FATALITIES INJURIES
## 1: TORNADO 5633 91346
## 2: TSTM WIND 504 6957
## 3: FLOOD 470 6789
## 4: EXCESSIVE HEAT 1903 6525
## 5: LIGHTNING 816 5230
## 6: HEAT 937 2100
## 7: ICE STORM 89 1975
## 8: FLASH FLOOD 978 1777
## 9: THUNDERSTORM WIND 133 1488
## 10: HAIL 15 1361
prop_damage <- melt(prop_ecoDT, id.vars="EVTYPE", variable.name = "Damage_Type")
crop_damage <- melt(crop_ecoDT, id.vars="EVTYPE", variable.name = "Damage_Type")
The upcoming figure shows the effect of disasters on crops. Events that displace large amounts of water are common in all of them aside from Extreme Cold.
crop_eco <- ggplot(crop_damage, aes(x=reorder(EVTYPE, -value), y=value))
crop_eco <- crop_eco + geom_bar(stat="identity")
crop_eco <- crop_eco + theme(axis.text.x = element_text(angle=60, hjust=1))
crop_eco <- crop_eco + xlab("Event") + ylab("Cost($)")
crop_eco <- crop_eco + ggtitle("Top 10 Natural Disaster Effects on Crop Damage")+ theme(plot.title = element_text(hjust = 0.5))
crop_eco
The upcoming figure shows similarities between the prior figure; however, the loss of the extreme temperatures and the addition of the Tornado and High Wind events suggest that wind-related events have a higher chance of causing property rather than crop damage.
prop_eco <- ggplot(prop_damage, aes(x=reorder(EVTYPE, -value), y=value))
prop_eco <- prop_eco + geom_bar(stat="identity")
prop_eco <- prop_eco + theme(axis.text.x = element_text(angle=60, hjust=1))
prop_eco <- prop_eco + xlab("Event") + ylab("Cost($)")
prop_eco <- prop_eco + ggtitle("Top 10 Natural Disaster Effects on Property Damage")+ theme(plot.title = element_text(hjust = 0.5))
prop_eco
fatalities <- melt(fatalDT, id.vars="EVTYPE", variable.name = "change")
The upcoming figure shows the devastating effects of tornadoes on U.S. mortality. This prominence is exemplified by the sum of the second tier of events (Excessive Heat, TSTM Wind, Flood, and Lightning) not even reaching the effects of tornadoes.
health <- ggplot(fatalities, aes(x=reorder(EVTYPE, -value), y=value))
health <- health + geom_bar(stat="identity", aes(fill=change))
health <- health + theme(axis.text.x = element_text(angle=60, hjust=1))
health <- health + xlab("Event") + ylab("Frequency")
health <- health + ggtitle("Top 10 Natural Disaster Effects on Mortality")+ theme(plot.title = element_text(hjust = 0.5))
health