Use the NOAA Storm Database to look at the Event Type (EVTYPE) that causes the most FATLITIES and INJURIES as well as most Property Damage (PROPDMG). I will sort the data for most FATALITIES and INJURIES, sum by category and type, then plot across both categories. A similar analysis will be done for Property Damage (PRPDMG) to determine the top contributors to economic consequences.
library(ggplot2)
data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2", header=TRUE, sep=",")
This was interpreted as which Event Type creates the most fatalities and injuries.
First, sum up total Fatalites by Event Type, and total Injuries by Event type.
sumPeople <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data, sum)
Next, combine both data sets, sort FATALITIES for Top 5
fatINJor <- sumPeople[order(sumPeople$FATALITIES, sumPeople$INJURIES),]
Top 5 Events for FATALITIES
tail(fatINJor, n=5)
## EVTYPE FATALITIES INJURIES
## 464 LIGHTNING 816 5230
## 275 HEAT 937 2100
## 153 FLASH FLOOD 978 1777
## 130 EXCESSIVE HEAT 1903 6525
## 834 TORNADO 5633 91346
Top 5 Events for INJURIES
fatINJor <- sumPeople[order(sumPeople$INJURIES, sumPeople$FATALITIES),]
tail(fatINJor, n=5)
## EVTYPE FATALITIES INJURIES
## 464 LIGHTNING 816 5230
## 130 EXCESSIVE HEAT 1903 6525
## 170 FLOOD 470 6789
## 856 TSTM WIND 504 6957
## 834 TORNADO 5633 91346
Subset original data to include Top 7 Event Types for BOTH Fatalites and Injuries, then find Total Fatalities and Injuries by Event Type
topsev <- subset(data, data$EVTYPE == "TORNADO" | data$EVTYPE == "TSTM WIND" | data$EVTYPE == "FLOOD" | data$EVTYPE == "EXCESSIVE HEAT" | data$EVTYPE == "LIGHTNING" | data$EVTYPE == "FLASH FLOOD" | data$EVTYPE == "HEAT")
Find Totals for FATALITIES & INJURIES
topsevTotal <- aggregate(cbind(FATALITIES, INJURIES)~EVTYPE, topsev, sum)
topsevTotal
## EVTYPE FATALITIES INJURIES
## 1 EXCESSIVE HEAT 1903 6525
## 2 FLASH FLOOD 978 1777
## 3 FLOOD 470 6789
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TORNADO 5633 91346
## 7 TSTM WIND 504 6957
The weather event that is most harmful in relation to population health in the US is Tornadoes. It causes by far the most injuries and fatalities. The next most detrimental event is Excessive Heat, which produces slightly more fatalities than the cluster of the remaining 5 Events (Flash Flood, Flood, Heat, Lightning, Thunderstorm Wind).
Plot Fatalities VS Injuries by Event Type
library(ggplot2)
qplot(FATALITIES, INJURIES, data=topsevTotal, color=EVTYPE, main="Fatalities And Injuries by 7 Weather Events in US")
ggsave("people.png")
## Saving 7 x 5 in image
dev.off()
## null device
## 1
Sum up the Property Damage & Crop Damage amounts by Event Type, and reorder
#damsum <- aggregate(PROPDMG ~ EVTYPE, data, sum)
damsum <- aggregate(cbind(PROPDMG, CROPDMG) ~EVTYPE, data, sum)
damsum <- damsum[order(damsum$PROPDMG), ]
Top 10 Property Damaging Events
tail(damsum, n=10)
## EVTYPE PROPDMG CROPDMG
## 972 WINTER STORM 132720.6 1978.99
## 359 HIGH WIND 324731.6 17283.21
## 786 THUNDERSTORM WINDS 446293.2 18684.93
## 464 LIGHTNING 603351.8 3580.61
## 244 HAIL 688693.4 579596.28
## 760 THUNDERSTORM WIND 876844.2 66791.45
## 170 FLOOD 899938.5 168037.88
## 856 TSTM WIND 1335965.6 109202.60
## 153 FLASH FLOOD 1420124.6 179200.46
## 834 TORNADO 3212258.2 100018.52
Top 10 Crop Damaging Events damsum <- damsum[order(damsum$CROPDMG), ]
Subset original data to include only to 10 Economically Damaging Events
top9 <- subset(data, data$EVTYPE == "TORNADO" | data$EVTYPE == "FLASH FLOOD" | data$EVTYPE == "TSTM WIND" | data$EVTYPE == "FLASH FLOOD" | data$EVTYPE == "TORNADO" |data$EVTYPE=="THUNDERSTORM WINDS" | data$EVTYPE =="THUNDERSTORM WIND" |data$EVTYPE =="HAIL" |data$EVTYPE=="WINTER STORM")
Subset Data to Only Needed Columns
sub <- c("EVTYPE", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
top9 <- top9[sub]
Adjust Multiplier Values for Property Damage Found Here: https://rstudio-pubs-static.s3.amazonaws.com/58957_37b6723ee52b455990e149edde45e5b6.html
unique(top9$PROPDMGEXP)
## [1] K M B 0 5 m 6 ? 4 2 3 7 H + 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("K", "3")] <- 1e+3
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("M", "6")] <- 1e+6
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("B")] <- 1e+9
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("0")] <- 1e+1
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("5")] <- 1e+5
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("4")] <- 1e+4
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("2", "H")] <- 1e+2
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("7")] <- 1e+7
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("+", "?", "-")] <- 0
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("1")] <- 10
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("8")] <- 1e+8
Adjust Multiplier Values for Crop Damage
unique(top9$CROPDMGEXP)
## [1] K M 0 ? k 2
## Levels: ? 0 2 B k K m M
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("K", "k")] <- 1e+3
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("M")] <- 1e+6
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("+", "?", "-", "0", "")] <- 0
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("2", "H")] <- 1e+2
There are a few values in the original data set that have CROPDMGEXP marked as B, but they were not in the top 10 significant events and Will not have significant effects on final totals
nrow(subset(data, CROPDMGEXP=="B"))
## [1] 9
subset(data[sub], CROPDMGEXP=="B")
## EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 188633 HEAT 0.00 0.40 B
## 198389 RIVER FLOOD 5.00 B 5.00 B
## 199733 DROUGHT 0.00 0.50 B
## 201256 FREEZE 0.00 0.20 B
## 211900 ICE STORM 500.00 K 5.00 B
## 581537 HURRICANE/TYPHOON 5.88 B 1.51 B
## 639347 DROUGHT 0.00 1.00 B
## 899222 DROUGHT 0.00 K 0.00 B
## 899608 DROUGHT 0.00 K 0.00 B
Adjust Property Damage & Crop Damage Values by Exponent Value
top9[is.na(top9)] <- 0
top9$PROPDMG <- top9$PROPDMG * top9$PROPDMGMULT
top9$CROPDMG <- top9$CROPDMG * top9$CROPDMGMULT
head(top9)
## EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP PROPDMGMULT CROPDMGMULT
## 1 TORNADO 25000 K 0 1000 0
## 2 TORNADO 2500 K 0 1000 0
## 3 TORNADO 25000 K 0 1000 0
## 4 TORNADO 2500 K 0 1000 0
## 5 TORNADO 2500 K 0 1000 0
## 6 TORNADO 2500 K 0 1000 0
Sum up total Damage by Property/Crop and Event Type
damsum <- aggregate(cbind(PROPDMG, CROPDMG) ~EVTYPE, top9, sum)
head(damsum)
## EVTYPE PROPDMG CROPDMG
## 1 FLASH FLOOD 16822676125 1421317100
## 2 HAIL 15730369577 3025954450
## 3 THUNDERSTORM WIND 3483123640 414843050
## 4 THUNDERSTORM WINDS 1942142931 190654700
## 5 TORNADO 56935881815 414953110
## 6 TSTM WIND 4484928990 554007350
nrow(damsum)
## [1] 7
Reorder data By Property Damage, then Crop Damage to see Top 10 Events
damsum <- damsum[order(damsum$PROPDMG, damsum$PROPDMG),]
tail(damsum)
## EVTYPE PROPDMG CROPDMG
## 3 THUNDERSTORM WIND 3483123640 414843050
## 6 TSTM WIND 4484928990 554007350
## 7 WINTER STORM 6688497260 26944000
## 2 HAIL 15730369577 3025954450
## 1 FLASH FLOOD 16822676125 1421317100
## 5 TORNADO 56935881815 414953110
damsum <- damsum[order(damsum$CROPDMG, damsum$CROPDMG),]
head(damsum)
## EVTYPE PROPDMG CROPDMG
## 7 WINTER STORM 6688497260 26944000
## 4 THUNDERSTORM WINDS 1942142931 190654700
## 3 THUNDERSTORM WIND 3483123640 414843050
## 5 TORNADO 56935881815 414953110
## 6 TSTM WIND 4484928990 554007350
## 1 FLASH FLOOD 16822676125 1421317100
damsum$TTLDMG <- damsum$PROPDMG +damsum$CROPDMG
head(damsum)
## EVTYPE PROPDMG CROPDMG TTLDMG
## 7 WINTER STORM 6688497260 26944000 6715441260
## 4 THUNDERSTORM WINDS 1942142931 190654700 2132797631
## 3 THUNDERSTORM WIND 3483123640 414843050 3897966690
## 5 TORNADO 56935881815 414953110 57350834925
## 6 TSTM WIND 4484928990 554007350 5038936340
## 1 FLASH FLOOD 16822676125 1421317100 18243993225
Plot Total Damages By Event Type
g <- ggplot(damsum, aes(EVTYPE, TTLDMG))
g+geom_point(aes(color = EVTYPE))+
theme(axis.text.x=element_blank())+
labs(x="Weather Event", y="Monetary Property & Crop Damage") +
ggtitle("Monetary Property Damage for US by Top 10 Weather Events")
ggsave("money.png")
## Saving 7 x 5 in image
dev.off()
## null device
## 1
Sum up 3 Thunderstorm categories to see if they beat out damage by Tornados
tornDam <- damsum$PROPDMG[2] + damsum$PROPDMG[3]+ damsum$PROPDMG[5] + damsum$CROPDMG[2] + damsum$CROPDMG[3] + damsum$CROPDMG[5]
tornDam
## [1] 11069700661
damsum[4,]
## EVTYPE PROPDMG CROPDMG TTLDMG
## 5 TORNADO 56935881815 414953110 57350834925
Tornado damage, still beats out the sub of all Thunderstorm Types.
The weather event that is most economically damaging is also Tornados, beating out the next closest event by a significant amount.