Synopsis

Use the NOAA Storm Database to look at the Event Type (EVTYPE) that causes the most FATLITIES and INJURIES as well as most Property Damage (PROPDMG). I will sort the data for most FATALITIES and INJURIES, sum by category and type, then plot across both categories. A similar analysis will be done for Property Damage (PRPDMG) to determine the top contributors to economic consequences.

Data Processing

Requirements to load data:

  • R version 3.4.1
  • data file named “repdata%2Fdata%2FStormData.csv.bz2” in your working directory
library(ggplot2)

data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2", header=TRUE, sep=",")

Results Part 1 - Which EVTYPE (Event Type) is Most Harmful to Health?

This was interpreted as which Event Type creates the most fatalities and injuries.

First, sum up total Fatalites by Event Type, and total Injuries by Event type.

sumPeople <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data, sum)

Next, combine both data sets, sort FATALITIES for Top 5

fatINJor <- sumPeople[order(sumPeople$FATALITIES, sumPeople$INJURIES),]

Top 5 Events for FATALITIES

tail(fatINJor, n=5)
##             EVTYPE FATALITIES INJURIES
## 464      LIGHTNING        816     5230
## 275           HEAT        937     2100
## 153    FLASH FLOOD        978     1777
## 130 EXCESSIVE HEAT       1903     6525
## 834        TORNADO       5633    91346

Top 5 Events for INJURIES

fatINJor <- sumPeople[order(sumPeople$INJURIES, sumPeople$FATALITIES),]
tail(fatINJor, n=5)
##             EVTYPE FATALITIES INJURIES
## 464      LIGHTNING        816     5230
## 130 EXCESSIVE HEAT       1903     6525
## 170          FLOOD        470     6789
## 856      TSTM WIND        504     6957
## 834        TORNADO       5633    91346

Subset original data to include Top 7 Event Types for BOTH Fatalites and Injuries, then find Total Fatalities and Injuries by Event Type

topsev <- subset(data, data$EVTYPE == "TORNADO" | data$EVTYPE == "TSTM WIND" | data$EVTYPE == "FLOOD" | data$EVTYPE == "EXCESSIVE HEAT" | data$EVTYPE == "LIGHTNING" | data$EVTYPE == "FLASH FLOOD" | data$EVTYPE == "HEAT")

Find Totals for FATALITIES & INJURIES

topsevTotal <- aggregate(cbind(FATALITIES, INJURIES)~EVTYPE, topsev, sum)
topsevTotal
##           EVTYPE FATALITIES INJURIES
## 1 EXCESSIVE HEAT       1903     6525
## 2    FLASH FLOOD        978     1777
## 3          FLOOD        470     6789
## 4           HEAT        937     2100
## 5      LIGHTNING        816     5230
## 6        TORNADO       5633    91346
## 7      TSTM WIND        504     6957

The weather event that is most harmful in relation to population health in the US is Tornadoes. It causes by far the most injuries and fatalities. The next most detrimental event is Excessive Heat, which produces slightly more fatalities than the cluster of the remaining 5 Events (Flash Flood, Flood, Heat, Lightning, Thunderstorm Wind).

Plot Fatalities VS Injuries by Event Type

library(ggplot2)
qplot(FATALITIES, INJURIES, data=topsevTotal, color=EVTYPE, main="Fatalities And Injuries by 7 Weather Events in US")

ggsave("people.png")
## Saving 7 x 5 in image
dev.off()
## null device 
##           1

Results Part 2 - Which EVTYPE (Event Type) Has Most Economic Consequences?

Sum up the Property Damage & Crop Damage amounts by Event Type, and reorder

#damsum <- aggregate(PROPDMG ~ EVTYPE, data, sum)

damsum <- aggregate(cbind(PROPDMG, CROPDMG) ~EVTYPE, data, sum)

damsum <- damsum[order(damsum$PROPDMG), ]

Top 10 Property Damaging Events

tail(damsum, n=10)
##                 EVTYPE   PROPDMG   CROPDMG
## 972       WINTER STORM  132720.6   1978.99
## 359          HIGH WIND  324731.6  17283.21
## 786 THUNDERSTORM WINDS  446293.2  18684.93
## 464          LIGHTNING  603351.8   3580.61
## 244               HAIL  688693.4 579596.28
## 760  THUNDERSTORM WIND  876844.2  66791.45
## 170              FLOOD  899938.5 168037.88
## 856          TSTM WIND 1335965.6 109202.60
## 153        FLASH FLOOD 1420124.6 179200.46
## 834            TORNADO 3212258.2 100018.52

Top 10 Crop Damaging Events damsum <- damsum[order(damsum$CROPDMG), ]

Subset original data to include only to 10 Economically Damaging Events

top9 <- subset(data, data$EVTYPE == "TORNADO" | data$EVTYPE == "FLASH FLOOD" | data$EVTYPE == "TSTM WIND" | data$EVTYPE == "FLASH FLOOD" | data$EVTYPE == "TORNADO" |data$EVTYPE=="THUNDERSTORM WINDS" | data$EVTYPE =="THUNDERSTORM WIND" |data$EVTYPE =="HAIL" |data$EVTYPE=="WINTER STORM")

Subset Data to Only Needed Columns

sub <- c("EVTYPE", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
top9 <- top9[sub]

Adjust Multiplier Values for Property Damage Found Here: https://rstudio-pubs-static.s3.amazonaws.com/58957_37b6723ee52b455990e149edde45e5b6.html

unique(top9$PROPDMGEXP)
##  [1] K M   B 0 5 m 6 ? 4 2 3 7 H + 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("K", "3")] <- 1e+3
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("M", "6")] <- 1e+6
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("B")] <- 1e+9
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("0")] <- 1e+1
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("5")] <- 1e+5
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("4")] <- 1e+4
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("2", "H")] <- 1e+2
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("7")] <- 1e+7
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("+", "?", "-")] <- 0
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("1")] <- 10
top9$PROPDMGMULT[top9$PROPDMGEXP %in% c("8")] <- 1e+8

Adjust Multiplier Values for Crop Damage

unique(top9$CROPDMGEXP)
## [1]   K M 0 ? k 2
## Levels:  ? 0 2 B k K m M
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("K", "k")] <- 1e+3
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("M")] <- 1e+6
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("+", "?", "-", "0", "")] <- 0
top9$CROPDMGMULT[top9$CROPDMGEXP %in% c("2", "H")] <- 1e+2

There are a few values in the original data set that have CROPDMGEXP marked as B, but they were not in the top 10 significant events and Will not have significant effects on final totals

nrow(subset(data, CROPDMGEXP=="B"))
## [1] 9
subset(data[sub], CROPDMGEXP=="B")
##                   EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 188633              HEAT    0.00               0.40          B
## 198389       RIVER FLOOD    5.00          B    5.00          B
## 199733           DROUGHT    0.00               0.50          B
## 201256            FREEZE    0.00               0.20          B
## 211900         ICE STORM  500.00          K    5.00          B
## 581537 HURRICANE/TYPHOON    5.88          B    1.51          B
## 639347           DROUGHT    0.00               1.00          B
## 899222           DROUGHT    0.00          K    0.00          B
## 899608           DROUGHT    0.00          K    0.00          B

Adjust Property Damage & Crop Damage Values by Exponent Value

top9[is.na(top9)] <- 0
top9$PROPDMG <- top9$PROPDMG * top9$PROPDMGMULT 
top9$CROPDMG <- top9$CROPDMG * top9$CROPDMGMULT 
head(top9)
##    EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP PROPDMGMULT CROPDMGMULT
## 1 TORNADO   25000          K       0                   1000           0
## 2 TORNADO    2500          K       0                   1000           0
## 3 TORNADO   25000          K       0                   1000           0
## 4 TORNADO    2500          K       0                   1000           0
## 5 TORNADO    2500          K       0                   1000           0
## 6 TORNADO    2500          K       0                   1000           0

Sum up total Damage by Property/Crop and Event Type

damsum <- aggregate(cbind(PROPDMG, CROPDMG) ~EVTYPE, top9, sum)
head(damsum)
##               EVTYPE     PROPDMG    CROPDMG
## 1        FLASH FLOOD 16822676125 1421317100
## 2               HAIL 15730369577 3025954450
## 3  THUNDERSTORM WIND  3483123640  414843050
## 4 THUNDERSTORM WINDS  1942142931  190654700
## 5            TORNADO 56935881815  414953110
## 6          TSTM WIND  4484928990  554007350
nrow(damsum)
## [1] 7

Reorder data By Property Damage, then Crop Damage to see Top 10 Events

damsum <- damsum[order(damsum$PROPDMG, damsum$PROPDMG),]
tail(damsum)
##              EVTYPE     PROPDMG    CROPDMG
## 3 THUNDERSTORM WIND  3483123640  414843050
## 6         TSTM WIND  4484928990  554007350
## 7      WINTER STORM  6688497260   26944000
## 2              HAIL 15730369577 3025954450
## 1       FLASH FLOOD 16822676125 1421317100
## 5           TORNADO 56935881815  414953110
damsum <- damsum[order(damsum$CROPDMG, damsum$CROPDMG),]
head(damsum)
##               EVTYPE     PROPDMG    CROPDMG
## 7       WINTER STORM  6688497260   26944000
## 4 THUNDERSTORM WINDS  1942142931  190654700
## 3  THUNDERSTORM WIND  3483123640  414843050
## 5            TORNADO 56935881815  414953110
## 6          TSTM WIND  4484928990  554007350
## 1        FLASH FLOOD 16822676125 1421317100
damsum$TTLDMG <- damsum$PROPDMG +damsum$CROPDMG
head(damsum)
##               EVTYPE     PROPDMG    CROPDMG      TTLDMG
## 7       WINTER STORM  6688497260   26944000  6715441260
## 4 THUNDERSTORM WINDS  1942142931  190654700  2132797631
## 3  THUNDERSTORM WIND  3483123640  414843050  3897966690
## 5            TORNADO 56935881815  414953110 57350834925
## 6          TSTM WIND  4484928990  554007350  5038936340
## 1        FLASH FLOOD 16822676125 1421317100 18243993225

Plot Total Damages By Event Type

g <- ggplot(damsum, aes(EVTYPE, TTLDMG))
g+geom_point(aes(color = EVTYPE))+
  theme(axis.text.x=element_blank())+      
  labs(x="Weather Event", y="Monetary Property & Crop Damage") + 
  ggtitle("Monetary Property Damage for US by Top 10 Weather Events")

ggsave("money.png")
## Saving 7 x 5 in image
dev.off()
## null device 
##           1

Sum up 3 Thunderstorm categories to see if they beat out damage by Tornados

tornDam <- damsum$PROPDMG[2] + damsum$PROPDMG[3]+ damsum$PROPDMG[5] + damsum$CROPDMG[2] + damsum$CROPDMG[3] + damsum$CROPDMG[5]
tornDam
## [1] 11069700661
damsum[4,]
##    EVTYPE     PROPDMG   CROPDMG      TTLDMG
## 5 TORNADO 56935881815 414953110 57350834925

Tornado damage, still beats out the sub of all Thunderstorm Types.

The weather event that is most economically damaging is also Tornados, beating out the next closest event by a significant amount.

Overall, the most damaging event in the US in relation to people, property and crops is Tornadoes.