Synopsis

This analysis uses NOAA Storm Data from 1950 - 2011 to assess the damage caused by different types of weather events, specifically regarding population health and economic consequences. We find that TORNADO has by far the biggest effect on human casualties, followed by HEAT, WIND, FLOOD and LIGHTNING. In terms of property and crop damage, FLOOD has by far the biggest effect, followed by HURRICANE/TYPHOON, TORNADO, STORM SURGE and HAIL.

Data Processing

The raw data is read directly from the source file, and used to create a secondary data frame with a reformatted date and only the columns relevant for our analysis:

stormraw <- read.csv("repdata%2Fdata%2FStormData.csv.bz2",stringsAsFactors = FALSE) 
stormraw$EVDATE <- as.Date(stormraw$BGN_DATE, "%m/%d/%Y")
stormdata <- stormraw[ ,c(38,7,8,23:28)]
names(stormdata)

[1] “EVDATE” “STATE” “EVTYPE” “FATALITIES” “INJURIES”
[6] “PROPDMG” “PROPDMGEXP” “CROPDMG” “CROPDMGEXP”

Some basic data quality checks were also performed (see Appendix).

Finally, a multiplier for the property and crop damage amounts was calculated and applied using a lookup table:

explkup <- rbind(c("k","1e+03"),c("K","1e+03"),
                 c("m","1e+06"),c("M","1e+06"),
                 c("b","1e+09"),c("B","1e+09"))

stormdata$PROPDMGMULT <- explkup[match(stormdata$PROPDMGEXP, explkup),2]
stormdata$PROPDMGMULT[is.na(stormdata$PROPDMGMULT)] <- 0
stormdata$PROPDMGX <- stormdata$PROPDMG * as.numeric(stormdata$PROPDMGMULT)

stormdata$CROPDMGMULT <- explkup[match(stormdata$CROPDMGEXP, explkup),2]
stormdata$CROPDMGMULT[is.na(stormdata$CROPDMGMULT)] <- 0
stormdata$CROPDMGX <- stormdata$CROPDMG * as.numeric(stormdata$CROPDMGMULT)

stormdata$EVTYPE <- as.factor(stormdata$EVTYPE)

Note that property and crop damage were considered as zeroes for records with invalid multipliers, as the volume was very low.

Results

Population Health

human <- stormdata %>% group_by(EVTYPE) %>% 
                summarise(FATALITIES = sum(FATALITIES),INJURIES = sum(INJURIES),
                          TOTAL = sum(FATALITIES + INJURIES)) %>%
                arrange(desc(TOTAL))
kable(human[1:5, ], "html", caption = "Top 5 weather events affecting population health") %>%
                kable_styling(bootstrap_options = "striped", full_width = F)
Top 5 weather events affecting population health
EVTYPE FATALITIES INJURIES TOTAL
TORNADO 5633 91346 96979
EXCESSIVE HEAT 1903 6525 8428
TSTM WIND 504 6957 7461
FLOOD 470 6789 7259
LIGHTNING 816 5230 6046
humancutoff <- 100
humanx <- subset(human, TOTAL > humancutoff & !(EVTYPE == "TORNADO"))
humanother <- subset(human, TOTAL <= humancutoff & !(EVTYPE == "TORNADO"))
humanx <- rbind(humanx, c("OTHER", as.numeric(sum(humanother$FATALITIES)),
                as.numeric(sum(humanother$INJURIES)),as.numeric(sum(humanother$TOTAL))))
humanlabel <- paste("OTHER: ",count(humanother)," categories with each total less than ",
                humancutoff,".",sep = "")

humanmelt <- humanx[,c(1:3)] %>% melt(id.vars = "EVTYPE")
names(humanmelt) <- c("EVTYPE", "HHTYPE", "HHVALUE")
humanmelt$HHVALUE <- as.numeric(humanmelt$HHVALUE)

humanplot <- ggplot(data = humanmelt, aes(x = EVTYPE, y = HHVALUE, fill = HHTYPE)) +  
                geom_bar(stat = "identity") + coord_flip() + 
                ggtitle("Number of human casualties by weather event type") +
                labs(x = "",y = "") + theme(legend.title = element_blank())
humanplot

OTHER: 943 categories with each total less than 100..
Note that ‘TORNADO’ was excluded from this graph, to avoid crowding out the other results.

Economic Consequences

econ <- stormdata %>% group_by(EVTYPE) %>% 
                summarise(PROPDMG = sum(PROPDMGX),CROPDMG = sum(CROPDMGX),
                  TOTAL = sum(PROPDMGX + CROPDMGX)) %>%
                arrange(desc(TOTAL))
kable(econ[1:5, ], "html", caption = "Top 5 weather events by economic consequence", 
                format.args=list(big.mark = ',')) %>%
                kable_styling(bootstrap_options = "striped", full_width = F)
Top 5 weather events by economic consequence
EVTYPE PROPDMG CROPDMG TOTAL
FLOOD 144,657,709,800 5,661,968,450 150,319,678,250
HURRICANE/TYPHOON 69,305,840,000 2,607,872,800 71,913,712,800
TORNADO 56,937,160,480 414,953,110 57,352,113,590
STORM SURGE 43,323,536,000 5,000 43,323,541,000
HAIL 15,732,266,720 3,025,954,450 18,758,221,170
econcutoff <- 1000000000
econx <- subset(econ, TOTAL > econcutoff & !(EVTYPE == "FLOOD"))
econother <- subset(econ, TOTAL <= econcutoff & !(EVTYPE == "FLOOD")) 
econx <- rbind(econx, c("OTHER", as.numeric(sum(econother$PROPDMG)),
                as.numeric(sum(econother$CROPDMG)),as.numeric(sum(econother$TOTAL))))
econlabel  <- paste("OTHER: ",count(econother)," categories with each total less than $",
                    format(econcutoff, big.mark=",", scientific=FALSE), sep = "")

econmelt <- econx[,c(1:3)] %>% melt(id.vars = "EVTYPE")
names(econmelt) <- c("EVTYPE", "ECONTYPE", "ECONVALUE")
econmelt$ECONVALUE <- as.numeric(econmelt$ECONVALUE)
econmelt$ECONVALUEM <- econmelt$ECONVALUE/1000000

econplot <- ggplot(data = econmelt, aes(x = EVTYPE, y = ECONVALUEM, fill = ECONTYPE)) +  
                geom_bar(stat = "identity") + coord_flip() + 
                ggtitle("Property/Crop damage by weather event type ($M)") +
                labs(x = "",y = "") + theme(legend.title = element_blank())
econplot

OTHER: 958 categories with each total less than $1,000,000,000.
Note that ‘FLOOD’ was excluded from this graph, to avoid crowding out the other results.

Appendix

Data quality checks results:

qa <- complete.cases(stormdata)
summary(qa)

Mode TRUE logical 902297

range(stormdata$FATALITIES)

[1] 0 583

range(stormdata$INJURIES)

[1] 0 1700

range(stormdata$PROPDMG)

[1] 0 5000

range(stormdata$CROPDMG)

[1] 0 990

propqa <- stormdata %>% group_by(PROPDMGEXP) %>% summarise(COUNT=n()) %>% arrange(desc(COUNT))
kable(propqa, "html") %>% kable_styling(bootstrap_options = "striped", full_width = F)
PROPDMGEXP COUNT
465934
K 424665
M 11330
0 216
B 40
5 28
1 25
2 13
? 8
m 7
H 6
5
7 5
3 4
4 4
6 4
1
8 1
h 1
cropqa <- stormdata %>% group_by(CROPDMGEXP) %>% summarise(COUNT=n()) %>% arrange(desc(COUNT))
kable(cropqa, "html") %>% kable_styling(bootstrap_options = "striped", full_width = F)
CROPDMGEXP COUNT
618413
K 281832
M 1994
k 21
0 19
B 9
? 7
2 1
m 1