Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. NOAA storm database. This database tracks characteristics of major storms and weather events in the US, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Q1 : Across the United States, which types of events (as indicated in the 𝙴𝚅𝚃𝚈𝙿𝙴 variable) are most harmful with respect to population health?

Q2 : Across the United States, which types of events have the greatest economic consequences?

Data load into R

#url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#download.file(url, "./Data/StormData.csv.bz2")
#library(R.utils)
#bunzip2("./Data/StormData.csv.bz2", "./Data/StormData.csv")
df <- read.csv("./Data/StormData.csv")
#head(df,5)

Q1: Population Health Impact

1.Data Processing

#df2 <- df[,c(EVTYPE, FATALITIES, INJURIES)]
df2 <- df[,c(8, 23, 24)]
#head(df2)

#Top10 Fatalities by EVTYPE
aggFatalities <-  aggregate(FATALITIES ~ EVTYPE,data = df2, sum)
aggFatalities <- aggFatalities[order(-aggFatalities$FATALITIES),]
head(aggFatalities, 10)
##             EVTYPE FATALITIES
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504
## 170          FLOOD        470
## 585    RIP CURRENT        368
## 359      HIGH WIND        248
## 19       AVALANCHE        224
#Top10 Injuries by EVTYPE
aggInjuries <- aggregate(INJURIES ~ EVTYPE, data = df2, sum)
aggInjuries <- aggInjuries[order(-aggInjuries$INJURIES),]
head(aggInjuries, 10)
##                EVTYPE INJURIES
## 834           TORNADO    91346
## 856         TSTM WIND     6957
## 170             FLOOD     6789
## 130    EXCESSIVE HEAT     6525
## 464         LIGHTNING     5230
## 275              HEAT     2100
## 427         ICE STORM     1975
## 153       FLASH FLOOD     1777
## 760 THUNDERSTORM WIND     1488
## 244              HAIL     1361

2.Visualization for top 10 events with the highest Fatalities and Injuries by EVTYPE

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)

fatal10 <- aggFatalities[1:10,]
injury10 <- aggInjuries[1:10,]

barplot(fatal10$FATALITIES, las = 2, 
        names.arg = fatal10$EVTYPE, 
        main = "Top 10 Events with Highest Fatalities", 
        ylab = "Number of fatalities", col = "orange")

barplot(injury10$INJURIES, las = 2, 
        names.arg = injury10$EVTYPE, 
        main = "Top 10 Events with Highest Injuries", 
        ylab = "Number of injuries", col = "orange")

3.Results

Based on the table and graph, the highest impact on the fatalities include Tornado, Excessive Heat and Flash Flood etc. In addition, the highest impact on the injuries include Torando, Tstm Wind, Flood etc.

Q2: Economic Consequences

The data provides two types of economic impact,property damage:PROPDMG, crop damage:CROPDMG, damage in US$: PROPDMGEXP, CROPDMGEXP

1.Data Processing

#names(df)
#df3 <- df[,c(EVTYPE, PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)]
df3 <- df[,c(8, 25, 26,27,28)]
head(df3)
##    EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO    25.0          K       0           
## 2 TORNADO     2.5          K       0           
## 3 TORNADO    25.0          K       0           
## 4 TORNADO     2.5          K       0           
## 5 TORNADO     2.5          K       0           
## 6 TORNADO     2.5          K       0
Symbol <- sort(unique(as.character(df3$PROPDMGEXP)))
Symbol
##  [1] ""  "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
#H, h -> hundreds = x10^2
#K, K -> kilos = x10^3
#M, m -> millions = x10^6
#B,b -> billions = x10^9
#(+) -> x1
#(-),(?),blank -> x0

Multiplier <- c(0,0,0,1,10,10,10,10,10,10,10,10,10,10^9,10^2,10^2,10^3,10^6,10^6)
convertdf <- data.frame(Symbol, Multiplier)

df3$PropMulti <- convertdf$Multiplier[match(df3$PROPDMGEXP, convertdf$Symbol)]
df3$CropMulti <- convertdf$Multiplier[match(df3$CROPDMGEXP, convertdf$Symbol)]
df3$PropDmg <- df3$PROPDMG*df3$PropMulti
df3$CropDmg <- df3$CROPDMG*df3$CropMulti
df3$TotalDmg <- df3$PropDmg + df3$CropDmg

aggTotalDmg <- aggregate(TotalDmg ~ EVTYPE, data = df3, sum)
aggTotalDmg <- aggTotalDmg[order(-aggTotalDmg$TotalDmg),]
head(aggTotalDmg,10)
##                EVTYPE     TotalDmg
## 170             FLOOD 150319678250
## 411 HURRICANE/TYPHOON  71913712800
## 834           TORNADO  57352117607
## 670       STORM SURGE  43323541000
## 244              HAIL  18757611527
## 153       FLASH FLOOD  17562132111
## 95            DROUGHT  15018672000
## 402         HURRICANE  14610229010
## 590       RIVER FLOOD  10148404500
## 427         ICE STORM   8967041810

2.Visualization for the top 10 events with the highest total economic damages :Property and Crop damage added together shown graphically.

library(ggplot2)

par(mfrow=c(1,1)) 
ggplot(aggTotalDmg[1:10,], aes(x=reorder(EVTYPE, -TotalDmg), y=TotalDmg))+
    geom_bar(stat="identity", col = "orange") + 
    theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+
    ggtitle("Top 10 Events with Highest Economic Impact") +
    labs(x="EVENT TYPE", y="Total Economic Impact ($USD)")

3.Results

Based on the table and graph, the highest economic consequences include Flood, Hurricane/Typoon, Tornado etc.