Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The analysis answers the following two questions: * Across the United States, which types of events are most harmful with respect to population health? * Across the United States, which types of events have the greatest economic consequences?
Variables analysed for the first question are fatalities and injuries. Variables analysed for the second question are property damage and crop damage.
library(dplyr)
library(ggplot2)
library(forcats)
library(gridExtra)
library(R.utils)
temp <- tempfile()
if(!file.exists("/stormData.csv.bz2")){
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile="./stormData.csv.bz2", mode="wb")
}
if(!file.exists("repdata_data_StormData.csv")){
bunzip2("stormData.csv.bz2","repdata_data_StormData.csv",remove=F)
}
df <- read.csv("repdata_data_StormData.csv")
Finding the most harmful events regarding to injuries and fatalities
df2 <- df %>% group_by(EVTYPE) %>% summarise(fat=sum(FATALITIES),inj=sum(INJURIES)) %>% arrange(desc(fat,inj))
head(df2,10)
## # A tibble: 10 x 3
## EVTYPE fat inj
## <fct> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
p1 <- df2[1:10,] %>%
mutate(EVTYPE = fct_reorder(EVTYPE,fat)) %>%
ggplot( aes(x=EVTYPE, y=fat)) + geom_bar(stat="identity",fill=I("Blue")) +
xlab("")+ ylab("Fatalities") + coord_flip() +theme_bw() +
ggtitle("Top 10 Harmful events regarding fatalities")
p2<- df2[1:10,] %>%
mutate(EVTYPE = fct_reorder(EVTYPE,inj)) %>%
ggplot( aes(x=EVTYPE, y=inj)) + geom_bar(stat="identity",fill=I("Red")) +
xlab("")+ylab("Injuries") + coord_flip() + theme_bw() +
ggtitle("Top 10 Harmful events regarding injuries")
grid.arrange(p1,p2,nrow=2)
Calculating the property and crop damage based on the alpha designators.
df3<-df %>%
mutate(PROPDMG = PROPDMG *case_when(
PROPDMGEXP == "B" ~ 10^9,
PROPDMGEXP == "k" | PROPDMGEXP == "K" ~ 10^3,
PROPDMGEXP == "m" | PROPDMGEXP == "M" ~ 10^6,
PROPDMGEXP == "h" | PROPDMGEXP == "H" ~ 10^2,
PROPDMGEXP == 1 ~ 1,
PROPDMGEXP == 2 ~ 2,
PROPDMGEXP == 3 ~ 3,
PROPDMGEXP == 4 ~ 4,
PROPDMGEXP == 5 ~ 5,
PROPDMGEXP == 6 ~ 6,
PROPDMGEXP == 7 ~ 7,
PROPDMGEXP == 8 ~ 8,
TRUE ~ 0
), PROPDMGEXP=NULL
)
Finding the most harmful events regarding to property damage
prop_desc <- df3 %>% group_by(EVTYPE) %>% summarise(sumPROP=sum(PROPDMG)) %>% arrange(desc(sumPROP))
head(prop_desc,10)
## # A tibble: 10 x 2
## EVTYPE sumPROP
## <fct> <dbl>
## 1 FLOOD 144657709800
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56937160991
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16140812087.
## 6 HAIL 15732267370
## 7 HURRICANE 11868319010
## 8 TROPICAL STORM 7703890550
## 9 WINTER STORM 6688497250
## 10 HIGH WIND 5270046260
Finding the most harmful events regarding to crop damage
crop_desc <- df3 %>% group_by(EVTYPE) %>% summarise(sumCROP=sum(CROPDMG)) %>% arrange(desc(sumCROP))
head(crop_desc,10)
## # A tibble: 10 x 2
## EVTYPE sumCROP
## <fct> <dbl>
## 1 HAIL 579596.
## 2 FLASH FLOOD 179200.
## 3 FLOOD 168038.
## 4 TSTM WIND 109203.
## 5 TORNADO 100019.
## 6 THUNDERSTORM WIND 66791.
## 7 DROUGHT 33899.
## 8 THUNDERSTORM WINDS 18685.
## 9 HIGH WIND 17283.
## 10 HEAVY RAIN 11123.
Plotting the most harmful events regarding property and crop damage
p3<- prop_desc[1:10,] %>%
mutate(EVTYPE = fct_reorder(EVTYPE,sumPROP)) %>%
ggplot( aes(x=EVTYPE, y=sumPROP/10^6)) + geom_bar(stat="identity", fill="DarkGreen") +
ylab("Total damage in Millions USD")+xlab("")+ coord_flip() + theme_bw() +
ggtitle("Total property damage by top 10 harmful event types")
p4<- crop_desc[1:10,] %>%
mutate(EVTYPE = fct_reorder(EVTYPE,sumCROP)) %>%
ggplot( aes(x=EVTYPE, y=sumCROP/10^3)) + geom_bar(stat="identity", fill="DarkBlue") +
ylab("Total damage in Millions USD")+xlab("") + coord_flip() + theme_bw() +
ggtitle("Total crop damage by top 10 harmful event types")
grid.arrange(p3,p4,nrow=2)