Synopsis: Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
The objective of this project is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database and to answer the following questions about severe weather events:
Across the United States, which types of events are most harmful with respect to population health? Across the United States, which types of events have the greatest economic consequences? Pre Analysis Loading necessary packages. We will use dplyr to clean and explore data and ggplot2 to plot it.
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
library(knitr)
## Warning: package 'knitr' was built under R version 4.0.3
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.3
Loading and Processing the data
noaa <- read.csv("repdata_data_StormData.csv")
noaa <- noaa[, c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
noaa <- as_tibble(noaa)
#cleaning data
unique(noaa$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(noaa$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "K"] <- 1000
## Warning: Unknown or uninitialised column: `PROPDMGVAL`.
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "M"] <- 1e+06
noaa$PROPDMGVAL[noaa$PROPDMGEXP == ""] <- 1
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "B"] <- 1e+09
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "m"] <- 1e+06
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "0"] <- 1
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "5"] <- 1e+05
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "6"] <- 1e+06
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "4"] <- 10000
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "2"] <- 100
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "3"] <- 1000
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "h"] <- 100
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "7"] <- 1e+07
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "H"] <- 100
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "1"] <- 10
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "8"] <- 1e+08
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "+"] <- 0
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "-"] <- 0
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "?"] <- 0
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "K"] <- 1e+03
## Warning: Unknown or uninitialised column: `CROPDMGVAL`.
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "k"] <- 1e+03
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "m"] <- 1e+06
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "M"] <- 1e+06
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "B"] <- 1e+09
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "0"] <- 1e+00
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "2"] <- 1e+02
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "?"] <- 0
noaa$PROPDMGVAL <- noaa$PROPDMG * noaa$PROPDMGVAL
noaa$CROPDMGVAL <- noaa$CROPDMG * noaa$CROPDMGVAL
Results:
Property Damage
FstQ <- aggregate(PROPDMGVAL ~ EVTYPE, data = noaa, sum)
FstQ <- FstQ[order(-FstQ$PROPDMGVAL),]
FstQ[1:10,]
## EVTYPE PROPDMGVAL
## 170 FLOOD 144657709807
## 411 HURRICANE/TYPHOON 69305840000
## 834 TORNADO 56947380617
## 670 STORM SURGE 43323536000
## 153 FLASH FLOOD 16822673979
## 244 HAIL 15735267513
## 402 HURRICANE 11868319010
## 848 TROPICAL STORM 7703890550
## 972 WINTER STORM 6688497251
## 359 HIGH WIND 5270046260
library("ggplot2")
library("knitr")
ggplot(data = FstQ[1:10, ],aes(x=PROPDMGVAL,y=EVTYPE))+
geom_bar(stat="identity" ,fill = "#34A287") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
coord_flip() + xlab("Total number of Property damage") + ylab("Event Types") +
ggtitle("Top Ten of event types with the highest Property damage")
ScdQ <- aggregate(CROPDMGVAL ~ EVTYPE, data = noaa, sum)
FLOOD has the greatest economic consequence when we analyze Property Damage.
Crop Damage
ScdQ <- ScdQ[order(-ScdQ$CROPDMGVAL),]
ScdQ[1:10,]
## EVTYPE CROPDMGVAL
## 16 DROUGHT 13972566000
## 35 FLOOD 5661968450
## 99 RIVER FLOOD 5029459000
## 86 ICE STORM 5022113500
## 53 HAIL 3025954470
## 78 HURRICANE 2741910000
## 83 HURRICANE/TYPHOON 2607872800
## 30 FLASH FLOOD 1421317100
## 26 EXTREME COLD 1292973000
## 47 FROST/FREEZE 1094086000
library(ggplot2)
# bigger fig.width
ggplot(data = ScdQ[1:10, ],aes(x=CROPDMGVAL,y=EVTYPE))+
geom_bar(stat="identity" ,fill = "violet") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
coord_flip() + xlab("Total number of Crop damage") + ylab("Event Types") +
ggtitle("Top Ten of event types with the highest Crop damage")
DROUGHT has the greatest economic consequence when we analyze Crop Damage
Fatalities and Injuries
noaa$HARM <- noaa$FATALITIES + noaa$INJURIES
TrdQ <- aggregate(HARM ~ EVTYPE, data = noaa, sum)
TrdQ <- TrdQ[order(-TrdQ$HARM),]
TrdQ[1:10,]
## EVTYPE HARM
## 834 TORNADO 96979
## 130 EXCESSIVE HEAT 8428
## 856 TSTM WIND 7461
## 170 FLOOD 7259
## 464 LIGHTNING 6046
## 275 HEAT 3037
## 153 FLASH FLOOD 2755
## 427 ICE STORM 2064
## 760 THUNDERSTORM WIND 1621
## 972 WINTER STORM 1527
library(ggplot2)
# bigger fig.width
ggplot(data = TrdQ[1:10, ],aes(x=HARM,y=EVTYPE))+
geom_bar(stat="identity" ,fill = "skyblue3") +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
coord_flip() + xlab("Total number of Fatalities + Injuries") + ylab("Event Types") +
ggtitle("Top Ten of event types with the highest impact on Population Health")
TORNADO are most harmful with respect to population health.