Synopsis: Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

The objective of this project is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database and to answer the following questions about severe weather events:

Across the United States, which types of events are most harmful with respect to population health? Across the United States, which types of events have the greatest economic consequences? Pre Analysis Loading necessary packages. We will use dplyr to clean and explore data and ggplot2 to plot it.

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
library(knitr)
## Warning: package 'knitr' was built under R version 4.0.3
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.3

Loading and Processing the data

noaa <- read.csv("repdata_data_StormData.csv")
noaa <- noaa[, c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
noaa <- as_tibble(noaa)
#cleaning data
unique(noaa$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(noaa$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "K"] <- 1000
## Warning: Unknown or uninitialised column: `PROPDMGVAL`.
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "M"] <- 1e+06
noaa$PROPDMGVAL[noaa$PROPDMGEXP == ""] <- 1
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "B"] <- 1e+09
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "m"] <- 1e+06
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "0"] <- 1
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "5"] <- 1e+05
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "6"] <- 1e+06
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "4"] <- 10000
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "2"] <- 100
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "3"] <- 1000
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "h"] <- 100
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "7"] <- 1e+07
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "H"] <- 100
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "1"] <- 10
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "8"] <- 1e+08
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "+"] <- 0
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "-"] <- 0
noaa$PROPDMGVAL[noaa$PROPDMGEXP == "?"] <- 0


noaa$CROPDMGVAL[noaa$CROPDMGEXP == "K"] <- 1e+03
## Warning: Unknown or uninitialised column: `CROPDMGVAL`.
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "k"] <- 1e+03
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "m"] <- 1e+06
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "M"] <- 1e+06
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "B"] <- 1e+09
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "0"] <- 1e+00
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "2"] <- 1e+02
noaa$CROPDMGVAL[noaa$CROPDMGEXP == "?"] <- 0
noaa$PROPDMGVAL <- noaa$PROPDMG * noaa$PROPDMGVAL
noaa$CROPDMGVAL <- noaa$CROPDMG * noaa$CROPDMGVAL

Results:

Property Damage

FstQ <- aggregate(PROPDMGVAL ~ EVTYPE, data = noaa, sum)
FstQ <- FstQ[order(-FstQ$PROPDMGVAL),]
FstQ[1:10,]
##                EVTYPE   PROPDMGVAL
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380617
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673979
## 244              HAIL  15735267513
## 402         HURRICANE  11868319010
## 848    TROPICAL STORM   7703890550
## 972      WINTER STORM   6688497251
## 359         HIGH WIND   5270046260
library("ggplot2")
library("knitr")
ggplot(data = FstQ[1:10, ],aes(x=PROPDMGVAL,y=EVTYPE))+
      geom_bar(stat="identity" ,fill = "#34A287") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      coord_flip() + xlab("Total number of Property damage") +  ylab("Event Types") +
      ggtitle("Top Ten of event types with the highest Property damage")

      ScdQ <- aggregate(CROPDMGVAL ~ EVTYPE, data = noaa, sum)

FLOOD has the greatest economic consequence when we analyze Property Damage.

Crop Damage

ScdQ <- ScdQ[order(-ScdQ$CROPDMGVAL),]
ScdQ[1:10,]
##               EVTYPE  CROPDMGVAL
## 16           DROUGHT 13972566000
## 35             FLOOD  5661968450
## 99       RIVER FLOOD  5029459000
## 86         ICE STORM  5022113500
## 53              HAIL  3025954470
## 78         HURRICANE  2741910000
## 83 HURRICANE/TYPHOON  2607872800
## 30       FLASH FLOOD  1421317100
## 26      EXTREME COLD  1292973000
## 47      FROST/FREEZE  1094086000
library(ggplot2)
# bigger fig.width
ggplot(data = ScdQ[1:10, ],aes(x=CROPDMGVAL,y=EVTYPE))+
      geom_bar(stat="identity" ,fill = "violet") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      coord_flip() + xlab("Total number of Crop damage") +  ylab("Event Types") +
      ggtitle("Top Ten of event types with the highest Crop damage")

DROUGHT has the greatest economic consequence when we analyze Crop Damage

Fatalities and Injuries

noaa$HARM <- noaa$FATALITIES + noaa$INJURIES
TrdQ <- aggregate(HARM ~ EVTYPE, data = noaa, sum)
TrdQ <- TrdQ[order(-TrdQ$HARM),]
TrdQ[1:10,]
##                EVTYPE  HARM
## 834           TORNADO 96979
## 130    EXCESSIVE HEAT  8428
## 856         TSTM WIND  7461
## 170             FLOOD  7259
## 464         LIGHTNING  6046
## 275              HEAT  3037
## 153       FLASH FLOOD  2755
## 427         ICE STORM  2064
## 760 THUNDERSTORM WIND  1621
## 972      WINTER STORM  1527
library(ggplot2)
# bigger fig.width
ggplot(data = TrdQ[1:10, ],aes(x=HARM,y=EVTYPE))+
      geom_bar(stat="identity" ,fill = "skyblue3") +
      theme(axis.text.x = element_text(angle = 45, hjust=1)) +
      coord_flip() + xlab("Total number of Fatalities + Injuries") +  ylab("Event Types") +
      ggtitle("Top Ten of event types with the highest impact on Population Health")

TORNADO are most harmful with respect to population health.