# Libraries
library(readr)
library(dplyr)
library(ggplot2)
library(plotly)
Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
StormData <- read_csv("repdata_data_StormData.csv")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl (1): COUNTYENDN
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(StormData)
## # A tibble: 6 × 37
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
## <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl>
## 1 1 4/18/1950… 0130 CST 97 MOBILE AL TORNA… 0
## 2 1 4/18/1950… 0145 CST 3 BALDWIN AL TORNA… 0
## 3 1 2/20/1951… 1600 CST 57 FAYETTE AL TORNA… 0
## 4 1 6/8/1951 … 0900 CST 89 MADISON AL TORNA… 0
## 5 1 11/15/195… 1500 CST 43 CULLMAN AL TORNA… 0
## 6 1 11/15/195… 2000 CST 77 LAUDERDALE AL TORNA… 0
## # ℹ 28 more variables: BGN_AZI <chr>, BGN_LOCATI <chr>, END_DATE <chr>,
## # END_TIME <chr>, COUNTY_END <dbl>, COUNTYENDN <lgl>, END_RANGE <dbl>,
## # END_AZI <chr>, END_LOCATI <chr>, LENGTH <dbl>, WIDTH <dbl>, F <dbl>,
## # MAG <dbl>, FATALITIES <dbl>, INJURIES <dbl>, PROPDMG <dbl>,
## # PROPDMGEXP <chr>, CROPDMG <dbl>, CROPDMGEXP <chr>, WFO <chr>,
## # STATEOFFIC <chr>, ZONENAMES <chr>, LATITUDE <dbl>, LONGITUDE <dbl>,
## # LATITUDE_E <dbl>, LONGITUDE_ <dbl>, REMARKS <chr>, REFNUM <dbl>
# Number of Unique values of the EVTYPE variable
length(unique(StormData$EVTYPE))
## [1] 977
There is 977 Storms and other severe weather event listed in the data.
In order to know the types of events that are most harmful with
respect to population health, we can consider these two variables :
FATALITIES and INJURIES.
# Let's create a column related to population health
StormData$HEALTH <- StormData$FATALITIES + StormData$INJURIES
# The number of harmed population by each event type
most_harmful <- aggregate(HEALTH ~ EVTYPE, StormData, sum) %>%
arrange(desc(HEALTH))
head(most_harmful, 10)
## EVTYPE HEALTH
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
The first ten types of events that are most harmful with respect to population health are :
TORNADO;
EXCESSIVE HEAT;
TSTM WIND;
FLOOD;
LIGHTNING;
HEAT;
FLASH FLOOD;
ICE STORM;
THUNDERSTORM WIND;
WINTER STORM.
p1 <- ggplot(head(most_harmful, 10), aes(reorder(EVTYPE, HEALTH), HEALTH, fill = EVTYPE)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(
title = "Top 10 events that are most harmful with respect to population health",
x = "Event Type",
y = "Number of fatalities and inhuries"
) +
theme(legend.position = "none")
ggplotly(p1, dynamicTicks = TRUE)
To answer this question, we will consider these two variables:
PROPDMG2 and PROPDMGEXP
hist(StormData$PROPDMG)
# PROPDMGEXP
summary(StormData$PROPDMGEXP)
## Length Class Mode
## 902297 character character
unique(StormData$PROPDMGEXP)
## [1] "K" "M" NA "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
# Let's create the variable related to economic consequences
ltr <- c("B" , "M" , "K", "","m","0","1","2","3","4","5","6","7","8","+","-","H","h","?")
mult <- c(10^9, 10^6, 10^3, 0,10^6,10,10,10,10,10,10,10,10,10,1,0,100,100,0)
StormData$PROPDMG2 <- StormData$PROPDMG * mult[match(StormData$PROPDMGEXP, ltr)]
StormData$CROPDMG2 <- StormData$CROPDMG * mult[match(StormData$CROPDMGEXP, ltr)]
StormData$economy <- StormData$PROPDMG2 + StormData$CROPDMG2
hist(StormData$economy )
In summary, this code processes a dataset of storm damage estimates by
converting the damage estimates into a consistent unit (such as dollars)
using the multipliers specified in the
ltr vector. It then
calculates an “economy” metric by summing up the adjusted property and
crop damage estimates for each entry in the dataset. The code aims to
provide a standardized way of quantifying the economic impact of
storms.
# Number of harmed economy by each event type
economy_harmful <- aggregate(economy ~ EVTYPE, StormData, sum) %>%
arrange(desc(economy))
head(economy_harmful, 10)
## EVTYPE economy
## 1 FLOOD 138007444500
## 2 HURRICANE/TYPHOON 29348167800
## 3 TORNADO 16570328280
## 4 HURRICANE 12405268000
## 5 RIVER FLOOD 10108369000
## 6 HAIL 10044983890
## 7 FLASH FLOOD 8715885664
## 8 ICE STORM 5925151300
## 9 STORM SURGE/TIDE 4641493000
## 10 THUNDERSTORM WIND 3813647990
Across the United States, Flood has the greatest economic consequences estimated 138007444500 dollars.
p2 <- ggplot(head(economy_harmful, 10), aes(reorder(EVTYPE, economy), economy, fill = EVTYPE)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(
title = "Top 10 events that have the greatest economic consequences",
x = "Event Type",
y = "Cost of damage ($)"
) +
theme(legend.position = "none")
ggplotly(p2, dynamicTicks = TRUE)