# Libraries
library(readr)
library(dplyr)
library(ggplot2)
library(plotly)

Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data

StormData <- read_csv("repdata_data_StormData.csv")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl  (1): COUNTYENDN
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(StormData)
## # A tibble: 6 × 37
##   STATE__ BGN_DATE   BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
##     <dbl> <chr>      <chr>    <chr>      <dbl> <chr>      <chr> <chr>      <dbl>
## 1       1 4/18/1950… 0130     CST           97 MOBILE     AL    TORNA…         0
## 2       1 4/18/1950… 0145     CST            3 BALDWIN    AL    TORNA…         0
## 3       1 2/20/1951… 1600     CST           57 FAYETTE    AL    TORNA…         0
## 4       1 6/8/1951 … 0900     CST           89 MADISON    AL    TORNA…         0
## 5       1 11/15/195… 1500     CST           43 CULLMAN    AL    TORNA…         0
## 6       1 11/15/195… 2000     CST           77 LAUDERDALE AL    TORNA…         0
## # ℹ 28 more variables: BGN_AZI <chr>, BGN_LOCATI <chr>, END_DATE <chr>,
## #   END_TIME <chr>, COUNTY_END <dbl>, COUNTYENDN <lgl>, END_RANGE <dbl>,
## #   END_AZI <chr>, END_LOCATI <chr>, LENGTH <dbl>, WIDTH <dbl>, F <dbl>,
## #   MAG <dbl>, FATALITIES <dbl>, INJURIES <dbl>, PROPDMG <dbl>,
## #   PROPDMGEXP <chr>, CROPDMG <dbl>, CROPDMGEXP <chr>, WFO <chr>,
## #   STATEOFFIC <chr>, ZONENAMES <chr>, LATITUDE <dbl>, LONGITUDE <dbl>,
## #   LATITUDE_E <dbl>, LONGITUDE_ <dbl>, REMARKS <chr>, REFNUM <dbl>

Across the United States, which types of events are most harmful with respect to population health?

# Number of Unique values of the EVTYPE variable
length(unique(StormData$EVTYPE))
## [1] 977

There is 977 Storms and other severe weather event listed in the data.

In order to know the types of events that are most harmful with respect to population health, we can consider these two variables : FATALITIES and INJURIES.

# Let's create a column related to population health
StormData$HEALTH <- StormData$FATALITIES + StormData$INJURIES

# The number of harmed population by each event type
most_harmful <- aggregate(HEALTH ~ EVTYPE, StormData, sum) %>%
  arrange(desc(HEALTH))

head(most_harmful, 10)
##               EVTYPE HEALTH
## 1            TORNADO  96979
## 2     EXCESSIVE HEAT   8428
## 3          TSTM WIND   7461
## 4              FLOOD   7259
## 5          LIGHTNING   6046
## 6               HEAT   3037
## 7        FLASH FLOOD   2755
## 8          ICE STORM   2064
## 9  THUNDERSTORM WIND   1621
## 10      WINTER STORM   1527

The first ten types of events that are most harmful with respect to population health are :

p1 <- ggplot(head(most_harmful, 10), aes(reorder(EVTYPE, HEALTH), HEALTH, fill = EVTYPE)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(
    title = "Top 10 events that are most harmful with respect to population health",
    x = "Event Type",
    y = "Number of fatalities and inhuries"
  ) +
  theme(legend.position = "none")
  

ggplotly(p1, dynamicTicks = TRUE)

Across the United States, which types of events have the greatest economic consequences?

To answer this question, we will consider these two variables: PROPDMG2 and PROPDMGEXP

hist(StormData$PROPDMG)

# PROPDMGEXP
summary(StormData$PROPDMGEXP)
##    Length     Class      Mode 
##    902297 character character
unique(StormData$PROPDMGEXP)
##  [1] "K" "M" NA  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
# Let's create the variable related to economic consequences
ltr  <- c("B" , "M" , "K", "","m","0","1","2","3","4","5","6","7","8","+","-","H","h","?")
mult <- c(10^9, 10^6, 10^3, 0,10^6,10,10,10,10,10,10,10,10,10,1,0,100,100,0)
StormData$PROPDMG2 <- StormData$PROPDMG * mult[match(StormData$PROPDMGEXP, ltr)]
StormData$CROPDMG2 <- StormData$CROPDMG * mult[match(StormData$CROPDMGEXP, ltr)]
StormData$economy <- StormData$PROPDMG2 + StormData$CROPDMG2
hist(StormData$economy )

In summary, this code processes a dataset of storm damage estimates by converting the damage estimates into a consistent unit (such as dollars) using the multipliers specified in the ltr vector. It then calculates an “economy” metric by summing up the adjusted property and crop damage estimates for each entry in the dataset. The code aims to provide a standardized way of quantifying the economic impact of storms.

# Number of harmed economy by each event type
economy_harmful <- aggregate(economy ~ EVTYPE, StormData, sum) %>%
  arrange(desc(economy))

head(economy_harmful, 10)
##               EVTYPE      economy
## 1              FLOOD 138007444500
## 2  HURRICANE/TYPHOON  29348167800
## 3            TORNADO  16570328280
## 4          HURRICANE  12405268000
## 5        RIVER FLOOD  10108369000
## 6               HAIL  10044983890
## 7        FLASH FLOOD   8715885664
## 8          ICE STORM   5925151300
## 9   STORM SURGE/TIDE   4641493000
## 10 THUNDERSTORM WIND   3813647990

Across the United States, Flood has the greatest economic consequences estimated 138007444500 dollars.

p2 <- ggplot(head(economy_harmful, 10), aes(reorder(EVTYPE, economy), economy, fill = EVTYPE)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(
    title = "Top 10 events that have the greatest economic consequences",
    x = "Event Type",
    y = "Cost of damage ($)"
  ) +
  theme(legend.position = "none")
  

ggplotly(p2, dynamicTicks = TRUE)