Exploring the NOAA Storm Database: US severe weather impacts on Public Health and Economics

##Synopsis This analysis explores critical weather events that have significant public health and economic impacts. The data comes from the NOAA database, which harbors information of major storms and severe weather episodes in the United States (1950-2011). It includes when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data processing

The data can be downloaded from the course website: Storm Data

Documentation of the database is available here:

. National Weather Service Storm Data Documentation . National Climatic Data Center Storm Events FAQ

# New project created in RStudio called: "RFinalProject"
dir()
## [1] "NOAA_Storm_Data.csv.bz2" "RFinalProject.pdf"      
## [3] "RFinalProject.Rmd"       "RFinalProject.Rproj"
library(knitr)
## Warning: package 'knitr' was built under R version 4.0.3
library(plyr)
library(ggplot2)
library(lattice)
## Warning: package 'lattice' was built under R version 4.0.3
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.3
library(grid)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.0.3
# NOAA Weather Impact Data
fileUrl<-"http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
# Downloading the data
download.file(fileUrl,destfile = "~/NOAA_Storm_Data.csv.bz2", method = "curl") 
date()
## [1] "Mon Dec 28 15:08:14 2020"
# Reading BZip format
system.time(df <- read.csv(bzfile("NOAA_Storm_Data.csv.bz2"), 
        header = TRUE, 
        strip.white=TRUE,
        stringsAsFactors = FALSE))
##    user  system elapsed 
##   49.81    0.65   50.54

Variables selected

Event Type (EVTYPE), Begin Date of Event (BGN_DATE), Event Human Fatalities (FATALITIES), Event Human Injuries (INJURIES), Event Estimated Property Damages (PROPDMG), Event Property Damage Decimal Precision (PROPDMGEXP), Event Estimated Crop Damages (CROPDMG), Event Property Damage Decimal Precision (CROPDMGEXP)

df <- df[ , c("EVTYPE", "BGN_DATE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]

Element transformation

#Transforming raw date to a proper date format:
df$BGN_DATE <- as.POSIXct(df$BGN_DATE,format="%m/%d/%Y %H:%M:%S")

#Looking at data structure and types of events 
str(df)
## 'data.frame':    902297 obs. of  8 variables:
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_DATE  : POSIXct, format: "1950-04-18" "1950-04-18" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
head(unique(df$EVTYPE))
## [1] "TORNADO"               "TSTM WIND"             "HAIL"                 
## [4] "FREEZING RAIN"         "SNOW"                  "ICE STORM/FLASH FLOOD"

QUESTION 1

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health ?

#Aggregate fatalities ad injuries by Event Type

fatalities <- aggregate(FATALITIES ~ EVTYPE, data=df, sum)

injuries <- aggregate(INJURIES ~ EVTYPE, data=df, sum)

#Arrange in descending order by Event Type by number of fatalities or injuries

fatalities <- arrange(fatalities,desc(FATALITIES),EVTYPE)[1:10,]
injuries <- arrange(injuries,desc(INJURIES),EVTYPE)[1:10,]

#Convert Event Type variable to factor for analysis
fatalities$EVTYPE <- factor(fatalities$EVTYPE, levels = fatalities$EVTYPE)
injuries$EVTYPE <- factor(injuries$EVTYPE, levels = injuries$EVTYPE)
fatalities
##            EVTYPE FATALITIES
## 1         TORNADO       5633
## 2  EXCESSIVE HEAT       1903
## 3     FLASH FLOOD        978
## 4            HEAT        937
## 5       LIGHTNING        816
## 6       TSTM WIND        504
## 7           FLOOD        470
## 8     RIP CURRENT        368
## 9       HIGH WIND        248
## 10      AVALANCHE        224
injuries
##               EVTYPE INJURIES
## 1            TORNADO    91346
## 2          TSTM WIND     6957
## 3              FLOOD     6789
## 4     EXCESSIVE HEAT     6525
## 5          LIGHTNING     5230
## 6               HEAT     2100
## 7          ICE STORM     1975
## 8        FLASH FLOOD     1777
## 9  THUNDERSTORM WIND     1488
## 10              HAIL     1361
#Plot of fatalities by Event Type
fatalitiesbyweather <- ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) + 
      geom_bar(stat = "identity", fill = "blue", width = NULL) + 
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      xlab("Event Type") + ylab("Fatalities")
#Plot of injuries by Event Type
injuriesbyweather <- ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) + 
      geom_bar(stat = "identity", fill = "blue", width = NULL) + 
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      xlab("Event Type") + ylab("Injuries") 

##QUESTION 2

Across the United States, which types of events have the greatest economic consequences ?

#Normalize Event Damage Amount to Integer Format
tmpPROPDMG <- mapvalues(df$PROPDMGEXP,
                          c("K","M","", "B","m","+","0","5","6","?","4","2","3","h","7","H","-","1","8"), 
                          c(1e3,1e6, 1, 1e9,1e6,  1,  1,1e5,1e6,  1,1e4,1e2,1e3,  1,1e7,1e2,  1, 10,1e8))

tmpCROPDMG <- mapvalues(df$CROPDMGEXP,
                          c("","M","K","m","B","?","0","k","2"),
                          c( 1,1e6,1e3,1e6,1e9,1,1,1e3,1e2))

# Make Numeric Property and Crop Damage 
df$TOTAL_PROPDMG <- as.numeric(tmpPROPDMG) * df$PROPDMG 
df$TOTAL_CROPDMG <- as.numeric(tmpCROPDMG) * df$CROPDMG 

# Show Column Names Now in the Data Frame (df)
colnames(df)
##  [1] "EVTYPE"        "BGN_DATE"      "FATALITIES"    "INJURIES"     
##  [5] "PROPDMG"       "PROPDMGEXP"    "CROPDMG"       "CROPDMGEXP"   
##  [9] "TOTAL_PROPDMG" "TOTAL_CROPDMG"
# Create a Total Damage Amount which is the Total of Property and Crop Damage Amounts
df$TOTALDMG <- df$TOTAL_PROPDMG + df$TOTAL_CROPDMG

# Sum total damages for property and crop by Weather Event Type (EVTYPE):
propdamage <- aggregate(TOTAL_PROPDMG ~ EVTYPE, data=df, sum)
cropdamage <- aggregate(TOTAL_CROPDMG ~ EVTYPE, data=df, sum)

# Sum total damages (property + crop)  by Weather Event Type (EVTYPE):
totaldamage <- aggregate(TOTALDMG ~ EVTYPE, data=df, sum)

# Arrange descending damages for property and crop by Weather Event Type (EVTYPE) (Top 10 Events):
cropdamage <- arrange(cropdamage,desc(cropdamage$TOTAL_CROPDMG),EVTYPE)[1:10,]
propdamage <- arrange(propdamage,desc(propdamage$TOTAL_PROPDMG),EVTYPE)[1:10,]
totaldamage <- arrange(totaldamage,desc(totaldamage$TOTALDMG),EVTYPE)[1:10,]

# Set Weather Event Type (EVTYPE) as a Factor Variable:
propdamage$EVTYPE <- factor(propdamage$EVTYPE, levels = propdamage$EVTYPE)
cropdamage$EVTYPE <- factor(cropdamage$EVTYPE, levels = cropdamage$EVTYPE)
totaldamage$EVTYPE <- factor(totaldamage$EVTYPE, levels = totaldamage$EVTYPE)
propdamage
##               EVTYPE TOTAL_PROPDMG
## 1              FLOOD  144657709807
## 2  HURRICANE/TYPHOON   69305840000
## 3            TORNADO   56947380677
## 4        STORM SURGE   43323536000
## 5        FLASH FLOOD   16822673979
## 6               HAIL   15735267513
## 7          HURRICANE   11868319010
## 8     TROPICAL STORM    7703890550
## 9       WINTER STORM    6688497251
## 10         HIGH WIND    5270046295
#Plot of PROPERTY DAMAGE by Event Type
propplotdamage <- ggplot(propdamage, aes(x = EVTYPE, y = TOTAL_PROPDMG)) + 
                  geom_bar(stat = "identity", fill = "green") + 
                  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
                  xlab("Event Type") + ylab("Property Damages ($)") 


cropdamage
##               EVTYPE TOTAL_CROPDMG
## 1            DROUGHT   13972566000
## 2              FLOOD    5661968450
## 3        RIVER FLOOD    5029459000
## 4          ICE STORM    5022113500
## 5               HAIL    3025954473
## 6          HURRICANE    2741910000
## 7  HURRICANE/TYPHOON    2607872800
## 8        FLASH FLOOD    1421317100
## 9       EXTREME COLD    1292973000
## 10      FROST/FREEZE    1094086000
#Plot of CROP DAMAGE by Event Type
cropplotdamage <- ggplot(cropdamage, aes(x = EVTYPE, y = TOTAL_CROPDMG)) + 
                        geom_bar(stat = "identity", fill = "blue") + 
                        theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
                        xlab("Event Type") + ylab("Crop Damages ($)") 


totaldamage
##               EVTYPE     TOTALDMG
## 1              FLOOD 150319678257
## 2  HURRICANE/TYPHOON  71913712800
## 3            TORNADO  57362333947
## 4        STORM SURGE  43323541000
## 5               HAIL  18761221986
## 6        FLASH FLOOD  18243991079
## 7            DROUGHT  15018672000
## 8          HURRICANE  14610229010
## 9        RIVER FLOOD  10148404500
## 10         ICE STORM   8967041360

Results

Public Health effects

*Tornados represent the most signifcant harm to public health.

grid.arrange(fatalitiesbyweather, injuriesbyweather, ncol=2, nrow=1,
     top = textGrob("Public Health Impact - Fatalities & Injuries from top 10 Weather Events",gp=gpar(fontsize=14,font=6)))

Economic consequences

*Floods cause the most significant total damage (property and crop).

*Drought events inflict the most damage to crops.

*Floods cause most harm for property.

#Plot of TOTAL DAMAGE by Event Type
totplotdamage <- ggplot(totaldamage, aes(x = EVTYPE, y = TOTALDMG)) + 
                    geom_bar(stat = "identity", fill = "yellow") + 
                    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
                    xlab("Event Type") + ylab("Total Prop & Crop Damages ($)") 

#plot(totplotdamage)
grid.arrange(propplotdamage, cropplotdamage, totplotdamage, ncol=3, nrow=1,
     top = textGrob("Damage Impact - Property, Crop, & Overall from top 10 Weather Events ",gp=gpar(fontsize=14,font=3)))