1. Synopsis

This study explores the NOAA Storm Database and explores the effects of severe weather events on both population and econommy. The database covers the time period between 1950 and November 2011

The following analysis investigates which types of severe weather events are the most harmful on:

  1. Health (both injuries and fatalities)
  2. Property and crops (economic consequences)

2. Data Processing

2.1. Data Loading

Download the raw data file, extract the data into a dataframe and convert it into a data.table.

library(data.table)
## Warning: package 'data.table' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileURL, destfile = paste0('/Users/bhara/Desktop/6th Sem/PE/R_Studio_files/Reproducible_Research/RR_project2', '/repdata%2Fdata%2FStormData.csv.bz2'))

2.2. Converting the data to a data table and examining the column names

stormDF <- read.csv('/Users/bhara/Desktop/6th Sem/PE/R_Studio_files/Reproducible_Research/RR_project2/repdata%2Fdata%2FStormData.csv.bz2')

# Convert the data.frame to a data.table
stormDT <- as.data.table(stormDF)
# Reading the column names 
colnames(stormDT)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

2.3. Data Subsetting

Not all columns in the dataset are relevant. Subset the dataset and extract the parameters of interest by removing the columsn we don’t need for clairity.

# Finding columsn to remove 
cols2Remove <- colnames(stormDT[, !c('EVTYPE'
                                     , "FATALITIES"
                                     , "INJURIES"
                                     , "PROPDMG"
                                     , "PROPDMGEXP"
                                     , "CROPDMG"
                                     , "CROPDMGEXP")])

# Removing columns 
stormDT[, c(cols2Remove):= NULL]

# Only use data where fatalities or injuries occurred.  
stormDT <- stormDT[(EVTYPE != "?" & 
             (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)),
             c("EVTYPE",
               "FATALITIES",
               "INJURIES",
               "PROPDMG",
               "PROPDMGEXP",
               "CROPDMG",
               "CROPDMGEXP") ]

Convert the Property Damage and Crop Damage Exponent Columns into Actual Exponents instead of (-,+,H,K,etc)

# Change all damage exponents to uppercase.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
stormDT[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]

# Map property damage alphanumeric exponents to numeric values.
propDmgKey <-  c("\"\"" = 10^0,
                 "-" = 10^0, 
                 "+" = 10^0,
                 "0" = 10^0,
                 "1" = 10^1,
                 "2" = 10^2,
                 "3" = 10^3,
                 "4" = 10^4,
                 "5" = 10^5,
                 "6" = 10^6,
                 "7" = 10^7,
                 "8" = 10^8,
                 "9" = 10^9,
                 "H" = 10^2,
                 "K" = 10^3,
                 "M" = 10^6,
                 "B" = 10^9)

# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <-  c("\"\"" = 10^0,
                "?" = 10^0, 
                "0" = 10^0,
                "K" = 10^3,
                "M" = 10^6,
                "B" = 10^9)

stormDT[, PROPDMGEXP := propDmgKey[as.character(stormDT[,PROPDMGEXP])]]
stormDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

stormDT[, CROPDMGEXP := cropDmgKey[as.character(stormDT[,CROPDMGEXP])] ]
stormDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]

2.5. Making Economic Cost Columns

stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]

2.6. Calculating the Total Property and Crop Cost

totalCostDT <- stormDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]

totalCostDT <- totalCostDT[order(-Total_Cost), ]

totalCostDT <- totalCostDT[1:10, ]

head(totalCostDT, 5)
##               EVTYPE     propCost   cropCost   Total_Cost
##               <char>        <num>      <num>        <num>
## 1:             FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON  69305840000 2607872800  71913712800
## 3:           TORNADO  56947380677  414953270  57362333947
## 4:       STORM SURGE  43323536000       5000  43323541000
## 5:              HAIL  15735267513 3025954473  18761221986

2.7. Calculating the Total Fatalities and Injuries

totalInjuriesDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]

totalInjuriesDT <- totalInjuriesDT[order(-FATALITIES), ]

totalInjuriesDT <- totalInjuriesDT[1:10, ]

head(totalInjuriesDT, 5)
##            EVTYPE FATALITIES INJURIES totals
##            <char>      <num>    <num>  <num>
## 1:        TORNADO       5633    91346  96979
## 2: EXCESSIVE HEAT       1903     6525   8428
## 3:    FLASH FLOOD        978     1777   2755
## 4:           HEAT        937     2100   3037
## 5:      LIGHTNING        816     5230   6046

3. Results

Events that are the Most Harmful to Population Health

Melting data.table so that it is easier to put in bar graph format

Injuries_bar_format <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "FATALITIES")
head(Injuries_bar_format, 5)
##            EVTYPE FATALITIES value
##            <char>     <fctr> <num>
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816

Creating the chart

# Create chart
healthChart <- ggplot(Injuries_bar_format, aes(x=reorder(EVTYPE, -value), y=value))

# Plot data as bar chart
healthChart = healthChart + geom_bar(stat="identity", aes(fill=FATALITIES), position="dodge")

# Format y-axis scale and set y-axis label
healthChart = healthChart + ylab("Frequency Count") 

# Set x-axis label
healthChart = healthChart + xlab("Event Type") 

# Rotate x-axis tick labels 
healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1))

# Set chart title and center it
healthChart = healthChart + ggtitle("Top 10 Killers(US)") + theme(plot.title = element_text(hjust = 0.5))

healthChart

## 3.2. Events that have the Greatest Economic Consequences Once again melting data.table so that it is easier to put into bar graph format

econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type")
head(econ_consequences, 5)
##               EVTYPE Damage_Type        value
##               <char>      <fctr>        <num>
## 1:             FLOOD    propCost 144657709807
## 2: HURRICANE/TYPHOON    propCost  69305840000
## 3:           TORNADO    propCost  56947380677
## 4:       STORM SURGE    propCost  43323536000
## 5:              HAIL    propCost  15735267513

Creating the chart

# Create chart
econChart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value))

# Plot data as bar chart
econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge")

# Format y-axis scale and set y-axis label
econChart = econChart + ylab("Cost (dollars)") 

# Set x-axis label
econChart = econChart + xlab("Event Type") 

# Rotate x-axis tick labels 
econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1))

# Set chart title and center it
econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))

econChart