Impact of Weather Events on Personal and Property Damage

Synopsis

In this analysis NOAA Storm Database is used to answer some basic questions about severe weather events.This study explores the criticality of different events based on three parameters- fatalities, injuries and economic damage. It identifies the top five critical events across each parameter and compares with the others.Result is presented by barplots of the top five events.

Data processing

The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. After the data is downloaded from the website, it is uncompressed and read into R environment.

download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile = "repdata_data_StormData.csv.bz2")

# Reading data
StormData <- read.csv(bzfile("repdata_data_StormData.csv.bz2"))

Cleaning the data

Key variables used for the analysis are:

EVTYPE : Type of the event FATALITIES : Number of fatalities from the event INJURIES : Nunber of injuries from the event PROPDMG : Property damage measured CROPDMG : Crop damage measured PROPDMGEXP : Property damage exponent (Mns, Bns etc) CROPDMGEXP : Crop damage exponent (Mns, Bns etc) Last two variables mentioned above do not have clean data as shown below.

unique(StormData$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"

First, make everything upper case

StormData$PROPDMGEXP <- toupper(StormData$PROPDMGEXP)
StormData$CROPDMGEXP <- toupper(StormData$CROPDMGEXP)
unique(StormData$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "+" "0" "5" "6" "?" "4" "2" "3" "H" "7" "-" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] ""  "M" "K" "B" "?" "0" "2"

convert + and - to zero

StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("", "+", "-", "?")] <- "0"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("", "+", "-", "?")] <- "0"
unique(StormData$PROPDMGEXP)
##  [1] "K" "M" "0" "B" "5" "6" "4" "2" "3" "H" "7" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] "0" "M" "K" "B" "2"

Create 10^x substitutions for Billion, Hundred, Kilo, and Million

StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("B")] <- "9"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("M")] <- "6"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("K")] <- "3"
StormData$PROPDMGEXP[StormData$PROPDMGEXP %in% c("H")] <- "2"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("B")] <- "9"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("M")] <- "6"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("K")] <- "3"
StormData$CROPDMGEXP[StormData$CROPDMGEXP %in% c("H")] <- "2"
unique(StormData$PROPDMGEXP)
##  [1] "3" "6" "0" "9" "5" "4" "2" "7" "1" "8"
unique(StormData$CROPDMGEXP)
## [1] "0" "6" "3" "9" "2"

Now combine the exponent with the value

StormData$PROPDMGTOTAL <- StormData$PROPDMG * (10 ^ as.numeric(StormData$PROPDMGEXP))
StormData$CROPDMGTOTAL <- StormData$CROPDMG * (10 ^ as.numeric(StormData$CROPDMGEXP))
# calculate total damage
StormData$DMGTOTAL <- StormData$PROPDMGTOTAL + StormData$CROPDMGTOTAL

Data Analysis

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
SummStormData <- StormData %>%
    group_by(EVTYPE) %>%
    summarize(SUMFATALITIES = sum(FATALITIES),
              SUMINJURIES = sum(INJURIES),
              SUMPROPDMG = sum(PROPDMGTOTAL),
              SUMCROPDMG = sum(CROPDMGTOTAL),
              TOTALDMG = sum(DMGTOTAL))

head(SummStormData)
## # A tibble: 6 × 6
##   EVTYPE                SUMFATALITIES SUMINJURIES SUMPROPDMG SUMCROPDMG TOTALDMG
##   <chr>                         <dbl>       <dbl>      <dbl>      <dbl>    <dbl>
## 1 "   HIGH SURF ADVISO…             0           0     200000          0   200000
## 2 " COASTAL FLOOD"                  0           0          0          0        0
## 3 " FLASH FLOOD"                    0           0      50000          0    50000
## 4 " LIGHTNING"                      0           0          0          0        0
## 5 " TSTM WIND"                      0           0    8100000          0  8100000
## 6 " TSTM WIND (G45)"                0           0       8000          0     8000

Results

Identifying events that caused most fatalities

SummStormDataFatality <- arrange(SummStormData, desc(SUMFATALITIES))
FatalityData <- head(SummStormDataFatality)
FatalityData
## # A tibble: 6 × 6
##   EVTYPE         SUMFATALITIES SUMINJURIES   SUMPROPDMG SUMCROPDMG     TOTALDMG
##   <chr>                  <dbl>       <dbl>        <dbl>      <dbl>        <dbl>
## 1 TORNADO                 5633       91346 56947380676.  414953270 57362333946.
## 2 EXCESSIVE HEAT          1903        6525     7753700   492402000   500155700 
## 3 FLASH FLOOD              978        1777 16822673978. 1421317100 18243991078.
## 4 HEAT                     937        2100     1797000   401461500   403258500 
## 5 LIGHTNING                816        5230   930379430.   12092090   942471520.
## 6 TSTM WIND                504        6957  4484928495   554007350  5038935845

Creating plot of top 5 eventtype and no of fatalities

ggplot(FatalityData[1:5, ], aes(EVTYPE, y = SUMFATALITIES)) + geom_bar(stat = "identity") + 
    xlab("Event Type") + ylab("Number of Fatalities") + ggtitle("Fatalities by Event type")

We can see Tornado caused most fatalities.

Identifying events that caused most injuries

SummStormDataInjuries <- arrange(SummStormData, desc(SUMINJURIES))
InjuriesData <- head(SummStormDataInjuries)
InjuriesData
## # A tibble: 6 × 6
##   EVTYPE         SUMFATALITIES SUMINJURIES    SUMPROPDMG SUMCROPDMG     TOTALDMG
##   <chr>                  <dbl>       <dbl>         <dbl>      <dbl>        <dbl>
## 1 TORNADO                 5633       91346  56947380676.  414953270      5.74e10
## 2 TSTM WIND                504        6957   4484928495   554007350      5.04e 9
## 3 FLOOD                    470        6789 144657709807  5661968450      1.50e11
## 4 EXCESSIVE HEAT          1903        6525      7753700   492402000      5.00e 8
## 5 LIGHTNING                816        5230    930379430.   12092090      9.42e 8
## 6 HEAT                     937        2100      1797000   401461500      4.03e 8

Creating plot of top 5 eventtype and no of injuries.

ggplot(InjuriesData[1:5, ], aes(EVTYPE, y = SUMINJURIES)) + geom_bar(stat = "identity") + 
    xlab("Event Type") + ylab("Number of Injuries") + ggtitle("Injuries by Event type")

We can see Tornado caused most injuries.

Identifying events that caused most damage

SummStormDataDamage <- arrange(SummStormData, desc(TOTALDMG))
DamageData <- head(SummStormDataDamage)
DamageData
## # A tibble: 6 × 6
##   EVTYPE            SUMFATALITIES SUMINJURIES    SUMPROPDMG SUMCROPDMG  TOTALDMG
##   <chr>                     <dbl>       <dbl>         <dbl>      <dbl>     <dbl>
## 1 FLOOD                       470        6789 144657709807  5661968450   1.50e11
## 2 HURRICANE/TYPHOON            64        1275  69305840000  2607872800   7.19e10
## 3 TORNADO                    5633       91346  56947380676.  414953270   5.74e10
## 4 STORM SURGE                  13          38  43323536000        5000   4.33e10
## 5 HAIL                         15        1361  15735267513. 3025954473   1.88e10
## 6 FLASH FLOOD                 978        1777  16822673978. 1421317100   1.82e10

Creating plot of top 5 eventtype and total damage

ggplot(DamageData[1:5, ], aes(EVTYPE, y = TOTALDMG)) + geom_bar(stat = "identity") + 
    xlab("Event Type") + ylab("Total Damage") + ggtitle("Total damage by Event type")

We can see Flood caused most injuries.

Summary

In all three parameters, it is observed that Floods are responsible for the most economic damage, but tornadoes cause the most injuries and fatalities.