Analysis of the Adverse Health and Economic Impacts of US Storms

1: Synopsis

The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.

The following analysis investigates which types of severe weather events are most harmful on:

  1. Health (injuries and fatalities)
  2. Property and crops (economic consequences)

2: Data Processing

2.1: Data Loading

Load the data using read.csv() function.

data = read.csv("repdata_data_StormData.csv",sep = ",",header = T, stringsAsFactors = FALSE)

2.2: Data Subsetting

used <- c('EVTYPE','FATALITIES','INJURIES','PROPDMG','PROPDMGEXP','CROPDMG','CROPDMGEXP')
dataused <- data[, used]
head(dataused)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

2.3: Tidy up the data

EVTYPE column contains data in non-standard format. Clean the data using regular expressions.

trim <- function(x) gsub("^\\s+|\\s+$", "", x)
dataused$EVTYPE <- toupper(dataused$EVTYPE)
dataused$EVTYPE <- trim(dataused$EVTYPE)

2.4: Converting Exponent Columns into Actual Exponents instead of (-,+, H, K, etc)

Making the PROPDMGEXP and CROPDMGEXP columns cleaner so they can be used to calculate property and crop cost.

dataused$PROPDMGEXP[is.na(dataused$PROPDMGEXP)] <- 0
dataused$PROPDMGEXP[dataused$PROPDMGEXP == ""] <- 1
dataused$PROPDMGEXP[grep("[-+?]", dataused$PROPDMGEXP)] <- 1
dataused$PROPDMGEXP[grep("[Hh]", dataused$PROPDMGEXP)] <- 100
dataused$PROPDMGEXP[grep("[Kk]", dataused$PROPDMGEXP)] <- 1000
dataused$PROPDMGEXP[grep("[Mm]", dataused$PROPDMGEXP)] <- 1e+06
dataused$PROPDMGEXP[grep("[Bb]", dataused$PROPDMGEXP)] <- 1e+09
dataused$PROPDMGEXP <- as.numeric(dataused$PROPDMGEXP)
dataused$PROPDMG <- dataused$PROPDMGEXP * dataused$PROPDMG

dataused$CROPDMGEXP[is.na(dataused$CROPDMGEXP)] <- 0
dataused$CROPDMGEXP[dataused$CROPDMGEXP == ""] <- 1
dataused$CROPDMGEXP[grep("[-+?]", dataused$CROPDMGEXP)] <- 1
dataused$CROPDMGEXP[grep("[Hh]", dataused$CROPDMGEXP)] <- 100
dataused$CROPDMGEXP[grep("[Kk]", dataused$CROPDMGEXP)] <- 1000
dataused$CROPDMGEXP[grep("[Mm]", dataused$CROPDMGEXP)] <- 1e+06
dataused$CROPDMGEXP[grep("[Bb]", dataused$CROPDMGEXP)] <- 1e+09
dataused$CROPDMGEXP <- as.numeric(dataused$CROPDMGEXP)
dataused$CROPDMG <- dataused$CROPDMGEXP * dataused$CROPDMG

2.5: Calculate the sum of FATALITIES, INJURIES, PROPDMG and CROPDMG by EVTYPE

total_fatalities <- aggregate(FATALITIES~EVTYPE,data=dataused,sum)
total_injuries <- aggregate(INJURIES~EVTYPE,data=dataused,sum)
total_propdmg <- aggregate(PROPDMG~EVTYPE,data=dataused,sum)
total_cropdmg <- aggregate(CROPDMG~EVTYPE,data=dataused,sum)

2.6: Getting Total Population Health Damage and Total Economic Damage Cost

health <- cbind(total_fatalities,total_injuries)
health <- health[,c("EVTYPE","FATALITIES","INJURIES")]
health$Total <- health$FATALITIES + health$INJURIES
economy <- cbind(total_cropdmg,total_propdmg)
economy <- economy[,c("EVTYPE","PROPDMG","CROPDMG")]
economy$Total <- economy$PROPDMG + economy$CROPDMG
head(economy)
##                 EVTYPE PROPDMG  CROPDMG    Total
## 1                    ?    5000        0     5000
## 2      ABNORMAL WARMTH       0        0        0
## 3       ABNORMALLY DRY       0        0        0
## 4       ABNORMALLY WET       0        0        0
## 5 ACCUMULATED SNOWFALL       0        0        0
## 6  AGRICULTURAL FREEZE       0 28820000 28820000
head(health)
##                 EVTYPE FATALITIES INJURIES Total
## 1                    ?          0        0     0
## 2      ABNORMAL WARMTH          0        0     0
## 3       ABNORMALLY DRY          0        0     0
## 4       ABNORMALLY WET          0        0     0
## 5 ACCUMULATED SNOWFALL          0        0     0
## 6  AGRICULTURAL FREEZE          0        0     0

2.7: Getting top 10 Highest Harming Events

health <- head(health[order(-health$Total), ],10)
economy <- head(economy[order(-economy$Total),],10)

health
##                EVTYPE FATALITIES INJURIES Total
## 750           TORNADO       5633    91346 96979
## 108    EXCESSIVE HEAT       1903     6525  8428
## 771         TSTM WIND        504     6957  7461
## 146             FLOOD        470     6789  7259
## 410         LIGHTNING        816     5230  6046
## 235              HEAT        937     2100  3037
## 130       FLASH FLOOD        978     1777  2755
## 379         ICE STORM         89     1975  2064
## 677 THUNDERSTORM WIND        133     1488  1621
## 880      WINTER STORM        206     1321  1527
economy
##                EVTYPE      PROPDMG     CROPDMG        Total
## 146             FLOOD 144657709807  5661968450 150319678257
## 364 HURRICANE/TYPHOON  69305840000  2607872800  71913712800
## 750           TORNADO  56937161054   414953110  57352114164
## 591       STORM SURGE  43323536000        5000  43323541000
## 204              HAIL  15732267427  3025954453  18758221880
## 130       FLASH FLOOD  16140862294  1421317100  17562179394
## 76            DROUGHT   1046106000 13972566000  15018672000
## 355         HURRICANE  11868319010  2741910000  14610229010
## 521       RIVER FLOOD   5118945500  5029459000  10148404500
## 379         ICE STORM   3944927810  5022113500   8967041310

3: Results

3.1: Events that are Most Harmful to Population Health

library(ggplot2)
ggplot(health, aes(x = EVTYPE, y = Total)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Event type", y = "Sum of Fatalities and Injuries") +
ggtitle("Most Population Health Harm by various Events") + theme(axis.text.x = element_text(angle = 90))

As seen by the plot, TORNADO is most harmful to Population Health

3.2: Events that have the Greatest Economic Consequences

library(ggplot2)
ggplot(economy, aes(x = EVTYPE, y = Total)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Event type", y = "Sum of Property and Crop Damage") +
ggtitle("Most Economic Harm by various Events") + theme(axis.text.x = element_text(angle = 90))

As seen by the plot, FLOOD is most harmful to the economy