1.0 Synopsys

The goal of this assignment is to explore the NOAA Storm Database and answer questions related to property damage and harmful efects caused by severe weather conditions

1.1 The following questions are addressed

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

1.2 Dataset location and details

The data for this assignment comes in the form of a comma-separated-value - compressed bzip2 file
You can download the file from the course web site: StormData

National Weather Service Storm Data Documentation

National Climatic Data Center Storm Events FAQ

2.0 Data Processing

2.1 Load Required Libraries

library(data.table)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

2.2 Data download file and read

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destFileName <- "repdata_data_StormData.csv.bz2"
download.file(fileUrl, destfile = destFileName)

## read CSV file
stormData <- read.csv2(destFileName, sep=",", header=TRUE)

3.0 Data processing

## Select only column of interest from dataset and overwrite dataset
# names(stormData)  # Examine Columns
stormData <- stormData %>% select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

setDT(stormData)
str(stormData)
## Classes 'data.table' and 'data.frame':   902297 obs. of  7 variables:
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ INJURIES  : chr  "15.00" "0.00" "2.00" "2.00" ...
##  $ PROPDMG   : chr  "25.00" "2.50" "25.00" "2.50" ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  - attr(*, ".internal.selfref")=<externalptr>
## Checking stormData
dim(stormData)    # 902297 rows and 37 columns
## [1] 902297      7

3.1 Data Conversion

## In this section we will update the following:
## Select relevant columns for this assignment

## Rename PROPDMG and CROPDMG column names
stormData <- stormData %>% rename(PropDamage = PROPDMG, CropDamage = CROPDMG)
stormData <- stormData %>% rename(Fatalities = FATALITIES, Injuries = INJURIES)

## 2. Convert from Character to Numerals
stormData$Fatalities <- as.numeric(stormData$Fatalities)
stormData$Injuries <- as.numeric(stormData$Injuries)
stormData$PropDamage <- as.numeric(stormData$PropDamage)
stormData$CropDamage <- as.numeric(stormData$CropDamage)

## 3. Group stromData by EVTYPE and create new dataset for Property and Crop Damages
## Also update column names from CRYPTIC
summaryEconomicDamage <- stormData %>% group_by(EVTYPE) %>% summarize_at(c("PropDamage","CropDamage"),sum)

## Get Fatalities and Injuries
summaryHarmful <- stormData %>% group_by(EVTYPE) %>% summarize_at(c("Fatalities","Injuries"), sum)

3.2 Calcuating Total Property/Crop Damage, and Total Fatalities/Injuries

## Create a COLUMN that combines both PROPERTY AND CROP damage -- mutate
summaryEconomicDamage <- summaryEconomicDamage %>% mutate(TotalCost = rowSums(select(., "PropDamage", "CropDamage")))
summaryEconomicDamage <- summaryEconomicDamage[order(summaryEconomicDamage$TotalCost, decreasing = TRUE),][1:10,]


## Create a COLUMN that combines Fatatlities AND Injuries -- mutate
summaryHarmful <- summaryHarmful %>% mutate(TotalSuffering = rowSums(select(., "Fatalities", "Injuries")))
summaryHarmful <- summaryHarmful[order(summaryHarmful$TotalSuffering, decreasing = TRUE),][1:10,]

4.0 Results

4.1 Events that are Most Harmful to Population Health

setDT(summaryHarmful)

top10Fatals_Injuries <- melt(summaryHarmful, id.vars="EVTYPE", variable.name = "series")
lossChart <- ggplot(top10Fatals_Injuries, aes(x=reorder(EVTYPE, -value), y=value))
lossChart + geom_bar(stat="identity", aes(fill=series), position="dodge") + ylab("Count of Fatalities/Injuries") + xlab("Event Type") + theme(axis.text.x = element_text(angle = 90, hjust = 1))

4.2 Events that have the Greatest Economic Consequences

setDT(summaryEconomicDamage)
top10_Prop_Crop_Loss <- melt(summaryEconomicDamage, id.vars="EVTYPE", variable.name = "series")

healthChart <- ggplot(top10_Prop_Crop_Loss, aes(x=reorder(EVTYPE, -value), y=value))
healthChart + geom_bar(stat="identity", aes(fill=series), position="dodge") + ylab("Value - Property/Crop Loss" ) + xlab("Event Type") + theme(axis.text.x = element_text(angle = 90, hjust = 1))