This course project analyzed the U.S. National Oceanic and Atmospheric Administration’s (NOAA) Storm database to determine the effects of weather events on the US population and its economy. Injuries and fatalities were caused by similar weather event patterns with Tornados inflicting the harshest toll. Economic impact, measured in crop and property damage followed a different pattern of weather events with Floods causing the largest total damage.
A. Get the data
path <- setwd("/Users/sexybaboy/Documents/Files/Zetch/Online Courses/Data Science Specialization Feb18/R/Reproducible Research/Course Project 2")
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "repdata-data-StormData.csv")
B. Load and preprocess data
library(data.table)
library(ggplot2)
StormOrig <- read.csv("./repdata-data-StormData.csv", header = TRUE, sep = ",")
# Convert data.frame to data.table
StormNew <- as.data.table(StormOrig)
colnames(StormNew)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
# Find columns to remove
cols2Remove <- colnames(StormNew[, !c("EVTYPE"
, "FATALITIES"
, "INJURIES"
, "PROPDMG"
, "PROPDMGEXP"
, "CROPDMG"
, "CROPDMGEXP")])
# Remove columns
StormNew[, c(cols2Remove) := NULL]
# Use data where fatalities or injuries occurred.
StormNew <- StormNew[(EVTYPE != "?" &
(INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
, "FATALITIES"
, "INJURIES"
, "PROPDMG"
, "PROPDMGEXP"
, "CROPDMG"
, "CROPDMGEXP") ]
# Change damage exponents to uppercase.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
StormNew[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
# Convert property damage alphanumeric exponents to numeric values.
propDmgNum <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
# Convert crop damage alphanumeric exponents to numeric values.
cropDmgNum <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
StormNew[, PROPDMGEXP := propDmgNum[as.character(StormNew[,PROPDMGEXP])]]
StormNew[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
StormNew[, CROPDMGEXP := cropDmgNum[as.character(StormNew[,CROPDMGEXP])] ]
StormNew[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
StormNew <- StormNew[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
totalInjuriesStorm <- StormNew[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), Total = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalInjuriesStorm <- totalInjuriesStorm[order(-FATALITIES), ]
totalInjuriesStorm <- totalInjuriesStorm[1:7, ]
head(totalInjuriesStorm, 5)
## EVTYPE FATALITIES INJURIES Total
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
## 4: HEAT 937 2100 3037
## 5: LIGHTNING 816 5230 6046
totalCostStorm <- StormNew[, .(propCost = sum(propCost), cropCost = sum(cropCost), TotalCost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
totalCostStorm <- totalCostStorm[order(-TotalCost), ]
totalCostStorm <- totalCostStorm[1:7, ]
head(totalCostStorm, 5)
## EVTYPE propCost cropCost TotalCost
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56947380676 414953270 57362333946
## 4: STORM SURGE 43323536000 5000 43323541000
## 5: HAIL 15735267513 3025954473 18761221986
The effects to population health can be found in fatalities and injuries columns. The event types are then summarized via the sum of fatalities and injuries into a transitory data frame identified as ‘Events’.
Events <- melt(totalInjuriesStorm, id.vars="EVTYPE", variable.name = "Cause")
head(Events, 5)
## EVTYPE Cause value
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
## 4: HEAT FATALITIES 937
## 5: LIGHTNING FATALITIES 816
# Create chart
Health <- ggplot(Events, aes(x = reorder(EVTYPE, -value), y = value))
# Plot data as bar chart
Health = Health + geom_bar(stat="identity", aes(fill = Cause), position = "dodge")
# Format y-axis scale and set y-axis label
Health = Health + ylab("Count")
# Set x-axis label
Health = Health + xlab("Event Type")
# Rotate x-axis tick labels
Health = Health + theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Set chart title and center
Health = Health + ggtitle("Top 7 Weather Events Harmful to Population Health") + theme(plot.title = element_text(hjust = 0.5))
Health
Economic consequences can be found in crop and property damage columns. The consequence types are then summarized via the sum of crop and property damage into a transitory data frame identified as ‘Effects’.
Effects <- melt(totalCostStorm, id.vars="EVTYPE", variable.name = "DamageType")
head(Effects, 5)
## EVTYPE DamageType value
## 1: FLOOD propCost 144657709807
## 2: HURRICANE/TYPHOON propCost 69305840000
## 3: TORNADO propCost 56947380676
## 4: STORM SURGE propCost 43323536000
## 5: HAIL propCost 15735267513
# Create chart
Economy <- ggplot(Effects, aes(x = reorder(EVTYPE, -value), y = value))
# Plot data as bar chart
Economy = Economy + geom_bar(stat = "identity", aes(fill = DamageType), position = "dodge")
# Format y-axis scale and set y-axis label
Economy = Economy + ylab("Cost (dollars)")
# Set x-axis label
Economy = Economy + xlab("Event Type")
# Rotate x-axis tick labels
Economy = Economy + theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Set chart title and center it
Economy = Economy + ggtitle("Top 7 Weather Events Harmful to the Economy") + theme(plot.title = element_text(hjust = 0.5))
Economy
Tornadoes had the greatest impact on health while Floods had the greatest impact on the economy.