Synopsis

This course project analyzed the U.S. National Oceanic and Atmospheric Administration’s (NOAA) Storm database to determine the effects of weather events on the US population and its economy. Injuries and fatalities were caused by similar weather event patterns with Tornados inflicting the harshest toll. Economic impact, measured in crop and property damage followed a different pattern of weather events with Floods causing the largest total damage.

Data Processing

A. Get the data

path <- setwd("/Users/sexybaboy/Documents/Files/Zetch/Online Courses/Data Science Specialization Feb18/R/Reproducible Research/Course Project 2")
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "repdata-data-StormData.csv")

B. Load and preprocess data

  1. Load data
library(data.table)
library(ggplot2)
StormOrig <- read.csv("./repdata-data-StormData.csv", header = TRUE, sep = ",")

# Convert data.frame to data.table
StormNew <- as.data.table(StormOrig)
  1. Process data
colnames(StormNew)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
  1. Subset data
# Find columns to remove
cols2Remove <- colnames(StormNew[, !c("EVTYPE"
  , "FATALITIES"
  , "INJURIES"
  , "PROPDMG"
  , "PROPDMGEXP"
  , "CROPDMG"
  , "CROPDMGEXP")])

# Remove columns
StormNew[, c(cols2Remove) := NULL]

# Use data where fatalities or injuries occurred.  
StormNew <- StormNew[(EVTYPE != "?" & 
             (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
                                                                            , "FATALITIES"
                                                                            , "INJURIES"
                                                                            , "PROPDMG"
                                                                            , "PROPDMGEXP"
                                                                            , "CROPDMG"
                                                                            , "CROPDMGEXP") ]
  1. Clean data
# Change damage exponents to uppercase.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
StormNew[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]

# Convert property damage alphanumeric exponents to numeric values.
propDmgNum <-  c("\"\"" = 10^0,
                 "-" = 10^0, 
                 "+" = 10^0,
                 "0" = 10^0,
                 "1" = 10^1,
                 "2" = 10^2,
                 "3" = 10^3,
                 "4" = 10^4,
                 "5" = 10^5,
                 "6" = 10^6,
                 "7" = 10^7,
                 "8" = 10^8,
                 "9" = 10^9,
                 "H" = 10^2,
                 "K" = 10^3,
                 "M" = 10^6,
                 "B" = 10^9)

# Convert crop damage alphanumeric exponents to numeric values.
cropDmgNum <-  c("\"\"" = 10^0,
                "?" = 10^0, 
                "0" = 10^0,
                "K" = 10^3,
                "M" = 10^6,
                "B" = 10^9)

StormNew[, PROPDMGEXP := propDmgNum[as.character(StormNew[,PROPDMGEXP])]]
StormNew[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

StormNew[, CROPDMGEXP := cropDmgNum[as.character(StormNew[,CROPDMGEXP])] ]
StormNew[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
  1. Create columns
StormNew <- StormNew[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
  1. Calculate fatalities and injuries
totalInjuriesStorm <- StormNew[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), Total = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]

totalInjuriesStorm <- totalInjuriesStorm[order(-FATALITIES), ]

totalInjuriesStorm <- totalInjuriesStorm[1:7, ]

head(totalInjuriesStorm, 5)
##            EVTYPE FATALITIES INJURIES Total
## 1:        TORNADO       5633    91346 96979
## 2: EXCESSIVE HEAT       1903     6525  8428
## 3:    FLASH FLOOD        978     1777  2755
## 4:           HEAT        937     2100  3037
## 5:      LIGHTNING        816     5230  6046
  1. Calculate total property and crop cost
totalCostStorm <- StormNew[, .(propCost = sum(propCost), cropCost = sum(cropCost), TotalCost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]

totalCostStorm <- totalCostStorm[order(-TotalCost), ]

totalCostStorm <- totalCostStorm[1:7, ]

head(totalCostStorm, 5)
##               EVTYPE     propCost   cropCost    TotalCost
## 1:             FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON  69305840000 2607872800  71913712800
## 3:           TORNADO  56947380676  414953270  57362333946
## 4:       STORM SURGE  43323536000       5000  43323541000
## 5:              HAIL  15735267513 3025954473  18761221986

Results:

  1. Across the United States, which types of events (as indicated in the 𝙴𝚅𝚃𝚈𝙿𝙴 variable) are most harmful with respect to population health?

The effects to population health can be found in fatalities and injuries columns. The event types are then summarized via the sum of fatalities and injuries into a transitory data frame identified as ‘Events’.

Events <- melt(totalInjuriesStorm, id.vars="EVTYPE", variable.name = "Cause")
head(Events, 5)
##            EVTYPE      Cause value
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816
# Create chart
Health <- ggplot(Events, aes(x = reorder(EVTYPE, -value), y = value))

# Plot data as bar chart
Health = Health + geom_bar(stat="identity", aes(fill = Cause), position = "dodge")

# Format y-axis scale and set y-axis label
Health = Health + ylab("Count") 

# Set x-axis label
Health = Health + xlab("Event Type") 

# Rotate x-axis tick labels 
Health = Health + theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Set chart title and center
Health = Health + ggtitle("Top 7 Weather Events Harmful to Population Health") + theme(plot.title = element_text(hjust = 0.5))

Health

  1. Across the United States, which types of events have the greatest economic consequences?

Economic consequences can be found in crop and property damage columns. The consequence types are then summarized via the sum of crop and property damage into a transitory data frame identified as ‘Effects’.

Effects <- melt(totalCostStorm, id.vars="EVTYPE", variable.name = "DamageType")
head(Effects, 5)
##               EVTYPE DamageType        value
## 1:             FLOOD   propCost 144657709807
## 2: HURRICANE/TYPHOON   propCost  69305840000
## 3:           TORNADO   propCost  56947380676
## 4:       STORM SURGE   propCost  43323536000
## 5:              HAIL   propCost  15735267513
# Create chart
Economy <- ggplot(Effects, aes(x = reorder(EVTYPE, -value), y = value))

# Plot data as bar chart
Economy = Economy + geom_bar(stat = "identity", aes(fill = DamageType), position = "dodge")

# Format y-axis scale and set y-axis label
Economy = Economy + ylab("Cost (dollars)") 

# Set x-axis label
Economy = Economy + xlab("Event Type") 

# Rotate x-axis tick labels 
Economy = Economy + theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Set chart title and center it
Economy = Economy + ggtitle("Top 7 Weather Events Harmful to the Economy") + theme(plot.title = element_text(hjust = 0.5))

Economy

Tornadoes had the greatest impact on health while Floods had the greatest impact on the economy.