library(knitr)
knitr::opts_chunk$set(echo = TRUE)

Storm Effect Analysis

Synopsis

By cleaning and processing the NOAA Storm Database, we can see that since 1996, tornados have caused most fatalities and injuries, while floods have caused most economic losses (in property and crop). By deleting the data prior to 1996 (from which they began to record all events type) and the possible typos and rare events (which has occured less than 5 times since 1996), we then catagorize each event into one of the 48 event types. Then we calculated the sum of fatalities and injuries caused by each type and used it as an indicator of the event’s effect on population health. Later we calculated the total amount of property damage and crop damage, and used it as an indicator of event’s economic effect.

Data Processing

Reading data

setwd("C:/Users/ADMIN/Desktop/Data_Analytics/Reproducible Research")
storm <- read.csv("repdata-data-StormData.csv")

Cleaning data

  • Remove the variables that are not going to be used in this analysis.
  • Since only after Jan 1996 did they record all events type, we will only use data after Jan 1st 1996 to do the analysis.
  • to make the data set briefer, we remove those events that occurs less than 5 times
evtype <- c("Astronomical Low Tide","Avalanche","Blizzard","Coastal Flood","Cold/Wind Chill","Debris Flow","Dense Fog","Dense Smoke","Drought","Dust Devil","Dust Storm","Excessive Heat","Extreme Cold/Wind Chill","Flash Flood","Flood","Freezing Fog","Frost/Freeze","Funnel Cloud","Hail","Heat","Heavy Rain","Heavy Snow","High Surf","High Wind","Hurricane/Typhoon","Ice Storm","Lakeshore Flood","Lake-Effect Snow","Lightning","Marine Hail","Marine High Wind","Marine Strong Wind","Marine Thunderstorm Wind","Rip Current","Seiche","Sleet","Storm Tide","Strong Wind","Thunderstorm Wind","Tornado","Tropical Depression","Tropical Storm","Tsunami","Volcanic Ash","Waterspout","Wildfire","Winter Storm","Winter Weather")
storm <- storm[,c("BGN_DATE","EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]

storm <- subset(storm, as.Date(storm$BGN_DATE, format = "%m/%d/%Y") > as.Date("1996-01-01"))

storm$EVTYPE <- toupper(storm$EVTYPE)
storm$EVTYPE <- sub(" ","",storm$EVTYPE)
storm$EVTYPE <- sub("TSTM","THUNDERSTORM",storm$EVTYPE)
evtype <- toupper(sub(" ","",evtype))
library(plyr)
cnt <- count(storm, "EVTYPE")
cnt <- subset(cnt, freq >= 5)
storm <- subset(storm, EVTYPE %in% cnt[,1])
  • After cleaning the data set, we now match EVTYPE variable in the data set with the official event type names (with a maximum distance of 3) and then remove the observations that EVTYPE cannot be matched.
library(stringdist)
## Warning: package 'stringdist' was built under R version 3.4.1
storm$EVTYPE <- amatch(storm$EVTYPE, evtype, maxDist = 3)
storm <- storm[is.na(storm$EVTYPE) == FALSE,]
storm$EVTYPE <- evtype[storm$EVTYPE]

Effect on Population Health

Injuries based on fatalities as well as injuries are included.

storm$FATALITIES <- storm$FATALITIES + storm$INJURIES
pophlth <- tapply(storm$FATALITIES, storm$EVTYPE, sum)
maxphev <- names(which.max(pophlth))

Economic Consequences

storm$PROPDMG[storm$PROPDMGEXP=="B"] <- storm$PROPDMG[storm$PROPDMGEXP=="B"] * 1000000000
storm$PROPDMG[storm$PROPDMGEXP=="M"] <- storm$PROPDMG[storm$PROPDMGEXP=="M"] * 1000000
storm$PROPDMG[storm$PROPDMGEXP=="K"] <- storm$PROPDMG[storm$PROPDMGEXP=="K"] * 1000
storm$PROPDMG[storm$PROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] <- storm$PROPDMG[storm$PROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] * 10
storm$PROPDMG[storm$PROPDMGEXP %in% c("-"," ","?")] <- 0
storm$CROPDMG[storm$CROPDMGEXP=="B"] <- storm$CROPDMG[storm$CROPDMGEXP=="B"] * 1000000000
storm$CROPDMG[storm$CROPDMGEXP=="M"] <- storm$CROPDMG[storm$CROPDMGEXP=="M"] * 1000000
storm$CROPDMG[storm$CROPDMGEXP=="K"] <- storm$CROPDMG[storm$CROPDMGEXP=="K"] * 1000
storm$CROPDMG[storm$CROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] <- storm$CROPDMG[storm$CROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] * 10
storm$CROPDMG[storm$CROPDMGEXP %in% c("-"," ","?")] <- 0
totaldam <- storm$PROPDMG + storm$CROPDMG
ecodam <- tapply(totaldam, storm$EVTYPE, sum)
maxedev <- names(which.max(ecodam))

RESULTS

Population Health

For each type of event include both injuries as well as fatal

barplot(pophlth, main = "Total Fatalities and Injuries caused by the event")

  • We can see that TORNADO has caused maximum hazards.

Econnomic Damage

barplot(ecodam, main = "Total loss caused by the event")

  • We see that flood has caused the maximum economic damage