library(knitr)
knitr::opts_chunk$set(echo = TRUE)
By cleaning and processing the NOAA Storm Database, we can see that since 1996, tornados have caused most fatalities and injuries, while floods have caused most economic losses (in property and crop). By deleting the data prior to 1996 (from which they began to record all events type) and the possible typos and rare events (which has occured less than 5 times since 1996), we then catagorize each event into one of the 48 event types. Then we calculated the sum of fatalities and injuries caused by each type and used it as an indicator of the event’s effect on population health. Later we calculated the total amount of property damage and crop damage, and used it as an indicator of event’s economic effect.
setwd("C:/Users/ADMIN/Desktop/Data_Analytics/Reproducible Research")
storm <- read.csv("repdata-data-StormData.csv")
evtype <- c("Astronomical Low Tide","Avalanche","Blizzard","Coastal Flood","Cold/Wind Chill","Debris Flow","Dense Fog","Dense Smoke","Drought","Dust Devil","Dust Storm","Excessive Heat","Extreme Cold/Wind Chill","Flash Flood","Flood","Freezing Fog","Frost/Freeze","Funnel Cloud","Hail","Heat","Heavy Rain","Heavy Snow","High Surf","High Wind","Hurricane/Typhoon","Ice Storm","Lakeshore Flood","Lake-Effect Snow","Lightning","Marine Hail","Marine High Wind","Marine Strong Wind","Marine Thunderstorm Wind","Rip Current","Seiche","Sleet","Storm Tide","Strong Wind","Thunderstorm Wind","Tornado","Tropical Depression","Tropical Storm","Tsunami","Volcanic Ash","Waterspout","Wildfire","Winter Storm","Winter Weather")
storm <- storm[,c("BGN_DATE","EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
storm <- subset(storm, as.Date(storm$BGN_DATE, format = "%m/%d/%Y") > as.Date("1996-01-01"))
storm$EVTYPE <- toupper(storm$EVTYPE)
storm$EVTYPE <- sub(" ","",storm$EVTYPE)
storm$EVTYPE <- sub("TSTM","THUNDERSTORM",storm$EVTYPE)
evtype <- toupper(sub(" ","",evtype))
library(plyr)
cnt <- count(storm, "EVTYPE")
cnt <- subset(cnt, freq >= 5)
storm <- subset(storm, EVTYPE %in% cnt[,1])
library(stringdist)
## Warning: package 'stringdist' was built under R version 3.4.1
storm$EVTYPE <- amatch(storm$EVTYPE, evtype, maxDist = 3)
storm <- storm[is.na(storm$EVTYPE) == FALSE,]
storm$EVTYPE <- evtype[storm$EVTYPE]
Injuries based on fatalities as well as injuries are included.
storm$FATALITIES <- storm$FATALITIES + storm$INJURIES
pophlth <- tapply(storm$FATALITIES, storm$EVTYPE, sum)
maxphev <- names(which.max(pophlth))
storm$PROPDMG[storm$PROPDMGEXP=="B"] <- storm$PROPDMG[storm$PROPDMGEXP=="B"] * 1000000000
storm$PROPDMG[storm$PROPDMGEXP=="M"] <- storm$PROPDMG[storm$PROPDMGEXP=="M"] * 1000000
storm$PROPDMG[storm$PROPDMGEXP=="K"] <- storm$PROPDMG[storm$PROPDMGEXP=="K"] * 1000
storm$PROPDMG[storm$PROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] <- storm$PROPDMG[storm$PROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] * 10
storm$PROPDMG[storm$PROPDMGEXP %in% c("-"," ","?")] <- 0
storm$CROPDMG[storm$CROPDMGEXP=="B"] <- storm$CROPDMG[storm$CROPDMGEXP=="B"] * 1000000000
storm$CROPDMG[storm$CROPDMGEXP=="M"] <- storm$CROPDMG[storm$CROPDMGEXP=="M"] * 1000000
storm$CROPDMG[storm$CROPDMGEXP=="K"] <- storm$CROPDMG[storm$CROPDMGEXP=="K"] * 1000
storm$CROPDMG[storm$CROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] <- storm$CROPDMG[storm$CROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] * 10
storm$CROPDMG[storm$CROPDMGEXP %in% c("-"," ","?")] <- 0
totaldam <- storm$PROPDMG + storm$CROPDMG
ecodam <- tapply(totaldam, storm$EVTYPE, sum)
maxedev <- names(which.max(ecodam))
For each type of event include both injuries as well as fatal
barplot(pophlth, main = "Total Fatalities and Injuries caused by the event")
barplot(ecodam, main = "Total loss caused by the event")