SYNOPSIS
Storms and severe weather create risks to public health and can have an economic impact on communities and municipalities. This analysis is a brief exploration of the NOAA Storm Database to derive some basic observations relating to severe weather events. Since this analysis is limited in scope, it will focus on types of events, costs of damage, injuries and fatalities, and storm locations.
DATA PROCESSING
Read in the dataset url = “http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2”
StormData <- read.csv(file="C:/Users/Songbird2015/Desktop/Coursera/Reproducible Research/StormData.csv", header=T, sep=",", na.strings="NA")
Review the data elements that are available for the analysis
names(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Subset the data to only included relevant fields
StormDataSub <- StormData[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
Initial observation of the data showed that dollars were stored as H, K, M, B. Reformat units to calculate Property and Crop Damages.
StormDataSub$pd <- 0
StormDataSub[StormDataSub$PROPDMGEXP == "H", ]$pd <-
StormDataSub[StormDataSub$PROPDMGEXP == "H", ]$PROPDMG * 10^2
StormDataSub[StormDataSub$PROPDMGEXP == "K", ]$pd <-
StormDataSub[StormDataSub$PROPDMGEXP == "K", ]$PROPDMG * 10^3
StormDataSub[StormDataSub$PROPDMGEXP == "M", ]$pd <-
StormDataSub[StormDataSub$PROPDMGEXP == "M", ]$PROPDMG * 10^6
StormDataSub[StormDataSub$PROPDMGEXP == "B", ]$pd <-
StormDataSub[StormDataSub$PROPDMGEXP == "B", ]$PROPDMG * 10^9
Converting the H, K, M, B into units to be able to calculate Crop Damage
StormDataSub$cd <- 0
StormDataSub[StormDataSub$CROPDMGEXP == "H", ]$cd <-
StormDataSub[StormDataSub$CROPDMGEXP == "H", ]$CROPDMG * 10^2
StormDataSub[StormDataSub$CROPDMGEXP == "K", ]$cd <-
StormDataSub[StormDataSub$CROPDMGEXP == "K", ]$CROPDMG * 10^3
StormDataSub[StormDataSub$CROPDMGEXP == "M", ]$cd <-
StormDataSub[StormDataSub$CROPDMGEXP == "M", ]$CROPDMG * 10^6
StormDataSub[StormDataSub$CROPDMGEXP == "B", ]$cd <-
StormDataSub[StormDataSub$CROPDMGEXP == "B", ]$CROPDMG * 10^9
Conducted some data cleanup
StormDataSub <- rename.vars(StormDataSub, c(“Extreme Heat”), c(“heat”“))
StormDataSub$CleanEV <- StormDataSub$EVTYPE
StormDataSub$EVTYPE[grepl("EXCESSIVE HEAT", StormDataSub$CleanEV)] <- "HEAT"
StormDataSub$EVTYPE[grepl("HEAT WAVE", StormDataSub$CleanEV)] <- "HEAT"
StormDataSub$EVTYPE[grepl("FLASH FLOOD", StormDataSub$CleanEV)] <- "FLOOD"
StormDataSub$EVTYPE[grepl("TSTM WIND", StormDataSub$CleanEV)] <- "WIND"
StormDataSub$EVTYPE[grepl ("HIGH WIND", StormDataSub$CleanEV)] <- "WIND"
StormDataSub$EVTYPE[grepl("STRONG WIND", StormDataSub$CleanEV)] <- "WIND"
StormDataSub$EVTYPE[grepl ("RIP CURRENT", StormDataSub$CleanEV)] <- "RIP CURRENTS"
StormDataSub$EVTYPE[grepl("THUNDERSTORM WIND", StormDataSub$CleanEV)] <- "WIND"
StormDataSub$EVTYPE[grepl("HEAVY SNOW", StormDataSub$CleanEV)] <- "WINTER STORM"
StormDataSub$EVTYPE[grepl("BLIZZARD", StormDataSub$CleanEV)] <- "WINTER STORM"
StormDataSub$EVTYPE[grepl("EXTREME COLD", StormDataSub$CleanEV)] <- "EXTREME COLD/WIND CHILL"
Created plots to visualize the data, starting with the Number of Fatalities by Event Type
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
fatal <- aggregate(FATALITIES ~ EVTYPE, data = StormDataSub, sum)
fatal <- fatal[order(-fatal$FATALITIES), ][1:10, ]
fatal$EVTY <- factor(fatal$EVTYPE, levels = fatal$EVTYPE)
ggplot(fatal, aes(x = EVTYPE, y = FATALITIES)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("EVENT TYPE") + ylab("FATALITIES") +
ggtitle("Number of Fatalities by Top 10 Weather Events")
Then the Number of Damages Catagorized By the Most Harmful Event Types
damage <- aggregate(pd + cd ~ EVTYPE, data = StormDataSub, sum)
names(damage) <- c("EVTYPE", "TDAMAGE")
damage <- damage[order(-damage$TDAMAGE), ][1:10, ]
damage$EVTYPE <- factor(damage$EVTYPE, levels = damage$EVTYPE)
ggplot(damage, aes(x = EVTYPE, y = TDAMAGE)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("EVENT TYPE") +
scale_y_continuous(name="Damages(US$)", labels = scales::comma) +
ggtitle("Property & Crop Damages by Top 10 Weather Events")
RESULTS
Based on the data analysis described above, it is clear that the flooding is the most costly type of damage to property and crops, exceeding the next closest event by more than $80B. Large storms are also very expensive as the next three events are hurricanes / typhoons, tornados and storm surges.
This is not mirrored in the fatality data, where tornados claim the highest number of lives, followed by heat.