This is an analysis of the public health and economic costs of severe weather events. Data come from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database from 1950 to 2011. Severe weather events are classified into event types. Each event has associated fatalities, injuries, and costs to property and crops. This analysis has ranked the top ten severe weather event types in terms of fatalities, injuries, costs to property, and costs to crops.
Set the working directory
setwd("C:/Users/Mike/Desktop/Dropbox/Coursera/Reproducible Research/RepData_PeerAssessment2")
Now, let’s import the data:
data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
Lets examine some features of the data:
dim(data)
## [1] 902297 37
names(data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Now, we are going to need to clean up property and crop damage. PROPDMG and CROPDMG have different units (e.g., dollars, thousands of \(, millions of \), etc.). PROPDMGEXP and CROPDMGEXP are the 10^exponents to multiply PROPDMG and CROPDMG respectively
First, let’s clean up property damage.
#unique(data$PROPDMGEXP)
data$PROPDMGEXP <- as.character(data$PROPDMGEXP)
data$PROPDMGEXP[data$PROPDMGEXP == "B"] <- "9"
data$PROPDMGEXP[data$PROPDMGEXP == "m"] <- "6"
data$PROPDMGEXP[data$PROPDMGEXP == "M"] <- "6"
data$PROPDMGEXP[data$PROPDMGEXP == "K"] <- "3"
data$PROPDMGEXP[data$PROPDMGEXP == "h"] <- "2"
data$PROPDMGEXP[data$PROPDMGEXP == "H"] <- "2"
data$PROPDMGEXP[data$PROPDMGEXP == ""] <- "0"
data$PROPDMGEXP <- as.numeric(data$PROPDMGEXP)
## Warning: NAs introduced by coercion
data$PROPDMGEXP[is.na(data$PROPDMGEXP)] <- 0
unique(data$PROPDMGEXP)
## [1] 3 6 0 9 5 4 2 7 1 8
Now, let’s clean up crop damage.
#unique(data$CROPDMGEXP)
data$CROPDMGEXP <- as.character(data$CROPDMGEXP)
data$CROPDMGEXP[data$CROPDMGEXP == "B"] <- "9"
data$CROPDMGEXP[data$CROPDMGEXP == "m"] <- "6"
data$CROPDMGEXP[data$CROPDMGEXP == "M"] <- "6"
data$CROPDMGEXP[data$CROPDMGEXP == "k"] <- "3"
data$CROPDMGEXP[data$CROPDMGEXP == "K"] <- "3"
data$CROPDMGEXP[data$CROPDMGEXP == ""] <- "0"
data$CROPDMGEXP <- as.numeric(data$CROPDMGEXP)
## Warning: NAs introduced by coercion
data$CROPDMGEXP[is.na(data$CROPDMGEXP)] <- 0
unique(data$CROPDMGEXP)
## [1] 0 6 3 9 2
Finally, calculate the property and crop damage in dollar units
data$PROPDMG_new <- data$PROPDMG * 10^data$PROPDMGEXP
data$CROPDMG_new <- data$CROPDMG * 10^data$CROPDMGEXP
There are 985 unique levels for event type
length(unique(data$EVTYPE))
## [1] 985
Make some replacements
data$EVTYPE <- gsub("^TORNADO.*", "TORNADO", data$EVTYPE)
data$EVTYPE <- gsub("^TSTM WIND.*", "TSTM WIND", data$EVTYPE)
data$EVTYPE <- gsub("TSTM WINDS", "TSTM WIND", data$EVTYPE)
data$EVTYPE <- gsub("^THUNDESTORM WIND.*", "TSTM WIND", data$EVTYPE)
data$EVTYPE <- gsub("THUNDERSTORM WIND", "TSTM WIND", data$EVTYPE)
data$EVTYPE <- gsub("^HAIL.*", "HAIL", data$EVTYPE)
data$EVTYPE <- gsub("STRONG WIND", "HIGH WIND", data$EVTYPE)
data$EVTYPE <- gsub("HIGH WINDS", "HIGH WIND", data$EVTYPE)
data$EVTYPE <- gsub("^STRONG WIND.*", "HIGH WIND", data$EVTYPE)
data$EVTYPE <- gsub("^WIND.*", "HIGH WIND", data$EVTYPE)
data$EVTYPE <- gsub("FLASH FLOOD", "FLOOD", data$EVTYPE)
data$EVTYPE <- gsub("FLOODING", "FLOOD", data$EVTYPE)
data$EVTYPE <- gsub("RIVER FLOOD", "FLOOD", data$EVTYPE)
data$EVTYPE <- gsub("URBAN FLOOD", "FLOOD", data$EVTYPE)
data$EVTYPE <- gsub("FLOOD/FLOOD", "FLOOD", data$EVTYPE)
data$EVTYPE <- gsub("COASTAL FLOOD", "FLOOD", data$EVTYPE)
data$EVTYPE <- gsub("RIP CURRENTS", "RIP CURRENT", data$EVTYPE)
data$EVTYPE <- gsub("URBAN/SML STREAM FLD", "FLOOD", data$EVTYPE)
data$EVTYPE <- gsub("MARINE TSTM WIND", "TSTM WIND", data$EVTYPE)
data$EVTYPE <- gsub("WILD/FOREST FIRE", "WILDFIRE", data$EVTYPE)
data$EVTYPE <- gsub("WINTER WEATHER", "HEAVY SNOW", data$EVTYPE)
data$EVTYPE <- gsub("HEAT WAVE", "HEAT", data$EVTYPE)
data$EVTYPE <- gsub("WINTER STORM", "HEAVY SNOW", data$EVTYPE)
data$EVTYPE <- gsub("BLIZZARD", "HEAVY SNOW", data$EVTYPE)
data$EVTYPE <- gsub("WINTER WEATHER/MIX", "HEAVY SNOW", data$EVTYPE)
data$EVTYPE <- gsub("EXCESSIVE HEAT","HEAT", data$EVTYPE)
data$EVTYPE <- gsub("freeze","frost",data$EVTYPE)
data$EVTYPE <- gsub(".*frost","frost",data$EVTYPE)
data$EVTYPE <- gsub("FROST/FREEZE","frost",data$EVTYPE)
data$EVTYPE <- gsub("EXTREME COLD/WIND CHILL","EXTREME COLD",data$EVTYPE)
data$EVTYPE <- gsub("COLD/WIND CHILL","EXTREME COLD",data$EVTYPE)
data$EVTYPE <- gsub("EXTREME HEAT","HEAT",data$EVTYPE)
data$EVTYPE <- gsub("HURRICANE/TYPHOON","HURRICANE",data$EVTYPE)
This certainly is not an exhaustive replacement of all similar event types, but it deals with some of the most common event types.
Aggregate data by human health impacts
fatalities <- aggregate(data$FATALITIES, by=list(data$EVTYPE), sum)
injuries <- aggregate(data$INJURIES, by=list(data$EVTYPE), sum)
Aggregate data by economic impacts
property <- aggregate(data$PROPDMG_new, by=list(data$EVTYPE), sum)
crops <- aggregate(data$CROPDMG_new, by=list(data$EVTYPE), sum)
Only look at the top 10 of each
fatalities <- fatalities[order(-fatalities[,2]),][1:10,]
injuries <- injuries[order(-injuries[,2]),][1:10,]
property <- property[order(-property[,2]),][1:10,]
crops <- crops[order(-crops[,2]),][1:10,]
Make sure the column names are right for everything
names(fatalities) <- c("EVTYPE","fatalities")
names(injuries) <- c("EVTYPE","injuries")
names(property) <- c("EVTYPE","property")
names(crops) <- c("EVTYPE","crops")
Make plots for human health impacts
library(ggplot2)
fatalities_plot <- ggplot(fatalities, aes(reorder(EVTYPE,fatalities),fatalities)) + geom_bar(stat="identity")
fatalities_plot <- fatalities_plot + xlab("") + ylab("Fatalities")
fatalities_plot <- fatalities_plot + theme_classic(base_size=12)
fatalities_plot <- fatalities_plot + theme(axis.text.x=element_text(angle=90, hjust=1))
injuries_plot <- ggplot(injuries, aes(reorder(EVTYPE,injuries),injuries)) + geom_bar(stat="identity")
injuries_plot <- injuries_plot + xlab("Event Type") + ylab("Injuries")
injuries_plot <- injuries_plot + theme_classic(base_size=12)
injuries_plot <- injuries_plot + theme(axis.text.x=element_text(angle=90, hjust=1))
# combine human health plots
library(gridExtra)
## Loading required package: grid
grid.arrange(fatalities_plot,injuries_plot)
The figure above shows the top ten event types with regards to fatalities and injuries. It appears that tornadoes, heat, and flooding are the top three events with the most fatalities. Tornadoes are the top event for injuries. Overall, tornadoes cause the largest impact on public health
Make plots for economic costs
property_plot <- ggplot(property, aes(reorder(EVTYPE,property),property)) + geom_bar(stat="identity")
property_plot <- property_plot + xlab("") + ylab("Property Damage ($)")
property_plot <- property_plot + theme_classic(base_size=12)
property_plot <- property_plot + theme(axis.text.x=element_text(angle=90, hjust=1))
crops_plot <- ggplot(crops, aes(reorder(EVTYPE,crops),crops)) + geom_bar(stat="identity")
crops_plot <- crops_plot + xlab("Event Type") + ylab("Crop Damage ($)")
crops_plot <- crops_plot + theme_classic(base_size=12)
crops_plot <- crops_plot + theme(axis.text.x=element_text(angle=90, hjust=1))
# combine economic plots
grid.arrange(property_plot,crops_plot)
The figure above shows the top ten event types with regards to property and crop damage. It appears that flooding, hurriances, and tornadoes are the top three events for property damage. Drought and flood are the top two events for crop damage. Overall, floods have the largest economic impact.