The U.S. National Oceanic and Atmospheric Administration’s (NOAA) has compiled a database of weather events from 1950-2011; the data comes from various sources and is not meant to be comprehensive.
I am using this data to show what weather events are the most harmful to our health and to our economy. Below I have used the data to create two visualizations depicting the top 10 most harmful weather events in terms of fatalities, injuries, property damage and crop damage.
Install packages
library(stringr)
library(dplyr)
library(ggplot2)
library(gridExtra)
Read in only data necessary to do analysis
storm_data <- read.csv("repdata_data_StormData.csv")[,c('EVTYPE', 'BGN_DATE', 'FATALITIES', 'INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP', 'TIME_ZONE')]
The PROPDMG and CROPDMG columns data must be multiplied by their exponent columns PROPDMGEXP and CROPDMGEXP respectively to get the damage totals.
#Relabeling damage data data with magnitudes found in the DMGEXP Columns
storm_data_relabel <- storm_data
#Relabeling the CROP/PROP EXP columns to multipliers from documentation
#Multiplying CROP/PROP DMG by EXP multiplier columns to get full amount of damage
#Relabeling Prop
storm_data_relabel$PROPDMGEXP <- gsub("^[0-9]", "10", storm_data_relabel$PROPDMGEXP)
storm_data_relabel$PROPDMGEXP <- gsub("[Mm]", "1000000", storm_data_relabel$PROPDMGEXP)
storm_data_relabel$PROPDMGEXP <- gsub("[Hh]", "100", storm_data_relabel$PROPDMGEXP)
storm_data_relabel$PROPDMGEXP <- gsub("K", "1000", storm_data_relabel$PROPDMGEXP)
storm_data_relabel$PROPDMGEXP <- gsub("B", "1000000000", storm_data_relabel$PROPDMGEXP)
storm_data_relabel$PROPDMGEXP <- gsub("-", "0", storm_data_relabel$PROPDMGEXP)
storm_data_relabel$PROPDMGEXP[storm_data_relabel$PROPDMGEXP=="+"] <- "1"
storm_data_relabel$PROPDMGEXP[storm_data_relabel$PROPDMGEXP=="?"] <- "0"
#Relabeling Crop
storm_data_relabel$CROPDMGEXP <- gsub("^[0-9]", "10", storm_data_relabel$CROPDMGEXP)
storm_data_relabel$CROPDMGEXP <- gsub("[Mm]", "1000000", storm_data_relabel$CROPDMGEXP)
storm_data_relabel$CROPDMGEXP <- gsub("[Hh]", "100", storm_data_relabel$CROPDMGEXP)
storm_data_relabel$CROPDMGEXP <- gsub("K", "1000", storm_data_relabel$CROPDMGEXP)
storm_data_relabel$CROPDMGEXP <- gsub("B", "1000000000", storm_data_relabel$CROPDMGEXP)
storm_data_relabel$CROPDMGEXP <- gsub("-", "0", storm_data_relabel$CROPDMGEXP)
storm_data_relabel$CROPDMGEXP[storm_data_relabel$CROPDMGEXP=="+"] <- "1"
storm_data_relabel$CROPDMGEXP[storm_data_relabel$CROPDMGEXP=="?"] <- "0"
#Multiplying Prop by exponents
storm_data_relabel$PROPDMGEXP <- suppressWarnings(as.numeric(storm_data_relabel$PROPDMGEXP))
storm_data_relabel$PROPDMG <- with(storm_data_relabel, PROPDMG*PROPDMGEXP)
#Multiplying Crop by exponents
storm_data_relabel$CROPDMGEXP <- suppressWarnings(as.numeric(storm_data_relabel$CROPDMGEXP))
storm_data_relabel$CROPDMG <- with(storm_data_relabel, CROPDMG*CROPDMGEXP)
Group data by EVTYPE and then sum over FATALITIES and INJURIES to create dataframes: fatalities and injuries. Then arrange aggregated sums in descending order and take the top 10 event types in each category
fatalities <- storm_data %>% group_by(EVTYPE) %>% summarise(fatal_sum = sum(FATALITIES)) %>% arrange(desc(fatal_sum))
fatalities <- fatalities[1:10,]
injuries <- storm_data %>% group_by(EVTYPE) %>% summarise(inj_sum = sum(INJURIES)) %>% arrange(desc(inj_sum))
injuries <- injuries[1:10,]
Group data by EVTYPE and then sum over PROPDMG and CROPDMG to create dataframes: storm_dat_top_prop and storm_data_top_crop. Then arrange aggregated sums in descending order and take the top 10 event types in each category
#Takes the top damage accumulators over all time
#totals property damage over all time. arranges in descending order, then takes top 10 EVTYPES w/ the most damage
storm_data_top_prop <- storm_data_relabel %>% group_by(EVTYPE) %>% summarise(total_property_damage=sum(PROPDMG)) %>% arrange(desc(total_property_damage))
storm_data_top_prop <- storm_data_top_prop[1:10,]
#totals crop damage over all time. arranges in descending order, then takes top 10 EVTYPES w/ the most damage
storm_data_top_crop <- storm_data_relabel %>% group_by(EVTYPE) %>% summarise(total_crop_damage=sum(CROPDMG)) %>% arrange(desc(total_crop_damage))
storm_data_top_crop <- storm_data_top_crop[1:10,]
Plots the Top 10 Events that have caused fatalities and injuries
fplot <- ggplot(fatalities, aes(x=reorder(EVTYPE, -fatal_sum), y=fatal_sum)) +
geom_bar(stat = "identity", fill ="darkblue") +
labs( x="", y="Total Fatalities", title="Top 10 Events with Highest Deaths") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
iplot <- ggplot(injuries, aes(x=reorder(EVTYPE, -inj_sum), y=inj_sum)) +
geom_bar(stat = "identity", fill ="darkblue") +
labs( x="", y="Total Injuries", title="Top 10 Events with Highest injuries") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
grid.arrange(fplot, iplot, nrow = 1)
Tornadoes result in the highest number of deaths and injuries.
Plots the Top 10 Events that have caused Property and Crop Damage
pplot <- ggplot(storm_data_top_prop, aes(x=reorder(EVTYPE, -total_property_damage), y=total_property_damage)) +
geom_bar(stat = "identity", fill ="darkgreen") +
labs( x="", y="Total Property Damage Cost", title="Top 10 Property Damage Events") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
cplot <- ggplot(storm_data_top_crop, aes(x=reorder(EVTYPE, -total_crop_damage), y=total_crop_damage)) +
geom_bar(stat = "identity", fill ="darkgreen") +
labs( x="", y="Total Crop Damage Cost", title="Top 10 Events Damaging to Crops") +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
grid.arrange(cplot, pplot, nrow = 1)
Tornadoes with thunderstorm wind and hail are the number one cause of property damage and excessive wetness causes the most crop damage.