Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
stormdf <- read.csv("~/Online Courses/Johns Hopkins University Data Science Foundations using R Specialization/Assignment/Module 5/RepData_PeerAssessment2/repdata_data_StormData.csv")
library('magrittr')
library('dplyr')
library('ggplot2')
library('patchwork')
stormdf2 <- stormdf %>%
select(REFNUM, COUNTYNAME, STATE, EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG)
str(stormdf2)
## 'data.frame': 902297 obs. of 8 variables:
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## Ensure all character entries are capitalized
stormdf2$COUNTYNAME <- toupper(stormdf2$COUNTYNAME)
stormdf2$STATE <- toupper(stormdf2$STATE)
stormdf2$EVTYPE <- toupper(stormdf2$EVTYPE)
## Remove leading and trailing spaces
stormdf2$COUNTYNAME <- trimws(as.character(stormdf2$COUNTYNAME))
stormdf2$STATE <- trimws(as.character(stormdf2$STATE))
stormdf2$EVTYPE <- trimws(as.character(stormdf2$EVTYPE))
## Create a subset of data on total fatalities and injuries combined for each event type
pophealthdf <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = stormdf2, sum) %>%
mutate(TOTAL_FATALITIES_AND_INJURIES = FATALITIES + INJURIES) %>%
select(EVTYPE, TOTAL_FATALITIES_AND_INJURIES)
## Create a subset of data on total property and crop damages combined for each event type
econdf <- aggregate(cbind(CROPDMG, PROPDMG) ~ EVTYPE, data = stormdf2, sum) %>%
mutate(TOTAL_PROP_CROPDMG = CROPDMG + PROPDMG) %>%
select(EVTYPE, TOTAL_PROP_CROPDMG)
top5_pophealthdf <- pophealthdf %>%
arrange(desc(TOTAL_FATALITIES_AND_INJURIES)) %>%
top_n(5, TOTAL_FATALITIES_AND_INJURIES)
summary(top5_pophealthdf)
## EVTYPE TOTAL_FATALITIES_AND_INJURIES
## Length:5 Min. : 6046
## Class :character 1st Qu.: 7259
## Mode :character Median : 7461
## Mean :25235
## 3rd Qu.: 8428
## Max. :96979
top5_econdf <- econdf %>%
arrange(desc(TOTAL_PROP_CROPDMG)) %>%
top_n(5, TOTAL_PROP_CROPDMG)
summary(top5_econdf)
## EVTYPE TOTAL_PROP_CROPDMG
## Length:5 Min. :1067976
## Class :character 1st Qu.:1268290
## Mode :character Median :1445306
## Mean :1738645
## 3rd Qu.:1599375
## Max. :3312277
plot1_colour <- c("HAIL" = "honeydew4",
"FLOOD" = "maroon4",
"FLASH FLOOD" = "lightsteelblue4",
"THUNDERSTORM WIND" ="plum4",
"LIGHTNING" = "paleturquoise4",
"TORNADO" = "bisque4",
"TSTM WIND" = "lavenderblush4",
"EXCESSIVE HEAT" = "indianred4")
pophealthplot <- ggplot(top5_pophealthdf, aes(x = EVTYPE, y = TOTAL_FATALITIES_AND_INJURIES, fill = EVTYPE)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = plot1_colour) +
labs(x = "Event_Type", y = "Total Fatalities and Injuries",
title = "Total Fatalities and Injuries by Event Type",
fill = "Event Type") +
theme(axis.title = element_text(size = 10),
plot.title = element_text(size = 14),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
econplot <- ggplot(top5_econdf, aes(x = EVTYPE, y = TOTAL_PROP_CROPDMG, fill = EVTYPE)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = plot1_colour) +
labs(x = "Event_Type", y = "Total_Properties_and_Crops_Damage_($)",
title = "Total Properties and Crop Damage by Event Type",
fill = "Event Type") +
theme(axis.title = element_text(size = 10),
plot.title = element_text(size = 14),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
combined_pophealth_econ_plot <- pophealthplot + econplot + plot_layout(nrow = 2)
combined_pophealth_econ_plot
## Create subset of data on total fatalities and injuries for each event type
fatdf <- aggregate(FATALITIES ~ EVTYPE, data = stormdf2, sum)
injdf <- aggregate(INJURIES ~ EVTYPE, data = stormdf2, sum)
top5_evtypes_fat <- fatdf %>%
arrange(desc(FATALITIES)) %>%
top_n(5, FATALITIES)
summary(top5_evtypes_fat)
## EVTYPE FATALITIES
## Length:5 Min. : 816
## Class :character 1st Qu.: 937
## Mode :character Median : 978
## Mean :2053
## 3rd Qu.:1903
## Max. :5633
top5_evtypes_inj <- injdf %>%
arrange(desc(INJURIES)) %>%
top_n(5, INJURIES)
summary(top5_evtypes_inj)
## EVTYPE INJURIES
## Length:5 Min. : 5230
## Class :character 1st Qu.: 6525
## Mode :character Median : 6789
## Mean :23369
## 3rd Qu.: 6957
## Max. :91346
fat_inj_colour <- c("EXCESSIVE HEAT" = "indianred4",
"FLOOD" = "maroon4",
"FLASH FLOOD" = "lightsteelblue4",
"HEAT" ="lightpink4",
"LIGHTNING" = "paleturquoise4",
"TORNADO" = "bisque4",
"TSTM WIND" = "lavenderblush4")
fat <- ggplot(top5_evtypes_fat, aes(x = EVTYPE, y = FATALITIES, fill = EVTYPE)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = fat_inj_colour) +
labs(x = "Event_Type" , y = "Fatalities_Count",
title = "Total Fatalities by Event Type",
fill = "Event Type") +
theme(axis.title = element_text(size = 10),
plot.title = element_text(size = 14),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
inj <- ggplot(top5_evtypes_inj, aes(x = EVTYPE, y = INJURIES, fill = EVTYPE)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = fat_inj_colour) +
labs(x = "Event_Type" , y = "Injuries_Count",
title = "Total Injuries by Event Type",
fill = "Event Type") +
theme(axis.title = element_text(size = 10),
plot.title = element_text(size = 14),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
combined_fat_inj <- fat + inj + plot_layout(nrow = 2)
combined_fat_inj
## Create a subset of data on total property and crop damage for each event type
property <- aggregate(PROPDMG ~ EVTYPE, data = stormdf2, sum)
crop <- aggregate(CROPDMG ~ EVTYPE, data = stormdf2, sum)
top5_propdmg <- property %>%
arrange(desc(PROPDMG)) %>%
top_n(5, PROPDMG)
summary(top5_propdmg)
## EVTYPE PROPDMG
## Length:5 Min. : 876844
## Class :character 1st Qu.: 899938
## Mode :character Median :1336104
## Mean :1549064
## 3rd Qu.:1420175
## Max. :3212258
top5_cropdmg <- crop %>%
arrange(desc(CROPDMG)) %>%
top_n(5, CROPDMG)
summary(top5_cropdmg)
## EVTYPE CROPDMG
## Length:5 Min. :100019
## Class :character 1st Qu.:109203
## Mode :character Median :168038
## Mean :227211
## 3rd Qu.:179201
## Max. :579596
prop_cropdmg_colour <- c( "HAIL" = "honeydew4",
"FLOOD" = "maroon4",
"FLASH FLOOD" = "lightsteelblue4",
"THUNDERSTORM WIND" ="plum4",
"LIGHTNING" = "paleturquoise4",
"TORNADO" = "bisque4",
"TSTM WIND" = "lavenderblush4")
propdmg <- ggplot(top5_propdmg, aes(x = EVTYPE, y = PROPDMG, fill = EVTYPE)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = prop_cropdmg_colour) +
labs(x = "Event_Type", y = "Property_Damage_($)",
title = "Total Property Damage by Event Type",
fill = "Event Type") +
theme(axis.title = element_text(size = 10),
plot.title = element_text(size = 14),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
cropdmg <- ggplot(top5_cropdmg, aes(x = EVTYPE, y = CROPDMG, fill = EVTYPE)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = prop_cropdmg_colour) +
labs(x = "Event_Type", y = "Crop_Damage_($)",
title = "Total Crop Damage by Event Type",
fill = "Event Type") +
theme(axis.title = element_text(size = 10),
plot.title = element_text(size = 14),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
combined_prop_cropdmg <- propdmg + cropdmg + plot_layout(nrow = 2)
combined_prop_cropdmg