Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
Objective
This data visualisation was presented in a 2012 edition of the weekly Bloomberg Businessweek magazine. This edition of the magazine was released in the wake of Hurricane Sandy, one of the largest and most destructive storms in American history. The intent of this visualisation was to show the general public how the frequency and cost of different types natural disasters are trending over time in the United States.
The visualisation chosen had the following three main issues:
Reference
The following code was used to fix the issues identified in the original.
#NOAA Disasters over $1B USD 1980-2020
#Bubble plot with marginal histogram
##packages
library(readr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(magrittr)
library(forcats)
library(cowplot)
library(ggrepel)
##import and clean data
events <- read.csv("https://www.ncdc.noaa.gov/billions/events-US-1980-2020.csv", skip =1, header = TRUE, )
events_clean <- events %>%
mutate(Disaster = factor(Disaster),
Cost = as.numeric(Total.CPI.Adjusted.Cost..Millions.of.Dollars.)/1000,
Date = parse_datetime(as.character(events$End.Date)),
Year = format(Date, "%Y")) %>%
select(Name, Disaster, Cost, Deaths, Year, Date)
##new factors for Disaster
events_clean$Disaster <- fct_collapse(events_clean$Disaster,
Drought_Heat_wave = "Drought",
Blizzard_Freeze_Ice = c("Winter Storm", "Freeze"),
Fire = "Wildfire",
Hurricane = "Tropical Cyclone",
Tornado_Storm = "Severe Storm",
Flood = "Flooding")
events_clean$Disaster <- factor(events_clean$Disaster,
levels = c("Drought_Heat_wave",
"Flood",
"Blizzard_Freeze_Ice",
"Tornado_Storm",
"Hurricane",
"Fire"),
labels = c("Drought/Heat wave",
"Flood",
"Blizzard/Freeze/Ice",
"Tornado/Storm",
"Hurricane",
"Fire"))
Disaster_Summary <-
events_clean %>%
group_by(Year, Disaster)%>%
summarise(Total_Cost = sum(Cost), Count = n())%>%
mutate(Period = Year)%>%
select(Year, Period, Disaster,Total_Cost, Count)
Total <-
events_clean %>%
group_by(Year)%>%
summarise(Total_Cost = sum(Cost), Count = n())%>%
mutate(Disaster = "All Natural Disasters", Period = Year)%>%
select(Year, Period, Disaster,Total_Cost, Count)
Disaster_Summary$Period <- fct_collapse(Disaster_Summary$Period,
`80_84` = c("1980","1981","1982","1983","1984"),
`85_89` = c("1985","1986","1988","1989"),
`90_94` = c("1990","1991","1992","1993","1994"),
`95_99` = c("1995","1996","1997","1998","1999"),
`00_04` = c("2000","2001","2002","2003","2004"),
`05_09` = c("2005","2006","2007","2008","2009"),
`10_14` = c("2010","2011","2012","2013","2014"),
`15_19` = c("2015","2016","2017","2018","2019"))
Total$Period <- fct_collapse(Total$Period,
`80_84` = c("1980","1981","1982","1983","1984"),
`85_89` = c("1985","1986","1988","1989"),
`90_94` = c("1990","1991","1992","1993","1994"),
`95_99` = c("1995","1996","1997","1998","1999"),
`00_04` = c("2000","2001","2002","2003","2004"),
`05_09` = c("2005","2006","2007","2008","2009"),
`10_14` = c("2010","2011","2012","2013","2014"),
`15_19` = c("2015","2016","2017","2018","2019"))
Total_5y <-
Total%>%
filter(Year < "2020")%>%
group_by(Period, Disaster)%>%
summarise(Total_Cost = sum(Total_Cost), Count = sum(Count))
Disasters_5y <-
Disaster_Summary %>%
filter(Year < "2020")%>%
group_by(Period, Disaster)%>%
summarise(Total_Cost = sum(Total_Cost), Count = sum(Count))
p1 <- Disasters_5y %>%
ggplot(aes(x=Count, y = Total_Cost))+
geom_point(aes(colour = Period))+
geom_path(arrow = arrow(type = "closed", length = unit(0.05, "npc")), colour = 'black')+
facet_wrap(~Disaster)+
labs(
caption = "Source: ncdc.noaa.gov/billion/events/US/1980-2020\n Note:Cost is CPI adjusted "
)+
xlab("Number of Disasters")+
ylab("Total Cost ($ billions)")+
scale_colour_brewer(palette="Dark2")+
theme(
legend.position = "none",
axis.ticks = element_blank(),
panel.grid.major.x = element_line(colour = "grey80"),
panel.grid.major.y = element_line(colour = "grey80"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.background = element_rect(fill = "#FFFFFF"),
plot.subtitle = element_text(face="italic"),
plot.title = element_text(face="bold")
)
p2 <- Total_5y %>%
ggplot(aes(x=Count, y = Total_Cost),colour = "black")+
geom_point(aes(colour = Period))+
geom_path(arrow = arrow(type = "closed", length = unit(0.1, "npc")))+
geom_label_repel(label = c("1980-1984", "1985-1989", "1990-1994", "1995-1999",
"2000-2004", "2005-2009", "2010-2014", "2015-2019"),
size = 2.5,
)+
annotate("rect", xmin = 0, xmax = 10, ymin = 400, ymax =500,
alpha = 0.2)+
annotate("segment", x = 1.25, y=425, xend = 1.25, yend = 475,
arrow = arrow(type = "closed", length = unit(0.02, "npc")))+
annotate("text", x = 2.25, y = 475, size = 2.3, hjust = 0, vjust =0.75,
label = "Moving in this direction\nindicates Total Cost\nhas increased between\nperiods.")+
annotate("rect", xmin = 60, xmax = 70, ymin = 100, ymax =200,
alpha = 0.2)+
annotate("segment", x = 62.5, y=110, xend = 67.5, yend = 110,
arrow = arrow(type = "closed", length = unit(0.02, "npc")))+
annotate("text", x = 60.5, y = 175, size = 2.3, hjust = 0, vjust =0.75,
label = "Moving in this direction\nindicates # of Disasters has\nincreased between periods.")+
facet_wrap(~Disaster)+
labs(
title = "U.S Natural disasters with a cost of $1 billion or more (1980-2019)",
subtitle = "Each point represents disasters that occured over a 5 year period.\nBegin in 1980-1984 at the arrow tail and progress to 2015-2019 at the arrow head."
)+
ylab("Total Cost ($ billions)")+
xlim(0,80)+
scale_colour_brewer(palette="Dark2")+
theme(
legend.position = "none",
axis.title.x = element_blank(),
axis.ticks = element_blank(),
panel.grid.major.x = element_line(colour = "grey80"),
panel.grid.minor.x = element_line(colour = "grey80"),
panel.grid.major.y = element_line(colour = "grey80"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.background = element_rect(fill = "#FFFFFF"),
plot.subtitle = element_text(face="italic"),
plot.title = element_text(face="bold")
)
plot <- plot_grid(p2,p1, ncol = 1, align = 'b', axis = 'b')
Data Reference
The following plot fixes the main issues in the original.