Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
The visualisation chosen has the following three main issues:
u/notimeforarcs (2021). [OC] C02 Emissions vs Population - by country [Online forum post]. Reddit. Retrieved April 10, 2021 https://www.reddit.com/r/dataisbeautiful/comments/mjumzz/oc_co2_emissions_vs_population_by_country/
The following code was used to fix the issues identified in the original.
# Load and Prepare Data ---------------------------------------------------
#Load all necessary packages
library(readxl)
library(dplyr)
library(tidyr)
library(magrittr)
library(ggplot2)
library(cowplot)
#Read in the enriched excel file (emissions data & World Bank income categorisation)
c02emissions <- read_excel("2018 C02 Emissions v4.xlsx")
colnames(c02emissions) <- c("Country","IsoCode", "IncomeCategory","Total","PerCapita","PopCategory","Population")
#Convert necessary values into factors to ensure they're displayed accurately and efficiently in the visualisation
#Ensure labels can be efficiently displayed (split long text into two-lines using a line break input)
c02emissions$Country <- c02emissions$Country %>% factor(levels = c02emissions$Country[order(+c02emissions$Population)])
c02emissions$IncomeCategory %<>% factor(levels = c("High income","Upper middle income","Lower middle income","Low income"))
c02emissions$PopCategory %<>% factor(levels = c(">1 Billion","200 - 350 Million","100 - 200 Million","75 - 100 Million","50 - 75 Million", "25 - 50 Million","10 - 25 Million","5 - 10 Million","1 - 5 Million","<1 Million"),
labels = c(">1\nBillion","200 - 350\nMillion","100 - 200\nMillion","75 - 100\nMillion","50 - 75\nMillion", " 25 - 50 \nMillion"," 10 - 25 \nMillion","5 - 10\nMillion","1 - 5\nMillion","<1 Million"))
#Format the values - round all values to one digit, apart from the DRC value which would round to 0.0 and potentially confuse the viewer, keep this displaying two digits.
c02emissions$PerCapLabel <- format(round(c02emissions$PerCapita,1),nsmall=1)
c02emissions[16,8] <- "0.02"
#Create two subsets of the data showing the largest 100 countries by population size
Largest_50 <- top_n(c02emissions,50,Population)
Subset <- top_n(c02emissions,-158,Population)
Next_50 <- top_n(Subset,50,Population)
# Visualisation Creation - Section One --------------------------------------------------
##To display the 100 values faceted neatly, create two segments of 50 countries each to be displayed side-by-side in a single visualisation
#Create base plot of emissions per capita, sorted by population and colour coded by income category
section1 <- ggplot(data = Largest_50,
aes(x = Country, y = PerCapita, label = PerCapLabel, fill=IncomeCategory)) +
geom_bar(stat = "identity", width = 0.75) + theme_grey() + coord_flip() +
scale_fill_manual(values = c("#0E5546","#0D7853","#6F8A9B","#007ECC")) +
scale_colour_manual(values = c("#0E5546","#0D7853","#546A78","#2A74A2"))
#Facet the base plot by the population category
section1 <- section1 + facet_grid(PopCategory ~ ., scales = "free", space = "free_y")
#Rotate the label text to make it legible, resize font and reduce colour to reduce cognitive load
section1 <- section1 + theme(strip.text.y = element_text(angle = 0)) +
theme(strip.text.y = element_text(size = 7, colour = "grey30"))
#Add data labels, using the same colour scheme as above
section1 <- section1 + geom_text(size=2.5, hjust = -0.03, vjust = 0.3, aes(colour = IncomeCategory))
#Remove the gap between the country label and the beginning of the x axis
section1 <- section1 + scale_y_continuous(limits = c(0,25),expand = c(0.01,0))
#Reduce background shading to reduce cognitive load and draw attention to the values themselves
section1 <- section1 + theme(panel.background = element_rect(fill = "grey95")) +
theme(strip.background = element_rect(fill = "grey93"))
#Remove x axis label, resize y axis label and alter wording to be more descriptive
section1 <- section1 + xlab(NULL) + ylab("Avg. Per Capita C02 Emissions (Tonnes per Year)") +
theme(axis.title=element_text(size=7, colour = "grey40"), axis.text.x = element_text(colour = "grey40", size=8))
#Adjust spacing so axis won't be materially different from the second section, despite the longer country names in the second section
section1 <- section1 + theme(plot.margin=unit(c(5.5,5.5,5.5,24),"points"))
#Remove legend to ensure no duplication when two sections are displayed side-by-side
section1 <- section1 + theme(legend.position="none")
# Visualisation Creation - Section Two --------------------------------------------------
#Create the second section using the same steps undertaken for Section One
section2 <- ggplot(data = Next_50,
aes(x = Country, y = PerCapita, label = PerCapLabel, fill=IncomeCategory)) +
geom_bar(stat = "identity", width = 0.75) + theme_grey() + coord_flip() +
scale_fill_manual(values = c("#0E5546","#0D7853","#6F8A9B","#007ECC")) +
scale_colour_manual(values = c("#0E5546","#0D7853","#6F8A9B","#007ECC")) +
facet_grid(PopCategory ~ ., scales = "free", space = "free_y") +
theme(strip.text.y = element_text(angle = 0)) +
theme(strip.text.y = element_text(size = 7, colour = "grey30")) +
geom_text(size=2.5, hjust = -0.03, vjust = 0.3, aes(colour = IncomeCategory)) +
scale_y_continuous(limits = c(0,25),expand = c(0.01,0)) +
theme(panel.background = element_rect(fill = "grey95")) +
theme(strip.background = element_rect(fill = "grey93")) +
xlab(NULL) + ylab("Avg. Per Capita C02 Emissions (Tonnes per Year)") +
theme(axis.title=element_text(size=7, colour = "grey40"), axis.text.x = element_text(colour = "grey40", size=8)) +
theme(plot.margin=unit(c(5.5,5.5,5.5,22),"points"))
#Change the formatting of the labels and legend in preparation of displaying above the combined and faceted plot
section2 <- section2 + labs(fill="2018 data. Segment label indicates population size. Colour represents World Bank economy classification by income level",
colour="2018 data. Segment label indicates population size. Colour represents World Bank economy classification by income level") +
theme(legend.title=element_text(size=9, colour = "grey25"),
legend.text=element_text(size=8, colour = "grey30")) +
theme(legend.key.size = unit(10,"points")) +
guides(fill=guide_legend(nrow=1))
#Create an object of the legend as a standalone object
leg <- get_legend(section2)
#Create an object to reflect Section Two with the legend removed, and tweak font colour to increase legibility
section3 <- section2 +
scale_colour_manual(values = c("#0E5546","#0D7853","#546A78","#2A74A2")) +
theme(legend.position="none")
# Arrange visual elements into a single display ---------------------------
#Combine both faceted sections into one plot
PlotGrid <- plot_grid(section1,section3, rel_widths = c(1,1.05))
#Create an object to show the overall title of the plot
TitleText <- ggplot() + labs(title= "C02 Consumption Emissions Per Capita, in Largest 100 Countries (by Population)") +
theme(plot.margin = margin(15,0,0,15), plot.title = element_text(size=15,face="bold"))
#Create postscript object to help readers understand the approach that has been taken where consumption data was unavailable for a country
PostscriptText <- ggplot() + labs(subtitle = "Note: * indicates that only total emission data was available for that country and was used in lieu of consumption data" ) +
theme(plot.margin = margin(0,15,0,140), plot.subtitle = element_text(size=8, colour = "gray65"))
#Create legend object including a null plot to left-align the text and legend for ease of user orientation (Nussbaumer Knaflic 2015)
LegendRow <- plot_grid(leg,NULL, nrow=1, ncol=2, rel_widths = c(1,0.2675))
#Place all elements into a single visualisation
Final_Chart <- plot_grid(TitleText,LegendRow,PlotGrid,PostscriptText, ncol=1, rel_heights = c(0.055,0.06,1,0.025))
# Final plot has been created as the object "Final_Chart" ----------------
Data References
Ritchie, H., & Roser, M. (2020). CO₂ and Greenhouse Gas Emissions. Retrieved April 24, 2021, from Our World In Data website: https://ourworldindata.org/co2-and-other-greenhouse-gas-emissions
The World Bank. (2021). World Bank Country and Lending Groups. Retrieved April 24, 2021, from The World Bank website: https://datahelpdesk.worldbank.org/knowledgebase/articles/906519
Other References
Nussbaumer Knaflic, C. (2015). Storytelling with Data. John Wiley & Sons.
The following plot fixes the main issues in the original.