Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.

Original


Source: Bloomberg Markets (Nov 2012).


Objective

The issues regarding this data visualisation are the data proportions, the arrangement, and comparative statistical representation. The proportions of the data are deceptive given they are not representative, nor even comparatively consistent, of the actual proportions provided. The halves are also note labelled as to what they represent. The arrangement has irrelevant aspects to it with an astray strawberry and lobster providing only clutter to the visualisation and the rest of the visuals are randomly strewn across the display, typically we read left to right, and take notice of patterns of change from majority to minority in the same fashion. The statistics regarding the countries of highest import are merely singular integers. This is lacking in any innate inference and causes a lot more processing to be done by the reader to gain any insight from these statistics.

The visualisation chosen had the following three main issues:

  • Incorrect and deceptive proportions of imports, the lamb is well over 52% in size for example nor is it stated which proportion is import or US.
  • Non-insightful metrics of imports by country, only a single percentage shown for each country, difficult to compare quickly or efficiently.
  • Redundant images in the arrangement, both the strawberry and lobster add nothing to the visualisation in regards to the message.

Reference

Code

The following code was used to fix the issues identified in the original.

# Data Visualisation Assignment 2

setwd(
  "C:/Users/samgh/Desktop/The Everything/Masters of Statistics and Operations Research/Year 2/Sem 2/Data Visualisation/Assignments/Assignment 2"
)

library(tidyverse)


# Initial critiques
#
# proportions are unclear
# random graphics that don't mean anything
# not in any order
# cannot compare the largest countries import, only a single integer
# arrangement is confusing with non related elements
# it's not clear but the percentages of the import countries are of the categorical percentage
# the categories of the imports are only sub-categories too
# no interactivity

# Create data
# Create Source data

category <- rep(c(rep('Commodity', 5), 'ALL'),2)

commodity <- c(rep(c(
  'Fruit and Nut',
  'Fresh Veg/Melons',
  'Honey',
  'Lamb', 
  'Seafood')
,2))

source <- c(rep('Imported',5),rep('USA',5))

category_perc <- c(
  51,20,61,52,88,
  49,80,39,48,12)

# create dataframe of Source data
cat_df <- data.frame(Category = rep('Commodity', 10),
                     Commodity = commodity,
                     Source = source,
                     Percentage = category_perc,
                     stringsAsFactors = FALSE)

ord_com <- c('Seafood', 'Honey','Lamb','Fruit and Nut','Fresh Veg/Melons') %>%  rev

cat_df$Commodity <- factor(cat_df$Commodity,
                           labels = ord_com,
                           levels = ord_com,
                           ordered = TRUE)
cat_df$Source <- factor(cat_df$Source,
                        labels = c('USA','Imported'),
                        levels = c('USA','Imported'),
                        ordered = TRUE)

all_df <- data.frame(`All Foods` = rep('All Foods',2),
                     Percentage = c(80,20),
                     Source = c('USA', 'Imported'))
all_df$Source <- factor(all_df$Source,
                        levels = c('USA','Imported'),
                        labels = c('USA','Imported'))

veg_df <- data.frame(Import = rep('Veg',3),
                     Percentage = c(50,22,28),
                     Country = c('Mexico','Canada','Other'))

veg_df$Country <- factor(veg_df$Country, ordered = TRUE)

fruit_df <- data.frame(Import = rep('Fruit',3),
                       Percentage = c(26,13, 61),
                       Country = c('Mexico','Chile', 'Other'))
fruit_df$Country <- factor(fruit_df$Country, ordered = TRUE)

sea_df <- data.frame(Import = rep('Seafood',3),
                     Percentage = c(16,6.5, 77.5),
                     Country = c('China','Vietnam','Other'))
sea_df$Country <- factor(sea_df$Country, ordered = TRUE)
font <- 'sans'
# The One Theme to Rule Them All
tottrta <- theme(
  plot.subtitle = element_text(face = 'italic'), 
  title = element_text(size = 13),
  plot.background = element_rect(fill = '#fff5eb'),
  panel.background = element_rect(fill = '#fee6ce', colour = '#fdae6b'),
  panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                  colour = "white"), 
  panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                  colour = "white"),
  strip.background = element_rect(fill = '#fdae6b'),
  strip.text.x = element_text(size = 12, face = 'bold', colour = '#442211', family = font),
  legend.background = element_rect(fill = '#fff5eb'),
  legend.text = element_text(colour = '#442211', family = font),
  legend.title = element_text(colour = '#442211', family = font),
  axis.title.x = element_text(colour = '#442211', size = 11, family = font),
  axis.text = element_text(colour = '#442211', size = 11, family = font),
  axis.title.y = element_text(colour = '#442211', size = 11, face = 'bold', family = font),
  axis.ticks = element_line(colour = '#442211')
  
)

# Visualise
p1 <- ggplot(data = all_df, aes(x = Source, y = Percentage, fill = Source)) +
  geom_bar(stat = 'identity', position = 'dodge', color = '#664433', size = 1.5) +
  facet_grid(~All.Foods) +
  labs(title = "Where Does America's Food Come From?" , x = '', y = 'Percentage (%)', 
       subtitle = 'For all food within the US for 2012') +
  scale_y_continuous(limits = c(0,100)) +
  scale_fill_manual(values = c('#b32134','#BBBFFF')) +
  annotate(x = 1.65, y = 50, hjust = 0, geom = 'text', label = 'Food imports to America \nhave increased from \n10% in 2002 to \n20% in 2012', 
           size = 3, col = '#442211') +
  theme(legend.position = 'none') +
  geom_segment(aes(x = 1.62, y = 20, xend = 1.62, yend = 62), col = '#442211') +
  tottrta



p2 <- ggplot(data = cat_df, aes(x = Commodity, y = Percentage, fill = Source)) +
  geom_bar(stat = 'identity', position = 'dodge', color = '#444444', size = 1) +
  labs(title = "" , subtitle = '', x = '', y = '') +  
  scale_y_continuous(limits = c(0,100)) +
  scale_fill_manual(values = c('#b32134','#BBBFFF')) +
  facet_grid(~Category) +
  tottrta +
  annotate(x = 1.2, y = 86, hjust = 0, geom = 'text', label = 'The US provides themselves 80% \nof consumed Veg and Melons', 
           size = 3, col = '#442211') +
  geom_segment(aes(x = 1, y = 78, xend = 2.5, yend = 78), col = '#442211') +
  annotate(x = 4.9, y = 80, hjust = 1, geom = 'text', label = 'Where as 88% of Seafoods are \nsourced from imports', 
           size = 3, col = '#442211') +
  geom_segment(aes(x = 4, y = 72, xend = 5, yend = 72), col = '#442211')


v_col = c('#fb8072', '#bebada', '#fdb462')
v_perc <- veg_df$Percentage
p3 <- ggplot(data = veg_df, aes(x = Import, y = Percentage, fill = reorder(Country, -Percentage))) +
  geom_bar(stat = 'identity', position = 'dodge', color = '#442211', size = 0.1) +
  ylab('Percentage (%)') + ggtitle('Where Do Imports Come From?') +
  labs(subtitle = 'The two highest countries are shown', x = '') +
  scale_y_continuous(limits = c(0,100)) + 
  scale_fill_manual(values = v_col) +
  annotate(x = 0.7, y = v_perc[1] + 4, geom = 'text', label = 'Mexico', col = '#442211', size = 4) +
  annotate(x = 1, y = v_perc[3] + 4, geom = 'text', label = 'Other', col = '#442211', size = 4) +
  annotate(x = 1.3, y = v_perc[2] + 4, geom = 'text', label = 'Canada', col = '#442211', size = 4) +
  theme(plot.caption = element_text(face = 'italic', colour = '#442211'),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        plot.subtitle = element_text(face = 'italic'),
        legend.position =  'none',
        ) +
  facet_grid(~ Import, scales = 'free_x') +
  tottrta


f_perc <- fruit_df$Percentage
f_col <- c('#bebada','#fb8072','#80b1d3')
p4 <- ggplot(data = fruit_df, aes(x = Import, y = Percentage, fill = reorder(Country, -Percentage))) +
  geom_bar(stat = 'identity', position = 'dodge', color = '#442211', size = 0.1) +
  xlab('') + labs(title = '', subtitle = '') +
  scale_y_continuous(limits = c(0,100)) + 
  scale_fill_manual(values = f_col) +
  annotate(x = 0.7, y = f_perc[3] + 4, geom = 'text', label = 'Other', col = '#442211', size = 4) +
  annotate(x = 1, y = f_perc[1] + 4, geom = 'text', label = 'Mexico', col = '#442211', size = 4) +
  annotate(x = 1.3, y = f_perc[2] + 4, geom = 'text', label = 'Chile', col = '#442211', size = 4) +
  tottrta + 
  theme(plot.caption = element_text(face = 'italic', colour = '#442211'),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.x = element_blank(),
        plot.subtitle = element_text(face = 'italic'),
        legend.position =  'none') +
  facet_grid(~ Import, scales = 'free_x') 


s_col = c('#bebada','#f1b6da', '#ffffb3')
s_perc <- sea_df$Percentage
p5 <- ggplot(data = sea_df, aes(x = Import, y = Percentage, fill = reorder(Country, -Percentage))) +
  geom_bar(stat = 'identity', position = 'dodge', color = '#442211', size = 0.1) +
  scale_y_continuous(limits = c(0,100)) + 
  scale_fill_manual(values = s_col) +
  labs(caption = 'Source: USDA Economics Research Service', title = '', subtitle = '') +
  annotate(x = 0.7, y = s_perc[3] + 4, geom = 'text', label = 'Other', col = '#442211', size = 4) +
  annotate(x = 1, y = s_perc[1] + 4, geom = 'text', label = 'China', col = '#442211', size = 4) +
  annotate(x = 1.3, y = s_perc[2] + 4, geom = 'text', label = 'Vietnam', col = '#442211', size = 4) +
  tottrta +
  theme(plot.caption = element_text(face = 'italic', colour = '#442211'),
        axis.title.x = element_blank(),
        axis.text.x  = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.y = element_blank(),
        plot.subtitle = element_text(face = 'italic'),
        legend.position =  'none') +
  facet_grid(~ Import, scales = 'free_x') 
 

p_grid <- gridExtra::grid.arrange(p1, p2, nrow = 1, widths = c(1,2))

p_grid2 <- gridExtra::grid.arrange(p3,p4,p5, nrow = 1)

Data Reference

  • USDA Economic Research Service (2012). Recreated from the data displayed on the Bloomberg Markets (Nov 2012) Where Your Food Comes From visualisation.

Reconstruction

The following plot fixes the main issues in the original.