Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.

Original

#Establish the pathway for the file and the original data visualisation
proj_dir <- getwd()
file_path <- file.path(proj_dir, "J.png")

Source: Japan Tourism Statistics Breakdown by Country/Area Section.


Objective

The original visualization was to convey that there was a general decrease in the total visitors to Japan in March 2023 compared to March 2019. It mainly provided information for general public including tourists and local citizens. The visualization had the following three main issues:

  • The change from Mar 2019 to Mar 2023 is hard to read.
  • The two pie charts make it very difficult to compare categories from different years as there is no common baseline.
  • No labels on the colors represented. There are too many different colors used in a single graph which may make some viewers find it hard to observe.

Reference

Code

The following code was used to fix the issues identified in the original.

#Load the related packages
library(ggplot2)
library(scales)

#Import the data as data frame from the source
data <- data.frame( Country = c("South Korea", "China", "Taiwan", "Hong Kong", "Thailand", "Singapore",
 "Malaysia", "Indonesia", "Philippines", "Vietnam", "India", "Australia",
 "United States", "Canada", "Mexico", "United Kingdom", "France", "Germany",
 "Italy", "Spain", "Russia", "Middle East", "Others"),
 Arrival_2019 = c(585586, 691279, 402433, 171430, 147443, 43687, 50615, 39609, 48277,
 47881, 17752, 44175, 176564, 37959, 5740, 38610, 29408, 28659, 14956,
 8916, 11701, 11726, 105730),
 Arrival_2023 = c(466800, 75700, 278900, 144900, 108000, 52700, 38900, 33200, 46600,
 53600, 14900, 45200, 203000, 36400, 5800, 34800, 21200, 25900, 12700,
 7600, 3400, 12300, 95000),
 Growth_Rate = c(-20.3, -89.0, -30.7, -15.5, -26.8, 20.6, -23.1, -16.2, -3.5, 11.9,
 -16.1, 2.3, 15.0, -4.1, 1.0, -9.9, -27.9, -9.6, -15.1, -14.8, -70.9,
 4.9, -10.1))

#Order the countries based on their corresponding Arrival_2023 values
data$Country <- factor(data$Country, levels = unique(data$Country[order(data$Arrival_2023)]))

#Construct the first graph for arrival in 2023
p1 <- ggplot(data, aes(x = Country, y = Arrival_2023, fill = Growth_Rate)) +
 geom_bar(stat = "identity") +
 scale_fill_gradient(low = "red", high = "green") +
 geom_text(aes(label = format(Arrival_2023, big.mark = ",")),
 vjust = 1.2, size = 3, color = "black") +
 scale_y_continuous(labels = comma, limits = c(0, 500000)) +
 xlab("Country/Area") + ylab("Visitor Arrivals Mar-2023") +
 ggtitle("Visitor Arrivals Mar-2023 by Country/Area") +
 theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
 axis.title = element_text(size = 10, face = "bold"),
 axis.text.y = element_text(size = 8),
 axis.text.x = element_text(size = 8, angle = 0, hjust = 1),
 panel.grid.major = element_blank(),
 panel.grid.minor = element_blank(),
 panel.background = element_blank(),
 axis.line = element_line(colour = "black", size = 0.5),
 plot.background = element_rect(fill = "white", colour = "black", size = 0.5))+coord_flip()

#Construct the second graph for arrival in 2019
p2<-ggplot(data, aes(x = Country, y = Arrival_2019, fill = Growth_Rate)) +
 geom_bar(stat = "identity") +
 scale_fill_gradient(low = "red", high = "green") +
 geom_text(aes(label = format(Arrival_2019, big.mark = ",")),
 vjust = 1.2, size = 3, color = "black") +
 scale_y_continuous(labels = comma, limits = c(0, 700000)) +
 xlab("Country/Area") + ylab("Visitor Arrivals Mar-2019") +
 ggtitle("Visitor Arrivals Mar-2019 by Country/Area") +
 theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
 axis.title = element_text(size = 10, face = "bold"),
 axis.text.y = element_text(size = 8),
 axis.text.x = element_text(size = 8, angle = 0, hjust = 1),
 panel.grid.major = element_blank(),
 panel.grid.minor = element_blank(),
 panel.background = element_blank(),
 axis.line = element_line(colour = "black", size = 0.5),
 plot.background = element_rect(fill = "white", colour = "black", size = 0.5))+coord_flip()

#Construct the third graph for comparison
p3<- ggplot(data, aes(x = Country, y = Arrival_2019)) +
  geom_point(aes(y = Arrival_2019, color = "red"), size = 2) +
  geom_point(aes(y = Arrival_2023, color = "blue"), size = 2) +
  geom_segment(aes(xend = Country, yend = Arrival_2023, color = ifelse(Arrival_2023 > Arrival_2019, "yellow", "purple")), size = 1) +
  scale_color_manual(values = c("red", "blue", "yellow", "purple"), 
                     labels = c("Arrival_2019", "Decrease", "Arrival_2023", "Increase")) +
  theme(axis.text.x = element_text(angle = 0, vjust= 0.5, hjust=1)) +
  labs(x = "Country/Area", y = "Arrival", 
       title = "Comparison of Arrival in March 2019 and March 2023 by Country/Area",
       color = "") +
  scale_y_continuous(labels = comma, limits = c(0, 700000))+coord_flip()

Data Reference

Reconstruction

The following plot fixes the main issues in the original.