Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.

Original


**


Objective

Explain the objective of the original data visualisation and the targetted audience.

The visualisation chosen had the following three main issues:

  • In the visualisation it is mentioned that bars are color coded by artists home country, but no information is given regarding that.
  • Another graph “Release Year” is used to represent the year with most expensive music videos, but it is not readable as the x- axis does not clearly represent the years separately. The given scale below this graph is confusing, the dots are a bit busy and difficult to follow.
  • The text written inside the bars is confusing as it is hard to distinguish between Title of the video and Artist(s) of the video. No information is given regarding that as well.

Reference

Code

The following code was used to fix the issues identified in the original.

library(rvest)
library(dplyr)
library(gdata)
library(tidyverse)
library(ggplot2)
library(scales)
library(reprex)
library(ggh4x)
library(repr)
library(ggthemes)
library(ragg)

#web scrapping the data
url <- "https://en.m.wikipedia.org/wiki/List_of_most_expensive_music_videos"
v <- read_html(url)
v %>% html_nodes('table') 
## {xml_nodeset (5)}
## [1] <table class="box-Multiple_issues plainlinks metadata ambox ambox-content ...
## [2] <table class="box-More_citations_needed plainlinks metadata ambox ambox-c ...
## [3] <table class="box-Unreliable_sources plainlinks metadata ambox ambox-cont ...
## [4] <table class="wikitable sortable plainrowheaders">\n<caption>Most expensi ...
## [5] <table class="wikitable plainrowheaders">\n<caption>Timeline of most expe ...
video <- v %>% html_nodes('table') %>% .[[4]] %>% html_table(header=FALSE, fill=TRUE) %>% slice(-1 * 1:2 )
video %>% as_tibble() 
## # A tibble: 76 × 8
##    X1    X2                               X3       X4    X5    X6    X7    X8   
##    <chr> <chr>                            <chr>    <chr> <chr> <chr> <chr> <chr>
##  1 1     "\"Scream\""                     Michael… Mark… 1995  $7,0… $12,… [3][…
##  2 2     "\"Die Another Day\""            Madonna  Trak… 2002  $6,1… $9,1… [3][…
##  3 3     "\"Express Yourself\""           Madonna  Davi… 1989  $5,0… $10,… [3][…
##  4 3     "\"Bedtime Story\""              Madonna  Mark… 1995  $5,0… $8,8… [3][…
##  5 5     "\"Estranged\""                  Guns N'… Andy… 1993  $5,0… $9,3… [3][…
##  6 6     "\"Black or White\""             Michael… John… 1991  $4,0… $7,9… [3][…
##  7 6     "\"Make Me Like You\""           Gwen St… Soph… 2016  $4,0… $4,5… [5]  
##  8 8     "\"Cartoon Heroes\""             Aqua     Thom… 2000  $3,5… $5,5… [6]  
##  9 9     "\"Rollin' (Air Raid Vehicle)\"" Limp Bi… Fred… 2000  $3,0… $4,6… [7]  
## 10 10    "\"Victory\""                    Puff Da… Marc… 1998  $2,7… $4,4… [8]  
## # … with 66 more rows
#adding the column names
col_names <- c("Rank"   ,"Title",   "Artist(s)",    "Director", "Year", "Cost Nominal (est.)", "Cost_Adjusted_(est.)", "Ref")
colnames(video) <- col_names

#converting into data frame
video <- as.data.frame(video)

#deleting the ref column
video <- subset(video, select = -c(Ref) )

#remove rank column(because there is a repeation of numbers)
video <- subset(video, select = -c(Rank) )

#Cost column
video$`Cost_Adjusted_(est.)` <- gsub(",", "", video$`Cost_Adjusted_(est.)`)  
video$`Cost_Adjusted_(est.)` <- gsub("\\$", "", video$`Cost_Adjusted_(est.)`)  
video$`Cost_Adjusted_(est.)` <- as.integer(video$`Cost_Adjusted_(est.)`)

#Title column
video$Title <- gsub("\"", "", video$Title)  

#Artist column(Removing the data in brackets)
video$`Artist(s)` <- gsub(r"{\s*\([^\)]+\)}","",as.character(video$`Artist(s)`))

#Year column
video$Year <- as.integer(video$Year)

#adding an index column
video <- tibble::rowid_to_column(video, "index")

#add a new column to distinguish the decades
video <- video %>% mutate(Year1 = case_when(Year < 1990 ~ "1980s" , Year < 2000 & Year >= 1990 ~ "1990s", Year >=2000 & Year < 2010 ~ "2000s", Year >= 2010 ~ "2010s") )

#adding a new Music Genre column 
#Genre
video <- video %>% mutate(Genre1 = ifelse(index %in% c(10,11, 14, 20, 27,29,30,32,46,6,12),"Hip-hop",""))
video <- video %>% mutate(Genre2 = ifelse(index %in% c(1,18,39,40,43,47,76), "R&B", ""))
video <- video %>% mutate(Genre3 = ifelse(index %in% c(2,3,28,44,45,62,64,74,75),"Dance-pop",""))
video <- video %>% mutate(Genre4 = ifelse(index %in% c(42,50, 53, 55,65),"K-pop",""))
video <- video %>% mutate(Genre5 = ifelse(index %in% c(7,49,70,73,52), "Pop",""))
video <- video %>% mutate(Genre6 = ifelse(index %in% c(16,17,25,38,41, 61,63), "Pop-rock",""))
video <- video %>% mutate(Genre7 = ifelse(index %in% c(23,24,33,22,48), "J-pop",""))
video <- video %>% mutate(Genre8 = ifelse(index %in% c(4,5,8,9,13,15,19,21,26,31,34,35,36,37,51,54,56,57,58,59,60,66,67,69,71,72,68), "Other",""))

video$Genre <- paste(video$Genre1, video$Genre2, video$Genre3, video$Genre4, video$Genre5, video$Genre6, video$Genre7, video$Genre8)
video <- subset(video, select = -c(Genre1,Genre2,Genre3,Genre4,Genre5,Genre6,Genre7,Genre8) )
#removing any extra spaces in the column
video$Genre <- gsub(" ", "", video$Genre)  
#choosing the colors for the plot
colors <- list("#fbb4ae", "#b3cde3","#ccebc5","#decbe4", "#fed9a6", "#ffffcc","#e5d8bd","#fddaec")

#The code used to visualize the data:
expensive_videos <- ggplot(video, aes(x= reorder(Title, +`Cost_Adjusted_(est.)`), y = `Cost_Adjusted_(est.)`, fill = Genre )) +
  geom_bar(stat = "identity", width = 0.8) +
  geom_text(aes( y = 1,label = `Artist(s)`),hjust = 0, color = "black", family = "mono", size = (8) ) +
  scale_fill_manual(values = colors) +
  facet_wrap(~Year1,dir="v", scale = "free_y", ncol = 1,strip.position = "right") +
  force_panelsizes(rows = c(2,8,6.2,6), cols = NULL) +
  theme(strip.background = element_rect(colour="black",fill="black"),strip.placement = "outside", strip.text = element_text(face = "bold", size = (22), color = "white"),panel.spacing=unit(0.5, "lines")) +
  scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6),position = "right") +
  labs(title= "The Most Expensive Music Videos Off All Time", subtitle = "The Artist(s) Names Are Written Inside The Bars", x = "Video Title", y = "Cost in Millions ($USD)", fill = "Music Genre") +
  theme(
  plot.title = element_text(family = "Helvetica", face = "bold", size = (30), color = "black"),
  plot.subtitle = element_text(family = "mono", face = "bold", size =    (18),color = "black"),
  legend.title = element_text(family = "Helvetica", face = "bold", size= (22), colour = "black"),
  legend.text = element_text(family = "Helvetica", face = "bold", size = (20), colour = "black"),
  axis.title = element_text(family = "Helvetica", face = "bold", size = (22), colour = "black"),
  axis.text = element_text(family = "mono",  face = "bold", size = (20), color = "black"),
  legend.position="bottom"
   )  
expensive_videos <- expensive_videos + coord_flip()

Data Reference * Wikipedia (April, 2022), List of most expensive music videos,website: https://en.wikipedia.org/wiki/List_of_most_expensive_music_videos

Reconstruction

The following plot fixes the main issues in the original.